drm/amdkfd: use correct method to get clock under SRIOV
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166 167
fd496ca8 168/**
b98c6299 169 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
170 *
171 * @dev: drm_device pointer
172 *
b98c6299 173 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
174 * otherwise return false.
175 */
b98c6299 176bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
177{
178 struct amdgpu_device *adev = drm_to_adev(dev);
179
b98c6299 180 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
181 return true;
182 return false;
183}
184
e3ecdffa 185/**
0330b848 186 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
187 *
188 * @dev: drm_device pointer
189 *
b98c6299 190 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
191 * otherwise return false.
192 */
31af062a 193bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 194{
1348969a 195 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 196
b98c6299
AD
197 if (adev->has_pr3 ||
198 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
199 return true;
200 return false;
201}
202
a69cba42
AD
203/**
204 * amdgpu_device_supports_baco - Does the device support BACO
205 *
206 * @dev: drm_device pointer
207 *
208 * Returns true if the device supporte BACO,
209 * otherwise return false.
210 */
211bool amdgpu_device_supports_baco(struct drm_device *dev)
212{
1348969a 213 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
214
215 return amdgpu_asic_supports_baco(adev);
216}
217
3fa8f89d
S
218/**
219 * amdgpu_device_supports_smart_shift - Is the device dGPU with
220 * smart shift support
221 *
222 * @dev: drm_device pointer
223 *
224 * Returns true if the device is a dGPU with Smart Shift support,
225 * otherwise returns false.
226 */
227bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
228{
229 return (amdgpu_device_supports_boco(dev) &&
230 amdgpu_acpi_is_power_shift_control_supported());
231}
232
6e3cd2a9
MCC
233/*
234 * VRAM access helper functions
235 */
236
e35e2b11 237/**
048af66b 238 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
239 *
240 * @adev: amdgpu_device pointer
241 * @pos: offset of the buffer in vram
242 * @buf: virtual address of the buffer in system memory
243 * @size: read/write size, sizeof(@buf) must > @size
244 * @write: true - write to vram, otherwise - read from vram
245 */
048af66b
KW
246void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
247 void *buf, size_t size, bool write)
e35e2b11 248{
e35e2b11 249 unsigned long flags;
048af66b
KW
250 uint32_t hi = ~0, tmp = 0;
251 uint32_t *data = buf;
ce05ac56 252 uint64_t last;
f89f8c6b 253 int idx;
ce05ac56 254
c58a863b 255 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 256 return;
9d11eb0d 257
048af66b
KW
258 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
259
260 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
261 for (last = pos + size; pos < last; pos += 4) {
262 tmp = pos >> 31;
263
264 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
265 if (tmp != hi) {
266 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
267 hi = tmp;
268 }
269 if (write)
270 WREG32_NO_KIQ(mmMM_DATA, *data++);
271 else
272 *data++ = RREG32_NO_KIQ(mmMM_DATA);
273 }
274
275 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
276 drm_dev_exit(idx);
277}
278
279/**
bbe04dec 280 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
281 *
282 * @adev: amdgpu_device pointer
283 * @pos: offset of the buffer in vram
284 * @buf: virtual address of the buffer in system memory
285 * @size: read/write size, sizeof(@buf) must > @size
286 * @write: true - write to vram, otherwise - read from vram
287 *
288 * The return value means how many bytes have been transferred.
289 */
290size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
291 void *buf, size_t size, bool write)
292{
9d11eb0d 293#ifdef CONFIG_64BIT
048af66b
KW
294 void __iomem *addr;
295 size_t count = 0;
296 uint64_t last;
297
298 if (!adev->mman.aper_base_kaddr)
299 return 0;
300
9d11eb0d
CK
301 last = min(pos + size, adev->gmc.visible_vram_size);
302 if (last > pos) {
048af66b
KW
303 addr = adev->mman.aper_base_kaddr + pos;
304 count = last - pos;
9d11eb0d
CK
305
306 if (write) {
307 memcpy_toio(addr, buf, count);
4c452b5c
SS
308 /* Make sure HDP write cache flush happens without any reordering
309 * after the system memory contents are sent over PCIe device
310 */
9d11eb0d 311 mb();
810085dd 312 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 313 } else {
810085dd 314 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
315 /* Make sure HDP read cache is invalidated before issuing a read
316 * to the PCIe device
317 */
9d11eb0d
CK
318 mb();
319 memcpy_fromio(buf, addr, count);
320 }
321
9d11eb0d 322 }
048af66b
KW
323
324 return count;
325#else
326 return 0;
9d11eb0d 327#endif
048af66b 328}
9d11eb0d 329
048af66b
KW
330/**
331 * amdgpu_device_vram_access - read/write a buffer in vram
332 *
333 * @adev: amdgpu_device pointer
334 * @pos: offset of the buffer in vram
335 * @buf: virtual address of the buffer in system memory
336 * @size: read/write size, sizeof(@buf) must > @size
337 * @write: true - write to vram, otherwise - read from vram
338 */
339void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
340 void *buf, size_t size, bool write)
341{
342 size_t count;
e35e2b11 343
048af66b
KW
344 /* try to using vram apreature to access vram first */
345 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
346 size -= count;
347 if (size) {
348 /* using MM to access rest vram */
349 pos += count;
350 buf += count;
351 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
352 }
353}
354
d38ceaf9 355/*
f7ee1874 356 * register access helper functions.
d38ceaf9 357 */
56b53c0b
DL
358
359/* Check if hw access should be skipped because of hotplug or device error */
360bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
361{
7afefb81 362 if (adev->no_hw_access)
56b53c0b
DL
363 return true;
364
365#ifdef CONFIG_LOCKDEP
366 /*
367 * This is a bit complicated to understand, so worth a comment. What we assert
368 * here is that the GPU reset is not running on another thread in parallel.
369 *
370 * For this we trylock the read side of the reset semaphore, if that succeeds
371 * we know that the reset is not running in paralell.
372 *
373 * If the trylock fails we assert that we are either already holding the read
374 * side of the lock or are the reset thread itself and hold the write side of
375 * the lock.
376 */
377 if (in_task()) {
d0fb18b5
AG
378 if (down_read_trylock(&adev->reset_domain->sem))
379 up_read(&adev->reset_domain->sem);
56b53c0b 380 else
d0fb18b5 381 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
382 }
383#endif
384 return false;
385}
386
e3ecdffa 387/**
f7ee1874 388 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
389 *
390 * @adev: amdgpu_device pointer
391 * @reg: dword aligned register offset
392 * @acc_flags: access flags which require special behavior
393 *
394 * Returns the 32 bit value from the offset specified.
395 */
f7ee1874
HZ
396uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
397 uint32_t reg, uint32_t acc_flags)
d38ceaf9 398{
f4b373f4
TSD
399 uint32_t ret;
400
56b53c0b 401 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
402 return 0;
403
f7ee1874
HZ
404 if ((reg * 4) < adev->rmmio_size) {
405 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
406 amdgpu_sriov_runtime(adev) &&
d0fb18b5 407 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 408 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 409 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
410 } else {
411 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
412 }
413 } else {
414 ret = adev->pcie_rreg(adev, reg * 4);
81202807 415 }
bc992ba5 416
f7ee1874 417 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 418
f4b373f4 419 return ret;
d38ceaf9
AD
420}
421
421a2a30
ML
422/*
423 * MMIO register read with bytes helper functions
424 * @offset:bytes offset from MMIO start
b8920e1e 425 */
421a2a30 426
e3ecdffa
AD
427/**
428 * amdgpu_mm_rreg8 - read a memory mapped IO register
429 *
430 * @adev: amdgpu_device pointer
431 * @offset: byte aligned register offset
432 *
433 * Returns the 8 bit value from the offset specified.
434 */
7cbbc745
AG
435uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
436{
56b53c0b 437 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
438 return 0;
439
421a2a30
ML
440 if (offset < adev->rmmio_size)
441 return (readb(adev->rmmio + offset));
442 BUG();
443}
444
445/*
446 * MMIO register write with bytes helper functions
447 * @offset:bytes offset from MMIO start
448 * @value: the value want to be written to the register
b8920e1e
SS
449 */
450
e3ecdffa
AD
451/**
452 * amdgpu_mm_wreg8 - read a memory mapped IO register
453 *
454 * @adev: amdgpu_device pointer
455 * @offset: byte aligned register offset
456 * @value: 8 bit value to write
457 *
458 * Writes the value specified to the offset specified.
459 */
7cbbc745
AG
460void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
461{
56b53c0b 462 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
463 return;
464
421a2a30
ML
465 if (offset < adev->rmmio_size)
466 writeb(value, adev->rmmio + offset);
467 else
468 BUG();
469}
470
e3ecdffa 471/**
f7ee1874 472 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
473 *
474 * @adev: amdgpu_device pointer
475 * @reg: dword aligned register offset
476 * @v: 32 bit value to write to the register
477 * @acc_flags: access flags which require special behavior
478 *
479 * Writes the value specified to the offset specified.
480 */
f7ee1874
HZ
481void amdgpu_device_wreg(struct amdgpu_device *adev,
482 uint32_t reg, uint32_t v,
483 uint32_t acc_flags)
d38ceaf9 484{
56b53c0b 485 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
486 return;
487
f7ee1874
HZ
488 if ((reg * 4) < adev->rmmio_size) {
489 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
490 amdgpu_sriov_runtime(adev) &&
d0fb18b5 491 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 492 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 493 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
494 } else {
495 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
496 }
497 } else {
498 adev->pcie_wreg(adev, reg * 4, v);
81202807 499 }
bc992ba5 500
f7ee1874 501 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 502}
d38ceaf9 503
03f2abb0 504/**
4cc9f86f 505 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 506 *
71579346
RB
507 * @adev: amdgpu_device pointer
508 * @reg: mmio/rlc register
509 * @v: value to write
510 *
511 * this function is invoked only for the debugfs register access
03f2abb0 512 */
f7ee1874 513void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
514 uint32_t reg, uint32_t v,
515 uint32_t xcc_id)
2e0cc4d4 516{
56b53c0b 517 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
518 return;
519
2e0cc4d4 520 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
521 adev->gfx.rlc.funcs &&
522 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 523 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 524 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
525 } else if ((reg * 4) >= adev->rmmio_size) {
526 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
527 } else {
528 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 529 }
d38ceaf9
AD
530}
531
1bba3683
HZ
532/**
533 * amdgpu_device_indirect_rreg - read an indirect register
534 *
535 * @adev: amdgpu_device pointer
22f453fb 536 * @reg_addr: indirect register address to read from
1bba3683
HZ
537 *
538 * Returns the value of indirect register @reg_addr
539 */
540u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
541 u32 reg_addr)
542{
65ba96e9 543 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
544 void __iomem *pcie_index_offset;
545 void __iomem *pcie_data_offset;
65ba96e9
HZ
546 u32 r;
547
548 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
549 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
550
551 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
552 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
553 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
554
555 writel(reg_addr, pcie_index_offset);
556 readl(pcie_index_offset);
557 r = readl(pcie_data_offset);
558 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
559
560 return r;
561}
562
0c552ed3
LM
563u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
564 u64 reg_addr)
565{
566 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
567 u32 r;
568 void __iomem *pcie_index_offset;
569 void __iomem *pcie_index_hi_offset;
570 void __iomem *pcie_data_offset;
571
572 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
573 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
574 if (adev->nbio.funcs->get_pcie_index_hi_offset)
575 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
576 else
577 pcie_index_hi = 0;
578
579 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
580 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
581 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
582 if (pcie_index_hi != 0)
583 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
584 pcie_index_hi * 4;
585
586 writel(reg_addr, pcie_index_offset);
587 readl(pcie_index_offset);
588 if (pcie_index_hi != 0) {
589 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
590 readl(pcie_index_hi_offset);
591 }
592 r = readl(pcie_data_offset);
593
594 /* clear the high bits */
595 if (pcie_index_hi != 0) {
596 writel(0, pcie_index_hi_offset);
597 readl(pcie_index_hi_offset);
598 }
599
600 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
601
602 return r;
603}
604
1bba3683
HZ
605/**
606 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
607 *
608 * @adev: amdgpu_device pointer
22f453fb 609 * @reg_addr: indirect register address to read from
1bba3683
HZ
610 *
611 * Returns the value of indirect register @reg_addr
612 */
613u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
614 u32 reg_addr)
615{
65ba96e9 616 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
617 void __iomem *pcie_index_offset;
618 void __iomem *pcie_data_offset;
65ba96e9
HZ
619 u64 r;
620
621 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
622 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
623
624 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
625 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
626 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
627
628 /* read low 32 bits */
629 writel(reg_addr, pcie_index_offset);
630 readl(pcie_index_offset);
631 r = readl(pcie_data_offset);
632 /* read high 32 bits */
633 writel(reg_addr + 4, pcie_index_offset);
634 readl(pcie_index_offset);
635 r |= ((u64)readl(pcie_data_offset) << 32);
636 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
637
638 return r;
639}
640
641/**
642 * amdgpu_device_indirect_wreg - write an indirect register address
643 *
644 * @adev: amdgpu_device pointer
1bba3683
HZ
645 * @reg_addr: indirect register offset
646 * @reg_data: indirect register data
647 *
648 */
649void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
650 u32 reg_addr, u32 reg_data)
651{
65ba96e9 652 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
653 void __iomem *pcie_index_offset;
654 void __iomem *pcie_data_offset;
655
65ba96e9
HZ
656 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
657 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
658
1bba3683
HZ
659 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
660 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
661 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
662
663 writel(reg_addr, pcie_index_offset);
664 readl(pcie_index_offset);
665 writel(reg_data, pcie_data_offset);
666 readl(pcie_data_offset);
667 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
668}
669
0c552ed3
LM
670void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
671 u64 reg_addr, u32 reg_data)
672{
673 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
674 void __iomem *pcie_index_offset;
675 void __iomem *pcie_index_hi_offset;
676 void __iomem *pcie_data_offset;
677
678 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
679 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
680 if (adev->nbio.funcs->get_pcie_index_hi_offset)
681 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
682 else
683 pcie_index_hi = 0;
684
685 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
686 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
687 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
688 if (pcie_index_hi != 0)
689 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
690 pcie_index_hi * 4;
691
692 writel(reg_addr, pcie_index_offset);
693 readl(pcie_index_offset);
694 if (pcie_index_hi != 0) {
695 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
696 readl(pcie_index_hi_offset);
697 }
698 writel(reg_data, pcie_data_offset);
699 readl(pcie_data_offset);
700
701 /* clear the high bits */
702 if (pcie_index_hi != 0) {
703 writel(0, pcie_index_hi_offset);
704 readl(pcie_index_hi_offset);
705 }
706
707 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
708}
709
1bba3683
HZ
710/**
711 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
712 *
713 * @adev: amdgpu_device pointer
1bba3683
HZ
714 * @reg_addr: indirect register offset
715 * @reg_data: indirect register data
716 *
717 */
718void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
719 u32 reg_addr, u64 reg_data)
720{
65ba96e9 721 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
722 void __iomem *pcie_index_offset;
723 void __iomem *pcie_data_offset;
724
65ba96e9
HZ
725 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
726 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
727
1bba3683
HZ
728 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
729 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
730 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
731
732 /* write low 32 bits */
733 writel(reg_addr, pcie_index_offset);
734 readl(pcie_index_offset);
735 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
736 readl(pcie_data_offset);
737 /* write high 32 bits */
738 writel(reg_addr + 4, pcie_index_offset);
739 readl(pcie_index_offset);
740 writel((u32)(reg_data >> 32), pcie_data_offset);
741 readl(pcie_data_offset);
742 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
743}
744
dabc114e
HZ
745/**
746 * amdgpu_device_get_rev_id - query device rev_id
747 *
748 * @adev: amdgpu_device pointer
749 *
750 * Return device rev_id
751 */
752u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
753{
754 return adev->nbio.funcs->get_rev_id(adev);
755}
756
d38ceaf9
AD
757/**
758 * amdgpu_invalid_rreg - dummy reg read function
759 *
982a820b 760 * @adev: amdgpu_device pointer
d38ceaf9
AD
761 * @reg: offset of register
762 *
763 * Dummy register read function. Used for register blocks
764 * that certain asics don't have (all asics).
765 * Returns the value in the register.
766 */
767static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
768{
769 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
770 BUG();
771 return 0;
772}
773
0c552ed3
LM
774static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
775{
776 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
777 BUG();
778 return 0;
779}
780
d38ceaf9
AD
781/**
782 * amdgpu_invalid_wreg - dummy reg write function
783 *
982a820b 784 * @adev: amdgpu_device pointer
d38ceaf9
AD
785 * @reg: offset of register
786 * @v: value to write to the register
787 *
788 * Dummy register read function. Used for register blocks
789 * that certain asics don't have (all asics).
790 */
791static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
792{
793 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
794 reg, v);
795 BUG();
796}
797
0c552ed3
LM
798static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
799{
800 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
801 reg, v);
802 BUG();
803}
804
4fa1c6a6
TZ
805/**
806 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
807 *
982a820b 808 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
809 * @reg: offset of register
810 *
811 * Dummy register read function. Used for register blocks
812 * that certain asics don't have (all asics).
813 * Returns the value in the register.
814 */
815static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
816{
817 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
818 BUG();
819 return 0;
820}
821
822/**
823 * amdgpu_invalid_wreg64 - dummy reg write function
824 *
982a820b 825 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
826 * @reg: offset of register
827 * @v: value to write to the register
828 *
829 * Dummy register read function. Used for register blocks
830 * that certain asics don't have (all asics).
831 */
832static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
833{
834 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
835 reg, v);
836 BUG();
837}
838
d38ceaf9
AD
839/**
840 * amdgpu_block_invalid_rreg - dummy reg read function
841 *
982a820b 842 * @adev: amdgpu_device pointer
d38ceaf9
AD
843 * @block: offset of instance
844 * @reg: offset of register
845 *
846 * Dummy register read function. Used for register blocks
847 * that certain asics don't have (all asics).
848 * Returns the value in the register.
849 */
850static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
851 uint32_t block, uint32_t reg)
852{
853 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
854 reg, block);
855 BUG();
856 return 0;
857}
858
859/**
860 * amdgpu_block_invalid_wreg - dummy reg write function
861 *
982a820b 862 * @adev: amdgpu_device pointer
d38ceaf9
AD
863 * @block: offset of instance
864 * @reg: offset of register
865 * @v: value to write to the register
866 *
867 * Dummy register read function. Used for register blocks
868 * that certain asics don't have (all asics).
869 */
870static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
871 uint32_t block,
872 uint32_t reg, uint32_t v)
873{
874 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
875 reg, block, v);
876 BUG();
877}
878
4d2997ab
AD
879/**
880 * amdgpu_device_asic_init - Wrapper for atom asic_init
881 *
982a820b 882 * @adev: amdgpu_device pointer
4d2997ab
AD
883 *
884 * Does any asic specific work and then calls atom asic init.
885 */
886static int amdgpu_device_asic_init(struct amdgpu_device *adev)
887{
888 amdgpu_asic_pre_asic_init(adev);
889
5db392a0
LL
890 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
891 adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
85d1bcc6
HZ
892 return amdgpu_atomfirmware_asic_init(adev, true);
893 else
894 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
895}
896
e3ecdffa 897/**
7ccfd79f 898 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 899 *
982a820b 900 * @adev: amdgpu_device pointer
e3ecdffa
AD
901 *
902 * Allocates a scratch page of VRAM for use by various things in the
903 * driver.
904 */
7ccfd79f 905static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 906{
7ccfd79f
CK
907 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
908 AMDGPU_GEM_DOMAIN_VRAM |
909 AMDGPU_GEM_DOMAIN_GTT,
910 &adev->mem_scratch.robj,
911 &adev->mem_scratch.gpu_addr,
912 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
913}
914
e3ecdffa 915/**
7ccfd79f 916 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 917 *
982a820b 918 * @adev: amdgpu_device pointer
e3ecdffa
AD
919 *
920 * Frees the VRAM scratch page.
921 */
7ccfd79f 922static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 923{
7ccfd79f 924 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
925}
926
927/**
9c3f2b54 928 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
929 *
930 * @adev: amdgpu_device pointer
931 * @registers: pointer to the register array
932 * @array_size: size of the register array
933 *
b8920e1e 934 * Programs an array or registers with and or masks.
d38ceaf9
AD
935 * This is a helper for setting golden registers.
936 */
9c3f2b54
AD
937void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
938 const u32 *registers,
939 const u32 array_size)
d38ceaf9
AD
940{
941 u32 tmp, reg, and_mask, or_mask;
942 int i;
943
944 if (array_size % 3)
945 return;
946
47fc644f 947 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
948 reg = registers[i + 0];
949 and_mask = registers[i + 1];
950 or_mask = registers[i + 2];
951
952 if (and_mask == 0xffffffff) {
953 tmp = or_mask;
954 } else {
955 tmp = RREG32(reg);
956 tmp &= ~and_mask;
e0d07657
HZ
957 if (adev->family >= AMDGPU_FAMILY_AI)
958 tmp |= (or_mask & and_mask);
959 else
960 tmp |= or_mask;
d38ceaf9
AD
961 }
962 WREG32(reg, tmp);
963 }
964}
965
e3ecdffa
AD
966/**
967 * amdgpu_device_pci_config_reset - reset the GPU
968 *
969 * @adev: amdgpu_device pointer
970 *
971 * Resets the GPU using the pci config reset sequence.
972 * Only applicable to asics prior to vega10.
973 */
8111c387 974void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
975{
976 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
977}
978
af484df8
AD
979/**
980 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
981 *
982 * @adev: amdgpu_device pointer
983 *
984 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
985 */
986int amdgpu_device_pci_reset(struct amdgpu_device *adev)
987{
988 return pci_reset_function(adev->pdev);
989}
990
d38ceaf9 991/*
06ec9070 992 * amdgpu_device_wb_*()
455a7bc2 993 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 994 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
995 */
996
997/**
06ec9070 998 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
999 *
1000 * @adev: amdgpu_device pointer
1001 *
1002 * Disables Writeback and frees the Writeback memory (all asics).
1003 * Used at driver shutdown.
1004 */
06ec9070 1005static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1006{
1007 if (adev->wb.wb_obj) {
a76ed485
AD
1008 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1009 &adev->wb.gpu_addr,
1010 (void **)&adev->wb.wb);
d38ceaf9
AD
1011 adev->wb.wb_obj = NULL;
1012 }
1013}
1014
1015/**
03f2abb0 1016 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1017 *
1018 * @adev: amdgpu_device pointer
1019 *
455a7bc2 1020 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1021 * Used at driver startup.
1022 * Returns 0 on success or an -error on failure.
1023 */
06ec9070 1024static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1025{
1026 int r;
1027
1028 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1029 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1030 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1031 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1032 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1033 (void **)&adev->wb.wb);
d38ceaf9
AD
1034 if (r) {
1035 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1036 return r;
1037 }
d38ceaf9
AD
1038
1039 adev->wb.num_wb = AMDGPU_MAX_WB;
1040 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1041
1042 /* clear wb memory */
73469585 1043 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1044 }
1045
1046 return 0;
1047}
1048
1049/**
131b4b36 1050 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1051 *
1052 * @adev: amdgpu_device pointer
1053 * @wb: wb index
1054 *
1055 * Allocate a wb slot for use by the driver (all asics).
1056 * Returns 0 on success or -EINVAL on failure.
1057 */
131b4b36 1058int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1059{
1060 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1061
97407b63 1062 if (offset < adev->wb.num_wb) {
7014285a 1063 __set_bit(offset, adev->wb.used);
63ae07ca 1064 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1065 return 0;
1066 } else {
1067 return -EINVAL;
1068 }
1069}
1070
d38ceaf9 1071/**
131b4b36 1072 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1073 *
1074 * @adev: amdgpu_device pointer
1075 * @wb: wb index
1076 *
1077 * Free a wb slot allocated for use by the driver (all asics)
1078 */
131b4b36 1079void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1080{
73469585 1081 wb >>= 3;
d38ceaf9 1082 if (wb < adev->wb.num_wb)
73469585 1083 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1084}
1085
d6895ad3
CK
1086/**
1087 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1088 *
1089 * @adev: amdgpu_device pointer
1090 *
1091 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1092 * to fail, but if any of the BARs is not accessible after the size we abort
1093 * driver loading by returning -ENODEV.
1094 */
1095int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1096{
453f617a 1097 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1098 struct pci_bus *root;
1099 struct resource *res;
b8920e1e 1100 unsigned int i;
d6895ad3
CK
1101 u16 cmd;
1102 int r;
1103
822130b5
AB
1104 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1105 return 0;
1106
0c03b912 1107 /* Bypass for VF */
1108 if (amdgpu_sriov_vf(adev))
1109 return 0;
1110
b7221f2b
AD
1111 /* skip if the bios has already enabled large BAR */
1112 if (adev->gmc.real_vram_size &&
1113 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1114 return 0;
1115
31b8adab
CK
1116 /* Check if the root BUS has 64bit memory resources */
1117 root = adev->pdev->bus;
1118 while (root->parent)
1119 root = root->parent;
1120
1121 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1122 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1123 res->start > 0x100000000ull)
1124 break;
1125 }
1126
1127 /* Trying to resize is pointless without a root hub window above 4GB */
1128 if (!res)
1129 return 0;
1130
453f617a
ND
1131 /* Limit the BAR size to what is available */
1132 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1133 rbar_size);
1134
d6895ad3
CK
1135 /* Disable memory decoding while we change the BAR addresses and size */
1136 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1137 pci_write_config_word(adev->pdev, PCI_COMMAND,
1138 cmd & ~PCI_COMMAND_MEMORY);
1139
1140 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1141 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1142 if (adev->asic_type >= CHIP_BONAIRE)
1143 pci_release_resource(adev->pdev, 2);
1144
1145 pci_release_resource(adev->pdev, 0);
1146
1147 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1148 if (r == -ENOSPC)
1149 DRM_INFO("Not enough PCI address space for a large BAR.");
1150 else if (r && r != -ENOTSUPP)
1151 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1152
1153 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1154
1155 /* When the doorbell or fb BAR isn't available we have no chance of
1156 * using the device.
1157 */
43c064db 1158 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1159 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1160 return -ENODEV;
1161
1162 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1163
1164 return 0;
1165}
a05502e5 1166
9535a86a
SZ
1167static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1168{
b8920e1e 1169 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1170 return false;
9535a86a
SZ
1171
1172 return true;
1173}
1174
d38ceaf9
AD
1175/*
1176 * GPU helpers function.
1177 */
1178/**
39c640c0 1179 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1180 *
1181 * @adev: amdgpu_device pointer
1182 *
c836fec5
JQ
1183 * Check if the asic has been initialized (all asics) at driver startup
1184 * or post is needed if hw reset is performed.
1185 * Returns true if need or false if not.
d38ceaf9 1186 */
39c640c0 1187bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1188{
1189 uint32_t reg;
1190
bec86378
ML
1191 if (amdgpu_sriov_vf(adev))
1192 return false;
1193
9535a86a
SZ
1194 if (!amdgpu_device_read_bios(adev))
1195 return false;
1196
bec86378 1197 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1198 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1199 * some old smc fw still need driver do vPost otherwise gpu hang, while
1200 * those smc fw version above 22.15 doesn't have this flaw, so we force
1201 * vpost executed for smc version below 22.15
bec86378
ML
1202 */
1203 if (adev->asic_type == CHIP_FIJI) {
1204 int err;
1205 uint32_t fw_ver;
b8920e1e 1206
bec86378
ML
1207 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1208 /* force vPost if error occured */
1209 if (err)
1210 return true;
1211
1212 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1213 if (fw_ver < 0x00160e00)
1214 return true;
bec86378 1215 }
bec86378 1216 }
91fe77eb 1217
e3c1b071 1218 /* Don't post if we need to reset whole hive on init */
1219 if (adev->gmc.xgmi.pending_reset)
1220 return false;
1221
91fe77eb 1222 if (adev->has_hw_reset) {
1223 adev->has_hw_reset = false;
1224 return true;
1225 }
1226
1227 /* bios scratch used on CIK+ */
1228 if (adev->asic_type >= CHIP_BONAIRE)
1229 return amdgpu_atombios_scratch_need_asic_init(adev);
1230
1231 /* check MEM_SIZE for older asics */
1232 reg = amdgpu_asic_get_config_memsize(adev);
1233
1234 if ((reg != 0) && (reg != 0xffffffff))
1235 return false;
1236
1237 return true;
bec86378
ML
1238}
1239
70e64c4d
ML
1240/*
1241 * On APUs with >= 64GB white flickering has been observed w/ SG enabled.
1242 * Disable S/G on such systems until we have a proper fix.
1243 * https://gitlab.freedesktop.org/drm/amd/-/issues/2354
1244 * https://gitlab.freedesktop.org/drm/amd/-/issues/2735
1245 */
1246bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
1247{
1248 switch (amdgpu_sg_display) {
1249 case -1:
1250 break;
1251 case 0:
1252 return false;
1253 case 1:
1254 return true;
1255 default:
1256 return false;
1257 }
1258 if ((totalram_pages() << (PAGE_SHIFT - 10)) +
1259 (adev->gmc.real_vram_size / 1024) >= 64000000) {
1260 DRM_WARN("Disabling S/G due to >=64GB RAM\n");
1261 return false;
1262 }
1263 return true;
1264}
1265
5d1eb4c4
ML
1266/*
1267 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1268 * speed switching. Until we have confirmation from Intel that a specific host
1269 * supports it, it's safer that we keep it disabled for all.
1270 *
1271 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1272 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1273 */
1274bool amdgpu_device_pcie_dynamic_switching_supported(void)
1275{
1276#if IS_ENABLED(CONFIG_X86)
1277 struct cpuinfo_x86 *c = &cpu_data(0);
1278
1279 if (c->x86_vendor == X86_VENDOR_INTEL)
1280 return false;
1281#endif
1282 return true;
1283}
1284
0ab5d711
ML
1285/**
1286 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1287 *
1288 * @adev: amdgpu_device pointer
1289 *
1290 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1291 * be set for this device.
1292 *
1293 * Returns true if it should be used or false if not.
1294 */
1295bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1296{
1297 switch (amdgpu_aspm) {
1298 case -1:
1299 break;
1300 case 0:
1301 return false;
1302 case 1:
1303 return true;
1304 default:
1305 return false;
1306 }
1307 return pcie_aspm_enabled(adev->pdev);
1308}
1309
3ad5dcfe
KHF
1310bool amdgpu_device_aspm_support_quirk(void)
1311{
1312#if IS_ENABLED(CONFIG_X86)
1313 struct cpuinfo_x86 *c = &cpu_data(0);
1314
1315 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1316#else
1317 return true;
1318#endif
1319}
1320
d38ceaf9
AD
1321/* if we get transitioned to only one device, take VGA back */
1322/**
06ec9070 1323 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1324 *
bf44e8ce 1325 * @pdev: PCI device pointer
d38ceaf9
AD
1326 * @state: enable/disable vga decode
1327 *
1328 * Enable/disable vga decode (all asics).
1329 * Returns VGA resource flags.
1330 */
bf44e8ce
CH
1331static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1332 bool state)
d38ceaf9 1333{
bf44e8ce 1334 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1335
d38ceaf9
AD
1336 amdgpu_asic_set_vga_state(adev, state);
1337 if (state)
1338 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1339 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1340 else
1341 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1342}
1343
e3ecdffa
AD
1344/**
1345 * amdgpu_device_check_block_size - validate the vm block size
1346 *
1347 * @adev: amdgpu_device pointer
1348 *
1349 * Validates the vm block size specified via module parameter.
1350 * The vm block size defines number of bits in page table versus page directory,
1351 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1352 * page table and the remaining bits are in the page directory.
1353 */
06ec9070 1354static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1355{
1356 /* defines number of bits in page table versus page directory,
1357 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1358 * page table and the remaining bits are in the page directory
1359 */
bab4fee7
JZ
1360 if (amdgpu_vm_block_size == -1)
1361 return;
a1adf8be 1362
bab4fee7 1363 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1364 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1365 amdgpu_vm_block_size);
97489129 1366 amdgpu_vm_block_size = -1;
a1adf8be 1367 }
a1adf8be
CZ
1368}
1369
e3ecdffa
AD
1370/**
1371 * amdgpu_device_check_vm_size - validate the vm size
1372 *
1373 * @adev: amdgpu_device pointer
1374 *
1375 * Validates the vm size in GB specified via module parameter.
1376 * The VM size is the size of the GPU virtual memory space in GB.
1377 */
06ec9070 1378static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1379{
64dab074
AD
1380 /* no need to check the default value */
1381 if (amdgpu_vm_size == -1)
1382 return;
1383
83ca145d
ZJ
1384 if (amdgpu_vm_size < 1) {
1385 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1386 amdgpu_vm_size);
f3368128 1387 amdgpu_vm_size = -1;
83ca145d 1388 }
83ca145d
ZJ
1389}
1390
7951e376
RZ
1391static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1392{
1393 struct sysinfo si;
a9d4fe2f 1394 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1395 uint64_t total_memory;
1396 uint64_t dram_size_seven_GB = 0x1B8000000;
1397 uint64_t dram_size_three_GB = 0xB8000000;
1398
1399 if (amdgpu_smu_memory_pool_size == 0)
1400 return;
1401
1402 if (!is_os_64) {
1403 DRM_WARN("Not 64-bit OS, feature not supported\n");
1404 goto def_value;
1405 }
1406 si_meminfo(&si);
1407 total_memory = (uint64_t)si.totalram * si.mem_unit;
1408
1409 if ((amdgpu_smu_memory_pool_size == 1) ||
1410 (amdgpu_smu_memory_pool_size == 2)) {
1411 if (total_memory < dram_size_three_GB)
1412 goto def_value1;
1413 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1414 (amdgpu_smu_memory_pool_size == 8)) {
1415 if (total_memory < dram_size_seven_GB)
1416 goto def_value1;
1417 } else {
1418 DRM_WARN("Smu memory pool size not supported\n");
1419 goto def_value;
1420 }
1421 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1422
1423 return;
1424
1425def_value1:
1426 DRM_WARN("No enough system memory\n");
1427def_value:
1428 adev->pm.smu_prv_buffer_size = 0;
1429}
1430
9f6a7857
HR
1431static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1432{
1433 if (!(adev->flags & AMD_IS_APU) ||
1434 adev->asic_type < CHIP_RAVEN)
1435 return 0;
1436
1437 switch (adev->asic_type) {
1438 case CHIP_RAVEN:
1439 if (adev->pdev->device == 0x15dd)
1440 adev->apu_flags |= AMD_APU_IS_RAVEN;
1441 if (adev->pdev->device == 0x15d8)
1442 adev->apu_flags |= AMD_APU_IS_PICASSO;
1443 break;
1444 case CHIP_RENOIR:
1445 if ((adev->pdev->device == 0x1636) ||
1446 (adev->pdev->device == 0x164c))
1447 adev->apu_flags |= AMD_APU_IS_RENOIR;
1448 else
1449 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1450 break;
1451 case CHIP_VANGOGH:
1452 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1453 break;
1454 case CHIP_YELLOW_CARP:
1455 break;
d0f56dc2 1456 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1457 if ((adev->pdev->device == 0x13FE) ||
1458 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1459 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1460 break;
9f6a7857 1461 default:
4eaf21b7 1462 break;
9f6a7857
HR
1463 }
1464
1465 return 0;
1466}
1467
d38ceaf9 1468/**
06ec9070 1469 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1470 *
1471 * @adev: amdgpu_device pointer
1472 *
1473 * Validates certain module parameters and updates
1474 * the associated values used by the driver (all asics).
1475 */
912dfc84 1476static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1477{
5b011235
CZ
1478 if (amdgpu_sched_jobs < 4) {
1479 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1480 amdgpu_sched_jobs);
1481 amdgpu_sched_jobs = 4;
47fc644f 1482 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1483 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1484 amdgpu_sched_jobs);
1485 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1486 }
d38ceaf9 1487
83e74db6 1488 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1489 /* gart size must be greater or equal to 32M */
1490 dev_warn(adev->dev, "gart size (%d) too small\n",
1491 amdgpu_gart_size);
83e74db6 1492 amdgpu_gart_size = -1;
d38ceaf9
AD
1493 }
1494
36d38372 1495 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1496 /* gtt size must be greater or equal to 32M */
36d38372
CK
1497 dev_warn(adev->dev, "gtt size (%d) too small\n",
1498 amdgpu_gtt_size);
1499 amdgpu_gtt_size = -1;
d38ceaf9
AD
1500 }
1501
d07f14be
RH
1502 /* valid range is between 4 and 9 inclusive */
1503 if (amdgpu_vm_fragment_size != -1 &&
1504 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1505 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1506 amdgpu_vm_fragment_size = -1;
1507 }
1508
5d5bd5e3
KW
1509 if (amdgpu_sched_hw_submission < 2) {
1510 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1511 amdgpu_sched_hw_submission);
1512 amdgpu_sched_hw_submission = 2;
1513 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1514 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1515 amdgpu_sched_hw_submission);
1516 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1517 }
1518
2656fd23
AG
1519 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1520 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1521 amdgpu_reset_method = -1;
1522 }
1523
7951e376
RZ
1524 amdgpu_device_check_smu_prv_buffer_size(adev);
1525
06ec9070 1526 amdgpu_device_check_vm_size(adev);
d38ceaf9 1527
06ec9070 1528 amdgpu_device_check_block_size(adev);
6a7f76e7 1529
19aede77 1530 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1531
e3c00faa 1532 return 0;
d38ceaf9
AD
1533}
1534
1535/**
1536 * amdgpu_switcheroo_set_state - set switcheroo state
1537 *
1538 * @pdev: pci dev pointer
1694467b 1539 * @state: vga_switcheroo state
d38ceaf9 1540 *
12024b17 1541 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1542 * the asics before or after it is powered up using ACPI methods.
1543 */
8aba21b7
LT
1544static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1545 enum vga_switcheroo_state state)
d38ceaf9
AD
1546{
1547 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1548 int r;
d38ceaf9 1549
b98c6299 1550 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1551 return;
1552
1553 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1554 pr_info("switched on\n");
d38ceaf9
AD
1555 /* don't suspend or resume card normally */
1556 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1557
8f66090b
TZ
1558 pci_set_power_state(pdev, PCI_D0);
1559 amdgpu_device_load_pci_state(pdev);
1560 r = pci_enable_device(pdev);
de185019
AD
1561 if (r)
1562 DRM_WARN("pci_enable_device failed (%d)\n", r);
1563 amdgpu_device_resume(dev, true);
d38ceaf9 1564
d38ceaf9 1565 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1566 } else {
dd4fa6c1 1567 pr_info("switched off\n");
d38ceaf9 1568 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1569 amdgpu_device_suspend(dev, true);
8f66090b 1570 amdgpu_device_cache_pci_state(pdev);
de185019 1571 /* Shut down the device */
8f66090b
TZ
1572 pci_disable_device(pdev);
1573 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1574 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1575 }
1576}
1577
1578/**
1579 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1580 *
1581 * @pdev: pci dev pointer
1582 *
1583 * Callback for the switcheroo driver. Check of the switcheroo
1584 * state can be changed.
1585 * Returns true if the state can be changed, false if not.
1586 */
1587static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1588{
1589 struct drm_device *dev = pci_get_drvdata(pdev);
1590
b8920e1e 1591 /*
d38ceaf9
AD
1592 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1593 * locking inversion with the driver load path. And the access here is
1594 * completely racy anyway. So don't bother with locking for now.
1595 */
7e13ad89 1596 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1597}
1598
1599static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1600 .set_gpu_state = amdgpu_switcheroo_set_state,
1601 .reprobe = NULL,
1602 .can_switch = amdgpu_switcheroo_can_switch,
1603};
1604
e3ecdffa
AD
1605/**
1606 * amdgpu_device_ip_set_clockgating_state - set the CG state
1607 *
87e3f136 1608 * @dev: amdgpu_device pointer
e3ecdffa
AD
1609 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1610 * @state: clockgating state (gate or ungate)
1611 *
1612 * Sets the requested clockgating state for all instances of
1613 * the hardware IP specified.
1614 * Returns the error code from the last instance.
1615 */
43fa561f 1616int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1617 enum amd_ip_block_type block_type,
1618 enum amd_clockgating_state state)
d38ceaf9 1619{
43fa561f 1620 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1621 int i, r = 0;
1622
1623 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1624 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1625 continue;
c722865a
RZ
1626 if (adev->ip_blocks[i].version->type != block_type)
1627 continue;
1628 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1629 continue;
1630 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1631 (void *)adev, state);
1632 if (r)
1633 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1634 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1635 }
1636 return r;
1637}
1638
e3ecdffa
AD
1639/**
1640 * amdgpu_device_ip_set_powergating_state - set the PG state
1641 *
87e3f136 1642 * @dev: amdgpu_device pointer
e3ecdffa
AD
1643 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1644 * @state: powergating state (gate or ungate)
1645 *
1646 * Sets the requested powergating state for all instances of
1647 * the hardware IP specified.
1648 * Returns the error code from the last instance.
1649 */
43fa561f 1650int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1651 enum amd_ip_block_type block_type,
1652 enum amd_powergating_state state)
d38ceaf9 1653{
43fa561f 1654 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1655 int i, r = 0;
1656
1657 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1658 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1659 continue;
c722865a
RZ
1660 if (adev->ip_blocks[i].version->type != block_type)
1661 continue;
1662 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1663 continue;
1664 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1665 (void *)adev, state);
1666 if (r)
1667 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1668 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1669 }
1670 return r;
1671}
1672
e3ecdffa
AD
1673/**
1674 * amdgpu_device_ip_get_clockgating_state - get the CG state
1675 *
1676 * @adev: amdgpu_device pointer
1677 * @flags: clockgating feature flags
1678 *
1679 * Walks the list of IPs on the device and updates the clockgating
1680 * flags for each IP.
1681 * Updates @flags with the feature flags for each hardware IP where
1682 * clockgating is enabled.
1683 */
2990a1fc 1684void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1685 u64 *flags)
6cb2d4e4
HR
1686{
1687 int i;
1688
1689 for (i = 0; i < adev->num_ip_blocks; i++) {
1690 if (!adev->ip_blocks[i].status.valid)
1691 continue;
1692 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1693 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1694 }
1695}
1696
e3ecdffa
AD
1697/**
1698 * amdgpu_device_ip_wait_for_idle - wait for idle
1699 *
1700 * @adev: amdgpu_device pointer
1701 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1702 *
1703 * Waits for the request hardware IP to be idle.
1704 * Returns 0 for success or a negative error code on failure.
1705 */
2990a1fc
AD
1706int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1707 enum amd_ip_block_type block_type)
5dbbb60b
AD
1708{
1709 int i, r;
1710
1711 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1712 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1713 continue;
a1255107
AD
1714 if (adev->ip_blocks[i].version->type == block_type) {
1715 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1716 if (r)
1717 return r;
1718 break;
1719 }
1720 }
1721 return 0;
1722
1723}
1724
e3ecdffa
AD
1725/**
1726 * amdgpu_device_ip_is_idle - is the hardware IP idle
1727 *
1728 * @adev: amdgpu_device pointer
1729 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1730 *
1731 * Check if the hardware IP is idle or not.
1732 * Returns true if it the IP is idle, false if not.
1733 */
2990a1fc
AD
1734bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1735 enum amd_ip_block_type block_type)
5dbbb60b
AD
1736{
1737 int i;
1738
1739 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1740 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1741 continue;
a1255107
AD
1742 if (adev->ip_blocks[i].version->type == block_type)
1743 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1744 }
1745 return true;
1746
1747}
1748
e3ecdffa
AD
1749/**
1750 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1751 *
1752 * @adev: amdgpu_device pointer
87e3f136 1753 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1754 *
1755 * Returns a pointer to the hardware IP block structure
1756 * if it exists for the asic, otherwise NULL.
1757 */
2990a1fc
AD
1758struct amdgpu_ip_block *
1759amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1760 enum amd_ip_block_type type)
d38ceaf9
AD
1761{
1762 int i;
1763
1764 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1765 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1766 return &adev->ip_blocks[i];
1767
1768 return NULL;
1769}
1770
1771/**
2990a1fc 1772 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1773 *
1774 * @adev: amdgpu_device pointer
5fc3aeeb 1775 * @type: enum amd_ip_block_type
d38ceaf9
AD
1776 * @major: major version
1777 * @minor: minor version
1778 *
1779 * return 0 if equal or greater
1780 * return 1 if smaller or the ip_block doesn't exist
1781 */
2990a1fc
AD
1782int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1783 enum amd_ip_block_type type,
1784 u32 major, u32 minor)
d38ceaf9 1785{
2990a1fc 1786 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1787
a1255107
AD
1788 if (ip_block && ((ip_block->version->major > major) ||
1789 ((ip_block->version->major == major) &&
1790 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1791 return 0;
1792
1793 return 1;
1794}
1795
a1255107 1796/**
2990a1fc 1797 * amdgpu_device_ip_block_add
a1255107
AD
1798 *
1799 * @adev: amdgpu_device pointer
1800 * @ip_block_version: pointer to the IP to add
1801 *
1802 * Adds the IP block driver information to the collection of IPs
1803 * on the asic.
1804 */
2990a1fc
AD
1805int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1806 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1807{
1808 if (!ip_block_version)
1809 return -EINVAL;
1810
7bd939d0
LG
1811 switch (ip_block_version->type) {
1812 case AMD_IP_BLOCK_TYPE_VCN:
1813 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1814 return 0;
1815 break;
1816 case AMD_IP_BLOCK_TYPE_JPEG:
1817 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1818 return 0;
1819 break;
1820 default:
1821 break;
1822 }
1823
e966a725 1824 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1825 ip_block_version->funcs->name);
1826
a1255107
AD
1827 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1828
1829 return 0;
1830}
1831
e3ecdffa
AD
1832/**
1833 * amdgpu_device_enable_virtual_display - enable virtual display feature
1834 *
1835 * @adev: amdgpu_device pointer
1836 *
1837 * Enabled the virtual display feature if the user has enabled it via
1838 * the module parameter virtual_display. This feature provides a virtual
1839 * display hardware on headless boards or in virtualized environments.
1840 * This function parses and validates the configuration string specified by
1841 * the user and configues the virtual display configuration (number of
1842 * virtual connectors, crtcs, etc.) specified.
1843 */
483ef985 1844static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1845{
1846 adev->enable_virtual_display = false;
1847
1848 if (amdgpu_virtual_display) {
8f66090b 1849 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1850 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1851
1852 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1853 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1854 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1855 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1856 if (!strcmp("all", pciaddname)
1857 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1858 long num_crtc;
1859 int res = -1;
1860
9accf2fd 1861 adev->enable_virtual_display = true;
0f66356d
ED
1862
1863 if (pciaddname_tmp)
1864 res = kstrtol(pciaddname_tmp, 10,
1865 &num_crtc);
1866
1867 if (!res) {
1868 if (num_crtc < 1)
1869 num_crtc = 1;
1870 if (num_crtc > 6)
1871 num_crtc = 6;
1872 adev->mode_info.num_crtc = num_crtc;
1873 } else {
1874 adev->mode_info.num_crtc = 1;
1875 }
9accf2fd
ED
1876 break;
1877 }
1878 }
1879
0f66356d
ED
1880 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1881 amdgpu_virtual_display, pci_address_name,
1882 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1883
1884 kfree(pciaddstr);
1885 }
1886}
1887
25263da3
AD
1888void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1889{
1890 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1891 adev->mode_info.num_crtc = 1;
1892 adev->enable_virtual_display = true;
1893 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1894 adev->enable_virtual_display, adev->mode_info.num_crtc);
1895 }
1896}
1897
e3ecdffa
AD
1898/**
1899 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1900 *
1901 * @adev: amdgpu_device pointer
1902 *
1903 * Parses the asic configuration parameters specified in the gpu info
1904 * firmware and makes them availale to the driver for use in configuring
1905 * the asic.
1906 * Returns 0 on success, -EINVAL on failure.
1907 */
e2a75f88
AD
1908static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1909{
e2a75f88 1910 const char *chip_name;
c0a43457 1911 char fw_name[40];
e2a75f88
AD
1912 int err;
1913 const struct gpu_info_firmware_header_v1_0 *hdr;
1914
ab4fe3e1
HR
1915 adev->firmware.gpu_info_fw = NULL;
1916
72de33f8 1917 if (adev->mman.discovery_bin) {
cc375d8c
TY
1918 /*
1919 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1920 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1921 * when DAL no longer needs it.
1922 */
1923 if (adev->asic_type != CHIP_NAVI12)
1924 return 0;
258620d0
AD
1925 }
1926
e2a75f88 1927 switch (adev->asic_type) {
e2a75f88
AD
1928 default:
1929 return 0;
1930 case CHIP_VEGA10:
1931 chip_name = "vega10";
1932 break;
3f76dced
AD
1933 case CHIP_VEGA12:
1934 chip_name = "vega12";
1935 break;
2d2e5e7e 1936 case CHIP_RAVEN:
54f78a76 1937 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1938 chip_name = "raven2";
54f78a76 1939 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1940 chip_name = "picasso";
54c4d17e
FX
1941 else
1942 chip_name = "raven";
2d2e5e7e 1943 break;
65e60f6e
LM
1944 case CHIP_ARCTURUS:
1945 chip_name = "arcturus";
1946 break;
42b325e5
XY
1947 case CHIP_NAVI12:
1948 chip_name = "navi12";
1949 break;
e2a75f88
AD
1950 }
1951
1952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 1953 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
1954 if (err) {
1955 dev_err(adev->dev,
b31d3063 1956 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
1957 fw_name);
1958 goto out;
1959 }
1960
ab4fe3e1 1961 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1962 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1963
1964 switch (hdr->version_major) {
1965 case 1:
1966 {
1967 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1968 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1969 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1970
cc375d8c
TY
1971 /*
1972 * Should be droped when DAL no longer needs it.
1973 */
1974 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1975 goto parse_soc_bounding_box;
1976
b5ab16bf
AD
1977 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1978 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1979 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1980 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1981 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1982 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1983 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1984 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1985 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1986 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1987 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1988 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1989 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1990 adev->gfx.cu_info.max_waves_per_simd =
1991 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1992 adev->gfx.cu_info.max_scratch_slots_per_cu =
1993 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1994 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1995 if (hdr->version_minor >= 1) {
35c2e910
HZ
1996 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1997 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1998 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1999 adev->gfx.config.num_sc_per_sh =
2000 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2001 adev->gfx.config.num_packer_per_sc =
2002 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2003 }
ec51d3fa
XY
2004
2005parse_soc_bounding_box:
ec51d3fa
XY
2006 /*
2007 * soc bounding box info is not integrated in disocovery table,
258620d0 2008 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2009 */
48321c3d
HW
2010 if (hdr->version_minor == 2) {
2011 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2012 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2013 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2014 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2015 }
e2a75f88
AD
2016 break;
2017 }
2018 default:
2019 dev_err(adev->dev,
2020 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2021 err = -EINVAL;
2022 goto out;
2023 }
2024out:
e2a75f88
AD
2025 return err;
2026}
2027
e3ecdffa
AD
2028/**
2029 * amdgpu_device_ip_early_init - run early init for hardware IPs
2030 *
2031 * @adev: amdgpu_device pointer
2032 *
2033 * Early initialization pass for hardware IPs. The hardware IPs that make
2034 * up each asic are discovered each IP's early_init callback is run. This
2035 * is the first stage in initializing the asic.
2036 * Returns 0 on success, negative error code on failure.
2037 */
06ec9070 2038static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2039{
901e2be2
AD
2040 struct drm_device *dev = adev_to_drm(adev);
2041 struct pci_dev *parent;
aaa36a97 2042 int i, r;
ced69502 2043 bool total;
d38ceaf9 2044
483ef985 2045 amdgpu_device_enable_virtual_display(adev);
a6be7570 2046
00a979f3 2047 if (amdgpu_sriov_vf(adev)) {
00a979f3 2048 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2049 if (r)
2050 return r;
00a979f3
WS
2051 }
2052
d38ceaf9 2053 switch (adev->asic_type) {
33f34802
KW
2054#ifdef CONFIG_DRM_AMDGPU_SI
2055 case CHIP_VERDE:
2056 case CHIP_TAHITI:
2057 case CHIP_PITCAIRN:
2058 case CHIP_OLAND:
2059 case CHIP_HAINAN:
295d0daf 2060 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2061 r = si_set_ip_blocks(adev);
2062 if (r)
2063 return r;
2064 break;
2065#endif
a2e73f56
AD
2066#ifdef CONFIG_DRM_AMDGPU_CIK
2067 case CHIP_BONAIRE:
2068 case CHIP_HAWAII:
2069 case CHIP_KAVERI:
2070 case CHIP_KABINI:
2071 case CHIP_MULLINS:
e1ad2d53 2072 if (adev->flags & AMD_IS_APU)
a2e73f56 2073 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2074 else
2075 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2076
2077 r = cik_set_ip_blocks(adev);
2078 if (r)
2079 return r;
2080 break;
2081#endif
da87c30b
AD
2082 case CHIP_TOPAZ:
2083 case CHIP_TONGA:
2084 case CHIP_FIJI:
2085 case CHIP_POLARIS10:
2086 case CHIP_POLARIS11:
2087 case CHIP_POLARIS12:
2088 case CHIP_VEGAM:
2089 case CHIP_CARRIZO:
2090 case CHIP_STONEY:
2091 if (adev->flags & AMD_IS_APU)
2092 adev->family = AMDGPU_FAMILY_CZ;
2093 else
2094 adev->family = AMDGPU_FAMILY_VI;
2095
2096 r = vi_set_ip_blocks(adev);
2097 if (r)
2098 return r;
2099 break;
d38ceaf9 2100 default:
63352b7f
AD
2101 r = amdgpu_discovery_set_ip_blocks(adev);
2102 if (r)
2103 return r;
2104 break;
d38ceaf9
AD
2105 }
2106
901e2be2
AD
2107 if (amdgpu_has_atpx() &&
2108 (amdgpu_is_atpx_hybrid() ||
2109 amdgpu_has_atpx_dgpu_power_cntl()) &&
2110 ((adev->flags & AMD_IS_APU) == 0) &&
2111 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2112 adev->flags |= AMD_IS_PX;
2113
85ac2021
AD
2114 if (!(adev->flags & AMD_IS_APU)) {
2115 parent = pci_upstream_bridge(adev->pdev);
2116 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2117 }
901e2be2 2118
1884734a 2119
3b94fb10 2120 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2121 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2122 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2123 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2124 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2125
ced69502 2126 total = true;
d38ceaf9
AD
2127 for (i = 0; i < adev->num_ip_blocks; i++) {
2128 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2129 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2130 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2131 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2132 } else {
a1255107
AD
2133 if (adev->ip_blocks[i].version->funcs->early_init) {
2134 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2135 if (r == -ENOENT) {
a1255107 2136 adev->ip_blocks[i].status.valid = false;
2c1a2784 2137 } else if (r) {
a1255107
AD
2138 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2139 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2140 total = false;
2c1a2784 2141 } else {
a1255107 2142 adev->ip_blocks[i].status.valid = true;
2c1a2784 2143 }
974e6b64 2144 } else {
a1255107 2145 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2146 }
d38ceaf9 2147 }
21a249ca
AD
2148 /* get the vbios after the asic_funcs are set up */
2149 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2150 r = amdgpu_device_parse_gpu_info_fw(adev);
2151 if (r)
2152 return r;
2153
21a249ca 2154 /* Read BIOS */
9535a86a
SZ
2155 if (amdgpu_device_read_bios(adev)) {
2156 if (!amdgpu_get_bios(adev))
2157 return -EINVAL;
21a249ca 2158
9535a86a
SZ
2159 r = amdgpu_atombios_init(adev);
2160 if (r) {
2161 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2162 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2163 return r;
2164 }
21a249ca 2165 }
77eabc6f
PJZ
2166
2167 /*get pf2vf msg info at it's earliest time*/
2168 if (amdgpu_sriov_vf(adev))
2169 amdgpu_virt_init_data_exchange(adev);
2170
21a249ca 2171 }
d38ceaf9 2172 }
ced69502
ML
2173 if (!total)
2174 return -ENODEV;
d38ceaf9 2175
00fa4035 2176 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2177 adev->cg_flags &= amdgpu_cg_mask;
2178 adev->pg_flags &= amdgpu_pg_mask;
2179
d38ceaf9
AD
2180 return 0;
2181}
2182
0a4f2520
RZ
2183static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2184{
2185 int i, r;
2186
2187 for (i = 0; i < adev->num_ip_blocks; i++) {
2188 if (!adev->ip_blocks[i].status.sw)
2189 continue;
2190 if (adev->ip_blocks[i].status.hw)
2191 continue;
2192 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2193 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2194 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2195 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2196 if (r) {
2197 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2198 adev->ip_blocks[i].version->funcs->name, r);
2199 return r;
2200 }
2201 adev->ip_blocks[i].status.hw = true;
2202 }
2203 }
2204
2205 return 0;
2206}
2207
2208static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2209{
2210 int i, r;
2211
2212 for (i = 0; i < adev->num_ip_blocks; i++) {
2213 if (!adev->ip_blocks[i].status.sw)
2214 continue;
2215 if (adev->ip_blocks[i].status.hw)
2216 continue;
2217 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2218 if (r) {
2219 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2220 adev->ip_blocks[i].version->funcs->name, r);
2221 return r;
2222 }
2223 adev->ip_blocks[i].status.hw = true;
2224 }
2225
2226 return 0;
2227}
2228
7a3e0bb2
RZ
2229static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2230{
2231 int r = 0;
2232 int i;
80f41f84 2233 uint32_t smu_version;
7a3e0bb2
RZ
2234
2235 if (adev->asic_type >= CHIP_VEGA10) {
2236 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2237 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2238 continue;
2239
e3c1b071 2240 if (!adev->ip_blocks[i].status.sw)
2241 continue;
2242
482f0e53
ML
2243 /* no need to do the fw loading again if already done*/
2244 if (adev->ip_blocks[i].status.hw == true)
2245 break;
2246
53b3f8f4 2247 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2248 r = adev->ip_blocks[i].version->funcs->resume(adev);
2249 if (r) {
2250 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2251 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2252 return r;
2253 }
2254 } else {
2255 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2256 if (r) {
2257 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2258 adev->ip_blocks[i].version->funcs->name, r);
2259 return r;
7a3e0bb2 2260 }
7a3e0bb2 2261 }
482f0e53
ML
2262
2263 adev->ip_blocks[i].status.hw = true;
2264 break;
7a3e0bb2
RZ
2265 }
2266 }
482f0e53 2267
8973d9ec
ED
2268 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2269 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2270
80f41f84 2271 return r;
7a3e0bb2
RZ
2272}
2273
5fd8518d
AG
2274static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2275{
2276 long timeout;
2277 int r, i;
2278
2279 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2280 struct amdgpu_ring *ring = adev->rings[i];
2281
2282 /* No need to setup the GPU scheduler for rings that don't need it */
2283 if (!ring || ring->no_scheduler)
2284 continue;
2285
2286 switch (ring->funcs->type) {
2287 case AMDGPU_RING_TYPE_GFX:
2288 timeout = adev->gfx_timeout;
2289 break;
2290 case AMDGPU_RING_TYPE_COMPUTE:
2291 timeout = adev->compute_timeout;
2292 break;
2293 case AMDGPU_RING_TYPE_SDMA:
2294 timeout = adev->sdma_timeout;
2295 break;
2296 default:
2297 timeout = adev->video_timeout;
2298 break;
2299 }
2300
2301 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2302 ring->num_hw_submission, 0,
8ab62eda
JG
2303 timeout, adev->reset_domain->wq,
2304 ring->sched_score, ring->name,
2305 adev->dev);
5fd8518d
AG
2306 if (r) {
2307 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2308 ring->name);
2309 return r;
2310 }
2311 }
2312
d425c6f4
JZ
2313 amdgpu_xcp_update_partition_sched_list(adev);
2314
5fd8518d
AG
2315 return 0;
2316}
2317
2318
e3ecdffa
AD
2319/**
2320 * amdgpu_device_ip_init - run init for hardware IPs
2321 *
2322 * @adev: amdgpu_device pointer
2323 *
2324 * Main initialization pass for hardware IPs. The list of all the hardware
2325 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2326 * are run. sw_init initializes the software state associated with each IP
2327 * and hw_init initializes the hardware associated with each IP.
2328 * Returns 0 on success, negative error code on failure.
2329 */
06ec9070 2330static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2331{
2332 int i, r;
2333
c030f2e4 2334 r = amdgpu_ras_init(adev);
2335 if (r)
2336 return r;
2337
d38ceaf9 2338 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2339 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2340 continue;
a1255107 2341 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2342 if (r) {
a1255107
AD
2343 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2344 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2345 goto init_failed;
2c1a2784 2346 }
a1255107 2347 adev->ip_blocks[i].status.sw = true;
bfca0289 2348
c1c39032
AD
2349 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2350 /* need to do common hw init early so everything is set up for gmc */
2351 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2352 if (r) {
2353 DRM_ERROR("hw_init %d failed %d\n", i, r);
2354 goto init_failed;
2355 }
2356 adev->ip_blocks[i].status.hw = true;
2357 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2358 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2359 /* Try to reserve bad pages early */
2360 if (amdgpu_sriov_vf(adev))
2361 amdgpu_virt_exchange_data(adev);
2362
7ccfd79f 2363 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2364 if (r) {
7ccfd79f 2365 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2366 goto init_failed;
2c1a2784 2367 }
a1255107 2368 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2369 if (r) {
2370 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2371 goto init_failed;
2c1a2784 2372 }
06ec9070 2373 r = amdgpu_device_wb_init(adev);
2c1a2784 2374 if (r) {
06ec9070 2375 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2376 goto init_failed;
2c1a2784 2377 }
a1255107 2378 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2379
2380 /* right after GMC hw init, we create CSA */
02ff519e 2381 if (adev->gfx.mcbp) {
1e256e27 2382 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2383 AMDGPU_GEM_DOMAIN_VRAM |
2384 AMDGPU_GEM_DOMAIN_GTT,
2385 AMDGPU_CSA_SIZE);
2493664f
ML
2386 if (r) {
2387 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2388 goto init_failed;
2493664f
ML
2389 }
2390 }
d38ceaf9
AD
2391 }
2392 }
2393
c9ffa427 2394 if (amdgpu_sriov_vf(adev))
22c16d25 2395 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2396
533aed27
AG
2397 r = amdgpu_ib_pool_init(adev);
2398 if (r) {
2399 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2400 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2401 goto init_failed;
2402 }
2403
c8963ea4
RZ
2404 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2405 if (r)
72d3f592 2406 goto init_failed;
0a4f2520
RZ
2407
2408 r = amdgpu_device_ip_hw_init_phase1(adev);
2409 if (r)
72d3f592 2410 goto init_failed;
0a4f2520 2411
7a3e0bb2
RZ
2412 r = amdgpu_device_fw_loading(adev);
2413 if (r)
72d3f592 2414 goto init_failed;
7a3e0bb2 2415
0a4f2520
RZ
2416 r = amdgpu_device_ip_hw_init_phase2(adev);
2417 if (r)
72d3f592 2418 goto init_failed;
d38ceaf9 2419
121a2bc6
AG
2420 /*
2421 * retired pages will be loaded from eeprom and reserved here,
2422 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2423 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2424 * for I2C communication which only true at this point.
b82e65a9
GC
2425 *
2426 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2427 * failure from bad gpu situation and stop amdgpu init process
2428 * accordingly. For other failed cases, it will still release all
2429 * the resource and print error message, rather than returning one
2430 * negative value to upper level.
121a2bc6
AG
2431 *
2432 * Note: theoretically, this should be called before all vram allocations
2433 * to protect retired page from abusing
2434 */
b82e65a9
GC
2435 r = amdgpu_ras_recovery_init(adev);
2436 if (r)
2437 goto init_failed;
121a2bc6 2438
cfbb6b00
AG
2439 /**
2440 * In case of XGMI grab extra reference for reset domain for this device
2441 */
a4c63caf 2442 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2443 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2444 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2445 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2446
dfd0287b
LH
2447 if (WARN_ON(!hive)) {
2448 r = -ENOENT;
2449 goto init_failed;
2450 }
2451
46c67660 2452 if (!hive->reset_domain ||
2453 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2454 r = -ENOENT;
2455 amdgpu_put_xgmi_hive(hive);
2456 goto init_failed;
2457 }
2458
2459 /* Drop the early temporary reset domain we created for device */
2460 amdgpu_reset_put_reset_domain(adev->reset_domain);
2461 adev->reset_domain = hive->reset_domain;
9dfa4860 2462 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2463 }
a4c63caf
AG
2464 }
2465 }
2466
5fd8518d
AG
2467 r = amdgpu_device_init_schedulers(adev);
2468 if (r)
2469 goto init_failed;
e3c1b071 2470
2471 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2472 if (!adev->gmc.xgmi.pending_reset) {
2473 kgd2kfd_init_zone_device(adev);
e3c1b071 2474 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2475 }
c6332b97 2476
bd607166
KR
2477 amdgpu_fru_get_product_info(adev);
2478
72d3f592 2479init_failed:
c6332b97 2480
72d3f592 2481 return r;
d38ceaf9
AD
2482}
2483
e3ecdffa
AD
2484/**
2485 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2486 *
2487 * @adev: amdgpu_device pointer
2488 *
2489 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2490 * this function before a GPU reset. If the value is retained after a
2491 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2492 */
06ec9070 2493static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2494{
2495 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2496}
2497
e3ecdffa
AD
2498/**
2499 * amdgpu_device_check_vram_lost - check if vram is valid
2500 *
2501 * @adev: amdgpu_device pointer
2502 *
2503 * Checks the reset magic value written to the gart pointer in VRAM.
2504 * The driver calls this after a GPU reset to see if the contents of
2505 * VRAM is lost or now.
2506 * returns true if vram is lost, false if not.
2507 */
06ec9070 2508static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2509{
dadce777
EQ
2510 if (memcmp(adev->gart.ptr, adev->reset_magic,
2511 AMDGPU_RESET_MAGIC_NUM))
2512 return true;
2513
53b3f8f4 2514 if (!amdgpu_in_reset(adev))
dadce777
EQ
2515 return false;
2516
2517 /*
2518 * For all ASICs with baco/mode1 reset, the VRAM is
2519 * always assumed to be lost.
2520 */
2521 switch (amdgpu_asic_reset_method(adev)) {
2522 case AMD_RESET_METHOD_BACO:
2523 case AMD_RESET_METHOD_MODE1:
2524 return true;
2525 default:
2526 return false;
2527 }
0c49e0b8
CZ
2528}
2529
e3ecdffa 2530/**
1112a46b 2531 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2532 *
2533 * @adev: amdgpu_device pointer
b8b72130 2534 * @state: clockgating state (gate or ungate)
e3ecdffa 2535 *
e3ecdffa 2536 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2537 * set_clockgating_state callbacks are run.
2538 * Late initialization pass enabling clockgating for hardware IPs.
2539 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2540 * Returns 0 on success, negative error code on failure.
2541 */
fdd34271 2542
5d89bb2d
LL
2543int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2544 enum amd_clockgating_state state)
d38ceaf9 2545{
1112a46b 2546 int i, j, r;
d38ceaf9 2547
4a2ba394
SL
2548 if (amdgpu_emu_mode == 1)
2549 return 0;
2550
1112a46b
RZ
2551 for (j = 0; j < adev->num_ip_blocks; j++) {
2552 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2553 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2554 continue;
47198eb7 2555 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2556 if (adev->in_s0ix &&
47198eb7
AD
2557 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2558 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2559 continue;
4a446d55 2560 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2561 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2562 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2563 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2564 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2565 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2566 /* enable clockgating to save power */
a1255107 2567 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2568 state);
4a446d55
AD
2569 if (r) {
2570 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2571 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2572 return r;
2573 }
b0b00ff1 2574 }
d38ceaf9 2575 }
06b18f61 2576
c9f96fd5
RZ
2577 return 0;
2578}
2579
5d89bb2d
LL
2580int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2581 enum amd_powergating_state state)
c9f96fd5 2582{
1112a46b 2583 int i, j, r;
06b18f61 2584
c9f96fd5
RZ
2585 if (amdgpu_emu_mode == 1)
2586 return 0;
2587
1112a46b
RZ
2588 for (j = 0; j < adev->num_ip_blocks; j++) {
2589 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2590 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2591 continue;
47198eb7 2592 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2593 if (adev->in_s0ix &&
47198eb7
AD
2594 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2595 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2596 continue;
c9f96fd5
RZ
2597 /* skip CG for VCE/UVD, it's handled specially */
2598 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2599 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2600 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2601 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2602 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2603 /* enable powergating to save power */
2604 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2605 state);
c9f96fd5
RZ
2606 if (r) {
2607 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2608 adev->ip_blocks[i].version->funcs->name, r);
2609 return r;
2610 }
2611 }
2612 }
2dc80b00
S
2613 return 0;
2614}
2615
beff74bc
AD
2616static int amdgpu_device_enable_mgpu_fan_boost(void)
2617{
2618 struct amdgpu_gpu_instance *gpu_ins;
2619 struct amdgpu_device *adev;
2620 int i, ret = 0;
2621
2622 mutex_lock(&mgpu_info.mutex);
2623
2624 /*
2625 * MGPU fan boost feature should be enabled
2626 * only when there are two or more dGPUs in
2627 * the system
2628 */
2629 if (mgpu_info.num_dgpu < 2)
2630 goto out;
2631
2632 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2633 gpu_ins = &(mgpu_info.gpu_ins[i]);
2634 adev = gpu_ins->adev;
2635 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2636 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2637 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2638 if (ret)
2639 break;
2640
2641 gpu_ins->mgpu_fan_enabled = 1;
2642 }
2643 }
2644
2645out:
2646 mutex_unlock(&mgpu_info.mutex);
2647
2648 return ret;
2649}
2650
e3ecdffa
AD
2651/**
2652 * amdgpu_device_ip_late_init - run late init for hardware IPs
2653 *
2654 * @adev: amdgpu_device pointer
2655 *
2656 * Late initialization pass for hardware IPs. The list of all the hardware
2657 * IPs that make up the asic is walked and the late_init callbacks are run.
2658 * late_init covers any special initialization that an IP requires
2659 * after all of the have been initialized or something that needs to happen
2660 * late in the init process.
2661 * Returns 0 on success, negative error code on failure.
2662 */
06ec9070 2663static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2664{
60599a03 2665 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2666 int i = 0, r;
2667
2668 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2669 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2670 continue;
2671 if (adev->ip_blocks[i].version->funcs->late_init) {
2672 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2673 if (r) {
2674 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2675 adev->ip_blocks[i].version->funcs->name, r);
2676 return r;
2677 }
2dc80b00 2678 }
73f847db 2679 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2680 }
2681
867e24ca 2682 r = amdgpu_ras_late_init(adev);
2683 if (r) {
2684 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2685 return r;
2686 }
2687
a891d239
DL
2688 amdgpu_ras_set_error_query_ready(adev, true);
2689
1112a46b
RZ
2690 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2691 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2692
06ec9070 2693 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2694
beff74bc
AD
2695 r = amdgpu_device_enable_mgpu_fan_boost();
2696 if (r)
2697 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2698
4da8b639 2699 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2700 if (amdgpu_passthrough(adev) &&
2701 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2702 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2703 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2704
2705 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2706 mutex_lock(&mgpu_info.mutex);
2707
2708 /*
2709 * Reset device p-state to low as this was booted with high.
2710 *
2711 * This should be performed only after all devices from the same
2712 * hive get initialized.
2713 *
2714 * However, it's unknown how many device in the hive in advance.
2715 * As this is counted one by one during devices initializations.
2716 *
2717 * So, we wait for all XGMI interlinked devices initialized.
2718 * This may bring some delays as those devices may come from
2719 * different hives. But that should be OK.
2720 */
2721 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2722 for (i = 0; i < mgpu_info.num_gpu; i++) {
2723 gpu_instance = &(mgpu_info.gpu_ins[i]);
2724 if (gpu_instance->adev->flags & AMD_IS_APU)
2725 continue;
2726
d84a430d
JK
2727 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2728 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2729 if (r) {
2730 DRM_ERROR("pstate setting failed (%d).\n", r);
2731 break;
2732 }
2733 }
2734 }
2735
2736 mutex_unlock(&mgpu_info.mutex);
2737 }
2738
d38ceaf9
AD
2739 return 0;
2740}
2741
613aa3ea
LY
2742/**
2743 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2744 *
2745 * @adev: amdgpu_device pointer
2746 *
2747 * For ASICs need to disable SMC first
2748 */
2749static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2750{
2751 int i, r;
2752
2753 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2754 return;
2755
2756 for (i = 0; i < adev->num_ip_blocks; i++) {
2757 if (!adev->ip_blocks[i].status.hw)
2758 continue;
2759 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2760 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2761 /* XXX handle errors */
2762 if (r) {
2763 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2764 adev->ip_blocks[i].version->funcs->name, r);
2765 }
2766 adev->ip_blocks[i].status.hw = false;
2767 break;
2768 }
2769 }
2770}
2771
e9669fb7 2772static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2773{
2774 int i, r;
2775
e9669fb7
AG
2776 for (i = 0; i < adev->num_ip_blocks; i++) {
2777 if (!adev->ip_blocks[i].version->funcs->early_fini)
2778 continue;
5278a159 2779
e9669fb7
AG
2780 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2781 if (r) {
2782 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2783 adev->ip_blocks[i].version->funcs->name, r);
2784 }
2785 }
c030f2e4 2786
05df1f01 2787 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2788 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2789
7270e895
TY
2790 amdgpu_amdkfd_suspend(adev, false);
2791
613aa3ea
LY
2792 /* Workaroud for ASICs need to disable SMC first */
2793 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2794
d38ceaf9 2795 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2796 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2797 continue;
8201a67a 2798
a1255107 2799 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2800 /* XXX handle errors */
2c1a2784 2801 if (r) {
a1255107
AD
2802 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2803 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2804 }
8201a67a 2805
a1255107 2806 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2807 }
2808
6effad8a
GC
2809 if (amdgpu_sriov_vf(adev)) {
2810 if (amdgpu_virt_release_full_gpu(adev, false))
2811 DRM_ERROR("failed to release exclusive mode on fini\n");
2812 }
2813
e9669fb7
AG
2814 return 0;
2815}
2816
2817/**
2818 * amdgpu_device_ip_fini - run fini for hardware IPs
2819 *
2820 * @adev: amdgpu_device pointer
2821 *
2822 * Main teardown pass for hardware IPs. The list of all the hardware
2823 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2824 * are run. hw_fini tears down the hardware associated with each IP
2825 * and sw_fini tears down any software state associated with each IP.
2826 * Returns 0 on success, negative error code on failure.
2827 */
2828static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2829{
2830 int i, r;
2831
2832 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2833 amdgpu_virt_release_ras_err_handler_data(adev);
2834
e9669fb7
AG
2835 if (adev->gmc.xgmi.num_physical_nodes > 1)
2836 amdgpu_xgmi_remove_device(adev);
2837
c004d44e 2838 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2839
d38ceaf9 2840 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2841 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2842 continue;
c12aba3a
ML
2843
2844 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2845 amdgpu_ucode_free_bo(adev);
1e256e27 2846 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2847 amdgpu_device_wb_fini(adev);
7ccfd79f 2848 amdgpu_device_mem_scratch_fini(adev);
533aed27 2849 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2850 }
2851
a1255107 2852 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2853 /* XXX handle errors */
2c1a2784 2854 if (r) {
a1255107
AD
2855 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2856 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2857 }
a1255107
AD
2858 adev->ip_blocks[i].status.sw = false;
2859 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2860 }
2861
a6dcfd9c 2862 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2863 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2864 continue;
a1255107
AD
2865 if (adev->ip_blocks[i].version->funcs->late_fini)
2866 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2867 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2868 }
2869
c030f2e4 2870 amdgpu_ras_fini(adev);
2871
d38ceaf9
AD
2872 return 0;
2873}
2874
e3ecdffa 2875/**
beff74bc 2876 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2877 *
1112a46b 2878 * @work: work_struct.
e3ecdffa 2879 */
beff74bc 2880static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2881{
2882 struct amdgpu_device *adev =
beff74bc 2883 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2884 int r;
2885
2886 r = amdgpu_ib_ring_tests(adev);
2887 if (r)
2888 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2889}
2890
1e317b99
RZ
2891static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2892{
2893 struct amdgpu_device *adev =
2894 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2895
90a92662
MD
2896 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2897 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2898
2899 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2900 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2901}
2902
e3ecdffa 2903/**
e7854a03 2904 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2905 *
2906 * @adev: amdgpu_device pointer
2907 *
2908 * Main suspend function for hardware IPs. The list of all the hardware
2909 * IPs that make up the asic is walked, clockgating is disabled and the
2910 * suspend callbacks are run. suspend puts the hardware and software state
2911 * in each IP into a state suitable for suspend.
2912 * Returns 0 on success, negative error code on failure.
2913 */
e7854a03
AD
2914static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2915{
2916 int i, r;
2917
50ec83f0
AD
2918 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2919 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2920
b31d6ada
EQ
2921 /*
2922 * Per PMFW team's suggestion, driver needs to handle gfxoff
2923 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2924 * scenario. Add the missing df cstate disablement here.
2925 */
2926 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2927 dev_warn(adev->dev, "Failed to disallow df cstate");
2928
e7854a03
AD
2929 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2930 if (!adev->ip_blocks[i].status.valid)
2931 continue;
2b9f7848 2932
e7854a03 2933 /* displays are handled separately */
2b9f7848
ND
2934 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2935 continue;
2936
2937 /* XXX handle errors */
2938 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2939 /* XXX handle errors */
2940 if (r) {
2941 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2942 adev->ip_blocks[i].version->funcs->name, r);
2943 return r;
e7854a03 2944 }
2b9f7848
ND
2945
2946 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2947 }
2948
e7854a03
AD
2949 return 0;
2950}
2951
2952/**
2953 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2954 *
2955 * @adev: amdgpu_device pointer
2956 *
2957 * Main suspend function for hardware IPs. The list of all the hardware
2958 * IPs that make up the asic is walked, clockgating is disabled and the
2959 * suspend callbacks are run. suspend puts the hardware and software state
2960 * in each IP into a state suitable for suspend.
2961 * Returns 0 on success, negative error code on failure.
2962 */
2963static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2964{
2965 int i, r;
2966
557f42a2 2967 if (adev->in_s0ix)
bc143d8b 2968 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 2969
d38ceaf9 2970 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2971 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2972 continue;
e7854a03
AD
2973 /* displays are handled in phase1 */
2974 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2975 continue;
bff77e86
LM
2976 /* PSP lost connection when err_event_athub occurs */
2977 if (amdgpu_ras_intr_triggered() &&
2978 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2979 adev->ip_blocks[i].status.hw = false;
2980 continue;
2981 }
e3c1b071 2982
2983 /* skip unnecessary suspend if we do not initialize them yet */
2984 if (adev->gmc.xgmi.pending_reset &&
2985 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2986 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2987 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2988 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2989 adev->ip_blocks[i].status.hw = false;
2990 continue;
2991 }
557f42a2 2992
afa6646b 2993 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
2994 * gfx is in gfxoff state, so on resume it will exit gfxoff just
2995 * like at runtime. PSP is also part of the always on hardware
2996 * so no need to suspend it.
2997 */
557f42a2 2998 if (adev->in_s0ix &&
32ff160d 2999 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3000 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3001 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3002 continue;
3003
2a7798ea
AD
3004 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3005 if (adev->in_s0ix &&
3006 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3007 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3008 continue;
3009
e11c7750
TH
3010 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3011 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3012 * from this location and RLC Autoload automatically also gets loaded
3013 * from here based on PMFW -> PSP message during re-init sequence.
3014 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3015 * the TMR and reload FWs again for IMU enabled APU ASICs.
3016 */
3017 if (amdgpu_in_reset(adev) &&
3018 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3019 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3020 continue;
3021
d38ceaf9 3022 /* XXX handle errors */
a1255107 3023 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3024 /* XXX handle errors */
2c1a2784 3025 if (r) {
a1255107
AD
3026 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3027 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3028 }
876923fb 3029 adev->ip_blocks[i].status.hw = false;
a3a09142 3030 /* handle putting the SMC in the appropriate state */
47fc644f 3031 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3032 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3033 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3034 if (r) {
3035 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3036 adev->mp1_state, r);
3037 return r;
3038 }
a3a09142
AD
3039 }
3040 }
d38ceaf9
AD
3041 }
3042
3043 return 0;
3044}
3045
e7854a03
AD
3046/**
3047 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3048 *
3049 * @adev: amdgpu_device pointer
3050 *
3051 * Main suspend function for hardware IPs. The list of all the hardware
3052 * IPs that make up the asic is walked, clockgating is disabled and the
3053 * suspend callbacks are run. suspend puts the hardware and software state
3054 * in each IP into a state suitable for suspend.
3055 * Returns 0 on success, negative error code on failure.
3056 */
3057int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3058{
3059 int r;
3060
3c73683c
JC
3061 if (amdgpu_sriov_vf(adev)) {
3062 amdgpu_virt_fini_data_exchange(adev);
e7819644 3063 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3064 }
e7819644 3065
e7854a03
AD
3066 r = amdgpu_device_ip_suspend_phase1(adev);
3067 if (r)
3068 return r;
3069 r = amdgpu_device_ip_suspend_phase2(adev);
3070
e7819644
YT
3071 if (amdgpu_sriov_vf(adev))
3072 amdgpu_virt_release_full_gpu(adev, false);
3073
e7854a03
AD
3074 return r;
3075}
3076
06ec9070 3077static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3078{
3079 int i, r;
3080
2cb681b6 3081 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3082 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3083 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3084 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3085 AMD_IP_BLOCK_TYPE_IH,
3086 };
a90ad3c2 3087
95ea3dbc 3088 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3089 int j;
3090 struct amdgpu_ip_block *block;
a90ad3c2 3091
4cd2a96d
J
3092 block = &adev->ip_blocks[i];
3093 block->status.hw = false;
2cb681b6 3094
4cd2a96d 3095 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3096
4cd2a96d 3097 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3098 !block->status.valid)
3099 continue;
3100
3101 r = block->version->funcs->hw_init(adev);
0aaeefcc 3102 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3103 if (r)
3104 return r;
482f0e53 3105 block->status.hw = true;
a90ad3c2
ML
3106 }
3107 }
3108
3109 return 0;
3110}
3111
06ec9070 3112static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3113{
3114 int i, r;
3115
2cb681b6
ML
3116 static enum amd_ip_block_type ip_order[] = {
3117 AMD_IP_BLOCK_TYPE_SMC,
3118 AMD_IP_BLOCK_TYPE_DCE,
3119 AMD_IP_BLOCK_TYPE_GFX,
3120 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3121 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3122 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3123 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3124 AMD_IP_BLOCK_TYPE_VCN,
3125 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3126 };
a90ad3c2 3127
2cb681b6
ML
3128 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3129 int j;
3130 struct amdgpu_ip_block *block;
a90ad3c2 3131
2cb681b6
ML
3132 for (j = 0; j < adev->num_ip_blocks; j++) {
3133 block = &adev->ip_blocks[j];
3134
3135 if (block->version->type != ip_order[i] ||
482f0e53
ML
3136 !block->status.valid ||
3137 block->status.hw)
2cb681b6
ML
3138 continue;
3139
895bd048
JZ
3140 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3141 r = block->version->funcs->resume(adev);
3142 else
3143 r = block->version->funcs->hw_init(adev);
3144
0aaeefcc 3145 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3146 if (r)
3147 return r;
482f0e53 3148 block->status.hw = true;
a90ad3c2
ML
3149 }
3150 }
3151
3152 return 0;
3153}
3154
e3ecdffa
AD
3155/**
3156 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3157 *
3158 * @adev: amdgpu_device pointer
3159 *
3160 * First resume function for hardware IPs. The list of all the hardware
3161 * IPs that make up the asic is walked and the resume callbacks are run for
3162 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3163 * after a suspend and updates the software state as necessary. This
3164 * function is also used for restoring the GPU after a GPU reset.
3165 * Returns 0 on success, negative error code on failure.
3166 */
06ec9070 3167static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3168{
3169 int i, r;
3170
a90ad3c2 3171 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3172 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3173 continue;
a90ad3c2 3174 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3175 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3176 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3177 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3178
fcf0649f
CZ
3179 r = adev->ip_blocks[i].version->funcs->resume(adev);
3180 if (r) {
3181 DRM_ERROR("resume of IP block <%s> failed %d\n",
3182 adev->ip_blocks[i].version->funcs->name, r);
3183 return r;
3184 }
482f0e53 3185 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3186 }
3187 }
3188
3189 return 0;
3190}
3191
e3ecdffa
AD
3192/**
3193 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3194 *
3195 * @adev: amdgpu_device pointer
3196 *
3197 * First resume function for hardware IPs. The list of all the hardware
3198 * IPs that make up the asic is walked and the resume callbacks are run for
3199 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3200 * functional state after a suspend and updates the software state as
3201 * necessary. This function is also used for restoring the GPU after a GPU
3202 * reset.
3203 * Returns 0 on success, negative error code on failure.
3204 */
06ec9070 3205static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3206{
3207 int i, r;
3208
3209 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3210 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3211 continue;
fcf0649f 3212 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3213 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3214 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3215 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3216 continue;
a1255107 3217 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3218 if (r) {
a1255107
AD
3219 DRM_ERROR("resume of IP block <%s> failed %d\n",
3220 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3221 return r;
2c1a2784 3222 }
482f0e53 3223 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3224 }
3225
3226 return 0;
3227}
3228
e3ecdffa
AD
3229/**
3230 * amdgpu_device_ip_resume - run resume for hardware IPs
3231 *
3232 * @adev: amdgpu_device pointer
3233 *
3234 * Main resume function for hardware IPs. The hardware IPs
3235 * are split into two resume functions because they are
b8920e1e 3236 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3237 * steps need to be take between them. In this case (S3/S4) they are
3238 * run sequentially.
3239 * Returns 0 on success, negative error code on failure.
3240 */
06ec9070 3241static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3242{
3243 int r;
3244
06ec9070 3245 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3246 if (r)
3247 return r;
7a3e0bb2
RZ
3248
3249 r = amdgpu_device_fw_loading(adev);
3250 if (r)
3251 return r;
3252
06ec9070 3253 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3254
3255 return r;
3256}
3257
e3ecdffa
AD
3258/**
3259 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3260 *
3261 * @adev: amdgpu_device pointer
3262 *
3263 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3264 */
4e99a44e 3265static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3266{
6867e1b5
ML
3267 if (amdgpu_sriov_vf(adev)) {
3268 if (adev->is_atom_fw) {
58ff791a 3269 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3270 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3271 } else {
3272 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3273 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3274 }
3275
3276 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3277 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3278 }
048765ad
AR
3279}
3280
e3ecdffa
AD
3281/**
3282 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3283 *
3284 * @asic_type: AMD asic type
3285 *
3286 * Check if there is DC (new modesetting infrastructre) support for an asic.
3287 * returns true if DC has support, false if not.
3288 */
4562236b
HW
3289bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3290{
3291 switch (asic_type) {
0637d417
AD
3292#ifdef CONFIG_DRM_AMDGPU_SI
3293 case CHIP_HAINAN:
3294#endif
3295 case CHIP_TOPAZ:
3296 /* chips with no display hardware */
3297 return false;
4562236b 3298#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3299 case CHIP_TAHITI:
3300 case CHIP_PITCAIRN:
3301 case CHIP_VERDE:
3302 case CHIP_OLAND:
2d32ffd6
AD
3303 /*
3304 * We have systems in the wild with these ASICs that require
3305 * LVDS and VGA support which is not supported with DC.
3306 *
3307 * Fallback to the non-DC driver here by default so as not to
3308 * cause regressions.
3309 */
3310#if defined(CONFIG_DRM_AMD_DC_SI)
3311 return amdgpu_dc > 0;
3312#else
3313 return false;
64200c46 3314#endif
4562236b 3315 case CHIP_BONAIRE:
0d6fbccb 3316 case CHIP_KAVERI:
367e6687
AD
3317 case CHIP_KABINI:
3318 case CHIP_MULLINS:
d9fda248
HW
3319 /*
3320 * We have systems in the wild with these ASICs that require
b5a0168e 3321 * VGA support which is not supported with DC.
d9fda248
HW
3322 *
3323 * Fallback to the non-DC driver here by default so as not to
3324 * cause regressions.
3325 */
3326 return amdgpu_dc > 0;
f7f12b25 3327 default:
fd187853 3328 return amdgpu_dc != 0;
f7f12b25 3329#else
4562236b 3330 default:
93b09a9a 3331 if (amdgpu_dc > 0)
b8920e1e 3332 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3333 return false;
f7f12b25 3334#endif
4562236b
HW
3335 }
3336}
3337
3338/**
3339 * amdgpu_device_has_dc_support - check if dc is supported
3340 *
982a820b 3341 * @adev: amdgpu_device pointer
4562236b
HW
3342 *
3343 * Returns true for supported, false for not supported
3344 */
3345bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3346{
25263da3 3347 if (adev->enable_virtual_display ||
abaf210c 3348 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3349 return false;
3350
4562236b
HW
3351 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3352}
3353
d4535e2c
AG
3354static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3355{
3356 struct amdgpu_device *adev =
3357 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3358 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3359
c6a6e2db
AG
3360 /* It's a bug to not have a hive within this function */
3361 if (WARN_ON(!hive))
3362 return;
3363
3364 /*
3365 * Use task barrier to synchronize all xgmi reset works across the
3366 * hive. task_barrier_enter and task_barrier_exit will block
3367 * until all the threads running the xgmi reset works reach
3368 * those points. task_barrier_full will do both blocks.
3369 */
3370 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3371
3372 task_barrier_enter(&hive->tb);
4a580877 3373 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3374
3375 if (adev->asic_reset_res)
3376 goto fail;
3377
3378 task_barrier_exit(&hive->tb);
4a580877 3379 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3380
3381 if (adev->asic_reset_res)
3382 goto fail;
43c4d576 3383
5e67bba3 3384 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3385 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3386 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3387 } else {
3388
3389 task_barrier_full(&hive->tb);
3390 adev->asic_reset_res = amdgpu_asic_reset(adev);
3391 }
ce316fa5 3392
c6a6e2db 3393fail:
d4535e2c 3394 if (adev->asic_reset_res)
fed184e9 3395 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3396 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3397 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3398}
3399
71f98027
AD
3400static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3401{
3402 char *input = amdgpu_lockup_timeout;
3403 char *timeout_setting = NULL;
3404 int index = 0;
3405 long timeout;
3406 int ret = 0;
3407
3408 /*
67387dfe
AD
3409 * By default timeout for non compute jobs is 10000
3410 * and 60000 for compute jobs.
71f98027 3411 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3412 * jobs are 60000 by default.
71f98027
AD
3413 */
3414 adev->gfx_timeout = msecs_to_jiffies(10000);
3415 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3416 if (amdgpu_sriov_vf(adev))
3417 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3418 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3419 else
67387dfe 3420 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3421
f440ff44 3422 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3423 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3424 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3425 ret = kstrtol(timeout_setting, 0, &timeout);
3426 if (ret)
3427 return ret;
3428
3429 if (timeout == 0) {
3430 index++;
3431 continue;
3432 } else if (timeout < 0) {
3433 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3434 dev_warn(adev->dev, "lockup timeout disabled");
3435 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3436 } else {
3437 timeout = msecs_to_jiffies(timeout);
3438 }
3439
3440 switch (index++) {
3441 case 0:
3442 adev->gfx_timeout = timeout;
3443 break;
3444 case 1:
3445 adev->compute_timeout = timeout;
3446 break;
3447 case 2:
3448 adev->sdma_timeout = timeout;
3449 break;
3450 case 3:
3451 adev->video_timeout = timeout;
3452 break;
3453 default:
3454 break;
3455 }
3456 }
3457 /*
3458 * There is only one value specified and
3459 * it should apply to all non-compute jobs.
3460 */
bcccee89 3461 if (index == 1) {
71f98027 3462 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3463 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3464 adev->compute_timeout = adev->gfx_timeout;
3465 }
71f98027
AD
3466 }
3467
3468 return ret;
3469}
d4535e2c 3470
4a74c38c
PY
3471/**
3472 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3473 *
3474 * @adev: amdgpu_device pointer
3475 *
3476 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3477 */
3478static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3479{
3480 struct iommu_domain *domain;
3481
3482 domain = iommu_get_domain_for_dev(adev->dev);
3483 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3484 adev->ram_is_direct_mapped = true;
3485}
3486
77f3a5cd 3487static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3488 &dev_attr_pcie_replay_count.attr,
3489 NULL
3490};
3491
02ff519e
AD
3492static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3493{
3494 if (amdgpu_mcbp == 1)
3495 adev->gfx.mcbp = true;
1e9e15dc
JZ
3496 else if (amdgpu_mcbp == 0)
3497 adev->gfx.mcbp = false;
3498 else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
3499 (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
3500 adev->gfx.num_gfx_rings)
50a7c876
AD
3501 adev->gfx.mcbp = true;
3502
02ff519e
AD
3503 if (amdgpu_sriov_vf(adev))
3504 adev->gfx.mcbp = true;
3505
3506 if (adev->gfx.mcbp)
3507 DRM_INFO("MCBP is enabled\n");
3508}
3509
d38ceaf9
AD
3510/**
3511 * amdgpu_device_init - initialize the driver
3512 *
3513 * @adev: amdgpu_device pointer
d38ceaf9
AD
3514 * @flags: driver flags
3515 *
3516 * Initializes the driver info and hw (all asics).
3517 * Returns 0 for success or an error on failure.
3518 * Called at driver startup.
3519 */
3520int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3521 uint32_t flags)
3522{
8aba21b7
LT
3523 struct drm_device *ddev = adev_to_drm(adev);
3524 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3525 int r, i;
b98c6299 3526 bool px = false;
95844d20 3527 u32 max_MBps;
59e9fff1 3528 int tmp;
d38ceaf9
AD
3529
3530 adev->shutdown = false;
d38ceaf9 3531 adev->flags = flags;
4e66d7d2
YZ
3532
3533 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3534 adev->asic_type = amdgpu_force_asic_type;
3535 else
3536 adev->asic_type = flags & AMD_ASIC_MASK;
3537
d38ceaf9 3538 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3539 if (amdgpu_emu_mode == 1)
8bdab6bb 3540 adev->usec_timeout *= 10;
770d13b1 3541 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3542 adev->accel_working = false;
3543 adev->num_rings = 0;
68ce8b24 3544 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3545 adev->mman.buffer_funcs = NULL;
3546 adev->mman.buffer_funcs_ring = NULL;
3547 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3548 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3549 adev->gmc.gmc_funcs = NULL;
7bd939d0 3550 adev->harvest_ip_mask = 0x0;
f54d1867 3551 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3552 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3553
3554 adev->smc_rreg = &amdgpu_invalid_rreg;
3555 adev->smc_wreg = &amdgpu_invalid_wreg;
3556 adev->pcie_rreg = &amdgpu_invalid_rreg;
3557 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3558 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3559 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3560 adev->pciep_rreg = &amdgpu_invalid_rreg;
3561 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3562 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3563 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3564 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3565 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3566 adev->didt_rreg = &amdgpu_invalid_rreg;
3567 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3568 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3569 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3570 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3571 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3572
3e39ab90
AD
3573 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3574 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3575 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3576
3577 /* mutex initialization are all done here so we
b8920e1e
SS
3578 * can recall function without having locking issues
3579 */
0e5ca0d1 3580 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3581 mutex_init(&adev->pm.mutex);
3582 mutex_init(&adev->gfx.gpu_clock_mutex);
3583 mutex_init(&adev->srbm_mutex);
b8866c26 3584 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3585 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3586 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3587 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3588 mutex_init(&adev->mn_lock);
e23b74aa 3589 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3590 hash_init(adev->mn_hash);
32eaeae0 3591 mutex_init(&adev->psp.mutex);
bd052211 3592 mutex_init(&adev->notifier_lock);
8cda7a4f 3593 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3594 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3595
ab3b9de6 3596 amdgpu_device_init_apu_flags(adev);
9f6a7857 3597
912dfc84
EQ
3598 r = amdgpu_device_check_arguments(adev);
3599 if (r)
3600 return r;
d38ceaf9 3601
d38ceaf9
AD
3602 spin_lock_init(&adev->mmio_idx_lock);
3603 spin_lock_init(&adev->smc_idx_lock);
3604 spin_lock_init(&adev->pcie_idx_lock);
3605 spin_lock_init(&adev->uvd_ctx_idx_lock);
3606 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3607 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3608 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3609 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3610 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3611
0c4e7fa5
CZ
3612 INIT_LIST_HEAD(&adev->shadow_list);
3613 mutex_init(&adev->shadow_list_lock);
3614
655ce9cb 3615 INIT_LIST_HEAD(&adev->reset_list);
3616
6492e1b0 3617 INIT_LIST_HEAD(&adev->ras_list);
3618
beff74bc
AD
3619 INIT_DELAYED_WORK(&adev->delayed_init_work,
3620 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3621 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3622 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3623
d4535e2c
AG
3624 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3625
d23ee13f 3626 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3627 adev->gfx.gfx_off_residency = 0;
3628 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3629 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3630
b265bdbd
EQ
3631 atomic_set(&adev->throttling_logging_enabled, 1);
3632 /*
3633 * If throttling continues, logging will be performed every minute
3634 * to avoid log flooding. "-1" is subtracted since the thermal
3635 * throttling interrupt comes every second. Thus, the total logging
3636 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3637 * for throttling interrupt) = 60 seconds.
3638 */
3639 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3640 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3641
0fa49558
AX
3642 /* Registers mapping */
3643 /* TODO: block userspace mapping of io register */
da69c161
KW
3644 if (adev->asic_type >= CHIP_BONAIRE) {
3645 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3646 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3647 } else {
3648 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3649 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3650 }
d38ceaf9 3651
6c08e0ef
EQ
3652 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3653 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3654
d38ceaf9 3655 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3656 if (!adev->rmmio)
d38ceaf9 3657 return -ENOMEM;
b8920e1e 3658
d38ceaf9 3659 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3660 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3661
436afdfa
PY
3662 /*
3663 * Reset domain needs to be present early, before XGMI hive discovered
3664 * (if any) and intitialized to use reset sem and in_gpu reset flag
3665 * early on during init and before calling to RREG32.
3666 */
3667 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3668 if (!adev->reset_domain)
3669 return -ENOMEM;
3670
3aa0115d
ML
3671 /* detect hw virtualization here */
3672 amdgpu_detect_virtualization(adev);
3673
04e85958
TL
3674 amdgpu_device_get_pcie_info(adev);
3675
dffa11b4
ML
3676 r = amdgpu_device_get_job_timeout_settings(adev);
3677 if (r) {
3678 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3679 return r;
a190d1c7
XY
3680 }
3681
d38ceaf9 3682 /* early init functions */
06ec9070 3683 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3684 if (r)
4ef87d8f 3685 return r;
d38ceaf9 3686
02ff519e
AD
3687 amdgpu_device_set_mcbp(adev);
3688
b7cdb41e
ML
3689 /* Get rid of things like offb */
3690 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3691 if (r)
3692 return r;
3693
4d33e704
SK
3694 /* Enable TMZ based on IP_VERSION */
3695 amdgpu_gmc_tmz_set(adev);
3696
957b0787 3697 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3698 /* Need to get xgmi info early to decide the reset behavior*/
3699 if (adev->gmc.xgmi.supported) {
3700 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3701 if (r)
3702 return r;
3703 }
3704
8e6d0b69 3705 /* enable PCIE atomic ops */
b4520bfd
GW
3706 if (amdgpu_sriov_vf(adev)) {
3707 if (adev->virt.fw_reserve.p_pf2vf)
3708 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3709 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3710 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3711 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3712 * internal path natively support atomics, set have_atomics_support to true.
3713 */
b4520bfd
GW
3714 } else if ((adev->flags & AMD_IS_APU) &&
3715 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
0e768043 3716 adev->have_atomics_support = true;
b4520bfd 3717 } else {
8e6d0b69 3718 adev->have_atomics_support =
3719 !pci_enable_atomic_ops_to_root(adev->pdev,
3720 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3721 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3722 }
3723
8e6d0b69 3724 if (!adev->have_atomics_support)
3725 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3726
6585661d 3727 /* doorbell bar mapping and doorbell index init*/
43c064db 3728 amdgpu_doorbell_init(adev);
6585661d 3729
9475a943
SL
3730 if (amdgpu_emu_mode == 1) {
3731 /* post the asic on emulation mode */
3732 emu_soc_asic_init(adev);
bfca0289 3733 goto fence_driver_init;
9475a943 3734 }
bfca0289 3735
04442bf7
LL
3736 amdgpu_reset_init(adev);
3737
4e99a44e 3738 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3739 if (adev->bios)
3740 amdgpu_device_detect_sriov_bios(adev);
048765ad 3741
95e8e59e
AD
3742 /* check if we need to reset the asic
3743 * E.g., driver was not cleanly unloaded previously, etc.
3744 */
f14899fd 3745 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3746 if (adev->gmc.xgmi.num_physical_nodes) {
3747 dev_info(adev->dev, "Pending hive reset.\n");
3748 adev->gmc.xgmi.pending_reset = true;
3749 /* Only need to init necessary block for SMU to handle the reset */
3750 for (i = 0; i < adev->num_ip_blocks; i++) {
3751 if (!adev->ip_blocks[i].status.valid)
3752 continue;
3753 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3754 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3755 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3756 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3757 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3758 adev->ip_blocks[i].version->funcs->name);
3759 adev->ip_blocks[i].status.hw = true;
3760 }
3761 }
3762 } else {
59e9fff1 3763 tmp = amdgpu_reset_method;
3764 /* It should do a default reset when loading or reloading the driver,
3765 * regardless of the module parameter reset_method.
3766 */
3767 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3768 r = amdgpu_asic_reset(adev);
59e9fff1 3769 amdgpu_reset_method = tmp;
e3c1b071 3770 if (r) {
3771 dev_err(adev->dev, "asic reset on init failed\n");
3772 goto failed;
3773 }
95e8e59e
AD
3774 }
3775 }
3776
d38ceaf9 3777 /* Post card if necessary */
39c640c0 3778 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3779 if (!adev->bios) {
bec86378 3780 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3781 r = -EINVAL;
3782 goto failed;
d38ceaf9 3783 }
bec86378 3784 DRM_INFO("GPU posting now...\n");
4d2997ab 3785 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3786 if (r) {
3787 dev_err(adev->dev, "gpu post error!\n");
3788 goto failed;
3789 }
d38ceaf9
AD
3790 }
3791
9535a86a
SZ
3792 if (adev->bios) {
3793 if (adev->is_atom_fw) {
3794 /* Initialize clocks */
3795 r = amdgpu_atomfirmware_get_clock_info(adev);
3796 if (r) {
3797 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3798 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3799 goto failed;
3800 }
3801 } else {
3802 /* Initialize clocks */
3803 r = amdgpu_atombios_get_clock_info(adev);
3804 if (r) {
3805 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3806 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3807 goto failed;
3808 }
3809 /* init i2c buses */
3810 if (!amdgpu_device_has_dc_support(adev))
3811 amdgpu_atombios_i2c_init(adev);
a5bde2f9 3812 }
2c1a2784 3813 }
d38ceaf9 3814
bfca0289 3815fence_driver_init:
d38ceaf9 3816 /* Fence driver */
067f44c8 3817 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3818 if (r) {
067f44c8 3819 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3820 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3821 goto failed;
2c1a2784 3822 }
d38ceaf9
AD
3823
3824 /* init the mode config */
4a580877 3825 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3826
06ec9070 3827 r = amdgpu_device_ip_init(adev);
d38ceaf9 3828 if (r) {
06ec9070 3829 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3830 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3831 goto release_ras_con;
d38ceaf9
AD
3832 }
3833
8d35a259
LG
3834 amdgpu_fence_driver_hw_init(adev);
3835
d69b8971
YZ
3836 dev_info(adev->dev,
3837 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3838 adev->gfx.config.max_shader_engines,
3839 adev->gfx.config.max_sh_per_se,
3840 adev->gfx.config.max_cu_per_sh,
3841 adev->gfx.cu_info.number);
3842
d38ceaf9
AD
3843 adev->accel_working = true;
3844
e59c0205
AX
3845 amdgpu_vm_check_compute_bug(adev);
3846
95844d20
MO
3847 /* Initialize the buffer migration limit. */
3848 if (amdgpu_moverate >= 0)
3849 max_MBps = amdgpu_moverate;
3850 else
3851 max_MBps = 8; /* Allow 8 MB/s. */
3852 /* Get a log2 for easy divisions. */
3853 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3854
184d8384
LL
3855 r = amdgpu_atombios_sysfs_init(adev);
3856 if (r)
3857 drm_err(&adev->ddev,
3858 "registering atombios sysfs failed (%d).\n", r);
3859
d2f52ac8 3860 r = amdgpu_pm_sysfs_init(adev);
53e9d836
GC
3861 if (r)
3862 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
d2f52ac8 3863
5bb23532 3864 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3865 if (r) {
3866 adev->ucode_sysfs_en = false;
5bb23532 3867 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3868 } else
3869 adev->ucode_sysfs_en = true;
5bb23532 3870
b0adca4d
EQ
3871 /*
3872 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3873 * Otherwise the mgpu fan boost feature will be skipped due to the
3874 * gpu instance is counted less.
3875 */
3876 amdgpu_register_gpu_instance(adev);
3877
d38ceaf9
AD
3878 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3879 * explicit gating rather than handling it automatically.
3880 */
e3c1b071 3881 if (!adev->gmc.xgmi.pending_reset) {
3882 r = amdgpu_device_ip_late_init(adev);
3883 if (r) {
3884 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3885 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3886 goto release_ras_con;
e3c1b071 3887 }
3888 /* must succeed. */
3889 amdgpu_ras_resume(adev);
3890 queue_delayed_work(system_wq, &adev->delayed_init_work,
3891 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3892 }
d38ceaf9 3893
38eecbe0
CL
3894 if (amdgpu_sriov_vf(adev)) {
3895 amdgpu_virt_release_full_gpu(adev, true);
2c738637 3896 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 3897 }
2c738637 3898
77f3a5cd 3899 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3900 if (r)
77f3a5cd 3901 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3902
7957ec80
LL
3903 amdgpu_fru_sysfs_init(adev);
3904
d155bef0
AB
3905 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3906 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3907 if (r)
3908 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3909
c1dd4aa6
AG
3910 /* Have stored pci confspace at hand for restore in sudden PCI error */
3911 if (amdgpu_device_cache_pci_state(adev->pdev))
3912 pci_restore_state(pdev);
3913
8c3dd61c
KHF
3914 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3915 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
3916 * ignore it
3917 */
8c3dd61c 3918 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3919 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 3920
d37a3929
OC
3921 px = amdgpu_device_supports_px(ddev);
3922
3923 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3924 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
3925 vga_switcheroo_register_client(adev->pdev,
3926 &amdgpu_switcheroo_ops, px);
d37a3929
OC
3927
3928 if (px)
8c3dd61c 3929 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 3930
e3c1b071 3931 if (adev->gmc.xgmi.pending_reset)
3932 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3933 msecs_to_jiffies(AMDGPU_RESUME_MS));
3934
4a74c38c
PY
3935 amdgpu_device_check_iommu_direct_map(adev);
3936
d38ceaf9 3937 return 0;
83ba126a 3938
970fd197 3939release_ras_con:
38eecbe0
CL
3940 if (amdgpu_sriov_vf(adev))
3941 amdgpu_virt_release_full_gpu(adev, true);
3942
3943 /* failed in exclusive mode due to timeout */
3944 if (amdgpu_sriov_vf(adev) &&
3945 !amdgpu_sriov_runtime(adev) &&
3946 amdgpu_virt_mmio_blocked(adev) &&
3947 !amdgpu_virt_wait_reset(adev)) {
3948 dev_err(adev->dev, "VF exclusive mode timeout\n");
3949 /* Don't send request since VF is inactive. */
3950 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3951 adev->virt.ops = NULL;
3952 r = -EAGAIN;
3953 }
970fd197
SY
3954 amdgpu_release_ras_context(adev);
3955
83ba126a 3956failed:
89041940 3957 amdgpu_vf_error_trans_all(adev);
8840a387 3958
83ba126a 3959 return r;
d38ceaf9
AD
3960}
3961
07775fc1
AG
3962static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3963{
62d5f9f7 3964
07775fc1
AG
3965 /* Clear all CPU mappings pointing to this device */
3966 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3967
3968 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 3969 amdgpu_doorbell_fini(adev);
07775fc1
AG
3970
3971 iounmap(adev->rmmio);
3972 adev->rmmio = NULL;
3973 if (adev->mman.aper_base_kaddr)
3974 iounmap(adev->mman.aper_base_kaddr);
3975 adev->mman.aper_base_kaddr = NULL;
3976
3977 /* Memory manager related */
a0ba1279 3978 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
3979 arch_phys_wc_del(adev->gmc.vram_mtrr);
3980 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3981 }
3982}
3983
d38ceaf9 3984/**
bbe04dec 3985 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3986 *
3987 * @adev: amdgpu_device pointer
3988 *
3989 * Tear down the driver info (all asics).
3990 * Called at driver shutdown.
3991 */
72c8c97b 3992void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 3993{
aac89168 3994 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3995 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3996 adev->shutdown = true;
9f875167 3997
752c683d
ML
3998 /* make sure IB test finished before entering exclusive mode
3999 * to avoid preemption on IB test
b8920e1e 4000 */
519b8b76 4001 if (amdgpu_sriov_vf(adev)) {
752c683d 4002 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4003 amdgpu_virt_fini_data_exchange(adev);
4004 }
752c683d 4005
e5b03032
ML
4006 /* disable all interrupts */
4007 amdgpu_irq_disable_all(adev);
47fc644f 4008 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4009 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4010 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4011 else
4a580877 4012 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4013 }
8d35a259 4014 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4015
cd3a8a59 4016 if (adev->mman.initialized)
9bff18d1 4017 drain_workqueue(adev->mman.bdev.wq);
98f56188 4018
53e9d836 4019 if (adev->pm.sysfs_initialized)
7c868b59 4020 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4021 if (adev->ucode_sysfs_en)
4022 amdgpu_ucode_sysfs_fini(adev);
4023 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4024 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4025
232d1d43
SY
4026 /* disable ras feature must before hw fini */
4027 amdgpu_ras_pre_fini(adev);
4028
e9669fb7 4029 amdgpu_device_ip_fini_early(adev);
d10d0daa 4030
a3848df6
YW
4031 amdgpu_irq_fini_hw(adev);
4032
b6fd6e0f
SK
4033 if (adev->mman.initialized)
4034 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4035
d10d0daa 4036 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4037
39934d3e
VP
4038 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4039 amdgpu_device_unmap_mmio(adev);
87172e89 4040
72c8c97b
AG
4041}
4042
4043void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4044{
62d5f9f7 4045 int idx;
d37a3929 4046 bool px;
62d5f9f7 4047
8d35a259 4048 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4049 amdgpu_device_ip_fini(adev);
b31d3063 4050 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4051 adev->accel_working = false;
68ce8b24 4052 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4053
4054 amdgpu_reset_fini(adev);
4055
d38ceaf9 4056 /* free i2c buses */
4562236b
HW
4057 if (!amdgpu_device_has_dc_support(adev))
4058 amdgpu_i2c_fini(adev);
bfca0289
SL
4059
4060 if (amdgpu_emu_mode != 1)
4061 amdgpu_atombios_fini(adev);
4062
d38ceaf9
AD
4063 kfree(adev->bios);
4064 adev->bios = NULL;
d37a3929
OC
4065
4066 px = amdgpu_device_supports_px(adev_to_drm(adev));
4067
4068 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4069 apple_gmux_detect(NULL, NULL)))
84c8b22e 4070 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4071
4072 if (px)
83ba126a 4073 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4074
38d6be81 4075 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4076 vga_client_unregister(adev->pdev);
e9bc1bf7 4077
62d5f9f7
LS
4078 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4079
4080 iounmap(adev->rmmio);
4081 adev->rmmio = NULL;
43c064db 4082 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4083 drm_dev_exit(idx);
4084 }
4085
d155bef0
AB
4086 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4087 amdgpu_pmu_fini(adev);
72de33f8 4088 if (adev->mman.discovery_bin)
a190d1c7 4089 amdgpu_discovery_fini(adev);
72c8c97b 4090
cfbb6b00
AG
4091 amdgpu_reset_put_reset_domain(adev->reset_domain);
4092 adev->reset_domain = NULL;
4093
72c8c97b
AG
4094 kfree(adev->pci_state);
4095
d38ceaf9
AD
4096}
4097
58144d28
ND
4098/**
4099 * amdgpu_device_evict_resources - evict device resources
4100 * @adev: amdgpu device object
4101 *
4102 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4103 * of the vram memory type. Mainly used for evicting device resources
4104 * at suspend time.
4105 *
4106 */
7863c155 4107static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4108{
7863c155
ML
4109 int ret;
4110
e53d9665
ML
4111 /* No need to evict vram on APUs for suspend to ram or s2idle */
4112 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4113 return 0;
58144d28 4114
7863c155
ML
4115 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4116 if (ret)
58144d28 4117 DRM_WARN("evicting device resources failed\n");
7863c155 4118 return ret;
58144d28 4119}
d38ceaf9
AD
4120
4121/*
4122 * Suspend & resume.
4123 */
4124/**
810ddc3a 4125 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4126 *
87e3f136 4127 * @dev: drm dev pointer
87e3f136 4128 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4129 *
4130 * Puts the hw in the suspend state (all asics).
4131 * Returns 0 for success or an error on failure.
4132 * Called at driver suspend.
4133 */
de185019 4134int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4135{
a2e15b0e 4136 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4137 int r = 0;
d38ceaf9 4138
d38ceaf9
AD
4139 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4140 return 0;
4141
44779b43 4142 adev->in_suspend = true;
3fa8f89d 4143
47ea2076
SF
4144 /* Evict the majority of BOs before grabbing the full access */
4145 r = amdgpu_device_evict_resources(adev);
4146 if (r)
4147 return r;
4148
d7274ec7
BZ
4149 if (amdgpu_sriov_vf(adev)) {
4150 amdgpu_virt_fini_data_exchange(adev);
4151 r = amdgpu_virt_request_full_gpu(adev, false);
4152 if (r)
4153 return r;
4154 }
4155
3fa8f89d
S
4156 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4157 DRM_WARN("smart shift update failed\n");
4158
5f818173 4159 if (fbcon)
087451f3 4160 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4161
beff74bc 4162 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4163 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4164
5e6932fe 4165 amdgpu_ras_suspend(adev);
4166
2196927b 4167 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4168
c004d44e 4169 if (!adev->in_s0ix)
5d3a2d95 4170 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4171
7863c155
ML
4172 r = amdgpu_device_evict_resources(adev);
4173 if (r)
4174 return r;
d38ceaf9 4175
8d35a259 4176 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4177
2196927b 4178 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4179
d7274ec7
BZ
4180 if (amdgpu_sriov_vf(adev))
4181 amdgpu_virt_release_full_gpu(adev, false);
4182
d38ceaf9
AD
4183 return 0;
4184}
4185
4186/**
810ddc3a 4187 * amdgpu_device_resume - initiate device resume
d38ceaf9 4188 *
87e3f136 4189 * @dev: drm dev pointer
87e3f136 4190 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4191 *
4192 * Bring the hw back to operating state (all asics).
4193 * Returns 0 for success or an error on failure.
4194 * Called at driver resume.
4195 */
de185019 4196int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4197{
1348969a 4198 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4199 int r = 0;
d38ceaf9 4200
d7274ec7
BZ
4201 if (amdgpu_sriov_vf(adev)) {
4202 r = amdgpu_virt_request_full_gpu(adev, true);
4203 if (r)
4204 return r;
4205 }
4206
d38ceaf9
AD
4207 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4208 return 0;
4209
62498733 4210 if (adev->in_s0ix)
bc143d8b 4211 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4212
d38ceaf9 4213 /* post card */
39c640c0 4214 if (amdgpu_device_need_post(adev)) {
4d2997ab 4215 r = amdgpu_device_asic_init(adev);
74b0b157 4216 if (r)
aac89168 4217 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4218 }
d38ceaf9 4219
06ec9070 4220 r = amdgpu_device_ip_resume(adev);
d7274ec7 4221
e6707218 4222 if (r) {
aac89168 4223 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4224 goto exit;
e6707218 4225 }
8d35a259 4226 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4227
06ec9070 4228 r = amdgpu_device_ip_late_init(adev);
03161a6e 4229 if (r)
3c22c1ea 4230 goto exit;
d38ceaf9 4231
beff74bc
AD
4232 queue_delayed_work(system_wq, &adev->delayed_init_work,
4233 msecs_to_jiffies(AMDGPU_RESUME_MS));
4234
c004d44e 4235 if (!adev->in_s0ix) {
5d3a2d95
AD
4236 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4237 if (r)
3c22c1ea 4238 goto exit;
5d3a2d95 4239 }
756e6880 4240
3c22c1ea
SF
4241exit:
4242 if (amdgpu_sriov_vf(adev)) {
4243 amdgpu_virt_init_data_exchange(adev);
4244 amdgpu_virt_release_full_gpu(adev, true);
4245 }
4246
4247 if (r)
4248 return r;
4249
96a5d8d4 4250 /* Make sure IB tests flushed */
beff74bc 4251 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4252
a2e15b0e 4253 if (fbcon)
087451f3 4254 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4255
5e6932fe 4256 amdgpu_ras_resume(adev);
4257
d09ef243
AD
4258 if (adev->mode_info.num_crtc) {
4259 /*
4260 * Most of the connector probing functions try to acquire runtime pm
4261 * refs to ensure that the GPU is powered on when connector polling is
4262 * performed. Since we're calling this from a runtime PM callback,
4263 * trying to acquire rpm refs will cause us to deadlock.
4264 *
4265 * Since we're guaranteed to be holding the rpm lock, it's safe to
4266 * temporarily disable the rpm helpers so this doesn't deadlock us.
4267 */
23a1a9e5 4268#ifdef CONFIG_PM
d09ef243 4269 dev->dev->power.disable_depth++;
23a1a9e5 4270#endif
d09ef243
AD
4271 if (!adev->dc_enabled)
4272 drm_helper_hpd_irq_event(dev);
4273 else
4274 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4275#ifdef CONFIG_PM
d09ef243 4276 dev->dev->power.disable_depth--;
23a1a9e5 4277#endif
d09ef243 4278 }
44779b43
RZ
4279 adev->in_suspend = false;
4280
dc907c9d
JX
4281 if (adev->enable_mes)
4282 amdgpu_mes_self_test(adev);
4283
3fa8f89d
S
4284 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4285 DRM_WARN("smart shift update failed\n");
4286
4d3b9ae5 4287 return 0;
d38ceaf9
AD
4288}
4289
e3ecdffa
AD
4290/**
4291 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4292 *
4293 * @adev: amdgpu_device pointer
4294 *
4295 * The list of all the hardware IPs that make up the asic is walked and
4296 * the check_soft_reset callbacks are run. check_soft_reset determines
4297 * if the asic is still hung or not.
4298 * Returns true if any of the IPs are still in a hung state, false if not.
4299 */
06ec9070 4300static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4301{
4302 int i;
4303 bool asic_hang = false;
4304
f993d628
ML
4305 if (amdgpu_sriov_vf(adev))
4306 return true;
4307
8bc04c29
AD
4308 if (amdgpu_asic_need_full_reset(adev))
4309 return true;
4310
63fbf42f 4311 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4312 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4313 continue;
a1255107
AD
4314 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4315 adev->ip_blocks[i].status.hang =
4316 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4317 if (adev->ip_blocks[i].status.hang) {
aac89168 4318 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4319 asic_hang = true;
4320 }
4321 }
4322 return asic_hang;
4323}
4324
e3ecdffa
AD
4325/**
4326 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4327 *
4328 * @adev: amdgpu_device pointer
4329 *
4330 * The list of all the hardware IPs that make up the asic is walked and the
4331 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4332 * handles any IP specific hardware or software state changes that are
4333 * necessary for a soft reset to succeed.
4334 * Returns 0 on success, negative error code on failure.
4335 */
06ec9070 4336static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4337{
4338 int i, r = 0;
4339
4340 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4341 if (!adev->ip_blocks[i].status.valid)
d31a501e 4342 continue;
a1255107
AD
4343 if (adev->ip_blocks[i].status.hang &&
4344 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4345 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4346 if (r)
4347 return r;
4348 }
4349 }
4350
4351 return 0;
4352}
4353
e3ecdffa
AD
4354/**
4355 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4356 *
4357 * @adev: amdgpu_device pointer
4358 *
4359 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4360 * reset is necessary to recover.
4361 * Returns true if a full asic reset is required, false if not.
4362 */
06ec9070 4363static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4364{
da146d3b
AD
4365 int i;
4366
8bc04c29
AD
4367 if (amdgpu_asic_need_full_reset(adev))
4368 return true;
4369
da146d3b 4370 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4371 if (!adev->ip_blocks[i].status.valid)
da146d3b 4372 continue;
a1255107
AD
4373 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4374 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4375 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4376 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4377 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4378 if (adev->ip_blocks[i].status.hang) {
aac89168 4379 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4380 return true;
4381 }
4382 }
35d782fe
CZ
4383 }
4384 return false;
4385}
4386
e3ecdffa
AD
4387/**
4388 * amdgpu_device_ip_soft_reset - do a soft reset
4389 *
4390 * @adev: amdgpu_device pointer
4391 *
4392 * The list of all the hardware IPs that make up the asic is walked and the
4393 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4394 * IP specific hardware or software state changes that are necessary to soft
4395 * reset the IP.
4396 * Returns 0 on success, negative error code on failure.
4397 */
06ec9070 4398static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4399{
4400 int i, r = 0;
4401
4402 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4403 if (!adev->ip_blocks[i].status.valid)
35d782fe 4404 continue;
a1255107
AD
4405 if (adev->ip_blocks[i].status.hang &&
4406 adev->ip_blocks[i].version->funcs->soft_reset) {
4407 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4408 if (r)
4409 return r;
4410 }
4411 }
4412
4413 return 0;
4414}
4415
e3ecdffa
AD
4416/**
4417 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4418 *
4419 * @adev: amdgpu_device pointer
4420 *
4421 * The list of all the hardware IPs that make up the asic is walked and the
4422 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4423 * handles any IP specific hardware or software state changes that are
4424 * necessary after the IP has been soft reset.
4425 * Returns 0 on success, negative error code on failure.
4426 */
06ec9070 4427static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4428{
4429 int i, r = 0;
4430
4431 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4432 if (!adev->ip_blocks[i].status.valid)
35d782fe 4433 continue;
a1255107
AD
4434 if (adev->ip_blocks[i].status.hang &&
4435 adev->ip_blocks[i].version->funcs->post_soft_reset)
4436 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4437 if (r)
4438 return r;
4439 }
4440
4441 return 0;
4442}
4443
e3ecdffa 4444/**
c33adbc7 4445 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4446 *
4447 * @adev: amdgpu_device pointer
4448 *
4449 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4450 * restore things like GPUVM page tables after a GPU reset where
4451 * the contents of VRAM might be lost.
403009bf
CK
4452 *
4453 * Returns:
4454 * 0 on success, negative error code on failure.
e3ecdffa 4455 */
c33adbc7 4456static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4457{
c41d1cf6 4458 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4459 struct amdgpu_bo *shadow;
e18aaea7 4460 struct amdgpu_bo_vm *vmbo;
403009bf 4461 long r = 1, tmo;
c41d1cf6
ML
4462
4463 if (amdgpu_sriov_runtime(adev))
b045d3af 4464 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4465 else
4466 tmo = msecs_to_jiffies(100);
4467
aac89168 4468 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4469 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4470 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4471 /* If vm is compute context or adev is APU, shadow will be NULL */
4472 if (!vmbo->shadow)
4473 continue;
4474 shadow = vmbo->shadow;
4475
403009bf 4476 /* No need to recover an evicted BO */
d3116756
CK
4477 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4478 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4479 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4480 continue;
4481
4482 r = amdgpu_bo_restore_shadow(shadow, &next);
4483 if (r)
4484 break;
4485
c41d1cf6 4486 if (fence) {
1712fb1a 4487 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4488 dma_fence_put(fence);
4489 fence = next;
1712fb1a 4490 if (tmo == 0) {
4491 r = -ETIMEDOUT;
c41d1cf6 4492 break;
1712fb1a 4493 } else if (tmo < 0) {
4494 r = tmo;
4495 break;
4496 }
403009bf
CK
4497 } else {
4498 fence = next;
c41d1cf6 4499 }
c41d1cf6
ML
4500 }
4501 mutex_unlock(&adev->shadow_list_lock);
4502
403009bf
CK
4503 if (fence)
4504 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4505 dma_fence_put(fence);
4506
1712fb1a 4507 if (r < 0 || tmo <= 0) {
aac89168 4508 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4509 return -EIO;
4510 }
c41d1cf6 4511
aac89168 4512 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4513 return 0;
c41d1cf6
ML
4514}
4515
a90ad3c2 4516
e3ecdffa 4517/**
06ec9070 4518 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4519 *
982a820b 4520 * @adev: amdgpu_device pointer
87e3f136 4521 * @from_hypervisor: request from hypervisor
5740682e
ML
4522 *
4523 * do VF FLR and reinitialize Asic
3f48c681 4524 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4525 */
4526static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4527 bool from_hypervisor)
5740682e
ML
4528{
4529 int r;
a5f67c93 4530 struct amdgpu_hive_info *hive = NULL;
7258fa31 4531 int retry_limit = 0;
5740682e 4532
7258fa31 4533retry:
c004d44e 4534 amdgpu_amdkfd_pre_reset(adev);
428890a3 4535
5740682e
ML
4536 if (from_hypervisor)
4537 r = amdgpu_virt_request_full_gpu(adev, true);
4538 else
4539 r = amdgpu_virt_reset_gpu(adev);
4540 if (r)
4541 return r;
f734b213 4542 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4543
83f24a8f
HC
4544 /* some sw clean up VF needs to do before recover */
4545 amdgpu_virt_post_reset(adev);
4546
a90ad3c2 4547 /* Resume IP prior to SMC */
06ec9070 4548 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4549 if (r)
4550 goto error;
a90ad3c2 4551
c9ffa427 4552 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4553
7a3e0bb2
RZ
4554 r = amdgpu_device_fw_loading(adev);
4555 if (r)
4556 return r;
4557
a90ad3c2 4558 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4559 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4560 if (r)
4561 goto error;
a90ad3c2 4562
a5f67c93
ZL
4563 hive = amdgpu_get_xgmi_hive(adev);
4564 /* Update PSP FW topology after reset */
4565 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4566 r = amdgpu_xgmi_update_topology(hive, adev);
4567
4568 if (hive)
4569 amdgpu_put_xgmi_hive(hive);
4570
4571 if (!r) {
a5f67c93 4572 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4573
c004d44e 4574 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4575 }
a90ad3c2 4576
abc34253 4577error:
c41d1cf6 4578 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4579 amdgpu_inc_vram_lost(adev);
c33adbc7 4580 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4581 }
437f3e0b 4582 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4583
7258fa31
SK
4584 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4585 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4586 retry_limit++;
4587 goto retry;
4588 } else
4589 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4590 }
4591
a90ad3c2
ML
4592 return r;
4593}
4594
9a1cddd6 4595/**
4596 * amdgpu_device_has_job_running - check if there is any job in mirror list
4597 *
982a820b 4598 * @adev: amdgpu_device pointer
9a1cddd6 4599 *
4600 * check if there is any job in mirror list
4601 */
4602bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4603{
4604 int i;
4605 struct drm_sched_job *job;
4606
4607 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4608 struct amdgpu_ring *ring = adev->rings[i];
4609
4610 if (!ring || !ring->sched.thread)
4611 continue;
4612
4613 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4614 job = list_first_entry_or_null(&ring->sched.pending_list,
4615 struct drm_sched_job, list);
9a1cddd6 4616 spin_unlock(&ring->sched.job_list_lock);
4617 if (job)
4618 return true;
4619 }
4620 return false;
4621}
4622
12938fad
CK
4623/**
4624 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4625 *
982a820b 4626 * @adev: amdgpu_device pointer
12938fad
CK
4627 *
4628 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4629 * a hung GPU.
4630 */
4631bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4632{
12938fad 4633
3ba7b418
AG
4634 if (amdgpu_gpu_recovery == 0)
4635 goto disabled;
4636
1a11a65d
YC
4637 /* Skip soft reset check in fatal error mode */
4638 if (!amdgpu_ras_is_poison_mode_supported(adev))
4639 return true;
4640
3ba7b418
AG
4641 if (amdgpu_sriov_vf(adev))
4642 return true;
4643
4644 if (amdgpu_gpu_recovery == -1) {
4645 switch (adev->asic_type) {
b3523c45
AD
4646#ifdef CONFIG_DRM_AMDGPU_SI
4647 case CHIP_VERDE:
4648 case CHIP_TAHITI:
4649 case CHIP_PITCAIRN:
4650 case CHIP_OLAND:
4651 case CHIP_HAINAN:
4652#endif
4653#ifdef CONFIG_DRM_AMDGPU_CIK
4654 case CHIP_KAVERI:
4655 case CHIP_KABINI:
4656 case CHIP_MULLINS:
4657#endif
4658 case CHIP_CARRIZO:
4659 case CHIP_STONEY:
4660 case CHIP_CYAN_SKILLFISH:
3ba7b418 4661 goto disabled;
b3523c45
AD
4662 default:
4663 break;
3ba7b418 4664 }
12938fad
CK
4665 }
4666
4667 return true;
3ba7b418
AG
4668
4669disabled:
aac89168 4670 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4671 return false;
12938fad
CK
4672}
4673
5c03e584
FX
4674int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4675{
47fc644f
SS
4676 u32 i;
4677 int ret = 0;
5c03e584 4678
47fc644f 4679 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4680
47fc644f 4681 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4682
47fc644f
SS
4683 /* disable BM */
4684 pci_clear_master(adev->pdev);
5c03e584 4685
47fc644f 4686 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4687
47fc644f
SS
4688 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4689 dev_info(adev->dev, "GPU smu mode1 reset\n");
4690 ret = amdgpu_dpm_mode1_reset(adev);
4691 } else {
4692 dev_info(adev->dev, "GPU psp mode1 reset\n");
4693 ret = psp_gpu_reset(adev);
4694 }
5c03e584 4695
47fc644f
SS
4696 if (ret)
4697 dev_err(adev->dev, "GPU mode1 reset failed\n");
5c03e584 4698
47fc644f 4699 amdgpu_device_load_pci_state(adev->pdev);
5c03e584 4700
47fc644f
SS
4701 /* wait for asic to come out of reset */
4702 for (i = 0; i < adev->usec_timeout; i++) {
4703 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4704
47fc644f
SS
4705 if (memsize != 0xffffffff)
4706 break;
4707 udelay(1);
4708 }
5c03e584 4709
47fc644f
SS
4710 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4711 return ret;
5c03e584 4712}
5c6dd71e 4713
e3c1b071 4714int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4715 struct amdgpu_reset_context *reset_context)
26bc5340 4716{
5c1e6fa4 4717 int i, r = 0;
04442bf7
LL
4718 struct amdgpu_job *job = NULL;
4719 bool need_full_reset =
4720 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4721
4722 if (reset_context->reset_req_dev == adev)
4723 job = reset_context->job;
71182665 4724
b602ca5f
TZ
4725 if (amdgpu_sriov_vf(adev)) {
4726 /* stop the data exchange thread */
4727 amdgpu_virt_fini_data_exchange(adev);
4728 }
4729
9e225fb9
AG
4730 amdgpu_fence_driver_isr_toggle(adev, true);
4731
71182665 4732 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4733 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4734 struct amdgpu_ring *ring = adev->rings[i];
4735
51687759 4736 if (!ring || !ring->sched.thread)
0875dc9e 4737 continue;
5740682e 4738
b8920e1e
SS
4739 /* Clear job fence from fence drv to avoid force_completion
4740 * leave NULL and vm flush fence in fence drv
4741 */
5c1e6fa4 4742 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4743
2f9d4084
ML
4744 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4745 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4746 }
d38ceaf9 4747
9e225fb9
AG
4748 amdgpu_fence_driver_isr_toggle(adev, false);
4749
ff99849b 4750 if (job && job->vm)
222b5f04
AG
4751 drm_sched_increase_karma(&job->base);
4752
04442bf7 4753 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 4754 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4755 if (r == -EOPNOTSUPP)
404b277b
LL
4756 r = 0;
4757 else
04442bf7
LL
4758 return r;
4759
1d721ed6 4760 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4761 if (!amdgpu_sriov_vf(adev)) {
4762
4763 if (!need_full_reset)
4764 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4765
360cd081
LG
4766 if (!need_full_reset && amdgpu_gpu_recovery &&
4767 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4768 amdgpu_device_ip_pre_soft_reset(adev);
4769 r = amdgpu_device_ip_soft_reset(adev);
4770 amdgpu_device_ip_post_soft_reset(adev);
4771 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4772 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4773 need_full_reset = true;
4774 }
4775 }
4776
4777 if (need_full_reset)
4778 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4779 if (need_full_reset)
4780 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4781 else
4782 clear_bit(AMDGPU_NEED_FULL_RESET,
4783 &reset_context->flags);
26bc5340
AG
4784 }
4785
4786 return r;
4787}
4788
15fd09a0
SA
4789static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4790{
15fd09a0
SA
4791 int i;
4792
38a15ad9 4793 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4794
4795 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4796 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4797 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4798 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4799 }
4800
4801 return 0;
4802}
4803
3d8785f6
SA
4804#ifdef CONFIG_DEV_COREDUMP
4805static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4806 size_t count, void *data, size_t datalen)
4807{
4808 struct drm_printer p;
4809 struct amdgpu_device *adev = data;
4810 struct drm_print_iterator iter;
4811 int i;
4812
4813 iter.data = buffer;
4814 iter.offset = 0;
4815 iter.start = offset;
4816 iter.remain = count;
4817
4818 p = drm_coredump_printer(&iter);
4819
4820 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4821 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4822 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4823 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4824 if (adev->reset_task_info.pid)
4825 drm_printf(&p, "process_name: %s PID: %d\n",
4826 adev->reset_task_info.process_name,
4827 adev->reset_task_info.pid);
4828
4829 if (adev->reset_vram_lost)
4830 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4831 if (adev->num_regs) {
4832 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4833
4834 for (i = 0; i < adev->num_regs; i++)
4835 drm_printf(&p, "0x%08x: 0x%08x\n",
4836 adev->reset_dump_reg_list[i],
4837 adev->reset_dump_reg_value[i]);
4838 }
4839
4840 return count - iter.remain;
4841}
4842
4843static void amdgpu_devcoredump_free(void *data)
4844{
4845}
4846
4847static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4848{
4849 struct drm_device *dev = adev_to_drm(adev);
4850
4851 ktime_get_ts64(&adev->reset_time);
4852 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4853 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4854}
4855#endif
4856
04442bf7
LL
4857int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4858 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4859{
4860 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4861 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4862 int r = 0;
f5c7e779 4863 bool gpu_reset_for_dev_remove = 0;
26bc5340 4864
04442bf7
LL
4865 /* Try reset handler method first */
4866 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4867 reset_list);
15fd09a0 4868 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4869
4870 reset_context->reset_device_list = device_list_handle;
04442bf7 4871 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 4872 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4873 if (r == -EOPNOTSUPP)
404b277b
LL
4874 r = 0;
4875 else
04442bf7
LL
4876 return r;
4877
4878 /* Reset handler not implemented, use the default method */
4879 need_full_reset =
4880 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4881 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4882
f5c7e779
YC
4883 gpu_reset_for_dev_remove =
4884 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4885 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4886
26bc5340 4887 /*
655ce9cb 4888 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4889 * to allow proper links negotiation in FW (within 1 sec)
4890 */
7ac71382 4891 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4892 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4893 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4894 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4895 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4896 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4897 r = -EALREADY;
4898 } else
4899 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4900
041a62bc 4901 if (r) {
aac89168 4902 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4903 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4904 break;
ce316fa5
LM
4905 }
4906 }
4907
041a62bc
AG
4908 /* For XGMI wait for all resets to complete before proceed */
4909 if (!r) {
655ce9cb 4910 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4911 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4912 flush_work(&tmp_adev->xgmi_reset_work);
4913 r = tmp_adev->asic_reset_res;
4914 if (r)
4915 break;
ce316fa5
LM
4916 }
4917 }
4918 }
ce316fa5 4919 }
26bc5340 4920
43c4d576 4921 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4922 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4923 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4924 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4925 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4926 }
4927
00eaa571 4928 amdgpu_ras_intr_cleared();
43c4d576 4929 }
00eaa571 4930
f5c7e779
YC
4931 /* Since the mode1 reset affects base ip blocks, the
4932 * phase1 ip blocks need to be resumed. Otherwise there
4933 * will be a BIOS signature error and the psp bootloader
4934 * can't load kdb on the next amdgpu install.
4935 */
4936 if (gpu_reset_for_dev_remove) {
4937 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4938 amdgpu_device_ip_resume_phase1(tmp_adev);
4939
4940 goto end;
4941 }
4942
655ce9cb 4943 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4944 if (need_full_reset) {
4945 /* post card */
e3c1b071 4946 r = amdgpu_device_asic_init(tmp_adev);
4947 if (r) {
aac89168 4948 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4949 } else {
26bc5340 4950 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 4951
26bc5340
AG
4952 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4953 if (r)
4954 goto out;
4955
4956 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4957#ifdef CONFIG_DEV_COREDUMP
4958 tmp_adev->reset_vram_lost = vram_lost;
4959 memset(&tmp_adev->reset_task_info, 0,
4960 sizeof(tmp_adev->reset_task_info));
4961 if (reset_context->job && reset_context->job->vm)
4962 tmp_adev->reset_task_info =
4963 reset_context->job->vm->task_info;
4964 amdgpu_reset_capture_coredumpm(tmp_adev);
4965#endif
26bc5340 4966 if (vram_lost) {
77e7f829 4967 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4968 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4969 }
4970
26bc5340
AG
4971 r = amdgpu_device_fw_loading(tmp_adev);
4972 if (r)
4973 return r;
4974
4975 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4976 if (r)
4977 goto out;
4978
4979 if (vram_lost)
4980 amdgpu_device_fill_reset_magic(tmp_adev);
4981
fdafb359
EQ
4982 /*
4983 * Add this ASIC as tracked as reset was already
4984 * complete successfully.
4985 */
4986 amdgpu_register_gpu_instance(tmp_adev);
4987
04442bf7
LL
4988 if (!reset_context->hive &&
4989 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4990 amdgpu_xgmi_add_device(tmp_adev);
4991
7c04ca50 4992 r = amdgpu_device_ip_late_init(tmp_adev);
4993 if (r)
4994 goto out;
4995
087451f3 4996 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 4997
e8fbaf03
GC
4998 /*
4999 * The GPU enters bad state once faulty pages
5000 * by ECC has reached the threshold, and ras
5001 * recovery is scheduled next. So add one check
5002 * here to break recovery if it indeed exceeds
5003 * bad page threshold, and remind user to
5004 * retire this GPU or setting one bigger
5005 * bad_page_threshold value to fix this once
5006 * probing driver again.
5007 */
11003c68 5008 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5009 /* must succeed. */
5010 amdgpu_ras_resume(tmp_adev);
5011 } else {
5012 r = -EINVAL;
5013 goto out;
5014 }
e79a04d5 5015
26bc5340 5016 /* Update PSP FW topology after reset */
04442bf7
LL
5017 if (reset_context->hive &&
5018 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5019 r = amdgpu_xgmi_update_topology(
5020 reset_context->hive, tmp_adev);
26bc5340
AG
5021 }
5022 }
5023
26bc5340
AG
5024out:
5025 if (!r) {
5026 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5027 r = amdgpu_ib_ring_tests(tmp_adev);
5028 if (r) {
5029 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5030 need_full_reset = true;
5031 r = -EAGAIN;
5032 goto end;
5033 }
5034 }
5035
5036 if (!r)
5037 r = amdgpu_device_recover_vram(tmp_adev);
5038 else
5039 tmp_adev->asic_reset_res = r;
5040 }
5041
5042end:
04442bf7
LL
5043 if (need_full_reset)
5044 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5045 else
5046 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5047 return r;
5048}
5049
e923be99 5050static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5051{
5740682e 5052
a3a09142
AD
5053 switch (amdgpu_asic_reset_method(adev)) {
5054 case AMD_RESET_METHOD_MODE1:
5055 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5056 break;
5057 case AMD_RESET_METHOD_MODE2:
5058 adev->mp1_state = PP_MP1_STATE_RESET;
5059 break;
5060 default:
5061 adev->mp1_state = PP_MP1_STATE_NONE;
5062 break;
5063 }
26bc5340 5064}
d38ceaf9 5065
e923be99 5066static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5067{
89041940 5068 amdgpu_vf_error_trans_all(adev);
a3a09142 5069 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5070}
5071
3f12acc8
EQ
5072static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5073{
5074 struct pci_dev *p = NULL;
5075
5076 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5077 adev->pdev->bus->number, 1);
5078 if (p) {
5079 pm_runtime_enable(&(p->dev));
5080 pm_runtime_resume(&(p->dev));
5081 }
b85e285e
YY
5082
5083 pci_dev_put(p);
3f12acc8
EQ
5084}
5085
5086static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5087{
5088 enum amd_reset_method reset_method;
5089 struct pci_dev *p = NULL;
5090 u64 expires;
5091
5092 /*
5093 * For now, only BACO and mode1 reset are confirmed
5094 * to suffer the audio issue without proper suspended.
5095 */
5096 reset_method = amdgpu_asic_reset_method(adev);
5097 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5098 (reset_method != AMD_RESET_METHOD_MODE1))
5099 return -EINVAL;
5100
5101 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5102 adev->pdev->bus->number, 1);
5103 if (!p)
5104 return -ENODEV;
5105
5106 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5107 if (!expires)
5108 /*
5109 * If we cannot get the audio device autosuspend delay,
5110 * a fixed 4S interval will be used. Considering 3S is
5111 * the audio controller default autosuspend delay setting.
5112 * 4S used here is guaranteed to cover that.
5113 */
54b7feb9 5114 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5115
5116 while (!pm_runtime_status_suspended(&(p->dev))) {
5117 if (!pm_runtime_suspend(&(p->dev)))
5118 break;
5119
5120 if (expires < ktime_get_mono_fast_ns()) {
5121 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5122 pci_dev_put(p);
3f12acc8
EQ
5123 /* TODO: abort the succeeding gpu reset? */
5124 return -ETIMEDOUT;
5125 }
5126 }
5127
5128 pm_runtime_disable(&(p->dev));
5129
b85e285e 5130 pci_dev_put(p);
3f12acc8
EQ
5131 return 0;
5132}
5133
d193b12b 5134static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5135{
5136 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5137
5138#if defined(CONFIG_DEBUG_FS)
5139 if (!amdgpu_sriov_vf(adev))
5140 cancel_work(&adev->reset_work);
5141#endif
5142
5143 if (adev->kfd.dev)
5144 cancel_work(&adev->kfd.reset_work);
5145
5146 if (amdgpu_sriov_vf(adev))
5147 cancel_work(&adev->virt.flr_work);
5148
5149 if (con && adev->ras_enabled)
5150 cancel_work(&con->recovery_work);
5151
5152}
5153
26bc5340 5154/**
6e9c65f7 5155 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5156 *
982a820b 5157 * @adev: amdgpu_device pointer
26bc5340 5158 * @job: which job trigger hang
80bd2de1 5159 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5160 *
5161 * Attempt to reset the GPU if it has hung (all asics).
5162 * Attempt to do soft-reset or full-reset and reinitialize Asic
5163 * Returns 0 for success or an error on failure.
5164 */
5165
cf727044 5166int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5167 struct amdgpu_job *job,
5168 struct amdgpu_reset_context *reset_context)
26bc5340 5169{
1d721ed6 5170 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5171 bool job_signaled = false;
26bc5340 5172 struct amdgpu_hive_info *hive = NULL;
26bc5340 5173 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5174 int i, r = 0;
bb5c7235 5175 bool need_emergency_restart = false;
3f12acc8 5176 bool audio_suspended = false;
f5c7e779
YC
5177 bool gpu_reset_for_dev_remove = false;
5178
5179 gpu_reset_for_dev_remove =
5180 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5181 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5182
6e3cd2a9 5183 /*
bb5c7235
WS
5184 * Special case: RAS triggered and full reset isn't supported
5185 */
5186 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5187
d5ea093e
AG
5188 /*
5189 * Flush RAM to disk so that after reboot
5190 * the user can read log and see why the system rebooted.
5191 */
bb5c7235 5192 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5193 DRM_WARN("Emergency reboot.");
5194
5195 ksys_sync_helper();
5196 emergency_restart();
5197 }
5198
b823821f 5199 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5200 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5201
175ac6ec
ZL
5202 if (!amdgpu_sriov_vf(adev))
5203 hive = amdgpu_get_xgmi_hive(adev);
681260df 5204 if (hive)
53b3f8f4 5205 mutex_lock(&hive->hive_lock);
26bc5340 5206
f1549c09
LG
5207 reset_context->job = job;
5208 reset_context->hive = hive;
9e94d22c
EQ
5209 /*
5210 * Build list of devices to reset.
5211 * In case we are in XGMI hive mode, resort the device list
5212 * to put adev in the 1st position.
5213 */
5214 INIT_LIST_HEAD(&device_list);
175ac6ec 5215 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5216 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5217 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5218 if (gpu_reset_for_dev_remove && adev->shutdown)
5219 tmp_adev->shutdown = true;
5220 }
655ce9cb 5221 if (!list_is_first(&adev->reset_list, &device_list))
5222 list_rotate_to_front(&adev->reset_list, &device_list);
5223 device_list_handle = &device_list;
26bc5340 5224 } else {
655ce9cb 5225 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5226 device_list_handle = &device_list;
5227 }
5228
e923be99
AG
5229 /* We need to lock reset domain only once both for XGMI and single device */
5230 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5231 reset_list);
3675c2f2 5232 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5233
1d721ed6 5234 /* block all schedulers and reset given job's ring */
655ce9cb 5235 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5236
e923be99 5237 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5238
3f12acc8
EQ
5239 /*
5240 * Try to put the audio codec into suspend state
5241 * before gpu reset started.
5242 *
5243 * Due to the power domain of the graphics device
5244 * is shared with AZ power domain. Without this,
5245 * we may change the audio hardware from behind
5246 * the audio driver's back. That will trigger
5247 * some audio codec errors.
5248 */
5249 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5250 audio_suspended = true;
5251
9e94d22c
EQ
5252 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5253
52fb44cf
EQ
5254 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5255
c004d44e 5256 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5257 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5258
12ffa55d
AG
5259 /*
5260 * Mark these ASICs to be reseted as untracked first
5261 * And add them back after reset completed
5262 */
5263 amdgpu_unregister_gpu_instance(tmp_adev);
5264
163d4cd2 5265 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5266
f1c1314b 5267 /* disable ras on ALL IPs */
bb5c7235 5268 if (!need_emergency_restart &&
b823821f 5269 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5270 amdgpu_ras_suspend(tmp_adev);
5271
1d721ed6
AG
5272 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5273 struct amdgpu_ring *ring = tmp_adev->rings[i];
5274
5275 if (!ring || !ring->sched.thread)
5276 continue;
5277
0b2d2c2e 5278 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5279
bb5c7235 5280 if (need_emergency_restart)
7c6e68c7 5281 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5282 }
8f8c80f4 5283 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5284 }
5285
bb5c7235 5286 if (need_emergency_restart)
7c6e68c7
AG
5287 goto skip_sched_resume;
5288
1d721ed6
AG
5289 /*
5290 * Must check guilty signal here since after this point all old
5291 * HW fences are force signaled.
5292 *
5293 * job->base holds a reference to parent fence
5294 */
f6a3f660 5295 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5296 job_signaled = true;
1d721ed6
AG
5297 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5298 goto skip_hw_reset;
5299 }
5300
26bc5340 5301retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5302 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5303 if (gpu_reset_for_dev_remove) {
5304 /* Workaroud for ASICs need to disable SMC first */
5305 amdgpu_device_smu_fini_early(tmp_adev);
5306 }
f1549c09 5307 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5308 /*TODO Should we stop ?*/
5309 if (r) {
aac89168 5310 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5311 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5312 tmp_adev->asic_reset_res = r;
5313 }
247c7b0d
AG
5314
5315 /*
5316 * Drop all pending non scheduler resets. Scheduler resets
5317 * were already dropped during drm_sched_stop
5318 */
d193b12b 5319 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5320 }
5321
5322 /* Actual ASIC resets if needed.*/
4f30d920 5323 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5324 if (amdgpu_sriov_vf(adev)) {
5325 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5326 if (r)
5327 adev->asic_reset_res = r;
950d6425 5328
28606c4e
YC
5329 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5330 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5331 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
950d6425 5332 amdgpu_ras_resume(adev);
26bc5340 5333 } else {
f1549c09 5334 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5335 if (r && r == -EAGAIN)
26bc5340 5336 goto retry;
f5c7e779
YC
5337
5338 if (!r && gpu_reset_for_dev_remove)
5339 goto recover_end;
26bc5340
AG
5340 }
5341
1d721ed6
AG
5342skip_hw_reset:
5343
26bc5340 5344 /* Post ASIC reset for all devs .*/
655ce9cb 5345 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5346
1d721ed6
AG
5347 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5348 struct amdgpu_ring *ring = tmp_adev->rings[i];
5349
5350 if (!ring || !ring->sched.thread)
5351 continue;
5352
6868a2c4 5353 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5354 }
5355
693073a0 5356 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5357 amdgpu_mes_self_test(tmp_adev);
5358
b8920e1e 5359 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5360 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5361
7258fa31
SK
5362 if (tmp_adev->asic_reset_res)
5363 r = tmp_adev->asic_reset_res;
5364
1d721ed6 5365 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5366
5367 if (r) {
5368 /* bad news, how to tell it to userspace ? */
12ffa55d 5369 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5370 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5371 } else {
12ffa55d 5372 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5373 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5374 DRM_WARN("smart shift update failed\n");
26bc5340 5375 }
7c6e68c7 5376 }
26bc5340 5377
7c6e68c7 5378skip_sched_resume:
655ce9cb 5379 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5380 /* unlock kfd: SRIOV would do it separately */
c004d44e 5381 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5382 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5383
5384 /* kfd_post_reset will do nothing if kfd device is not initialized,
5385 * need to bring up kfd here if it's not be initialized before
5386 */
5387 if (!adev->kfd.init_complete)
5388 amdgpu_amdkfd_device_init(adev);
5389
3f12acc8
EQ
5390 if (audio_suspended)
5391 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5392
5393 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5394
5395 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5396 }
5397
f5c7e779 5398recover_end:
e923be99
AG
5399 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5400 reset_list);
5401 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5402
9e94d22c 5403 if (hive) {
9e94d22c 5404 mutex_unlock(&hive->hive_lock);
d95e8e97 5405 amdgpu_put_xgmi_hive(hive);
9e94d22c 5406 }
26bc5340 5407
f287a3c5 5408 if (r)
26bc5340 5409 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5410
5411 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5412 return r;
5413}
5414
e3ecdffa
AD
5415/**
5416 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5417 *
5418 * @adev: amdgpu_device pointer
5419 *
5420 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5421 * and lanes) of the slot the device is in. Handles APUs and
5422 * virtualized environments where PCIE config space may not be available.
5423 */
5494d864 5424static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5425{
5d9a6330 5426 struct pci_dev *pdev;
c5313457
HK
5427 enum pci_bus_speed speed_cap, platform_speed_cap;
5428 enum pcie_link_width platform_link_width;
d0dd7f0c 5429
cd474ba0
AD
5430 if (amdgpu_pcie_gen_cap)
5431 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5432
cd474ba0
AD
5433 if (amdgpu_pcie_lane_cap)
5434 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5435
cd474ba0 5436 /* covers APUs as well */
04e85958 5437 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5438 if (adev->pm.pcie_gen_mask == 0)
5439 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5440 if (adev->pm.pcie_mlw_mask == 0)
5441 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5442 return;
cd474ba0 5443 }
d0dd7f0c 5444
c5313457
HK
5445 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5446 return;
5447
dbaa922b
AD
5448 pcie_bandwidth_available(adev->pdev, NULL,
5449 &platform_speed_cap, &platform_link_width);
c5313457 5450
cd474ba0 5451 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5452 /* asic caps */
5453 pdev = adev->pdev;
5454 speed_cap = pcie_get_speed_cap(pdev);
5455 if (speed_cap == PCI_SPEED_UNKNOWN) {
5456 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5457 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5458 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5459 } else {
2b3a1f51
FX
5460 if (speed_cap == PCIE_SPEED_32_0GT)
5461 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5462 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5463 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5464 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5465 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5466 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5467 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5468 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5469 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5470 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5471 else if (speed_cap == PCIE_SPEED_8_0GT)
5472 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5473 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5474 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5475 else if (speed_cap == PCIE_SPEED_5_0GT)
5476 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5477 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5478 else
5479 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5480 }
5481 /* platform caps */
c5313457 5482 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5483 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5484 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5485 } else {
2b3a1f51
FX
5486 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5487 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5488 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5489 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5490 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5491 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5492 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5493 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5494 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5495 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5496 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5497 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5498 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5499 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5500 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5501 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5502 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5503 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5504 else
5505 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5506
cd474ba0
AD
5507 }
5508 }
5509 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5510 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5511 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5512 } else {
c5313457 5513 switch (platform_link_width) {
5d9a6330 5514 case PCIE_LNK_X32:
cd474ba0
AD
5515 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5519 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5520 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5522 break;
5d9a6330 5523 case PCIE_LNK_X16:
cd474ba0
AD
5524 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5525 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5526 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5527 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5528 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5530 break;
5d9a6330 5531 case PCIE_LNK_X12:
cd474ba0
AD
5532 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5533 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5534 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5535 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5536 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5537 break;
5d9a6330 5538 case PCIE_LNK_X8:
cd474ba0
AD
5539 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5540 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5541 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5542 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5543 break;
5d9a6330 5544 case PCIE_LNK_X4:
cd474ba0
AD
5545 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5546 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5547 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5548 break;
5d9a6330 5549 case PCIE_LNK_X2:
cd474ba0
AD
5550 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5551 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5552 break;
5d9a6330 5553 case PCIE_LNK_X1:
cd474ba0
AD
5554 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5555 break;
5556 default:
5557 break;
5558 }
d0dd7f0c
AD
5559 }
5560 }
5561}
d38ceaf9 5562
08a2fd23
RE
5563/**
5564 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5565 *
5566 * @adev: amdgpu_device pointer
5567 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5568 *
5569 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5570 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5571 * @peer_adev.
5572 */
5573bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5574 struct amdgpu_device *peer_adev)
5575{
5576#ifdef CONFIG_HSA_AMD_P2P
5577 uint64_t address_mask = peer_adev->dev->dma_mask ?
5578 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5579 resource_size_t aper_limit =
5580 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5581 bool p2p_access =
5582 !adev->gmc.xgmi.connected_to_cpu &&
5583 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5584
5585 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5586 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5587 !(adev->gmc.aper_base & address_mask ||
5588 aper_limit & address_mask));
5589#else
5590 return false;
5591#endif
5592}
5593
361dbd01
AD
5594int amdgpu_device_baco_enter(struct drm_device *dev)
5595{
1348969a 5596 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5597 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5598
6ab68650 5599 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5600 return -ENOTSUPP;
5601
8ab0d6f0 5602 if (ras && adev->ras_enabled &&
acdae216 5603 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5604 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5605
9530273e 5606 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5607}
5608
5609int amdgpu_device_baco_exit(struct drm_device *dev)
5610{
1348969a 5611 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5612 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5613 int ret = 0;
361dbd01 5614
6ab68650 5615 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5616 return -ENOTSUPP;
5617
9530273e
EQ
5618 ret = amdgpu_dpm_baco_exit(adev);
5619 if (ret)
5620 return ret;
7a22677b 5621
8ab0d6f0 5622 if (ras && adev->ras_enabled &&
acdae216 5623 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5624 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5625
1bece222
CL
5626 if (amdgpu_passthrough(adev) &&
5627 adev->nbio.funcs->clear_doorbell_interrupt)
5628 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5629
7a22677b 5630 return 0;
361dbd01 5631}
c9a6b82f
AG
5632
5633/**
5634 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5635 * @pdev: PCI device struct
5636 * @state: PCI channel state
5637 *
5638 * Description: Called when a PCI error is detected.
5639 *
5640 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5641 */
5642pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5643{
5644 struct drm_device *dev = pci_get_drvdata(pdev);
5645 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5646 int i;
c9a6b82f
AG
5647
5648 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5649
6894305c
AG
5650 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5651 DRM_WARN("No support for XGMI hive yet...");
5652 return PCI_ERS_RESULT_DISCONNECT;
5653 }
5654
e17e27f9
GC
5655 adev->pci_channel_state = state;
5656
c9a6b82f
AG
5657 switch (state) {
5658 case pci_channel_io_normal:
5659 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5660 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5661 case pci_channel_io_frozen:
5662 /*
d0fb18b5 5663 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5664 * to GPU during PCI error recovery
5665 */
3675c2f2 5666 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5667 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5668
5669 /*
5670 * Block any work scheduling as we do for regular GPU reset
5671 * for the duration of the recovery
5672 */
5673 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5674 struct amdgpu_ring *ring = adev->rings[i];
5675
5676 if (!ring || !ring->sched.thread)
5677 continue;
5678
5679 drm_sched_stop(&ring->sched, NULL);
5680 }
8f8c80f4 5681 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5682 return PCI_ERS_RESULT_NEED_RESET;
5683 case pci_channel_io_perm_failure:
5684 /* Permanent error, prepare for device removal */
5685 return PCI_ERS_RESULT_DISCONNECT;
5686 }
5687
5688 return PCI_ERS_RESULT_NEED_RESET;
5689}
5690
5691/**
5692 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5693 * @pdev: pointer to PCI device
5694 */
5695pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5696{
5697
5698 DRM_INFO("PCI error: mmio enabled callback!!\n");
5699
5700 /* TODO - dump whatever for debugging purposes */
5701
5702 /* This called only if amdgpu_pci_error_detected returns
5703 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5704 * works, no need to reset slot.
5705 */
5706
5707 return PCI_ERS_RESULT_RECOVERED;
5708}
5709
5710/**
5711 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5712 * @pdev: PCI device struct
5713 *
5714 * Description: This routine is called by the pci error recovery
5715 * code after the PCI slot has been reset, just before we
5716 * should resume normal operations.
5717 */
5718pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5719{
5720 struct drm_device *dev = pci_get_drvdata(pdev);
5721 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5722 int r, i;
04442bf7 5723 struct amdgpu_reset_context reset_context;
362c7b91 5724 u32 memsize;
7ac71382 5725 struct list_head device_list;
c9a6b82f
AG
5726
5727 DRM_INFO("PCI error: slot reset callback!!\n");
5728
04442bf7
LL
5729 memset(&reset_context, 0, sizeof(reset_context));
5730
7ac71382 5731 INIT_LIST_HEAD(&device_list);
655ce9cb 5732 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5733
362c7b91
AG
5734 /* wait for asic to come out of reset */
5735 msleep(500);
5736
7ac71382 5737 /* Restore PCI confspace */
c1dd4aa6 5738 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5739
362c7b91
AG
5740 /* confirm ASIC came out of reset */
5741 for (i = 0; i < adev->usec_timeout; i++) {
5742 memsize = amdgpu_asic_get_config_memsize(adev);
5743
5744 if (memsize != 0xffffffff)
5745 break;
5746 udelay(1);
5747 }
5748 if (memsize == 0xffffffff) {
5749 r = -ETIME;
5750 goto out;
5751 }
5752
04442bf7
LL
5753 reset_context.method = AMD_RESET_METHOD_NONE;
5754 reset_context.reset_req_dev = adev;
5755 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5756 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5757
7afefb81 5758 adev->no_hw_access = true;
04442bf7 5759 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5760 adev->no_hw_access = false;
c9a6b82f
AG
5761 if (r)
5762 goto out;
5763
04442bf7 5764 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5765
5766out:
c9a6b82f 5767 if (!r) {
c1dd4aa6
AG
5768 if (amdgpu_device_cache_pci_state(adev->pdev))
5769 pci_restore_state(adev->pdev);
5770
c9a6b82f
AG
5771 DRM_INFO("PCIe error recovery succeeded\n");
5772 } else {
5773 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5774 amdgpu_device_unset_mp1_state(adev);
5775 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5776 }
5777
5778 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5779}
5780
5781/**
5782 * amdgpu_pci_resume() - resume normal ops after PCI reset
5783 * @pdev: pointer to PCI device
5784 *
5785 * Called when the error recovery driver tells us that its
505199a3 5786 * OK to resume normal operation.
c9a6b82f
AG
5787 */
5788void amdgpu_pci_resume(struct pci_dev *pdev)
5789{
5790 struct drm_device *dev = pci_get_drvdata(pdev);
5791 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5792 int i;
c9a6b82f 5793
c9a6b82f
AG
5794
5795 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5796
e17e27f9
GC
5797 /* Only continue execution for the case of pci_channel_io_frozen */
5798 if (adev->pci_channel_state != pci_channel_io_frozen)
5799 return;
5800
acd89fca
AG
5801 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5802 struct amdgpu_ring *ring = adev->rings[i];
5803
5804 if (!ring || !ring->sched.thread)
5805 continue;
5806
acd89fca
AG
5807 drm_sched_start(&ring->sched, true);
5808 }
5809
e923be99
AG
5810 amdgpu_device_unset_mp1_state(adev);
5811 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5812}
c1dd4aa6
AG
5813
5814bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5815{
5816 struct drm_device *dev = pci_get_drvdata(pdev);
5817 struct amdgpu_device *adev = drm_to_adev(dev);
5818 int r;
5819
5820 r = pci_save_state(pdev);
5821 if (!r) {
5822 kfree(adev->pci_state);
5823
5824 adev->pci_state = pci_store_saved_state(pdev);
5825
5826 if (!adev->pci_state) {
5827 DRM_ERROR("Failed to store PCI saved state");
5828 return false;
5829 }
5830 } else {
5831 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5832 return false;
5833 }
5834
5835 return true;
5836}
5837
5838bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5839{
5840 struct drm_device *dev = pci_get_drvdata(pdev);
5841 struct amdgpu_device *adev = drm_to_adev(dev);
5842 int r;
5843
5844 if (!adev->pci_state)
5845 return false;
5846
5847 r = pci_load_saved_state(pdev, adev->pci_state);
5848
5849 if (!r) {
5850 pci_restore_state(pdev);
5851 } else {
5852 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5853 return false;
5854 }
5855
5856 return true;
5857}
5858
810085dd
EH
5859void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5860 struct amdgpu_ring *ring)
5861{
5862#ifdef CONFIG_X86_64
b818a5d3 5863 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5864 return;
5865#endif
5866 if (adev->gmc.xgmi.connected_to_cpu)
5867 return;
5868
5869 if (ring && ring->funcs->emit_hdp_flush)
5870 amdgpu_ring_emit_hdp_flush(ring);
5871 else
5872 amdgpu_asic_flush_hdp(adev, ring);
5873}
c1dd4aa6 5874
810085dd
EH
5875void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5876 struct amdgpu_ring *ring)
5877{
5878#ifdef CONFIG_X86_64
b818a5d3 5879 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5880 return;
5881#endif
5882 if (adev->gmc.xgmi.connected_to_cpu)
5883 return;
c1dd4aa6 5884
810085dd
EH
5885 amdgpu_asic_invalidate_hdp(adev, ring);
5886}
34f3a4a9 5887
89a7a870
AG
5888int amdgpu_in_reset(struct amdgpu_device *adev)
5889{
5890 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
5891}
5892
34f3a4a9
LY
5893/**
5894 * amdgpu_device_halt() - bring hardware to some kind of halt state
5895 *
5896 * @adev: amdgpu_device pointer
5897 *
5898 * Bring hardware to some kind of halt state so that no one can touch it
5899 * any more. It will help to maintain error context when error occurred.
5900 * Compare to a simple hang, the system will keep stable at least for SSH
5901 * access. Then it should be trivial to inspect the hardware state and
5902 * see what's going on. Implemented as following:
5903 *
5904 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5905 * clears all CPU mappings to device, disallows remappings through page faults
5906 * 2. amdgpu_irq_disable_all() disables all interrupts
5907 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5908 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5909 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5910 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5911 * flush any in flight DMA operations
5912 */
5913void amdgpu_device_halt(struct amdgpu_device *adev)
5914{
5915 struct pci_dev *pdev = adev->pdev;
e0f943b4 5916 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 5917
2c1c7ba4 5918 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
5919 drm_dev_unplug(ddev);
5920
5921 amdgpu_irq_disable_all(adev);
5922
5923 amdgpu_fence_driver_hw_fini(adev);
5924
5925 adev->no_hw_access = true;
5926
5927 amdgpu_device_unmap_mmio(adev);
5928
5929 pci_disable_device(pdev);
5930 pci_wait_for_pending_transaction(pdev);
5931}
86700a40
XD
5932
5933u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5934 u32 reg)
5935{
5936 unsigned long flags, address, data;
5937 u32 r;
5938
5939 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5940 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5941
5942 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5943 WREG32(address, reg * 4);
5944 (void)RREG32(address);
5945 r = RREG32(data);
5946 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5947 return r;
5948}
5949
5950void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5951 u32 reg, u32 v)
5952{
5953 unsigned long flags, address, data;
5954
5955 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5956 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5957
5958 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5959 WREG32(address, reg * 4);
5960 (void)RREG32(address);
5961 WREG32(data, v);
5962 (void)RREG32(data);
5963 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5964}
68ce8b24
CK
5965
5966/**
5967 * amdgpu_device_switch_gang - switch to a new gang
5968 * @adev: amdgpu_device pointer
5969 * @gang: the gang to switch to
5970 *
5971 * Try to switch to a new gang.
5972 * Returns: NULL if we switched to the new gang or a reference to the current
5973 * gang leader.
5974 */
5975struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5976 struct dma_fence *gang)
5977{
5978 struct dma_fence *old = NULL;
5979
5980 do {
5981 dma_fence_put(old);
5982 rcu_read_lock();
5983 old = dma_fence_get_rcu_safe(&adev->gang_submit);
5984 rcu_read_unlock();
5985
5986 if (old == gang)
5987 break;
5988
5989 if (!dma_fence_is_signaled(old))
5990 return old;
5991
5992 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5993 old, gang) != old);
5994
5995 dma_fence_put(old);
5996 return NULL;
5997}
220c8cc8
AD
5998
5999bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6000{
6001 switch (adev->asic_type) {
6002#ifdef CONFIG_DRM_AMDGPU_SI
6003 case CHIP_HAINAN:
6004#endif
6005 case CHIP_TOPAZ:
6006 /* chips with no display hardware */
6007 return false;
6008#ifdef CONFIG_DRM_AMDGPU_SI
6009 case CHIP_TAHITI:
6010 case CHIP_PITCAIRN:
6011 case CHIP_VERDE:
6012 case CHIP_OLAND:
6013#endif
6014#ifdef CONFIG_DRM_AMDGPU_CIK
6015 case CHIP_BONAIRE:
6016 case CHIP_HAWAII:
6017 case CHIP_KAVERI:
6018 case CHIP_KABINI:
6019 case CHIP_MULLINS:
6020#endif
6021 case CHIP_TONGA:
6022 case CHIP_FIJI:
6023 case CHIP_POLARIS10:
6024 case CHIP_POLARIS11:
6025 case CHIP_POLARIS12:
6026 case CHIP_VEGAM:
6027 case CHIP_CARRIZO:
6028 case CHIP_STONEY:
6029 /* chips with display hardware */
6030 return true;
6031 default:
6032 /* IP discovery */
6033 if (!adev->ip_versions[DCE_HWIP][0] ||
6034 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6035 return false;
6036 return true;
6037 }
6038}
81283fee
JZ
6039
6040uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6041 uint32_t inst, uint32_t reg_addr, char reg_name[],
6042 uint32_t expected_value, uint32_t mask)
6043{
6044 uint32_t ret = 0;
6045 uint32_t old_ = 0;
6046 uint32_t tmp_ = RREG32(reg_addr);
6047 uint32_t loop = adev->usec_timeout;
6048
6049 while ((tmp_ & (mask)) != (expected_value)) {
6050 if (old_ != tmp_) {
6051 loop = adev->usec_timeout;
6052 old_ = tmp_;
6053 } else
6054 udelay(1);
6055 tmp_ = RREG32(reg_addr);
6056 loop--;
6057 if (!loop) {
6058 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6059 inst, reg_name, (uint32_t)expected_value,
6060 (uint32_t)(tmp_ & (mask)));
6061 ret = -ETIMEDOUT;
6062 break;
6063 }
6064 }
6065 return ret;
6066}