drm/amd/pm: introduce a new set of OD interfaces
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166 167
fd496ca8 168/**
b98c6299 169 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
170 *
171 * @dev: drm_device pointer
172 *
b98c6299 173 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
174 * otherwise return false.
175 */
b98c6299 176bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
177{
178 struct amdgpu_device *adev = drm_to_adev(dev);
179
b98c6299 180 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
181 return true;
182 return false;
183}
184
e3ecdffa 185/**
0330b848 186 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
187 *
188 * @dev: drm_device pointer
189 *
b98c6299 190 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
191 * otherwise return false.
192 */
31af062a 193bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 194{
1348969a 195 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 196
b98c6299
AD
197 if (adev->has_pr3 ||
198 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
199 return true;
200 return false;
201}
202
a69cba42
AD
203/**
204 * amdgpu_device_supports_baco - Does the device support BACO
205 *
206 * @dev: drm_device pointer
207 *
208 * Returns true if the device supporte BACO,
209 * otherwise return false.
210 */
211bool amdgpu_device_supports_baco(struct drm_device *dev)
212{
1348969a 213 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
214
215 return amdgpu_asic_supports_baco(adev);
216}
217
3fa8f89d
S
218/**
219 * amdgpu_device_supports_smart_shift - Is the device dGPU with
220 * smart shift support
221 *
222 * @dev: drm_device pointer
223 *
224 * Returns true if the device is a dGPU with Smart Shift support,
225 * otherwise returns false.
226 */
227bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
228{
229 return (amdgpu_device_supports_boco(dev) &&
230 amdgpu_acpi_is_power_shift_control_supported());
231}
232
6e3cd2a9
MCC
233/*
234 * VRAM access helper functions
235 */
236
e35e2b11 237/**
048af66b 238 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
239 *
240 * @adev: amdgpu_device pointer
241 * @pos: offset of the buffer in vram
242 * @buf: virtual address of the buffer in system memory
243 * @size: read/write size, sizeof(@buf) must > @size
244 * @write: true - write to vram, otherwise - read from vram
245 */
048af66b
KW
246void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
247 void *buf, size_t size, bool write)
e35e2b11 248{
e35e2b11 249 unsigned long flags;
048af66b
KW
250 uint32_t hi = ~0, tmp = 0;
251 uint32_t *data = buf;
ce05ac56 252 uint64_t last;
f89f8c6b 253 int idx;
ce05ac56 254
c58a863b 255 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 256 return;
9d11eb0d 257
048af66b
KW
258 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
259
260 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
261 for (last = pos + size; pos < last; pos += 4) {
262 tmp = pos >> 31;
263
264 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
265 if (tmp != hi) {
266 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
267 hi = tmp;
268 }
269 if (write)
270 WREG32_NO_KIQ(mmMM_DATA, *data++);
271 else
272 *data++ = RREG32_NO_KIQ(mmMM_DATA);
273 }
274
275 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
276 drm_dev_exit(idx);
277}
278
279/**
bbe04dec 280 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
281 *
282 * @adev: amdgpu_device pointer
283 * @pos: offset of the buffer in vram
284 * @buf: virtual address of the buffer in system memory
285 * @size: read/write size, sizeof(@buf) must > @size
286 * @write: true - write to vram, otherwise - read from vram
287 *
288 * The return value means how many bytes have been transferred.
289 */
290size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
291 void *buf, size_t size, bool write)
292{
9d11eb0d 293#ifdef CONFIG_64BIT
048af66b
KW
294 void __iomem *addr;
295 size_t count = 0;
296 uint64_t last;
297
298 if (!adev->mman.aper_base_kaddr)
299 return 0;
300
9d11eb0d
CK
301 last = min(pos + size, adev->gmc.visible_vram_size);
302 if (last > pos) {
048af66b
KW
303 addr = adev->mman.aper_base_kaddr + pos;
304 count = last - pos;
9d11eb0d
CK
305
306 if (write) {
307 memcpy_toio(addr, buf, count);
4c452b5c
SS
308 /* Make sure HDP write cache flush happens without any reordering
309 * after the system memory contents are sent over PCIe device
310 */
9d11eb0d 311 mb();
810085dd 312 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 313 } else {
810085dd 314 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
315 /* Make sure HDP read cache is invalidated before issuing a read
316 * to the PCIe device
317 */
9d11eb0d
CK
318 mb();
319 memcpy_fromio(buf, addr, count);
320 }
321
9d11eb0d 322 }
048af66b
KW
323
324 return count;
325#else
326 return 0;
9d11eb0d 327#endif
048af66b 328}
9d11eb0d 329
048af66b
KW
330/**
331 * amdgpu_device_vram_access - read/write a buffer in vram
332 *
333 * @adev: amdgpu_device pointer
334 * @pos: offset of the buffer in vram
335 * @buf: virtual address of the buffer in system memory
336 * @size: read/write size, sizeof(@buf) must > @size
337 * @write: true - write to vram, otherwise - read from vram
338 */
339void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
340 void *buf, size_t size, bool write)
341{
342 size_t count;
e35e2b11 343
048af66b
KW
344 /* try to using vram apreature to access vram first */
345 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
346 size -= count;
347 if (size) {
348 /* using MM to access rest vram */
349 pos += count;
350 buf += count;
351 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
352 }
353}
354
d38ceaf9 355/*
f7ee1874 356 * register access helper functions.
d38ceaf9 357 */
56b53c0b
DL
358
359/* Check if hw access should be skipped because of hotplug or device error */
360bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
361{
7afefb81 362 if (adev->no_hw_access)
56b53c0b
DL
363 return true;
364
365#ifdef CONFIG_LOCKDEP
366 /*
367 * This is a bit complicated to understand, so worth a comment. What we assert
368 * here is that the GPU reset is not running on another thread in parallel.
369 *
370 * For this we trylock the read side of the reset semaphore, if that succeeds
371 * we know that the reset is not running in paralell.
372 *
373 * If the trylock fails we assert that we are either already holding the read
374 * side of the lock or are the reset thread itself and hold the write side of
375 * the lock.
376 */
377 if (in_task()) {
d0fb18b5
AG
378 if (down_read_trylock(&adev->reset_domain->sem))
379 up_read(&adev->reset_domain->sem);
56b53c0b 380 else
d0fb18b5 381 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
382 }
383#endif
384 return false;
385}
386
e3ecdffa 387/**
f7ee1874 388 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
389 *
390 * @adev: amdgpu_device pointer
391 * @reg: dword aligned register offset
392 * @acc_flags: access flags which require special behavior
393 *
394 * Returns the 32 bit value from the offset specified.
395 */
f7ee1874
HZ
396uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
397 uint32_t reg, uint32_t acc_flags)
d38ceaf9 398{
f4b373f4
TSD
399 uint32_t ret;
400
56b53c0b 401 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
402 return 0;
403
f7ee1874
HZ
404 if ((reg * 4) < adev->rmmio_size) {
405 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
406 amdgpu_sriov_runtime(adev) &&
d0fb18b5 407 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 408 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 409 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
410 } else {
411 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
412 }
413 } else {
414 ret = adev->pcie_rreg(adev, reg * 4);
81202807 415 }
bc992ba5 416
f7ee1874 417 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 418
f4b373f4 419 return ret;
d38ceaf9
AD
420}
421
421a2a30
ML
422/*
423 * MMIO register read with bytes helper functions
424 * @offset:bytes offset from MMIO start
b8920e1e 425 */
421a2a30 426
e3ecdffa
AD
427/**
428 * amdgpu_mm_rreg8 - read a memory mapped IO register
429 *
430 * @adev: amdgpu_device pointer
431 * @offset: byte aligned register offset
432 *
433 * Returns the 8 bit value from the offset specified.
434 */
7cbbc745
AG
435uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
436{
56b53c0b 437 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
438 return 0;
439
421a2a30
ML
440 if (offset < adev->rmmio_size)
441 return (readb(adev->rmmio + offset));
442 BUG();
443}
444
445/*
446 * MMIO register write with bytes helper functions
447 * @offset:bytes offset from MMIO start
448 * @value: the value want to be written to the register
b8920e1e
SS
449 */
450
e3ecdffa
AD
451/**
452 * amdgpu_mm_wreg8 - read a memory mapped IO register
453 *
454 * @adev: amdgpu_device pointer
455 * @offset: byte aligned register offset
456 * @value: 8 bit value to write
457 *
458 * Writes the value specified to the offset specified.
459 */
7cbbc745
AG
460void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
461{
56b53c0b 462 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
463 return;
464
421a2a30
ML
465 if (offset < adev->rmmio_size)
466 writeb(value, adev->rmmio + offset);
467 else
468 BUG();
469}
470
e3ecdffa 471/**
f7ee1874 472 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
473 *
474 * @adev: amdgpu_device pointer
475 * @reg: dword aligned register offset
476 * @v: 32 bit value to write to the register
477 * @acc_flags: access flags which require special behavior
478 *
479 * Writes the value specified to the offset specified.
480 */
f7ee1874
HZ
481void amdgpu_device_wreg(struct amdgpu_device *adev,
482 uint32_t reg, uint32_t v,
483 uint32_t acc_flags)
d38ceaf9 484{
56b53c0b 485 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
486 return;
487
f7ee1874
HZ
488 if ((reg * 4) < adev->rmmio_size) {
489 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
490 amdgpu_sriov_runtime(adev) &&
d0fb18b5 491 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 492 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 493 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
494 } else {
495 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
496 }
497 } else {
498 adev->pcie_wreg(adev, reg * 4, v);
81202807 499 }
bc992ba5 500
f7ee1874 501 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 502}
d38ceaf9 503
03f2abb0 504/**
4cc9f86f 505 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 506 *
71579346
RB
507 * @adev: amdgpu_device pointer
508 * @reg: mmio/rlc register
509 * @v: value to write
8057a9d6 510 * @xcc_id: xcc accelerated compute core id
71579346
RB
511 *
512 * this function is invoked only for the debugfs register access
03f2abb0 513 */
f7ee1874 514void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
515 uint32_t reg, uint32_t v,
516 uint32_t xcc_id)
2e0cc4d4 517{
56b53c0b 518 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
519 return;
520
2e0cc4d4 521 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
522 adev->gfx.rlc.funcs &&
523 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 524 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 525 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
526 } else if ((reg * 4) >= adev->rmmio_size) {
527 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
528 } else {
529 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 530 }
d38ceaf9
AD
531}
532
1bba3683
HZ
533/**
534 * amdgpu_device_indirect_rreg - read an indirect register
535 *
536 * @adev: amdgpu_device pointer
22f453fb 537 * @reg_addr: indirect register address to read from
1bba3683
HZ
538 *
539 * Returns the value of indirect register @reg_addr
540 */
541u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
542 u32 reg_addr)
543{
65ba96e9 544 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
545 void __iomem *pcie_index_offset;
546 void __iomem *pcie_data_offset;
65ba96e9
HZ
547 u32 r;
548
549 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
550 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
551
552 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
553 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
554 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
555
556 writel(reg_addr, pcie_index_offset);
557 readl(pcie_index_offset);
558 r = readl(pcie_data_offset);
559 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
560
561 return r;
562}
563
0c552ed3
LM
564u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
565 u64 reg_addr)
566{
567 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
568 u32 r;
569 void __iomem *pcie_index_offset;
570 void __iomem *pcie_index_hi_offset;
571 void __iomem *pcie_data_offset;
572
573 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
574 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
575 if (adev->nbio.funcs->get_pcie_index_hi_offset)
576 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
577 else
578 pcie_index_hi = 0;
579
580 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
581 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
582 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
583 if (pcie_index_hi != 0)
584 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
585 pcie_index_hi * 4;
586
587 writel(reg_addr, pcie_index_offset);
588 readl(pcie_index_offset);
589 if (pcie_index_hi != 0) {
590 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
591 readl(pcie_index_hi_offset);
592 }
593 r = readl(pcie_data_offset);
594
595 /* clear the high bits */
596 if (pcie_index_hi != 0) {
597 writel(0, pcie_index_hi_offset);
598 readl(pcie_index_hi_offset);
599 }
600
601 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
602
603 return r;
604}
605
1bba3683
HZ
606/**
607 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
608 *
609 * @adev: amdgpu_device pointer
22f453fb 610 * @reg_addr: indirect register address to read from
1bba3683
HZ
611 *
612 * Returns the value of indirect register @reg_addr
613 */
614u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
615 u32 reg_addr)
616{
65ba96e9 617 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
618 void __iomem *pcie_index_offset;
619 void __iomem *pcie_data_offset;
65ba96e9
HZ
620 u64 r;
621
622 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
623 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
624
625 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
626 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
627 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
628
629 /* read low 32 bits */
630 writel(reg_addr, pcie_index_offset);
631 readl(pcie_index_offset);
632 r = readl(pcie_data_offset);
633 /* read high 32 bits */
634 writel(reg_addr + 4, pcie_index_offset);
635 readl(pcie_index_offset);
636 r |= ((u64)readl(pcie_data_offset) << 32);
637 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
638
639 return r;
640}
641
642/**
643 * amdgpu_device_indirect_wreg - write an indirect register address
644 *
645 * @adev: amdgpu_device pointer
1bba3683
HZ
646 * @reg_addr: indirect register offset
647 * @reg_data: indirect register data
648 *
649 */
650void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
651 u32 reg_addr, u32 reg_data)
652{
65ba96e9 653 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
654 void __iomem *pcie_index_offset;
655 void __iomem *pcie_data_offset;
656
65ba96e9
HZ
657 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
658 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
659
1bba3683
HZ
660 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
661 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
662 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
663
664 writel(reg_addr, pcie_index_offset);
665 readl(pcie_index_offset);
666 writel(reg_data, pcie_data_offset);
667 readl(pcie_data_offset);
668 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
669}
670
0c552ed3
LM
671void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
672 u64 reg_addr, u32 reg_data)
673{
674 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
675 void __iomem *pcie_index_offset;
676 void __iomem *pcie_index_hi_offset;
677 void __iomem *pcie_data_offset;
678
679 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
680 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
681 if (adev->nbio.funcs->get_pcie_index_hi_offset)
682 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
683 else
684 pcie_index_hi = 0;
685
686 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
687 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
688 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
689 if (pcie_index_hi != 0)
690 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
691 pcie_index_hi * 4;
692
693 writel(reg_addr, pcie_index_offset);
694 readl(pcie_index_offset);
695 if (pcie_index_hi != 0) {
696 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
697 readl(pcie_index_hi_offset);
698 }
699 writel(reg_data, pcie_data_offset);
700 readl(pcie_data_offset);
701
702 /* clear the high bits */
703 if (pcie_index_hi != 0) {
704 writel(0, pcie_index_hi_offset);
705 readl(pcie_index_hi_offset);
706 }
707
708 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
709}
710
1bba3683
HZ
711/**
712 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
713 *
714 * @adev: amdgpu_device pointer
1bba3683
HZ
715 * @reg_addr: indirect register offset
716 * @reg_data: indirect register data
717 *
718 */
719void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
720 u32 reg_addr, u64 reg_data)
721{
65ba96e9 722 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
723 void __iomem *pcie_index_offset;
724 void __iomem *pcie_data_offset;
725
65ba96e9
HZ
726 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
727 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
728
1bba3683
HZ
729 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
730 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
731 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
732
733 /* write low 32 bits */
734 writel(reg_addr, pcie_index_offset);
735 readl(pcie_index_offset);
736 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
737 readl(pcie_data_offset);
738 /* write high 32 bits */
739 writel(reg_addr + 4, pcie_index_offset);
740 readl(pcie_index_offset);
741 writel((u32)(reg_data >> 32), pcie_data_offset);
742 readl(pcie_data_offset);
743 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
744}
745
dabc114e
HZ
746/**
747 * amdgpu_device_get_rev_id - query device rev_id
748 *
749 * @adev: amdgpu_device pointer
750 *
751 * Return device rev_id
752 */
753u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
754{
755 return adev->nbio.funcs->get_rev_id(adev);
756}
757
d38ceaf9
AD
758/**
759 * amdgpu_invalid_rreg - dummy reg read function
760 *
982a820b 761 * @adev: amdgpu_device pointer
d38ceaf9
AD
762 * @reg: offset of register
763 *
764 * Dummy register read function. Used for register blocks
765 * that certain asics don't have (all asics).
766 * Returns the value in the register.
767 */
768static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
769{
770 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
771 BUG();
772 return 0;
773}
774
0c552ed3
LM
775static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
776{
777 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
778 BUG();
779 return 0;
780}
781
d38ceaf9
AD
782/**
783 * amdgpu_invalid_wreg - dummy reg write function
784 *
982a820b 785 * @adev: amdgpu_device pointer
d38ceaf9
AD
786 * @reg: offset of register
787 * @v: value to write to the register
788 *
789 * Dummy register read function. Used for register blocks
790 * that certain asics don't have (all asics).
791 */
792static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
793{
794 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
795 reg, v);
796 BUG();
797}
798
0c552ed3
LM
799static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
800{
801 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
802 reg, v);
803 BUG();
804}
805
4fa1c6a6
TZ
806/**
807 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
808 *
982a820b 809 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
810 * @reg: offset of register
811 *
812 * Dummy register read function. Used for register blocks
813 * that certain asics don't have (all asics).
814 * Returns the value in the register.
815 */
816static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
817{
818 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
819 BUG();
820 return 0;
821}
822
823/**
824 * amdgpu_invalid_wreg64 - dummy reg write function
825 *
982a820b 826 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
827 * @reg: offset of register
828 * @v: value to write to the register
829 *
830 * Dummy register read function. Used for register blocks
831 * that certain asics don't have (all asics).
832 */
833static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
834{
835 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
836 reg, v);
837 BUG();
838}
839
d38ceaf9
AD
840/**
841 * amdgpu_block_invalid_rreg - dummy reg read function
842 *
982a820b 843 * @adev: amdgpu_device pointer
d38ceaf9
AD
844 * @block: offset of instance
845 * @reg: offset of register
846 *
847 * Dummy register read function. Used for register blocks
848 * that certain asics don't have (all asics).
849 * Returns the value in the register.
850 */
851static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
852 uint32_t block, uint32_t reg)
853{
854 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
855 reg, block);
856 BUG();
857 return 0;
858}
859
860/**
861 * amdgpu_block_invalid_wreg - dummy reg write function
862 *
982a820b 863 * @adev: amdgpu_device pointer
d38ceaf9
AD
864 * @block: offset of instance
865 * @reg: offset of register
866 * @v: value to write to the register
867 *
868 * Dummy register read function. Used for register blocks
869 * that certain asics don't have (all asics).
870 */
871static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
872 uint32_t block,
873 uint32_t reg, uint32_t v)
874{
875 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
876 reg, block, v);
877 BUG();
878}
879
4d2997ab
AD
880/**
881 * amdgpu_device_asic_init - Wrapper for atom asic_init
882 *
982a820b 883 * @adev: amdgpu_device pointer
4d2997ab
AD
884 *
885 * Does any asic specific work and then calls atom asic init.
886 */
887static int amdgpu_device_asic_init(struct amdgpu_device *adev)
888{
15c5c5f5
LL
889 int ret;
890
4d2997ab
AD
891 amdgpu_asic_pre_asic_init(adev);
892
5db392a0 893 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
15c5c5f5
LL
894 adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
895 amdgpu_psp_wait_for_bootloader(adev);
896 ret = amdgpu_atomfirmware_asic_init(adev, true);
897 return ret;
898 } else {
85d1bcc6 899 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
900 }
901
902 return 0;
4d2997ab
AD
903}
904
e3ecdffa 905/**
7ccfd79f 906 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 907 *
982a820b 908 * @adev: amdgpu_device pointer
e3ecdffa
AD
909 *
910 * Allocates a scratch page of VRAM for use by various things in the
911 * driver.
912 */
7ccfd79f 913static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 914{
7ccfd79f
CK
915 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
916 AMDGPU_GEM_DOMAIN_VRAM |
917 AMDGPU_GEM_DOMAIN_GTT,
918 &adev->mem_scratch.robj,
919 &adev->mem_scratch.gpu_addr,
920 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
921}
922
e3ecdffa 923/**
7ccfd79f 924 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 925 *
982a820b 926 * @adev: amdgpu_device pointer
e3ecdffa
AD
927 *
928 * Frees the VRAM scratch page.
929 */
7ccfd79f 930static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 931{
7ccfd79f 932 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
933}
934
935/**
9c3f2b54 936 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
937 *
938 * @adev: amdgpu_device pointer
939 * @registers: pointer to the register array
940 * @array_size: size of the register array
941 *
b8920e1e 942 * Programs an array or registers with and or masks.
d38ceaf9
AD
943 * This is a helper for setting golden registers.
944 */
9c3f2b54
AD
945void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
946 const u32 *registers,
947 const u32 array_size)
d38ceaf9
AD
948{
949 u32 tmp, reg, and_mask, or_mask;
950 int i;
951
952 if (array_size % 3)
953 return;
954
47fc644f 955 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
956 reg = registers[i + 0];
957 and_mask = registers[i + 1];
958 or_mask = registers[i + 2];
959
960 if (and_mask == 0xffffffff) {
961 tmp = or_mask;
962 } else {
963 tmp = RREG32(reg);
964 tmp &= ~and_mask;
e0d07657
HZ
965 if (adev->family >= AMDGPU_FAMILY_AI)
966 tmp |= (or_mask & and_mask);
967 else
968 tmp |= or_mask;
d38ceaf9
AD
969 }
970 WREG32(reg, tmp);
971 }
972}
973
e3ecdffa
AD
974/**
975 * amdgpu_device_pci_config_reset - reset the GPU
976 *
977 * @adev: amdgpu_device pointer
978 *
979 * Resets the GPU using the pci config reset sequence.
980 * Only applicable to asics prior to vega10.
981 */
8111c387 982void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
983{
984 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
985}
986
af484df8
AD
987/**
988 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
989 *
990 * @adev: amdgpu_device pointer
991 *
992 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
993 */
994int amdgpu_device_pci_reset(struct amdgpu_device *adev)
995{
996 return pci_reset_function(adev->pdev);
997}
998
d38ceaf9 999/*
06ec9070 1000 * amdgpu_device_wb_*()
455a7bc2 1001 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1002 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1003 */
1004
1005/**
06ec9070 1006 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1007 *
1008 * @adev: amdgpu_device pointer
1009 *
1010 * Disables Writeback and frees the Writeback memory (all asics).
1011 * Used at driver shutdown.
1012 */
06ec9070 1013static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1014{
1015 if (adev->wb.wb_obj) {
a76ed485
AD
1016 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1017 &adev->wb.gpu_addr,
1018 (void **)&adev->wb.wb);
d38ceaf9
AD
1019 adev->wb.wb_obj = NULL;
1020 }
1021}
1022
1023/**
03f2abb0 1024 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1025 *
1026 * @adev: amdgpu_device pointer
1027 *
455a7bc2 1028 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1029 * Used at driver startup.
1030 * Returns 0 on success or an -error on failure.
1031 */
06ec9070 1032static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1033{
1034 int r;
1035
1036 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1037 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1038 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1039 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1040 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1041 (void **)&adev->wb.wb);
d38ceaf9
AD
1042 if (r) {
1043 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1044 return r;
1045 }
d38ceaf9
AD
1046
1047 adev->wb.num_wb = AMDGPU_MAX_WB;
1048 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1049
1050 /* clear wb memory */
73469585 1051 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1052 }
1053
1054 return 0;
1055}
1056
1057/**
131b4b36 1058 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1059 *
1060 * @adev: amdgpu_device pointer
1061 * @wb: wb index
1062 *
1063 * Allocate a wb slot for use by the driver (all asics).
1064 * Returns 0 on success or -EINVAL on failure.
1065 */
131b4b36 1066int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1067{
1068 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1069
97407b63 1070 if (offset < adev->wb.num_wb) {
7014285a 1071 __set_bit(offset, adev->wb.used);
63ae07ca 1072 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1073 return 0;
1074 } else {
1075 return -EINVAL;
1076 }
1077}
1078
d38ceaf9 1079/**
131b4b36 1080 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1081 *
1082 * @adev: amdgpu_device pointer
1083 * @wb: wb index
1084 *
1085 * Free a wb slot allocated for use by the driver (all asics)
1086 */
131b4b36 1087void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1088{
73469585 1089 wb >>= 3;
d38ceaf9 1090 if (wb < adev->wb.num_wb)
73469585 1091 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1092}
1093
d6895ad3
CK
1094/**
1095 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1096 *
1097 * @adev: amdgpu_device pointer
1098 *
1099 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1100 * to fail, but if any of the BARs is not accessible after the size we abort
1101 * driver loading by returning -ENODEV.
1102 */
1103int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1104{
453f617a 1105 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1106 struct pci_bus *root;
1107 struct resource *res;
b8920e1e 1108 unsigned int i;
d6895ad3
CK
1109 u16 cmd;
1110 int r;
1111
822130b5
AB
1112 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1113 return 0;
1114
0c03b912 1115 /* Bypass for VF */
1116 if (amdgpu_sriov_vf(adev))
1117 return 0;
1118
b7221f2b
AD
1119 /* skip if the bios has already enabled large BAR */
1120 if (adev->gmc.real_vram_size &&
1121 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1122 return 0;
1123
31b8adab
CK
1124 /* Check if the root BUS has 64bit memory resources */
1125 root = adev->pdev->bus;
1126 while (root->parent)
1127 root = root->parent;
1128
1129 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1130 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1131 res->start > 0x100000000ull)
1132 break;
1133 }
1134
1135 /* Trying to resize is pointless without a root hub window above 4GB */
1136 if (!res)
1137 return 0;
1138
453f617a
ND
1139 /* Limit the BAR size to what is available */
1140 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1141 rbar_size);
1142
d6895ad3
CK
1143 /* Disable memory decoding while we change the BAR addresses and size */
1144 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1145 pci_write_config_word(adev->pdev, PCI_COMMAND,
1146 cmd & ~PCI_COMMAND_MEMORY);
1147
1148 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1149 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1150 if (adev->asic_type >= CHIP_BONAIRE)
1151 pci_release_resource(adev->pdev, 2);
1152
1153 pci_release_resource(adev->pdev, 0);
1154
1155 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1156 if (r == -ENOSPC)
1157 DRM_INFO("Not enough PCI address space for a large BAR.");
1158 else if (r && r != -ENOTSUPP)
1159 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1160
1161 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1162
1163 /* When the doorbell or fb BAR isn't available we have no chance of
1164 * using the device.
1165 */
43c064db 1166 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1167 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1168 return -ENODEV;
1169
1170 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1171
1172 return 0;
1173}
a05502e5 1174
9535a86a
SZ
1175static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1176{
b8920e1e 1177 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1178 return false;
9535a86a
SZ
1179
1180 return true;
1181}
1182
d38ceaf9
AD
1183/*
1184 * GPU helpers function.
1185 */
1186/**
39c640c0 1187 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1188 *
1189 * @adev: amdgpu_device pointer
1190 *
c836fec5
JQ
1191 * Check if the asic has been initialized (all asics) at driver startup
1192 * or post is needed if hw reset is performed.
1193 * Returns true if need or false if not.
d38ceaf9 1194 */
39c640c0 1195bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1196{
1197 uint32_t reg;
1198
bec86378
ML
1199 if (amdgpu_sriov_vf(adev))
1200 return false;
1201
9535a86a
SZ
1202 if (!amdgpu_device_read_bios(adev))
1203 return false;
1204
bec86378 1205 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1206 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1207 * some old smc fw still need driver do vPost otherwise gpu hang, while
1208 * those smc fw version above 22.15 doesn't have this flaw, so we force
1209 * vpost executed for smc version below 22.15
bec86378
ML
1210 */
1211 if (adev->asic_type == CHIP_FIJI) {
1212 int err;
1213 uint32_t fw_ver;
b8920e1e 1214
bec86378
ML
1215 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1216 /* force vPost if error occured */
1217 if (err)
1218 return true;
1219
1220 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1221 if (fw_ver < 0x00160e00)
1222 return true;
bec86378 1223 }
bec86378 1224 }
91fe77eb 1225
e3c1b071 1226 /* Don't post if we need to reset whole hive on init */
1227 if (adev->gmc.xgmi.pending_reset)
1228 return false;
1229
91fe77eb 1230 if (adev->has_hw_reset) {
1231 adev->has_hw_reset = false;
1232 return true;
1233 }
1234
1235 /* bios scratch used on CIK+ */
1236 if (adev->asic_type >= CHIP_BONAIRE)
1237 return amdgpu_atombios_scratch_need_asic_init(adev);
1238
1239 /* check MEM_SIZE for older asics */
1240 reg = amdgpu_asic_get_config_memsize(adev);
1241
1242 if ((reg != 0) && (reg != 0xffffffff))
1243 return false;
1244
1245 return true;
bec86378
ML
1246}
1247
70e64c4d
ML
1248/*
1249 * On APUs with >= 64GB white flickering has been observed w/ SG enabled.
1250 * Disable S/G on such systems until we have a proper fix.
1251 * https://gitlab.freedesktop.org/drm/amd/-/issues/2354
1252 * https://gitlab.freedesktop.org/drm/amd/-/issues/2735
1253 */
1254bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
1255{
1256 switch (amdgpu_sg_display) {
1257 case -1:
1258 break;
1259 case 0:
1260 return false;
1261 case 1:
1262 return true;
1263 default:
1264 return false;
1265 }
1266 if ((totalram_pages() << (PAGE_SHIFT - 10)) +
1267 (adev->gmc.real_vram_size / 1024) >= 64000000) {
1268 DRM_WARN("Disabling S/G due to >=64GB RAM\n");
1269 return false;
1270 }
1271 return true;
1272}
1273
5d1eb4c4
ML
1274/*
1275 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1276 * speed switching. Until we have confirmation from Intel that a specific host
1277 * supports it, it's safer that we keep it disabled for all.
1278 *
1279 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1280 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1281 */
1282bool amdgpu_device_pcie_dynamic_switching_supported(void)
1283{
1284#if IS_ENABLED(CONFIG_X86)
1285 struct cpuinfo_x86 *c = &cpu_data(0);
1286
1287 if (c->x86_vendor == X86_VENDOR_INTEL)
1288 return false;
1289#endif
1290 return true;
1291}
1292
0ab5d711
ML
1293/**
1294 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1295 *
1296 * @adev: amdgpu_device pointer
1297 *
1298 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1299 * be set for this device.
1300 *
1301 * Returns true if it should be used or false if not.
1302 */
1303bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1304{
1305 switch (amdgpu_aspm) {
1306 case -1:
1307 break;
1308 case 0:
1309 return false;
1310 case 1:
1311 return true;
1312 default:
1313 return false;
1314 }
1315 return pcie_aspm_enabled(adev->pdev);
1316}
1317
3ad5dcfe
KHF
1318bool amdgpu_device_aspm_support_quirk(void)
1319{
1320#if IS_ENABLED(CONFIG_X86)
1321 struct cpuinfo_x86 *c = &cpu_data(0);
1322
1323 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1324#else
1325 return true;
1326#endif
1327}
1328
d38ceaf9
AD
1329/* if we get transitioned to only one device, take VGA back */
1330/**
06ec9070 1331 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1332 *
bf44e8ce 1333 * @pdev: PCI device pointer
d38ceaf9
AD
1334 * @state: enable/disable vga decode
1335 *
1336 * Enable/disable vga decode (all asics).
1337 * Returns VGA resource flags.
1338 */
bf44e8ce
CH
1339static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1340 bool state)
d38ceaf9 1341{
bf44e8ce 1342 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1343
d38ceaf9
AD
1344 amdgpu_asic_set_vga_state(adev, state);
1345 if (state)
1346 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1347 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1348 else
1349 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1350}
1351
e3ecdffa
AD
1352/**
1353 * amdgpu_device_check_block_size - validate the vm block size
1354 *
1355 * @adev: amdgpu_device pointer
1356 *
1357 * Validates the vm block size specified via module parameter.
1358 * The vm block size defines number of bits in page table versus page directory,
1359 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1360 * page table and the remaining bits are in the page directory.
1361 */
06ec9070 1362static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1363{
1364 /* defines number of bits in page table versus page directory,
1365 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1366 * page table and the remaining bits are in the page directory
1367 */
bab4fee7
JZ
1368 if (amdgpu_vm_block_size == -1)
1369 return;
a1adf8be 1370
bab4fee7 1371 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1372 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1373 amdgpu_vm_block_size);
97489129 1374 amdgpu_vm_block_size = -1;
a1adf8be 1375 }
a1adf8be
CZ
1376}
1377
e3ecdffa
AD
1378/**
1379 * amdgpu_device_check_vm_size - validate the vm size
1380 *
1381 * @adev: amdgpu_device pointer
1382 *
1383 * Validates the vm size in GB specified via module parameter.
1384 * The VM size is the size of the GPU virtual memory space in GB.
1385 */
06ec9070 1386static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1387{
64dab074
AD
1388 /* no need to check the default value */
1389 if (amdgpu_vm_size == -1)
1390 return;
1391
83ca145d
ZJ
1392 if (amdgpu_vm_size < 1) {
1393 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1394 amdgpu_vm_size);
f3368128 1395 amdgpu_vm_size = -1;
83ca145d 1396 }
83ca145d
ZJ
1397}
1398
7951e376
RZ
1399static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1400{
1401 struct sysinfo si;
a9d4fe2f 1402 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1403 uint64_t total_memory;
1404 uint64_t dram_size_seven_GB = 0x1B8000000;
1405 uint64_t dram_size_three_GB = 0xB8000000;
1406
1407 if (amdgpu_smu_memory_pool_size == 0)
1408 return;
1409
1410 if (!is_os_64) {
1411 DRM_WARN("Not 64-bit OS, feature not supported\n");
1412 goto def_value;
1413 }
1414 si_meminfo(&si);
1415 total_memory = (uint64_t)si.totalram * si.mem_unit;
1416
1417 if ((amdgpu_smu_memory_pool_size == 1) ||
1418 (amdgpu_smu_memory_pool_size == 2)) {
1419 if (total_memory < dram_size_three_GB)
1420 goto def_value1;
1421 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1422 (amdgpu_smu_memory_pool_size == 8)) {
1423 if (total_memory < dram_size_seven_GB)
1424 goto def_value1;
1425 } else {
1426 DRM_WARN("Smu memory pool size not supported\n");
1427 goto def_value;
1428 }
1429 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1430
1431 return;
1432
1433def_value1:
1434 DRM_WARN("No enough system memory\n");
1435def_value:
1436 adev->pm.smu_prv_buffer_size = 0;
1437}
1438
9f6a7857
HR
1439static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1440{
1441 if (!(adev->flags & AMD_IS_APU) ||
1442 adev->asic_type < CHIP_RAVEN)
1443 return 0;
1444
1445 switch (adev->asic_type) {
1446 case CHIP_RAVEN:
1447 if (adev->pdev->device == 0x15dd)
1448 adev->apu_flags |= AMD_APU_IS_RAVEN;
1449 if (adev->pdev->device == 0x15d8)
1450 adev->apu_flags |= AMD_APU_IS_PICASSO;
1451 break;
1452 case CHIP_RENOIR:
1453 if ((adev->pdev->device == 0x1636) ||
1454 (adev->pdev->device == 0x164c))
1455 adev->apu_flags |= AMD_APU_IS_RENOIR;
1456 else
1457 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1458 break;
1459 case CHIP_VANGOGH:
1460 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1461 break;
1462 case CHIP_YELLOW_CARP:
1463 break;
d0f56dc2 1464 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1465 if ((adev->pdev->device == 0x13FE) ||
1466 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1467 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1468 break;
9f6a7857 1469 default:
4eaf21b7 1470 break;
9f6a7857
HR
1471 }
1472
1473 return 0;
1474}
1475
d38ceaf9 1476/**
06ec9070 1477 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1478 *
1479 * @adev: amdgpu_device pointer
1480 *
1481 * Validates certain module parameters and updates
1482 * the associated values used by the driver (all asics).
1483 */
912dfc84 1484static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1485{
5b011235
CZ
1486 if (amdgpu_sched_jobs < 4) {
1487 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1488 amdgpu_sched_jobs);
1489 amdgpu_sched_jobs = 4;
47fc644f 1490 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1491 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1492 amdgpu_sched_jobs);
1493 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1494 }
d38ceaf9 1495
83e74db6 1496 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1497 /* gart size must be greater or equal to 32M */
1498 dev_warn(adev->dev, "gart size (%d) too small\n",
1499 amdgpu_gart_size);
83e74db6 1500 amdgpu_gart_size = -1;
d38ceaf9
AD
1501 }
1502
36d38372 1503 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1504 /* gtt size must be greater or equal to 32M */
36d38372
CK
1505 dev_warn(adev->dev, "gtt size (%d) too small\n",
1506 amdgpu_gtt_size);
1507 amdgpu_gtt_size = -1;
d38ceaf9
AD
1508 }
1509
d07f14be
RH
1510 /* valid range is between 4 and 9 inclusive */
1511 if (amdgpu_vm_fragment_size != -1 &&
1512 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1513 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1514 amdgpu_vm_fragment_size = -1;
1515 }
1516
5d5bd5e3
KW
1517 if (amdgpu_sched_hw_submission < 2) {
1518 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1519 amdgpu_sched_hw_submission);
1520 amdgpu_sched_hw_submission = 2;
1521 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1522 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1523 amdgpu_sched_hw_submission);
1524 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1525 }
1526
2656fd23
AG
1527 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1528 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1529 amdgpu_reset_method = -1;
1530 }
1531
7951e376
RZ
1532 amdgpu_device_check_smu_prv_buffer_size(adev);
1533
06ec9070 1534 amdgpu_device_check_vm_size(adev);
d38ceaf9 1535
06ec9070 1536 amdgpu_device_check_block_size(adev);
6a7f76e7 1537
19aede77 1538 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1539
e3c00faa 1540 return 0;
d38ceaf9
AD
1541}
1542
1543/**
1544 * amdgpu_switcheroo_set_state - set switcheroo state
1545 *
1546 * @pdev: pci dev pointer
1694467b 1547 * @state: vga_switcheroo state
d38ceaf9 1548 *
12024b17 1549 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1550 * the asics before or after it is powered up using ACPI methods.
1551 */
8aba21b7
LT
1552static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1553 enum vga_switcheroo_state state)
d38ceaf9
AD
1554{
1555 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1556 int r;
d38ceaf9 1557
b98c6299 1558 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1559 return;
1560
1561 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1562 pr_info("switched on\n");
d38ceaf9
AD
1563 /* don't suspend or resume card normally */
1564 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1565
8f66090b
TZ
1566 pci_set_power_state(pdev, PCI_D0);
1567 amdgpu_device_load_pci_state(pdev);
1568 r = pci_enable_device(pdev);
de185019
AD
1569 if (r)
1570 DRM_WARN("pci_enable_device failed (%d)\n", r);
1571 amdgpu_device_resume(dev, true);
d38ceaf9 1572
d38ceaf9 1573 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1574 } else {
dd4fa6c1 1575 pr_info("switched off\n");
d38ceaf9 1576 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1577 amdgpu_device_suspend(dev, true);
8f66090b 1578 amdgpu_device_cache_pci_state(pdev);
de185019 1579 /* Shut down the device */
8f66090b
TZ
1580 pci_disable_device(pdev);
1581 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1582 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1583 }
1584}
1585
1586/**
1587 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1588 *
1589 * @pdev: pci dev pointer
1590 *
1591 * Callback for the switcheroo driver. Check of the switcheroo
1592 * state can be changed.
1593 * Returns true if the state can be changed, false if not.
1594 */
1595static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1596{
1597 struct drm_device *dev = pci_get_drvdata(pdev);
1598
b8920e1e 1599 /*
d38ceaf9
AD
1600 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1601 * locking inversion with the driver load path. And the access here is
1602 * completely racy anyway. So don't bother with locking for now.
1603 */
7e13ad89 1604 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1605}
1606
1607static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1608 .set_gpu_state = amdgpu_switcheroo_set_state,
1609 .reprobe = NULL,
1610 .can_switch = amdgpu_switcheroo_can_switch,
1611};
1612
e3ecdffa
AD
1613/**
1614 * amdgpu_device_ip_set_clockgating_state - set the CG state
1615 *
87e3f136 1616 * @dev: amdgpu_device pointer
e3ecdffa
AD
1617 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1618 * @state: clockgating state (gate or ungate)
1619 *
1620 * Sets the requested clockgating state for all instances of
1621 * the hardware IP specified.
1622 * Returns the error code from the last instance.
1623 */
43fa561f 1624int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1625 enum amd_ip_block_type block_type,
1626 enum amd_clockgating_state state)
d38ceaf9 1627{
43fa561f 1628 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1629 int i, r = 0;
1630
1631 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1632 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1633 continue;
c722865a
RZ
1634 if (adev->ip_blocks[i].version->type != block_type)
1635 continue;
1636 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1637 continue;
1638 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1639 (void *)adev, state);
1640 if (r)
1641 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1642 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1643 }
1644 return r;
1645}
1646
e3ecdffa
AD
1647/**
1648 * amdgpu_device_ip_set_powergating_state - set the PG state
1649 *
87e3f136 1650 * @dev: amdgpu_device pointer
e3ecdffa
AD
1651 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1652 * @state: powergating state (gate or ungate)
1653 *
1654 * Sets the requested powergating state for all instances of
1655 * the hardware IP specified.
1656 * Returns the error code from the last instance.
1657 */
43fa561f 1658int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1659 enum amd_ip_block_type block_type,
1660 enum amd_powergating_state state)
d38ceaf9 1661{
43fa561f 1662 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1663 int i, r = 0;
1664
1665 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1666 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1667 continue;
c722865a
RZ
1668 if (adev->ip_blocks[i].version->type != block_type)
1669 continue;
1670 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1671 continue;
1672 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1673 (void *)adev, state);
1674 if (r)
1675 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1676 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1677 }
1678 return r;
1679}
1680
e3ecdffa
AD
1681/**
1682 * amdgpu_device_ip_get_clockgating_state - get the CG state
1683 *
1684 * @adev: amdgpu_device pointer
1685 * @flags: clockgating feature flags
1686 *
1687 * Walks the list of IPs on the device and updates the clockgating
1688 * flags for each IP.
1689 * Updates @flags with the feature flags for each hardware IP where
1690 * clockgating is enabled.
1691 */
2990a1fc 1692void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1693 u64 *flags)
6cb2d4e4
HR
1694{
1695 int i;
1696
1697 for (i = 0; i < adev->num_ip_blocks; i++) {
1698 if (!adev->ip_blocks[i].status.valid)
1699 continue;
1700 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1701 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1702 }
1703}
1704
e3ecdffa
AD
1705/**
1706 * amdgpu_device_ip_wait_for_idle - wait for idle
1707 *
1708 * @adev: amdgpu_device pointer
1709 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1710 *
1711 * Waits for the request hardware IP to be idle.
1712 * Returns 0 for success or a negative error code on failure.
1713 */
2990a1fc
AD
1714int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1715 enum amd_ip_block_type block_type)
5dbbb60b
AD
1716{
1717 int i, r;
1718
1719 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1720 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1721 continue;
a1255107
AD
1722 if (adev->ip_blocks[i].version->type == block_type) {
1723 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1724 if (r)
1725 return r;
1726 break;
1727 }
1728 }
1729 return 0;
1730
1731}
1732
e3ecdffa
AD
1733/**
1734 * amdgpu_device_ip_is_idle - is the hardware IP idle
1735 *
1736 * @adev: amdgpu_device pointer
1737 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1738 *
1739 * Check if the hardware IP is idle or not.
1740 * Returns true if it the IP is idle, false if not.
1741 */
2990a1fc
AD
1742bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1743 enum amd_ip_block_type block_type)
5dbbb60b
AD
1744{
1745 int i;
1746
1747 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1748 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1749 continue;
a1255107
AD
1750 if (adev->ip_blocks[i].version->type == block_type)
1751 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1752 }
1753 return true;
1754
1755}
1756
e3ecdffa
AD
1757/**
1758 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1759 *
1760 * @adev: amdgpu_device pointer
87e3f136 1761 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1762 *
1763 * Returns a pointer to the hardware IP block structure
1764 * if it exists for the asic, otherwise NULL.
1765 */
2990a1fc
AD
1766struct amdgpu_ip_block *
1767amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1768 enum amd_ip_block_type type)
d38ceaf9
AD
1769{
1770 int i;
1771
1772 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1773 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1774 return &adev->ip_blocks[i];
1775
1776 return NULL;
1777}
1778
1779/**
2990a1fc 1780 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1781 *
1782 * @adev: amdgpu_device pointer
5fc3aeeb 1783 * @type: enum amd_ip_block_type
d38ceaf9
AD
1784 * @major: major version
1785 * @minor: minor version
1786 *
1787 * return 0 if equal or greater
1788 * return 1 if smaller or the ip_block doesn't exist
1789 */
2990a1fc
AD
1790int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1791 enum amd_ip_block_type type,
1792 u32 major, u32 minor)
d38ceaf9 1793{
2990a1fc 1794 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1795
a1255107
AD
1796 if (ip_block && ((ip_block->version->major > major) ||
1797 ((ip_block->version->major == major) &&
1798 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1799 return 0;
1800
1801 return 1;
1802}
1803
a1255107 1804/**
2990a1fc 1805 * amdgpu_device_ip_block_add
a1255107
AD
1806 *
1807 * @adev: amdgpu_device pointer
1808 * @ip_block_version: pointer to the IP to add
1809 *
1810 * Adds the IP block driver information to the collection of IPs
1811 * on the asic.
1812 */
2990a1fc
AD
1813int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1814 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1815{
1816 if (!ip_block_version)
1817 return -EINVAL;
1818
7bd939d0
LG
1819 switch (ip_block_version->type) {
1820 case AMD_IP_BLOCK_TYPE_VCN:
1821 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1822 return 0;
1823 break;
1824 case AMD_IP_BLOCK_TYPE_JPEG:
1825 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1826 return 0;
1827 break;
1828 default:
1829 break;
1830 }
1831
e966a725 1832 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1833 ip_block_version->funcs->name);
1834
a1255107
AD
1835 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1836
1837 return 0;
1838}
1839
e3ecdffa
AD
1840/**
1841 * amdgpu_device_enable_virtual_display - enable virtual display feature
1842 *
1843 * @adev: amdgpu_device pointer
1844 *
1845 * Enabled the virtual display feature if the user has enabled it via
1846 * the module parameter virtual_display. This feature provides a virtual
1847 * display hardware on headless boards or in virtualized environments.
1848 * This function parses and validates the configuration string specified by
1849 * the user and configues the virtual display configuration (number of
1850 * virtual connectors, crtcs, etc.) specified.
1851 */
483ef985 1852static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1853{
1854 adev->enable_virtual_display = false;
1855
1856 if (amdgpu_virtual_display) {
8f66090b 1857 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1858 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1859
1860 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1861 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1862 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1863 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1864 if (!strcmp("all", pciaddname)
1865 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1866 long num_crtc;
1867 int res = -1;
1868
9accf2fd 1869 adev->enable_virtual_display = true;
0f66356d
ED
1870
1871 if (pciaddname_tmp)
1872 res = kstrtol(pciaddname_tmp, 10,
1873 &num_crtc);
1874
1875 if (!res) {
1876 if (num_crtc < 1)
1877 num_crtc = 1;
1878 if (num_crtc > 6)
1879 num_crtc = 6;
1880 adev->mode_info.num_crtc = num_crtc;
1881 } else {
1882 adev->mode_info.num_crtc = 1;
1883 }
9accf2fd
ED
1884 break;
1885 }
1886 }
1887
0f66356d
ED
1888 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1889 amdgpu_virtual_display, pci_address_name,
1890 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1891
1892 kfree(pciaddstr);
1893 }
1894}
1895
25263da3
AD
1896void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1897{
1898 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1899 adev->mode_info.num_crtc = 1;
1900 adev->enable_virtual_display = true;
1901 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1902 adev->enable_virtual_display, adev->mode_info.num_crtc);
1903 }
1904}
1905
e3ecdffa
AD
1906/**
1907 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1908 *
1909 * @adev: amdgpu_device pointer
1910 *
1911 * Parses the asic configuration parameters specified in the gpu info
1912 * firmware and makes them availale to the driver for use in configuring
1913 * the asic.
1914 * Returns 0 on success, -EINVAL on failure.
1915 */
e2a75f88
AD
1916static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1917{
e2a75f88 1918 const char *chip_name;
c0a43457 1919 char fw_name[40];
e2a75f88
AD
1920 int err;
1921 const struct gpu_info_firmware_header_v1_0 *hdr;
1922
ab4fe3e1
HR
1923 adev->firmware.gpu_info_fw = NULL;
1924
72de33f8 1925 if (adev->mman.discovery_bin) {
cc375d8c
TY
1926 /*
1927 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1928 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1929 * when DAL no longer needs it.
1930 */
1931 if (adev->asic_type != CHIP_NAVI12)
1932 return 0;
258620d0
AD
1933 }
1934
e2a75f88 1935 switch (adev->asic_type) {
e2a75f88
AD
1936 default:
1937 return 0;
1938 case CHIP_VEGA10:
1939 chip_name = "vega10";
1940 break;
3f76dced
AD
1941 case CHIP_VEGA12:
1942 chip_name = "vega12";
1943 break;
2d2e5e7e 1944 case CHIP_RAVEN:
54f78a76 1945 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1946 chip_name = "raven2";
54f78a76 1947 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1948 chip_name = "picasso";
54c4d17e
FX
1949 else
1950 chip_name = "raven";
2d2e5e7e 1951 break;
65e60f6e
LM
1952 case CHIP_ARCTURUS:
1953 chip_name = "arcturus";
1954 break;
42b325e5
XY
1955 case CHIP_NAVI12:
1956 chip_name = "navi12";
1957 break;
e2a75f88
AD
1958 }
1959
1960 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 1961 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
1962 if (err) {
1963 dev_err(adev->dev,
b31d3063 1964 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
1965 fw_name);
1966 goto out;
1967 }
1968
ab4fe3e1 1969 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1970 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1971
1972 switch (hdr->version_major) {
1973 case 1:
1974 {
1975 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1976 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1977 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1978
cc375d8c
TY
1979 /*
1980 * Should be droped when DAL no longer needs it.
1981 */
1982 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1983 goto parse_soc_bounding_box;
1984
b5ab16bf
AD
1985 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1986 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1987 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1988 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1989 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1990 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1991 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1992 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1993 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1994 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1995 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1996 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1997 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1998 adev->gfx.cu_info.max_waves_per_simd =
1999 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2000 adev->gfx.cu_info.max_scratch_slots_per_cu =
2001 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2002 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2003 if (hdr->version_minor >= 1) {
35c2e910
HZ
2004 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2005 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2006 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2007 adev->gfx.config.num_sc_per_sh =
2008 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2009 adev->gfx.config.num_packer_per_sc =
2010 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2011 }
ec51d3fa
XY
2012
2013parse_soc_bounding_box:
ec51d3fa
XY
2014 /*
2015 * soc bounding box info is not integrated in disocovery table,
258620d0 2016 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2017 */
48321c3d
HW
2018 if (hdr->version_minor == 2) {
2019 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2020 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2021 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2022 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2023 }
e2a75f88
AD
2024 break;
2025 }
2026 default:
2027 dev_err(adev->dev,
2028 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2029 err = -EINVAL;
2030 goto out;
2031 }
2032out:
e2a75f88
AD
2033 return err;
2034}
2035
e3ecdffa
AD
2036/**
2037 * amdgpu_device_ip_early_init - run early init for hardware IPs
2038 *
2039 * @adev: amdgpu_device pointer
2040 *
2041 * Early initialization pass for hardware IPs. The hardware IPs that make
2042 * up each asic are discovered each IP's early_init callback is run. This
2043 * is the first stage in initializing the asic.
2044 * Returns 0 on success, negative error code on failure.
2045 */
06ec9070 2046static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2047{
901e2be2
AD
2048 struct drm_device *dev = adev_to_drm(adev);
2049 struct pci_dev *parent;
aaa36a97 2050 int i, r;
ced69502 2051 bool total;
d38ceaf9 2052
483ef985 2053 amdgpu_device_enable_virtual_display(adev);
a6be7570 2054
00a979f3 2055 if (amdgpu_sriov_vf(adev)) {
00a979f3 2056 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2057 if (r)
2058 return r;
00a979f3
WS
2059 }
2060
d38ceaf9 2061 switch (adev->asic_type) {
33f34802
KW
2062#ifdef CONFIG_DRM_AMDGPU_SI
2063 case CHIP_VERDE:
2064 case CHIP_TAHITI:
2065 case CHIP_PITCAIRN:
2066 case CHIP_OLAND:
2067 case CHIP_HAINAN:
295d0daf 2068 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2069 r = si_set_ip_blocks(adev);
2070 if (r)
2071 return r;
2072 break;
2073#endif
a2e73f56
AD
2074#ifdef CONFIG_DRM_AMDGPU_CIK
2075 case CHIP_BONAIRE:
2076 case CHIP_HAWAII:
2077 case CHIP_KAVERI:
2078 case CHIP_KABINI:
2079 case CHIP_MULLINS:
e1ad2d53 2080 if (adev->flags & AMD_IS_APU)
a2e73f56 2081 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2082 else
2083 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2084
2085 r = cik_set_ip_blocks(adev);
2086 if (r)
2087 return r;
2088 break;
2089#endif
da87c30b
AD
2090 case CHIP_TOPAZ:
2091 case CHIP_TONGA:
2092 case CHIP_FIJI:
2093 case CHIP_POLARIS10:
2094 case CHIP_POLARIS11:
2095 case CHIP_POLARIS12:
2096 case CHIP_VEGAM:
2097 case CHIP_CARRIZO:
2098 case CHIP_STONEY:
2099 if (adev->flags & AMD_IS_APU)
2100 adev->family = AMDGPU_FAMILY_CZ;
2101 else
2102 adev->family = AMDGPU_FAMILY_VI;
2103
2104 r = vi_set_ip_blocks(adev);
2105 if (r)
2106 return r;
2107 break;
d38ceaf9 2108 default:
63352b7f
AD
2109 r = amdgpu_discovery_set_ip_blocks(adev);
2110 if (r)
2111 return r;
2112 break;
d38ceaf9
AD
2113 }
2114
901e2be2
AD
2115 if (amdgpu_has_atpx() &&
2116 (amdgpu_is_atpx_hybrid() ||
2117 amdgpu_has_atpx_dgpu_power_cntl()) &&
2118 ((adev->flags & AMD_IS_APU) == 0) &&
2119 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2120 adev->flags |= AMD_IS_PX;
2121
85ac2021
AD
2122 if (!(adev->flags & AMD_IS_APU)) {
2123 parent = pci_upstream_bridge(adev->pdev);
2124 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2125 }
901e2be2 2126
1884734a 2127
3b94fb10 2128 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2129 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2130 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2131 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2132 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2133
ced69502 2134 total = true;
d38ceaf9
AD
2135 for (i = 0; i < adev->num_ip_blocks; i++) {
2136 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2137 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2138 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2139 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2140 } else {
a1255107
AD
2141 if (adev->ip_blocks[i].version->funcs->early_init) {
2142 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2143 if (r == -ENOENT) {
a1255107 2144 adev->ip_blocks[i].status.valid = false;
2c1a2784 2145 } else if (r) {
a1255107
AD
2146 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2147 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2148 total = false;
2c1a2784 2149 } else {
a1255107 2150 adev->ip_blocks[i].status.valid = true;
2c1a2784 2151 }
974e6b64 2152 } else {
a1255107 2153 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2154 }
d38ceaf9 2155 }
21a249ca
AD
2156 /* get the vbios after the asic_funcs are set up */
2157 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2158 r = amdgpu_device_parse_gpu_info_fw(adev);
2159 if (r)
2160 return r;
2161
21a249ca 2162 /* Read BIOS */
9535a86a
SZ
2163 if (amdgpu_device_read_bios(adev)) {
2164 if (!amdgpu_get_bios(adev))
2165 return -EINVAL;
21a249ca 2166
9535a86a
SZ
2167 r = amdgpu_atombios_init(adev);
2168 if (r) {
2169 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2170 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2171 return r;
2172 }
21a249ca 2173 }
77eabc6f
PJZ
2174
2175 /*get pf2vf msg info at it's earliest time*/
2176 if (amdgpu_sriov_vf(adev))
2177 amdgpu_virt_init_data_exchange(adev);
2178
21a249ca 2179 }
d38ceaf9 2180 }
ced69502
ML
2181 if (!total)
2182 return -ENODEV;
d38ceaf9 2183
00fa4035 2184 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2185 adev->cg_flags &= amdgpu_cg_mask;
2186 adev->pg_flags &= amdgpu_pg_mask;
2187
d38ceaf9
AD
2188 return 0;
2189}
2190
0a4f2520
RZ
2191static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2192{
2193 int i, r;
2194
2195 for (i = 0; i < adev->num_ip_blocks; i++) {
2196 if (!adev->ip_blocks[i].status.sw)
2197 continue;
2198 if (adev->ip_blocks[i].status.hw)
2199 continue;
2200 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2201 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2202 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2203 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2204 if (r) {
2205 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2206 adev->ip_blocks[i].version->funcs->name, r);
2207 return r;
2208 }
2209 adev->ip_blocks[i].status.hw = true;
2210 }
2211 }
2212
2213 return 0;
2214}
2215
2216static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2217{
2218 int i, r;
2219
2220 for (i = 0; i < adev->num_ip_blocks; i++) {
2221 if (!adev->ip_blocks[i].status.sw)
2222 continue;
2223 if (adev->ip_blocks[i].status.hw)
2224 continue;
2225 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2226 if (r) {
2227 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2228 adev->ip_blocks[i].version->funcs->name, r);
2229 return r;
2230 }
2231 adev->ip_blocks[i].status.hw = true;
2232 }
2233
2234 return 0;
2235}
2236
7a3e0bb2
RZ
2237static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2238{
2239 int r = 0;
2240 int i;
80f41f84 2241 uint32_t smu_version;
7a3e0bb2
RZ
2242
2243 if (adev->asic_type >= CHIP_VEGA10) {
2244 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2245 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2246 continue;
2247
e3c1b071 2248 if (!adev->ip_blocks[i].status.sw)
2249 continue;
2250
482f0e53
ML
2251 /* no need to do the fw loading again if already done*/
2252 if (adev->ip_blocks[i].status.hw == true)
2253 break;
2254
53b3f8f4 2255 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2256 r = adev->ip_blocks[i].version->funcs->resume(adev);
2257 if (r) {
2258 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2259 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2260 return r;
2261 }
2262 } else {
2263 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2264 if (r) {
2265 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2266 adev->ip_blocks[i].version->funcs->name, r);
2267 return r;
7a3e0bb2 2268 }
7a3e0bb2 2269 }
482f0e53
ML
2270
2271 adev->ip_blocks[i].status.hw = true;
2272 break;
7a3e0bb2
RZ
2273 }
2274 }
482f0e53 2275
8973d9ec
ED
2276 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2277 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2278
80f41f84 2279 return r;
7a3e0bb2
RZ
2280}
2281
5fd8518d
AG
2282static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2283{
2284 long timeout;
2285 int r, i;
2286
2287 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2288 struct amdgpu_ring *ring = adev->rings[i];
2289
2290 /* No need to setup the GPU scheduler for rings that don't need it */
2291 if (!ring || ring->no_scheduler)
2292 continue;
2293
2294 switch (ring->funcs->type) {
2295 case AMDGPU_RING_TYPE_GFX:
2296 timeout = adev->gfx_timeout;
2297 break;
2298 case AMDGPU_RING_TYPE_COMPUTE:
2299 timeout = adev->compute_timeout;
2300 break;
2301 case AMDGPU_RING_TYPE_SDMA:
2302 timeout = adev->sdma_timeout;
2303 break;
2304 default:
2305 timeout = adev->video_timeout;
2306 break;
2307 }
2308
2309 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2310 ring->num_hw_submission, 0,
8ab62eda
JG
2311 timeout, adev->reset_domain->wq,
2312 ring->sched_score, ring->name,
2313 adev->dev);
5fd8518d
AG
2314 if (r) {
2315 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2316 ring->name);
2317 return r;
2318 }
2319 }
2320
d425c6f4
JZ
2321 amdgpu_xcp_update_partition_sched_list(adev);
2322
5fd8518d
AG
2323 return 0;
2324}
2325
2326
e3ecdffa
AD
2327/**
2328 * amdgpu_device_ip_init - run init for hardware IPs
2329 *
2330 * @adev: amdgpu_device pointer
2331 *
2332 * Main initialization pass for hardware IPs. The list of all the hardware
2333 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2334 * are run. sw_init initializes the software state associated with each IP
2335 * and hw_init initializes the hardware associated with each IP.
2336 * Returns 0 on success, negative error code on failure.
2337 */
06ec9070 2338static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2339{
2340 int i, r;
2341
c030f2e4 2342 r = amdgpu_ras_init(adev);
2343 if (r)
2344 return r;
2345
d38ceaf9 2346 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2347 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2348 continue;
a1255107 2349 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2350 if (r) {
a1255107
AD
2351 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2352 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2353 goto init_failed;
2c1a2784 2354 }
a1255107 2355 adev->ip_blocks[i].status.sw = true;
bfca0289 2356
c1c39032
AD
2357 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2358 /* need to do common hw init early so everything is set up for gmc */
2359 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2360 if (r) {
2361 DRM_ERROR("hw_init %d failed %d\n", i, r);
2362 goto init_failed;
2363 }
2364 adev->ip_blocks[i].status.hw = true;
2365 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2366 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2367 /* Try to reserve bad pages early */
2368 if (amdgpu_sriov_vf(adev))
2369 amdgpu_virt_exchange_data(adev);
2370
7ccfd79f 2371 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2372 if (r) {
7ccfd79f 2373 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2374 goto init_failed;
2c1a2784 2375 }
a1255107 2376 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2377 if (r) {
2378 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2379 goto init_failed;
2c1a2784 2380 }
06ec9070 2381 r = amdgpu_device_wb_init(adev);
2c1a2784 2382 if (r) {
06ec9070 2383 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2384 goto init_failed;
2c1a2784 2385 }
a1255107 2386 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2387
2388 /* right after GMC hw init, we create CSA */
02ff519e 2389 if (adev->gfx.mcbp) {
1e256e27 2390 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2391 AMDGPU_GEM_DOMAIN_VRAM |
2392 AMDGPU_GEM_DOMAIN_GTT,
2393 AMDGPU_CSA_SIZE);
2493664f
ML
2394 if (r) {
2395 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2396 goto init_failed;
2493664f
ML
2397 }
2398 }
d38ceaf9
AD
2399 }
2400 }
2401
c9ffa427 2402 if (amdgpu_sriov_vf(adev))
22c16d25 2403 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2404
533aed27
AG
2405 r = amdgpu_ib_pool_init(adev);
2406 if (r) {
2407 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2408 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2409 goto init_failed;
2410 }
2411
c8963ea4
RZ
2412 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2413 if (r)
72d3f592 2414 goto init_failed;
0a4f2520
RZ
2415
2416 r = amdgpu_device_ip_hw_init_phase1(adev);
2417 if (r)
72d3f592 2418 goto init_failed;
0a4f2520 2419
7a3e0bb2
RZ
2420 r = amdgpu_device_fw_loading(adev);
2421 if (r)
72d3f592 2422 goto init_failed;
7a3e0bb2 2423
0a4f2520
RZ
2424 r = amdgpu_device_ip_hw_init_phase2(adev);
2425 if (r)
72d3f592 2426 goto init_failed;
d38ceaf9 2427
121a2bc6
AG
2428 /*
2429 * retired pages will be loaded from eeprom and reserved here,
2430 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2431 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2432 * for I2C communication which only true at this point.
b82e65a9
GC
2433 *
2434 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2435 * failure from bad gpu situation and stop amdgpu init process
2436 * accordingly. For other failed cases, it will still release all
2437 * the resource and print error message, rather than returning one
2438 * negative value to upper level.
121a2bc6
AG
2439 *
2440 * Note: theoretically, this should be called before all vram allocations
2441 * to protect retired page from abusing
2442 */
b82e65a9
GC
2443 r = amdgpu_ras_recovery_init(adev);
2444 if (r)
2445 goto init_failed;
121a2bc6 2446
cfbb6b00
AG
2447 /**
2448 * In case of XGMI grab extra reference for reset domain for this device
2449 */
a4c63caf 2450 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2451 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2452 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2453 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2454
dfd0287b
LH
2455 if (WARN_ON(!hive)) {
2456 r = -ENOENT;
2457 goto init_failed;
2458 }
2459
46c67660 2460 if (!hive->reset_domain ||
2461 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2462 r = -ENOENT;
2463 amdgpu_put_xgmi_hive(hive);
2464 goto init_failed;
2465 }
2466
2467 /* Drop the early temporary reset domain we created for device */
2468 amdgpu_reset_put_reset_domain(adev->reset_domain);
2469 adev->reset_domain = hive->reset_domain;
9dfa4860 2470 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2471 }
a4c63caf
AG
2472 }
2473 }
2474
5fd8518d
AG
2475 r = amdgpu_device_init_schedulers(adev);
2476 if (r)
2477 goto init_failed;
e3c1b071 2478
2479 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2480 if (!adev->gmc.xgmi.pending_reset) {
2481 kgd2kfd_init_zone_device(adev);
e3c1b071 2482 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2483 }
c6332b97 2484
bd607166
KR
2485 amdgpu_fru_get_product_info(adev);
2486
72d3f592 2487init_failed:
c6332b97 2488
72d3f592 2489 return r;
d38ceaf9
AD
2490}
2491
e3ecdffa
AD
2492/**
2493 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2494 *
2495 * @adev: amdgpu_device pointer
2496 *
2497 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2498 * this function before a GPU reset. If the value is retained after a
2499 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2500 */
06ec9070 2501static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2502{
2503 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2504}
2505
e3ecdffa
AD
2506/**
2507 * amdgpu_device_check_vram_lost - check if vram is valid
2508 *
2509 * @adev: amdgpu_device pointer
2510 *
2511 * Checks the reset magic value written to the gart pointer in VRAM.
2512 * The driver calls this after a GPU reset to see if the contents of
2513 * VRAM is lost or now.
2514 * returns true if vram is lost, false if not.
2515 */
06ec9070 2516static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2517{
dadce777
EQ
2518 if (memcmp(adev->gart.ptr, adev->reset_magic,
2519 AMDGPU_RESET_MAGIC_NUM))
2520 return true;
2521
53b3f8f4 2522 if (!amdgpu_in_reset(adev))
dadce777
EQ
2523 return false;
2524
2525 /*
2526 * For all ASICs with baco/mode1 reset, the VRAM is
2527 * always assumed to be lost.
2528 */
2529 switch (amdgpu_asic_reset_method(adev)) {
2530 case AMD_RESET_METHOD_BACO:
2531 case AMD_RESET_METHOD_MODE1:
2532 return true;
2533 default:
2534 return false;
2535 }
0c49e0b8
CZ
2536}
2537
e3ecdffa 2538/**
1112a46b 2539 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2540 *
2541 * @adev: amdgpu_device pointer
b8b72130 2542 * @state: clockgating state (gate or ungate)
e3ecdffa 2543 *
e3ecdffa 2544 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2545 * set_clockgating_state callbacks are run.
2546 * Late initialization pass enabling clockgating for hardware IPs.
2547 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2548 * Returns 0 on success, negative error code on failure.
2549 */
fdd34271 2550
5d89bb2d
LL
2551int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2552 enum amd_clockgating_state state)
d38ceaf9 2553{
1112a46b 2554 int i, j, r;
d38ceaf9 2555
4a2ba394
SL
2556 if (amdgpu_emu_mode == 1)
2557 return 0;
2558
1112a46b
RZ
2559 for (j = 0; j < adev->num_ip_blocks; j++) {
2560 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2561 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2562 continue;
47198eb7 2563 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2564 if (adev->in_s0ix &&
47198eb7
AD
2565 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2566 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2567 continue;
4a446d55 2568 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2569 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2570 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2571 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2572 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2573 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2574 /* enable clockgating to save power */
a1255107 2575 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2576 state);
4a446d55
AD
2577 if (r) {
2578 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2579 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2580 return r;
2581 }
b0b00ff1 2582 }
d38ceaf9 2583 }
06b18f61 2584
c9f96fd5
RZ
2585 return 0;
2586}
2587
5d89bb2d
LL
2588int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2589 enum amd_powergating_state state)
c9f96fd5 2590{
1112a46b 2591 int i, j, r;
06b18f61 2592
c9f96fd5
RZ
2593 if (amdgpu_emu_mode == 1)
2594 return 0;
2595
1112a46b
RZ
2596 for (j = 0; j < adev->num_ip_blocks; j++) {
2597 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2598 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2599 continue;
47198eb7 2600 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2601 if (adev->in_s0ix &&
47198eb7
AD
2602 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2603 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2604 continue;
c9f96fd5
RZ
2605 /* skip CG for VCE/UVD, it's handled specially */
2606 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2607 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2608 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2609 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2610 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2611 /* enable powergating to save power */
2612 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2613 state);
c9f96fd5
RZ
2614 if (r) {
2615 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2616 adev->ip_blocks[i].version->funcs->name, r);
2617 return r;
2618 }
2619 }
2620 }
2dc80b00
S
2621 return 0;
2622}
2623
beff74bc
AD
2624static int amdgpu_device_enable_mgpu_fan_boost(void)
2625{
2626 struct amdgpu_gpu_instance *gpu_ins;
2627 struct amdgpu_device *adev;
2628 int i, ret = 0;
2629
2630 mutex_lock(&mgpu_info.mutex);
2631
2632 /*
2633 * MGPU fan boost feature should be enabled
2634 * only when there are two or more dGPUs in
2635 * the system
2636 */
2637 if (mgpu_info.num_dgpu < 2)
2638 goto out;
2639
2640 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2641 gpu_ins = &(mgpu_info.gpu_ins[i]);
2642 adev = gpu_ins->adev;
2643 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2644 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2645 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2646 if (ret)
2647 break;
2648
2649 gpu_ins->mgpu_fan_enabled = 1;
2650 }
2651 }
2652
2653out:
2654 mutex_unlock(&mgpu_info.mutex);
2655
2656 return ret;
2657}
2658
e3ecdffa
AD
2659/**
2660 * amdgpu_device_ip_late_init - run late init for hardware IPs
2661 *
2662 * @adev: amdgpu_device pointer
2663 *
2664 * Late initialization pass for hardware IPs. The list of all the hardware
2665 * IPs that make up the asic is walked and the late_init callbacks are run.
2666 * late_init covers any special initialization that an IP requires
2667 * after all of the have been initialized or something that needs to happen
2668 * late in the init process.
2669 * Returns 0 on success, negative error code on failure.
2670 */
06ec9070 2671static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2672{
60599a03 2673 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2674 int i = 0, r;
2675
2676 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2677 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2678 continue;
2679 if (adev->ip_blocks[i].version->funcs->late_init) {
2680 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2681 if (r) {
2682 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2683 adev->ip_blocks[i].version->funcs->name, r);
2684 return r;
2685 }
2dc80b00 2686 }
73f847db 2687 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2688 }
2689
867e24ca 2690 r = amdgpu_ras_late_init(adev);
2691 if (r) {
2692 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2693 return r;
2694 }
2695
a891d239
DL
2696 amdgpu_ras_set_error_query_ready(adev, true);
2697
1112a46b
RZ
2698 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2699 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2700
06ec9070 2701 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2702
beff74bc
AD
2703 r = amdgpu_device_enable_mgpu_fan_boost();
2704 if (r)
2705 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2706
4da8b639 2707 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2708 if (amdgpu_passthrough(adev) &&
2709 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2710 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2711 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2712
2713 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2714 mutex_lock(&mgpu_info.mutex);
2715
2716 /*
2717 * Reset device p-state to low as this was booted with high.
2718 *
2719 * This should be performed only after all devices from the same
2720 * hive get initialized.
2721 *
2722 * However, it's unknown how many device in the hive in advance.
2723 * As this is counted one by one during devices initializations.
2724 *
2725 * So, we wait for all XGMI interlinked devices initialized.
2726 * This may bring some delays as those devices may come from
2727 * different hives. But that should be OK.
2728 */
2729 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2730 for (i = 0; i < mgpu_info.num_gpu; i++) {
2731 gpu_instance = &(mgpu_info.gpu_ins[i]);
2732 if (gpu_instance->adev->flags & AMD_IS_APU)
2733 continue;
2734
d84a430d
JK
2735 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2736 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2737 if (r) {
2738 DRM_ERROR("pstate setting failed (%d).\n", r);
2739 break;
2740 }
2741 }
2742 }
2743
2744 mutex_unlock(&mgpu_info.mutex);
2745 }
2746
d38ceaf9
AD
2747 return 0;
2748}
2749
613aa3ea
LY
2750/**
2751 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2752 *
2753 * @adev: amdgpu_device pointer
2754 *
2755 * For ASICs need to disable SMC first
2756 */
2757static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2758{
2759 int i, r;
2760
2761 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2762 return;
2763
2764 for (i = 0; i < adev->num_ip_blocks; i++) {
2765 if (!adev->ip_blocks[i].status.hw)
2766 continue;
2767 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2768 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2769 /* XXX handle errors */
2770 if (r) {
2771 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2772 adev->ip_blocks[i].version->funcs->name, r);
2773 }
2774 adev->ip_blocks[i].status.hw = false;
2775 break;
2776 }
2777 }
2778}
2779
e9669fb7 2780static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2781{
2782 int i, r;
2783
e9669fb7
AG
2784 for (i = 0; i < adev->num_ip_blocks; i++) {
2785 if (!adev->ip_blocks[i].version->funcs->early_fini)
2786 continue;
5278a159 2787
e9669fb7
AG
2788 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2789 if (r) {
2790 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2791 adev->ip_blocks[i].version->funcs->name, r);
2792 }
2793 }
c030f2e4 2794
05df1f01 2795 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2796 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2797
7270e895
TY
2798 amdgpu_amdkfd_suspend(adev, false);
2799
613aa3ea
LY
2800 /* Workaroud for ASICs need to disable SMC first */
2801 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2802
d38ceaf9 2803 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2804 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2805 continue;
8201a67a 2806
a1255107 2807 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2808 /* XXX handle errors */
2c1a2784 2809 if (r) {
a1255107
AD
2810 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2811 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2812 }
8201a67a 2813
a1255107 2814 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2815 }
2816
6effad8a
GC
2817 if (amdgpu_sriov_vf(adev)) {
2818 if (amdgpu_virt_release_full_gpu(adev, false))
2819 DRM_ERROR("failed to release exclusive mode on fini\n");
2820 }
2821
e9669fb7
AG
2822 return 0;
2823}
2824
2825/**
2826 * amdgpu_device_ip_fini - run fini for hardware IPs
2827 *
2828 * @adev: amdgpu_device pointer
2829 *
2830 * Main teardown pass for hardware IPs. The list of all the hardware
2831 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2832 * are run. hw_fini tears down the hardware associated with each IP
2833 * and sw_fini tears down any software state associated with each IP.
2834 * Returns 0 on success, negative error code on failure.
2835 */
2836static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2837{
2838 int i, r;
2839
2840 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2841 amdgpu_virt_release_ras_err_handler_data(adev);
2842
e9669fb7
AG
2843 if (adev->gmc.xgmi.num_physical_nodes > 1)
2844 amdgpu_xgmi_remove_device(adev);
2845
c004d44e 2846 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2847
d38ceaf9 2848 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2849 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2850 continue;
c12aba3a
ML
2851
2852 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2853 amdgpu_ucode_free_bo(adev);
1e256e27 2854 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2855 amdgpu_device_wb_fini(adev);
7ccfd79f 2856 amdgpu_device_mem_scratch_fini(adev);
533aed27 2857 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2858 }
2859
a1255107 2860 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2861 /* XXX handle errors */
2c1a2784 2862 if (r) {
a1255107
AD
2863 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2864 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2865 }
a1255107
AD
2866 adev->ip_blocks[i].status.sw = false;
2867 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2868 }
2869
a6dcfd9c 2870 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2871 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2872 continue;
a1255107
AD
2873 if (adev->ip_blocks[i].version->funcs->late_fini)
2874 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2875 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2876 }
2877
c030f2e4 2878 amdgpu_ras_fini(adev);
2879
d38ceaf9
AD
2880 return 0;
2881}
2882
e3ecdffa 2883/**
beff74bc 2884 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2885 *
1112a46b 2886 * @work: work_struct.
e3ecdffa 2887 */
beff74bc 2888static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2889{
2890 struct amdgpu_device *adev =
beff74bc 2891 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2892 int r;
2893
2894 r = amdgpu_ib_ring_tests(adev);
2895 if (r)
2896 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2897}
2898
1e317b99
RZ
2899static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2900{
2901 struct amdgpu_device *adev =
2902 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2903
90a92662
MD
2904 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2905 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2906
2907 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2908 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2909}
2910
e3ecdffa 2911/**
e7854a03 2912 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2913 *
2914 * @adev: amdgpu_device pointer
2915 *
2916 * Main suspend function for hardware IPs. The list of all the hardware
2917 * IPs that make up the asic is walked, clockgating is disabled and the
2918 * suspend callbacks are run. suspend puts the hardware and software state
2919 * in each IP into a state suitable for suspend.
2920 * Returns 0 on success, negative error code on failure.
2921 */
e7854a03
AD
2922static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2923{
2924 int i, r;
2925
50ec83f0
AD
2926 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2927 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2928
b31d6ada
EQ
2929 /*
2930 * Per PMFW team's suggestion, driver needs to handle gfxoff
2931 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2932 * scenario. Add the missing df cstate disablement here.
2933 */
2934 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2935 dev_warn(adev->dev, "Failed to disallow df cstate");
2936
e7854a03
AD
2937 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2938 if (!adev->ip_blocks[i].status.valid)
2939 continue;
2b9f7848 2940
e7854a03 2941 /* displays are handled separately */
2b9f7848
ND
2942 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2943 continue;
2944
2945 /* XXX handle errors */
2946 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2947 /* XXX handle errors */
2948 if (r) {
2949 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2950 adev->ip_blocks[i].version->funcs->name, r);
2951 return r;
e7854a03 2952 }
2b9f7848
ND
2953
2954 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2955 }
2956
e7854a03
AD
2957 return 0;
2958}
2959
2960/**
2961 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2962 *
2963 * @adev: amdgpu_device pointer
2964 *
2965 * Main suspend function for hardware IPs. The list of all the hardware
2966 * IPs that make up the asic is walked, clockgating is disabled and the
2967 * suspend callbacks are run. suspend puts the hardware and software state
2968 * in each IP into a state suitable for suspend.
2969 * Returns 0 on success, negative error code on failure.
2970 */
2971static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2972{
2973 int i, r;
2974
557f42a2 2975 if (adev->in_s0ix)
bc143d8b 2976 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 2977
d38ceaf9 2978 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2979 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2980 continue;
e7854a03
AD
2981 /* displays are handled in phase1 */
2982 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2983 continue;
bff77e86
LM
2984 /* PSP lost connection when err_event_athub occurs */
2985 if (amdgpu_ras_intr_triggered() &&
2986 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2987 adev->ip_blocks[i].status.hw = false;
2988 continue;
2989 }
e3c1b071 2990
2991 /* skip unnecessary suspend if we do not initialize them yet */
2992 if (adev->gmc.xgmi.pending_reset &&
2993 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2994 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2995 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2996 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2997 adev->ip_blocks[i].status.hw = false;
2998 continue;
2999 }
557f42a2 3000
afa6646b 3001 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3002 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3003 * like at runtime. PSP is also part of the always on hardware
3004 * so no need to suspend it.
3005 */
557f42a2 3006 if (adev->in_s0ix &&
32ff160d 3007 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3008 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3009 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3010 continue;
3011
2a7798ea
AD
3012 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3013 if (adev->in_s0ix &&
3014 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3015 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3016 continue;
3017
e11c7750
TH
3018 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3019 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3020 * from this location and RLC Autoload automatically also gets loaded
3021 * from here based on PMFW -> PSP message during re-init sequence.
3022 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3023 * the TMR and reload FWs again for IMU enabled APU ASICs.
3024 */
3025 if (amdgpu_in_reset(adev) &&
3026 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3027 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3028 continue;
3029
d38ceaf9 3030 /* XXX handle errors */
a1255107 3031 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3032 /* XXX handle errors */
2c1a2784 3033 if (r) {
a1255107
AD
3034 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3035 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3036 }
876923fb 3037 adev->ip_blocks[i].status.hw = false;
a3a09142 3038 /* handle putting the SMC in the appropriate state */
47fc644f 3039 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3040 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3041 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3042 if (r) {
3043 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3044 adev->mp1_state, r);
3045 return r;
3046 }
a3a09142
AD
3047 }
3048 }
d38ceaf9
AD
3049 }
3050
3051 return 0;
3052}
3053
e7854a03
AD
3054/**
3055 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3056 *
3057 * @adev: amdgpu_device pointer
3058 *
3059 * Main suspend function for hardware IPs. The list of all the hardware
3060 * IPs that make up the asic is walked, clockgating is disabled and the
3061 * suspend callbacks are run. suspend puts the hardware and software state
3062 * in each IP into a state suitable for suspend.
3063 * Returns 0 on success, negative error code on failure.
3064 */
3065int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3066{
3067 int r;
3068
3c73683c
JC
3069 if (amdgpu_sriov_vf(adev)) {
3070 amdgpu_virt_fini_data_exchange(adev);
e7819644 3071 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3072 }
e7819644 3073
e7854a03
AD
3074 r = amdgpu_device_ip_suspend_phase1(adev);
3075 if (r)
3076 return r;
3077 r = amdgpu_device_ip_suspend_phase2(adev);
3078
e7819644
YT
3079 if (amdgpu_sriov_vf(adev))
3080 amdgpu_virt_release_full_gpu(adev, false);
3081
e7854a03
AD
3082 return r;
3083}
3084
06ec9070 3085static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3086{
3087 int i, r;
3088
2cb681b6 3089 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3090 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3091 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3092 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3093 AMD_IP_BLOCK_TYPE_IH,
3094 };
a90ad3c2 3095
95ea3dbc 3096 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3097 int j;
3098 struct amdgpu_ip_block *block;
a90ad3c2 3099
4cd2a96d
J
3100 block = &adev->ip_blocks[i];
3101 block->status.hw = false;
2cb681b6 3102
4cd2a96d 3103 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3104
4cd2a96d 3105 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3106 !block->status.valid)
3107 continue;
3108
3109 r = block->version->funcs->hw_init(adev);
0aaeefcc 3110 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3111 if (r)
3112 return r;
482f0e53 3113 block->status.hw = true;
a90ad3c2
ML
3114 }
3115 }
3116
3117 return 0;
3118}
3119
06ec9070 3120static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3121{
3122 int i, r;
3123
2cb681b6
ML
3124 static enum amd_ip_block_type ip_order[] = {
3125 AMD_IP_BLOCK_TYPE_SMC,
3126 AMD_IP_BLOCK_TYPE_DCE,
3127 AMD_IP_BLOCK_TYPE_GFX,
3128 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3129 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3130 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3131 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3132 AMD_IP_BLOCK_TYPE_VCN,
3133 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3134 };
a90ad3c2 3135
2cb681b6
ML
3136 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3137 int j;
3138 struct amdgpu_ip_block *block;
a90ad3c2 3139
2cb681b6
ML
3140 for (j = 0; j < adev->num_ip_blocks; j++) {
3141 block = &adev->ip_blocks[j];
3142
3143 if (block->version->type != ip_order[i] ||
482f0e53
ML
3144 !block->status.valid ||
3145 block->status.hw)
2cb681b6
ML
3146 continue;
3147
895bd048
JZ
3148 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3149 r = block->version->funcs->resume(adev);
3150 else
3151 r = block->version->funcs->hw_init(adev);
3152
0aaeefcc 3153 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3154 if (r)
3155 return r;
482f0e53 3156 block->status.hw = true;
a90ad3c2
ML
3157 }
3158 }
3159
3160 return 0;
3161}
3162
e3ecdffa
AD
3163/**
3164 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3165 *
3166 * @adev: amdgpu_device pointer
3167 *
3168 * First resume function for hardware IPs. The list of all the hardware
3169 * IPs that make up the asic is walked and the resume callbacks are run for
3170 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3171 * after a suspend and updates the software state as necessary. This
3172 * function is also used for restoring the GPU after a GPU reset.
3173 * Returns 0 on success, negative error code on failure.
3174 */
06ec9070 3175static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3176{
3177 int i, r;
3178
a90ad3c2 3179 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3180 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3181 continue;
a90ad3c2 3182 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3183 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3184 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3185 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3186
fcf0649f
CZ
3187 r = adev->ip_blocks[i].version->funcs->resume(adev);
3188 if (r) {
3189 DRM_ERROR("resume of IP block <%s> failed %d\n",
3190 adev->ip_blocks[i].version->funcs->name, r);
3191 return r;
3192 }
482f0e53 3193 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3194 }
3195 }
3196
3197 return 0;
3198}
3199
e3ecdffa
AD
3200/**
3201 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3202 *
3203 * @adev: amdgpu_device pointer
3204 *
3205 * First resume function for hardware IPs. The list of all the hardware
3206 * IPs that make up the asic is walked and the resume callbacks are run for
3207 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3208 * functional state after a suspend and updates the software state as
3209 * necessary. This function is also used for restoring the GPU after a GPU
3210 * reset.
3211 * Returns 0 on success, negative error code on failure.
3212 */
06ec9070 3213static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3214{
3215 int i, r;
3216
3217 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3218 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3219 continue;
fcf0649f 3220 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3221 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3222 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3223 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3224 continue;
a1255107 3225 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3226 if (r) {
a1255107
AD
3227 DRM_ERROR("resume of IP block <%s> failed %d\n",
3228 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3229 return r;
2c1a2784 3230 }
482f0e53 3231 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3232 }
3233
3234 return 0;
3235}
3236
e3ecdffa
AD
3237/**
3238 * amdgpu_device_ip_resume - run resume for hardware IPs
3239 *
3240 * @adev: amdgpu_device pointer
3241 *
3242 * Main resume function for hardware IPs. The hardware IPs
3243 * are split into two resume functions because they are
b8920e1e 3244 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3245 * steps need to be take between them. In this case (S3/S4) they are
3246 * run sequentially.
3247 * Returns 0 on success, negative error code on failure.
3248 */
06ec9070 3249static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3250{
3251 int r;
3252
06ec9070 3253 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3254 if (r)
3255 return r;
7a3e0bb2
RZ
3256
3257 r = amdgpu_device_fw_loading(adev);
3258 if (r)
3259 return r;
3260
06ec9070 3261 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3262
3263 return r;
3264}
3265
e3ecdffa
AD
3266/**
3267 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3268 *
3269 * @adev: amdgpu_device pointer
3270 *
3271 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3272 */
4e99a44e 3273static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3274{
6867e1b5
ML
3275 if (amdgpu_sriov_vf(adev)) {
3276 if (adev->is_atom_fw) {
58ff791a 3277 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3278 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3279 } else {
3280 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3281 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3282 }
3283
3284 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3285 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3286 }
048765ad
AR
3287}
3288
e3ecdffa
AD
3289/**
3290 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3291 *
3292 * @asic_type: AMD asic type
3293 *
3294 * Check if there is DC (new modesetting infrastructre) support for an asic.
3295 * returns true if DC has support, false if not.
3296 */
4562236b
HW
3297bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3298{
3299 switch (asic_type) {
0637d417
AD
3300#ifdef CONFIG_DRM_AMDGPU_SI
3301 case CHIP_HAINAN:
3302#endif
3303 case CHIP_TOPAZ:
3304 /* chips with no display hardware */
3305 return false;
4562236b 3306#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3307 case CHIP_TAHITI:
3308 case CHIP_PITCAIRN:
3309 case CHIP_VERDE:
3310 case CHIP_OLAND:
2d32ffd6
AD
3311 /*
3312 * We have systems in the wild with these ASICs that require
3313 * LVDS and VGA support which is not supported with DC.
3314 *
3315 * Fallback to the non-DC driver here by default so as not to
3316 * cause regressions.
3317 */
3318#if defined(CONFIG_DRM_AMD_DC_SI)
3319 return amdgpu_dc > 0;
3320#else
3321 return false;
64200c46 3322#endif
4562236b 3323 case CHIP_BONAIRE:
0d6fbccb 3324 case CHIP_KAVERI:
367e6687
AD
3325 case CHIP_KABINI:
3326 case CHIP_MULLINS:
d9fda248
HW
3327 /*
3328 * We have systems in the wild with these ASICs that require
b5a0168e 3329 * VGA support which is not supported with DC.
d9fda248
HW
3330 *
3331 * Fallback to the non-DC driver here by default so as not to
3332 * cause regressions.
3333 */
3334 return amdgpu_dc > 0;
f7f12b25 3335 default:
fd187853 3336 return amdgpu_dc != 0;
f7f12b25 3337#else
4562236b 3338 default:
93b09a9a 3339 if (amdgpu_dc > 0)
b8920e1e 3340 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3341 return false;
f7f12b25 3342#endif
4562236b
HW
3343 }
3344}
3345
3346/**
3347 * amdgpu_device_has_dc_support - check if dc is supported
3348 *
982a820b 3349 * @adev: amdgpu_device pointer
4562236b
HW
3350 *
3351 * Returns true for supported, false for not supported
3352 */
3353bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3354{
25263da3 3355 if (adev->enable_virtual_display ||
abaf210c 3356 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3357 return false;
3358
4562236b
HW
3359 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3360}
3361
d4535e2c
AG
3362static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3363{
3364 struct amdgpu_device *adev =
3365 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3366 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3367
c6a6e2db
AG
3368 /* It's a bug to not have a hive within this function */
3369 if (WARN_ON(!hive))
3370 return;
3371
3372 /*
3373 * Use task barrier to synchronize all xgmi reset works across the
3374 * hive. task_barrier_enter and task_barrier_exit will block
3375 * until all the threads running the xgmi reset works reach
3376 * those points. task_barrier_full will do both blocks.
3377 */
3378 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3379
3380 task_barrier_enter(&hive->tb);
4a580877 3381 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3382
3383 if (adev->asic_reset_res)
3384 goto fail;
3385
3386 task_barrier_exit(&hive->tb);
4a580877 3387 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3388
3389 if (adev->asic_reset_res)
3390 goto fail;
43c4d576 3391
5e67bba3 3392 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3393 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3394 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3395 } else {
3396
3397 task_barrier_full(&hive->tb);
3398 adev->asic_reset_res = amdgpu_asic_reset(adev);
3399 }
ce316fa5 3400
c6a6e2db 3401fail:
d4535e2c 3402 if (adev->asic_reset_res)
fed184e9 3403 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3404 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3405 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3406}
3407
71f98027
AD
3408static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3409{
3410 char *input = amdgpu_lockup_timeout;
3411 char *timeout_setting = NULL;
3412 int index = 0;
3413 long timeout;
3414 int ret = 0;
3415
3416 /*
67387dfe
AD
3417 * By default timeout for non compute jobs is 10000
3418 * and 60000 for compute jobs.
71f98027 3419 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3420 * jobs are 60000 by default.
71f98027
AD
3421 */
3422 adev->gfx_timeout = msecs_to_jiffies(10000);
3423 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3424 if (amdgpu_sriov_vf(adev))
3425 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3426 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3427 else
67387dfe 3428 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3429
f440ff44 3430 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3431 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3432 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3433 ret = kstrtol(timeout_setting, 0, &timeout);
3434 if (ret)
3435 return ret;
3436
3437 if (timeout == 0) {
3438 index++;
3439 continue;
3440 } else if (timeout < 0) {
3441 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3442 dev_warn(adev->dev, "lockup timeout disabled");
3443 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3444 } else {
3445 timeout = msecs_to_jiffies(timeout);
3446 }
3447
3448 switch (index++) {
3449 case 0:
3450 adev->gfx_timeout = timeout;
3451 break;
3452 case 1:
3453 adev->compute_timeout = timeout;
3454 break;
3455 case 2:
3456 adev->sdma_timeout = timeout;
3457 break;
3458 case 3:
3459 adev->video_timeout = timeout;
3460 break;
3461 default:
3462 break;
3463 }
3464 }
3465 /*
3466 * There is only one value specified and
3467 * it should apply to all non-compute jobs.
3468 */
bcccee89 3469 if (index == 1) {
71f98027 3470 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3471 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3472 adev->compute_timeout = adev->gfx_timeout;
3473 }
71f98027
AD
3474 }
3475
3476 return ret;
3477}
d4535e2c 3478
4a74c38c
PY
3479/**
3480 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3481 *
3482 * @adev: amdgpu_device pointer
3483 *
3484 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3485 */
3486static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3487{
3488 struct iommu_domain *domain;
3489
3490 domain = iommu_get_domain_for_dev(adev->dev);
3491 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3492 adev->ram_is_direct_mapped = true;
3493}
3494
77f3a5cd 3495static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3496 &dev_attr_pcie_replay_count.attr,
3497 NULL
3498};
3499
02ff519e
AD
3500static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3501{
3502 if (amdgpu_mcbp == 1)
3503 adev->gfx.mcbp = true;
1e9e15dc
JZ
3504 else if (amdgpu_mcbp == 0)
3505 adev->gfx.mcbp = false;
3506 else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
3507 (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
3508 adev->gfx.num_gfx_rings)
50a7c876
AD
3509 adev->gfx.mcbp = true;
3510
02ff519e
AD
3511 if (amdgpu_sriov_vf(adev))
3512 adev->gfx.mcbp = true;
3513
3514 if (adev->gfx.mcbp)
3515 DRM_INFO("MCBP is enabled\n");
3516}
3517
d38ceaf9
AD
3518/**
3519 * amdgpu_device_init - initialize the driver
3520 *
3521 * @adev: amdgpu_device pointer
d38ceaf9
AD
3522 * @flags: driver flags
3523 *
3524 * Initializes the driver info and hw (all asics).
3525 * Returns 0 for success or an error on failure.
3526 * Called at driver startup.
3527 */
3528int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3529 uint32_t flags)
3530{
8aba21b7
LT
3531 struct drm_device *ddev = adev_to_drm(adev);
3532 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3533 int r, i;
b98c6299 3534 bool px = false;
95844d20 3535 u32 max_MBps;
59e9fff1 3536 int tmp;
d38ceaf9
AD
3537
3538 adev->shutdown = false;
d38ceaf9 3539 adev->flags = flags;
4e66d7d2
YZ
3540
3541 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3542 adev->asic_type = amdgpu_force_asic_type;
3543 else
3544 adev->asic_type = flags & AMD_ASIC_MASK;
3545
d38ceaf9 3546 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3547 if (amdgpu_emu_mode == 1)
8bdab6bb 3548 adev->usec_timeout *= 10;
770d13b1 3549 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3550 adev->accel_working = false;
3551 adev->num_rings = 0;
68ce8b24 3552 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3553 adev->mman.buffer_funcs = NULL;
3554 adev->mman.buffer_funcs_ring = NULL;
3555 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3556 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3557 adev->gmc.gmc_funcs = NULL;
7bd939d0 3558 adev->harvest_ip_mask = 0x0;
f54d1867 3559 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3560 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3561
3562 adev->smc_rreg = &amdgpu_invalid_rreg;
3563 adev->smc_wreg = &amdgpu_invalid_wreg;
3564 adev->pcie_rreg = &amdgpu_invalid_rreg;
3565 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3566 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3567 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3568 adev->pciep_rreg = &amdgpu_invalid_rreg;
3569 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3570 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3571 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3572 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3573 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3574 adev->didt_rreg = &amdgpu_invalid_rreg;
3575 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3576 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3577 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3578 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3579 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3580
3e39ab90
AD
3581 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3582 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3583 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3584
3585 /* mutex initialization are all done here so we
b8920e1e
SS
3586 * can recall function without having locking issues
3587 */
0e5ca0d1 3588 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3589 mutex_init(&adev->pm.mutex);
3590 mutex_init(&adev->gfx.gpu_clock_mutex);
3591 mutex_init(&adev->srbm_mutex);
b8866c26 3592 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3593 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3594 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3595 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3596 mutex_init(&adev->mn_lock);
e23b74aa 3597 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3598 hash_init(adev->mn_hash);
32eaeae0 3599 mutex_init(&adev->psp.mutex);
bd052211 3600 mutex_init(&adev->notifier_lock);
8cda7a4f 3601 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3602 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3603
ab3b9de6 3604 amdgpu_device_init_apu_flags(adev);
9f6a7857 3605
912dfc84
EQ
3606 r = amdgpu_device_check_arguments(adev);
3607 if (r)
3608 return r;
d38ceaf9 3609
d38ceaf9
AD
3610 spin_lock_init(&adev->mmio_idx_lock);
3611 spin_lock_init(&adev->smc_idx_lock);
3612 spin_lock_init(&adev->pcie_idx_lock);
3613 spin_lock_init(&adev->uvd_ctx_idx_lock);
3614 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3615 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3616 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3617 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3618 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3619
0c4e7fa5
CZ
3620 INIT_LIST_HEAD(&adev->shadow_list);
3621 mutex_init(&adev->shadow_list_lock);
3622
655ce9cb 3623 INIT_LIST_HEAD(&adev->reset_list);
3624
6492e1b0 3625 INIT_LIST_HEAD(&adev->ras_list);
3626
3e38b634
EQ
3627 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3628
beff74bc
AD
3629 INIT_DELAYED_WORK(&adev->delayed_init_work,
3630 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3631 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3632 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3633
d4535e2c
AG
3634 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3635
d23ee13f 3636 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3637 adev->gfx.gfx_off_residency = 0;
3638 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3639 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3640
b265bdbd
EQ
3641 atomic_set(&adev->throttling_logging_enabled, 1);
3642 /*
3643 * If throttling continues, logging will be performed every minute
3644 * to avoid log flooding. "-1" is subtracted since the thermal
3645 * throttling interrupt comes every second. Thus, the total logging
3646 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3647 * for throttling interrupt) = 60 seconds.
3648 */
3649 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3650 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3651
0fa49558
AX
3652 /* Registers mapping */
3653 /* TODO: block userspace mapping of io register */
da69c161
KW
3654 if (adev->asic_type >= CHIP_BONAIRE) {
3655 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3656 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3657 } else {
3658 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3659 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3660 }
d38ceaf9 3661
6c08e0ef
EQ
3662 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3663 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3664
d38ceaf9 3665 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3666 if (!adev->rmmio)
d38ceaf9 3667 return -ENOMEM;
b8920e1e 3668
d38ceaf9 3669 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3670 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3671
436afdfa
PY
3672 /*
3673 * Reset domain needs to be present early, before XGMI hive discovered
3674 * (if any) and intitialized to use reset sem and in_gpu reset flag
3675 * early on during init and before calling to RREG32.
3676 */
3677 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3678 if (!adev->reset_domain)
3679 return -ENOMEM;
3680
3aa0115d
ML
3681 /* detect hw virtualization here */
3682 amdgpu_detect_virtualization(adev);
3683
04e85958
TL
3684 amdgpu_device_get_pcie_info(adev);
3685
dffa11b4
ML
3686 r = amdgpu_device_get_job_timeout_settings(adev);
3687 if (r) {
3688 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3689 return r;
a190d1c7
XY
3690 }
3691
d38ceaf9 3692 /* early init functions */
06ec9070 3693 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3694 if (r)
4ef87d8f 3695 return r;
d38ceaf9 3696
02ff519e
AD
3697 amdgpu_device_set_mcbp(adev);
3698
b7cdb41e
ML
3699 /* Get rid of things like offb */
3700 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3701 if (r)
3702 return r;
3703
4d33e704
SK
3704 /* Enable TMZ based on IP_VERSION */
3705 amdgpu_gmc_tmz_set(adev);
3706
957b0787 3707 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3708 /* Need to get xgmi info early to decide the reset behavior*/
3709 if (adev->gmc.xgmi.supported) {
3710 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3711 if (r)
3712 return r;
3713 }
3714
8e6d0b69 3715 /* enable PCIE atomic ops */
b4520bfd
GW
3716 if (amdgpu_sriov_vf(adev)) {
3717 if (adev->virt.fw_reserve.p_pf2vf)
3718 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3719 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3720 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3721 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3722 * internal path natively support atomics, set have_atomics_support to true.
3723 */
b4520bfd
GW
3724 } else if ((adev->flags & AMD_IS_APU) &&
3725 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
0e768043 3726 adev->have_atomics_support = true;
b4520bfd 3727 } else {
8e6d0b69 3728 adev->have_atomics_support =
3729 !pci_enable_atomic_ops_to_root(adev->pdev,
3730 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3731 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3732 }
3733
8e6d0b69 3734 if (!adev->have_atomics_support)
3735 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3736
6585661d 3737 /* doorbell bar mapping and doorbell index init*/
43c064db 3738 amdgpu_doorbell_init(adev);
6585661d 3739
9475a943
SL
3740 if (amdgpu_emu_mode == 1) {
3741 /* post the asic on emulation mode */
3742 emu_soc_asic_init(adev);
bfca0289 3743 goto fence_driver_init;
9475a943 3744 }
bfca0289 3745
04442bf7
LL
3746 amdgpu_reset_init(adev);
3747
4e99a44e 3748 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3749 if (adev->bios)
3750 amdgpu_device_detect_sriov_bios(adev);
048765ad 3751
95e8e59e
AD
3752 /* check if we need to reset the asic
3753 * E.g., driver was not cleanly unloaded previously, etc.
3754 */
f14899fd 3755 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3756 if (adev->gmc.xgmi.num_physical_nodes) {
3757 dev_info(adev->dev, "Pending hive reset.\n");
3758 adev->gmc.xgmi.pending_reset = true;
3759 /* Only need to init necessary block for SMU to handle the reset */
3760 for (i = 0; i < adev->num_ip_blocks; i++) {
3761 if (!adev->ip_blocks[i].status.valid)
3762 continue;
3763 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3764 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3765 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3766 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3767 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3768 adev->ip_blocks[i].version->funcs->name);
3769 adev->ip_blocks[i].status.hw = true;
3770 }
3771 }
3772 } else {
59e9fff1 3773 tmp = amdgpu_reset_method;
3774 /* It should do a default reset when loading or reloading the driver,
3775 * regardless of the module parameter reset_method.
3776 */
3777 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3778 r = amdgpu_asic_reset(adev);
59e9fff1 3779 amdgpu_reset_method = tmp;
e3c1b071 3780 if (r) {
3781 dev_err(adev->dev, "asic reset on init failed\n");
3782 goto failed;
3783 }
95e8e59e
AD
3784 }
3785 }
3786
d38ceaf9 3787 /* Post card if necessary */
39c640c0 3788 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3789 if (!adev->bios) {
bec86378 3790 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3791 r = -EINVAL;
3792 goto failed;
d38ceaf9 3793 }
bec86378 3794 DRM_INFO("GPU posting now...\n");
4d2997ab 3795 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3796 if (r) {
3797 dev_err(adev->dev, "gpu post error!\n");
3798 goto failed;
3799 }
d38ceaf9
AD
3800 }
3801
9535a86a
SZ
3802 if (adev->bios) {
3803 if (adev->is_atom_fw) {
3804 /* Initialize clocks */
3805 r = amdgpu_atomfirmware_get_clock_info(adev);
3806 if (r) {
3807 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3808 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3809 goto failed;
3810 }
3811 } else {
3812 /* Initialize clocks */
3813 r = amdgpu_atombios_get_clock_info(adev);
3814 if (r) {
3815 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3816 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3817 goto failed;
3818 }
3819 /* init i2c buses */
3820 if (!amdgpu_device_has_dc_support(adev))
3821 amdgpu_atombios_i2c_init(adev);
a5bde2f9 3822 }
2c1a2784 3823 }
d38ceaf9 3824
bfca0289 3825fence_driver_init:
d38ceaf9 3826 /* Fence driver */
067f44c8 3827 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3828 if (r) {
067f44c8 3829 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3830 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3831 goto failed;
2c1a2784 3832 }
d38ceaf9
AD
3833
3834 /* init the mode config */
4a580877 3835 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3836
06ec9070 3837 r = amdgpu_device_ip_init(adev);
d38ceaf9 3838 if (r) {
06ec9070 3839 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3840 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3841 goto release_ras_con;
d38ceaf9
AD
3842 }
3843
8d35a259
LG
3844 amdgpu_fence_driver_hw_init(adev);
3845
d69b8971
YZ
3846 dev_info(adev->dev,
3847 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3848 adev->gfx.config.max_shader_engines,
3849 adev->gfx.config.max_sh_per_se,
3850 adev->gfx.config.max_cu_per_sh,
3851 adev->gfx.cu_info.number);
3852
d38ceaf9
AD
3853 adev->accel_working = true;
3854
e59c0205
AX
3855 amdgpu_vm_check_compute_bug(adev);
3856
95844d20
MO
3857 /* Initialize the buffer migration limit. */
3858 if (amdgpu_moverate >= 0)
3859 max_MBps = amdgpu_moverate;
3860 else
3861 max_MBps = 8; /* Allow 8 MB/s. */
3862 /* Get a log2 for easy divisions. */
3863 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3864
184d8384
LL
3865 r = amdgpu_atombios_sysfs_init(adev);
3866 if (r)
3867 drm_err(&adev->ddev,
3868 "registering atombios sysfs failed (%d).\n", r);
3869
d2f52ac8 3870 r = amdgpu_pm_sysfs_init(adev);
53e9d836
GC
3871 if (r)
3872 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
d2f52ac8 3873
5bb23532 3874 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3875 if (r) {
3876 adev->ucode_sysfs_en = false;
5bb23532 3877 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3878 } else
3879 adev->ucode_sysfs_en = true;
5bb23532 3880
b0adca4d
EQ
3881 /*
3882 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3883 * Otherwise the mgpu fan boost feature will be skipped due to the
3884 * gpu instance is counted less.
3885 */
3886 amdgpu_register_gpu_instance(adev);
3887
d38ceaf9
AD
3888 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3889 * explicit gating rather than handling it automatically.
3890 */
e3c1b071 3891 if (!adev->gmc.xgmi.pending_reset) {
3892 r = amdgpu_device_ip_late_init(adev);
3893 if (r) {
3894 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3895 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3896 goto release_ras_con;
e3c1b071 3897 }
3898 /* must succeed. */
3899 amdgpu_ras_resume(adev);
3900 queue_delayed_work(system_wq, &adev->delayed_init_work,
3901 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3902 }
d38ceaf9 3903
38eecbe0
CL
3904 if (amdgpu_sriov_vf(adev)) {
3905 amdgpu_virt_release_full_gpu(adev, true);
2c738637 3906 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 3907 }
2c738637 3908
77f3a5cd 3909 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3910 if (r)
77f3a5cd 3911 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3912
7957ec80
LL
3913 amdgpu_fru_sysfs_init(adev);
3914
d155bef0
AB
3915 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3916 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3917 if (r)
3918 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3919
c1dd4aa6
AG
3920 /* Have stored pci confspace at hand for restore in sudden PCI error */
3921 if (amdgpu_device_cache_pci_state(adev->pdev))
3922 pci_restore_state(pdev);
3923
8c3dd61c
KHF
3924 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3925 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
3926 * ignore it
3927 */
8c3dd61c 3928 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3929 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 3930
d37a3929
OC
3931 px = amdgpu_device_supports_px(ddev);
3932
3933 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3934 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
3935 vga_switcheroo_register_client(adev->pdev,
3936 &amdgpu_switcheroo_ops, px);
d37a3929
OC
3937
3938 if (px)
8c3dd61c 3939 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 3940
e3c1b071 3941 if (adev->gmc.xgmi.pending_reset)
3942 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3943 msecs_to_jiffies(AMDGPU_RESUME_MS));
3944
4a74c38c
PY
3945 amdgpu_device_check_iommu_direct_map(adev);
3946
d38ceaf9 3947 return 0;
83ba126a 3948
970fd197 3949release_ras_con:
38eecbe0
CL
3950 if (amdgpu_sriov_vf(adev))
3951 amdgpu_virt_release_full_gpu(adev, true);
3952
3953 /* failed in exclusive mode due to timeout */
3954 if (amdgpu_sriov_vf(adev) &&
3955 !amdgpu_sriov_runtime(adev) &&
3956 amdgpu_virt_mmio_blocked(adev) &&
3957 !amdgpu_virt_wait_reset(adev)) {
3958 dev_err(adev->dev, "VF exclusive mode timeout\n");
3959 /* Don't send request since VF is inactive. */
3960 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3961 adev->virt.ops = NULL;
3962 r = -EAGAIN;
3963 }
970fd197
SY
3964 amdgpu_release_ras_context(adev);
3965
83ba126a 3966failed:
89041940 3967 amdgpu_vf_error_trans_all(adev);
8840a387 3968
83ba126a 3969 return r;
d38ceaf9
AD
3970}
3971
07775fc1
AG
3972static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3973{
62d5f9f7 3974
07775fc1
AG
3975 /* Clear all CPU mappings pointing to this device */
3976 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3977
3978 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 3979 amdgpu_doorbell_fini(adev);
07775fc1
AG
3980
3981 iounmap(adev->rmmio);
3982 adev->rmmio = NULL;
3983 if (adev->mman.aper_base_kaddr)
3984 iounmap(adev->mman.aper_base_kaddr);
3985 adev->mman.aper_base_kaddr = NULL;
3986
3987 /* Memory manager related */
a0ba1279 3988 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
3989 arch_phys_wc_del(adev->gmc.vram_mtrr);
3990 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3991 }
3992}
3993
d38ceaf9 3994/**
bbe04dec 3995 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3996 *
3997 * @adev: amdgpu_device pointer
3998 *
3999 * Tear down the driver info (all asics).
4000 * Called at driver shutdown.
4001 */
72c8c97b 4002void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4003{
aac89168 4004 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4005 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4006 adev->shutdown = true;
9f875167 4007
752c683d
ML
4008 /* make sure IB test finished before entering exclusive mode
4009 * to avoid preemption on IB test
b8920e1e 4010 */
519b8b76 4011 if (amdgpu_sriov_vf(adev)) {
752c683d 4012 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4013 amdgpu_virt_fini_data_exchange(adev);
4014 }
752c683d 4015
e5b03032
ML
4016 /* disable all interrupts */
4017 amdgpu_irq_disable_all(adev);
47fc644f 4018 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4019 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4020 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4021 else
4a580877 4022 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4023 }
8d35a259 4024 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4025
cd3a8a59 4026 if (adev->mman.initialized)
9bff18d1 4027 drain_workqueue(adev->mman.bdev.wq);
98f56188 4028
53e9d836 4029 if (adev->pm.sysfs_initialized)
7c868b59 4030 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4031 if (adev->ucode_sysfs_en)
4032 amdgpu_ucode_sysfs_fini(adev);
4033 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4034 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4035
232d1d43
SY
4036 /* disable ras feature must before hw fini */
4037 amdgpu_ras_pre_fini(adev);
4038
e9669fb7 4039 amdgpu_device_ip_fini_early(adev);
d10d0daa 4040
a3848df6
YW
4041 amdgpu_irq_fini_hw(adev);
4042
b6fd6e0f
SK
4043 if (adev->mman.initialized)
4044 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4045
d10d0daa 4046 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4047
39934d3e
VP
4048 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4049 amdgpu_device_unmap_mmio(adev);
87172e89 4050
72c8c97b
AG
4051}
4052
4053void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4054{
62d5f9f7 4055 int idx;
d37a3929 4056 bool px;
62d5f9f7 4057
8d35a259 4058 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4059 amdgpu_device_ip_fini(adev);
b31d3063 4060 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4061 adev->accel_working = false;
68ce8b24 4062 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4063
4064 amdgpu_reset_fini(adev);
4065
d38ceaf9 4066 /* free i2c buses */
4562236b
HW
4067 if (!amdgpu_device_has_dc_support(adev))
4068 amdgpu_i2c_fini(adev);
bfca0289
SL
4069
4070 if (amdgpu_emu_mode != 1)
4071 amdgpu_atombios_fini(adev);
4072
d38ceaf9
AD
4073 kfree(adev->bios);
4074 adev->bios = NULL;
d37a3929
OC
4075
4076 px = amdgpu_device_supports_px(adev_to_drm(adev));
4077
4078 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4079 apple_gmux_detect(NULL, NULL)))
84c8b22e 4080 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4081
4082 if (px)
83ba126a 4083 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4084
38d6be81 4085 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4086 vga_client_unregister(adev->pdev);
e9bc1bf7 4087
62d5f9f7
LS
4088 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4089
4090 iounmap(adev->rmmio);
4091 adev->rmmio = NULL;
43c064db 4092 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4093 drm_dev_exit(idx);
4094 }
4095
d155bef0
AB
4096 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4097 amdgpu_pmu_fini(adev);
72de33f8 4098 if (adev->mman.discovery_bin)
a190d1c7 4099 amdgpu_discovery_fini(adev);
72c8c97b 4100
cfbb6b00
AG
4101 amdgpu_reset_put_reset_domain(adev->reset_domain);
4102 adev->reset_domain = NULL;
4103
72c8c97b
AG
4104 kfree(adev->pci_state);
4105
d38ceaf9
AD
4106}
4107
58144d28
ND
4108/**
4109 * amdgpu_device_evict_resources - evict device resources
4110 * @adev: amdgpu device object
4111 *
4112 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4113 * of the vram memory type. Mainly used for evicting device resources
4114 * at suspend time.
4115 *
4116 */
7863c155 4117static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4118{
7863c155
ML
4119 int ret;
4120
e53d9665
ML
4121 /* No need to evict vram on APUs for suspend to ram or s2idle */
4122 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4123 return 0;
58144d28 4124
7863c155
ML
4125 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4126 if (ret)
58144d28 4127 DRM_WARN("evicting device resources failed\n");
7863c155 4128 return ret;
58144d28 4129}
d38ceaf9
AD
4130
4131/*
4132 * Suspend & resume.
4133 */
4134/**
810ddc3a 4135 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4136 *
87e3f136 4137 * @dev: drm dev pointer
87e3f136 4138 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4139 *
4140 * Puts the hw in the suspend state (all asics).
4141 * Returns 0 for success or an error on failure.
4142 * Called at driver suspend.
4143 */
de185019 4144int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4145{
a2e15b0e 4146 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4147 int r = 0;
d38ceaf9 4148
d38ceaf9
AD
4149 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4150 return 0;
4151
44779b43 4152 adev->in_suspend = true;
3fa8f89d 4153
47ea2076
SF
4154 /* Evict the majority of BOs before grabbing the full access */
4155 r = amdgpu_device_evict_resources(adev);
4156 if (r)
4157 return r;
4158
d7274ec7
BZ
4159 if (amdgpu_sriov_vf(adev)) {
4160 amdgpu_virt_fini_data_exchange(adev);
4161 r = amdgpu_virt_request_full_gpu(adev, false);
4162 if (r)
4163 return r;
4164 }
4165
3fa8f89d
S
4166 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4167 DRM_WARN("smart shift update failed\n");
4168
5f818173 4169 if (fbcon)
087451f3 4170 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4171
beff74bc 4172 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4173 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4174
5e6932fe 4175 amdgpu_ras_suspend(adev);
4176
2196927b 4177 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4178
c004d44e 4179 if (!adev->in_s0ix)
5d3a2d95 4180 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4181
7863c155
ML
4182 r = amdgpu_device_evict_resources(adev);
4183 if (r)
4184 return r;
d38ceaf9 4185
8d35a259 4186 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4187
2196927b 4188 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4189
d7274ec7
BZ
4190 if (amdgpu_sriov_vf(adev))
4191 amdgpu_virt_release_full_gpu(adev, false);
4192
d38ceaf9
AD
4193 return 0;
4194}
4195
4196/**
810ddc3a 4197 * amdgpu_device_resume - initiate device resume
d38ceaf9 4198 *
87e3f136 4199 * @dev: drm dev pointer
87e3f136 4200 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4201 *
4202 * Bring the hw back to operating state (all asics).
4203 * Returns 0 for success or an error on failure.
4204 * Called at driver resume.
4205 */
de185019 4206int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4207{
1348969a 4208 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4209 int r = 0;
d38ceaf9 4210
d7274ec7
BZ
4211 if (amdgpu_sriov_vf(adev)) {
4212 r = amdgpu_virt_request_full_gpu(adev, true);
4213 if (r)
4214 return r;
4215 }
4216
d38ceaf9
AD
4217 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4218 return 0;
4219
62498733 4220 if (adev->in_s0ix)
bc143d8b 4221 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4222
d38ceaf9 4223 /* post card */
39c640c0 4224 if (amdgpu_device_need_post(adev)) {
4d2997ab 4225 r = amdgpu_device_asic_init(adev);
74b0b157 4226 if (r)
aac89168 4227 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4228 }
d38ceaf9 4229
06ec9070 4230 r = amdgpu_device_ip_resume(adev);
d7274ec7 4231
e6707218 4232 if (r) {
aac89168 4233 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4234 goto exit;
e6707218 4235 }
8d35a259 4236 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4237
06ec9070 4238 r = amdgpu_device_ip_late_init(adev);
03161a6e 4239 if (r)
3c22c1ea 4240 goto exit;
d38ceaf9 4241
beff74bc
AD
4242 queue_delayed_work(system_wq, &adev->delayed_init_work,
4243 msecs_to_jiffies(AMDGPU_RESUME_MS));
4244
c004d44e 4245 if (!adev->in_s0ix) {
5d3a2d95
AD
4246 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4247 if (r)
3c22c1ea 4248 goto exit;
5d3a2d95 4249 }
756e6880 4250
3c22c1ea
SF
4251exit:
4252 if (amdgpu_sriov_vf(adev)) {
4253 amdgpu_virt_init_data_exchange(adev);
4254 amdgpu_virt_release_full_gpu(adev, true);
4255 }
4256
4257 if (r)
4258 return r;
4259
96a5d8d4 4260 /* Make sure IB tests flushed */
beff74bc 4261 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4262
a2e15b0e 4263 if (fbcon)
087451f3 4264 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4265
5e6932fe 4266 amdgpu_ras_resume(adev);
4267
d09ef243
AD
4268 if (adev->mode_info.num_crtc) {
4269 /*
4270 * Most of the connector probing functions try to acquire runtime pm
4271 * refs to ensure that the GPU is powered on when connector polling is
4272 * performed. Since we're calling this from a runtime PM callback,
4273 * trying to acquire rpm refs will cause us to deadlock.
4274 *
4275 * Since we're guaranteed to be holding the rpm lock, it's safe to
4276 * temporarily disable the rpm helpers so this doesn't deadlock us.
4277 */
23a1a9e5 4278#ifdef CONFIG_PM
d09ef243 4279 dev->dev->power.disable_depth++;
23a1a9e5 4280#endif
d09ef243
AD
4281 if (!adev->dc_enabled)
4282 drm_helper_hpd_irq_event(dev);
4283 else
4284 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4285#ifdef CONFIG_PM
d09ef243 4286 dev->dev->power.disable_depth--;
23a1a9e5 4287#endif
d09ef243 4288 }
44779b43
RZ
4289 adev->in_suspend = false;
4290
dc907c9d
JX
4291 if (adev->enable_mes)
4292 amdgpu_mes_self_test(adev);
4293
3fa8f89d
S
4294 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4295 DRM_WARN("smart shift update failed\n");
4296
4d3b9ae5 4297 return 0;
d38ceaf9
AD
4298}
4299
e3ecdffa
AD
4300/**
4301 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4302 *
4303 * @adev: amdgpu_device pointer
4304 *
4305 * The list of all the hardware IPs that make up the asic is walked and
4306 * the check_soft_reset callbacks are run. check_soft_reset determines
4307 * if the asic is still hung or not.
4308 * Returns true if any of the IPs are still in a hung state, false if not.
4309 */
06ec9070 4310static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4311{
4312 int i;
4313 bool asic_hang = false;
4314
f993d628
ML
4315 if (amdgpu_sriov_vf(adev))
4316 return true;
4317
8bc04c29
AD
4318 if (amdgpu_asic_need_full_reset(adev))
4319 return true;
4320
63fbf42f 4321 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4322 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4323 continue;
a1255107
AD
4324 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4325 adev->ip_blocks[i].status.hang =
4326 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4327 if (adev->ip_blocks[i].status.hang) {
aac89168 4328 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4329 asic_hang = true;
4330 }
4331 }
4332 return asic_hang;
4333}
4334
e3ecdffa
AD
4335/**
4336 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4337 *
4338 * @adev: amdgpu_device pointer
4339 *
4340 * The list of all the hardware IPs that make up the asic is walked and the
4341 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4342 * handles any IP specific hardware or software state changes that are
4343 * necessary for a soft reset to succeed.
4344 * Returns 0 on success, negative error code on failure.
4345 */
06ec9070 4346static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4347{
4348 int i, r = 0;
4349
4350 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4351 if (!adev->ip_blocks[i].status.valid)
d31a501e 4352 continue;
a1255107
AD
4353 if (adev->ip_blocks[i].status.hang &&
4354 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4355 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4356 if (r)
4357 return r;
4358 }
4359 }
4360
4361 return 0;
4362}
4363
e3ecdffa
AD
4364/**
4365 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4366 *
4367 * @adev: amdgpu_device pointer
4368 *
4369 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4370 * reset is necessary to recover.
4371 * Returns true if a full asic reset is required, false if not.
4372 */
06ec9070 4373static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4374{
da146d3b
AD
4375 int i;
4376
8bc04c29
AD
4377 if (amdgpu_asic_need_full_reset(adev))
4378 return true;
4379
da146d3b 4380 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4381 if (!adev->ip_blocks[i].status.valid)
da146d3b 4382 continue;
a1255107
AD
4383 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4384 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4385 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4386 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4387 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4388 if (adev->ip_blocks[i].status.hang) {
aac89168 4389 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4390 return true;
4391 }
4392 }
35d782fe
CZ
4393 }
4394 return false;
4395}
4396
e3ecdffa
AD
4397/**
4398 * amdgpu_device_ip_soft_reset - do a soft reset
4399 *
4400 * @adev: amdgpu_device pointer
4401 *
4402 * The list of all the hardware IPs that make up the asic is walked and the
4403 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4404 * IP specific hardware or software state changes that are necessary to soft
4405 * reset the IP.
4406 * Returns 0 on success, negative error code on failure.
4407 */
06ec9070 4408static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4409{
4410 int i, r = 0;
4411
4412 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4413 if (!adev->ip_blocks[i].status.valid)
35d782fe 4414 continue;
a1255107
AD
4415 if (adev->ip_blocks[i].status.hang &&
4416 adev->ip_blocks[i].version->funcs->soft_reset) {
4417 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4418 if (r)
4419 return r;
4420 }
4421 }
4422
4423 return 0;
4424}
4425
e3ecdffa
AD
4426/**
4427 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4428 *
4429 * @adev: amdgpu_device pointer
4430 *
4431 * The list of all the hardware IPs that make up the asic is walked and the
4432 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4433 * handles any IP specific hardware or software state changes that are
4434 * necessary after the IP has been soft reset.
4435 * Returns 0 on success, negative error code on failure.
4436 */
06ec9070 4437static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4438{
4439 int i, r = 0;
4440
4441 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4442 if (!adev->ip_blocks[i].status.valid)
35d782fe 4443 continue;
a1255107
AD
4444 if (adev->ip_blocks[i].status.hang &&
4445 adev->ip_blocks[i].version->funcs->post_soft_reset)
4446 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4447 if (r)
4448 return r;
4449 }
4450
4451 return 0;
4452}
4453
e3ecdffa 4454/**
c33adbc7 4455 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4456 *
4457 * @adev: amdgpu_device pointer
4458 *
4459 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4460 * restore things like GPUVM page tables after a GPU reset where
4461 * the contents of VRAM might be lost.
403009bf
CK
4462 *
4463 * Returns:
4464 * 0 on success, negative error code on failure.
e3ecdffa 4465 */
c33adbc7 4466static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4467{
c41d1cf6 4468 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4469 struct amdgpu_bo *shadow;
e18aaea7 4470 struct amdgpu_bo_vm *vmbo;
403009bf 4471 long r = 1, tmo;
c41d1cf6
ML
4472
4473 if (amdgpu_sriov_runtime(adev))
b045d3af 4474 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4475 else
4476 tmo = msecs_to_jiffies(100);
4477
aac89168 4478 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4479 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4480 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4481 /* If vm is compute context or adev is APU, shadow will be NULL */
4482 if (!vmbo->shadow)
4483 continue;
4484 shadow = vmbo->shadow;
4485
403009bf 4486 /* No need to recover an evicted BO */
d3116756
CK
4487 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4488 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4489 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4490 continue;
4491
4492 r = amdgpu_bo_restore_shadow(shadow, &next);
4493 if (r)
4494 break;
4495
c41d1cf6 4496 if (fence) {
1712fb1a 4497 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4498 dma_fence_put(fence);
4499 fence = next;
1712fb1a 4500 if (tmo == 0) {
4501 r = -ETIMEDOUT;
c41d1cf6 4502 break;
1712fb1a 4503 } else if (tmo < 0) {
4504 r = tmo;
4505 break;
4506 }
403009bf
CK
4507 } else {
4508 fence = next;
c41d1cf6 4509 }
c41d1cf6
ML
4510 }
4511 mutex_unlock(&adev->shadow_list_lock);
4512
403009bf
CK
4513 if (fence)
4514 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4515 dma_fence_put(fence);
4516
1712fb1a 4517 if (r < 0 || tmo <= 0) {
aac89168 4518 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4519 return -EIO;
4520 }
c41d1cf6 4521
aac89168 4522 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4523 return 0;
c41d1cf6
ML
4524}
4525
a90ad3c2 4526
e3ecdffa 4527/**
06ec9070 4528 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4529 *
982a820b 4530 * @adev: amdgpu_device pointer
87e3f136 4531 * @from_hypervisor: request from hypervisor
5740682e
ML
4532 *
4533 * do VF FLR and reinitialize Asic
3f48c681 4534 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4535 */
4536static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4537 bool from_hypervisor)
5740682e
ML
4538{
4539 int r;
a5f67c93 4540 struct amdgpu_hive_info *hive = NULL;
7258fa31 4541 int retry_limit = 0;
5740682e 4542
7258fa31 4543retry:
c004d44e 4544 amdgpu_amdkfd_pre_reset(adev);
428890a3 4545
5740682e
ML
4546 if (from_hypervisor)
4547 r = amdgpu_virt_request_full_gpu(adev, true);
4548 else
4549 r = amdgpu_virt_reset_gpu(adev);
4550 if (r)
4551 return r;
f734b213 4552 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4553
83f24a8f
HC
4554 /* some sw clean up VF needs to do before recover */
4555 amdgpu_virt_post_reset(adev);
4556
a90ad3c2 4557 /* Resume IP prior to SMC */
06ec9070 4558 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4559 if (r)
4560 goto error;
a90ad3c2 4561
c9ffa427 4562 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4563
7a3e0bb2
RZ
4564 r = amdgpu_device_fw_loading(adev);
4565 if (r)
4566 return r;
4567
a90ad3c2 4568 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4569 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4570 if (r)
4571 goto error;
a90ad3c2 4572
a5f67c93
ZL
4573 hive = amdgpu_get_xgmi_hive(adev);
4574 /* Update PSP FW topology after reset */
4575 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4576 r = amdgpu_xgmi_update_topology(hive, adev);
4577
4578 if (hive)
4579 amdgpu_put_xgmi_hive(hive);
4580
4581 if (!r) {
a5f67c93 4582 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4583
c004d44e 4584 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4585 }
a90ad3c2 4586
abc34253 4587error:
c41d1cf6 4588 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4589 amdgpu_inc_vram_lost(adev);
c33adbc7 4590 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4591 }
437f3e0b 4592 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4593
7258fa31
SK
4594 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4595 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4596 retry_limit++;
4597 goto retry;
4598 } else
4599 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4600 }
4601
a90ad3c2
ML
4602 return r;
4603}
4604
9a1cddd6 4605/**
4606 * amdgpu_device_has_job_running - check if there is any job in mirror list
4607 *
982a820b 4608 * @adev: amdgpu_device pointer
9a1cddd6 4609 *
4610 * check if there is any job in mirror list
4611 */
4612bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4613{
4614 int i;
4615 struct drm_sched_job *job;
4616
4617 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4618 struct amdgpu_ring *ring = adev->rings[i];
4619
4620 if (!ring || !ring->sched.thread)
4621 continue;
4622
4623 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4624 job = list_first_entry_or_null(&ring->sched.pending_list,
4625 struct drm_sched_job, list);
9a1cddd6 4626 spin_unlock(&ring->sched.job_list_lock);
4627 if (job)
4628 return true;
4629 }
4630 return false;
4631}
4632
12938fad
CK
4633/**
4634 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4635 *
982a820b 4636 * @adev: amdgpu_device pointer
12938fad
CK
4637 *
4638 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4639 * a hung GPU.
4640 */
4641bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4642{
12938fad 4643
3ba7b418
AG
4644 if (amdgpu_gpu_recovery == 0)
4645 goto disabled;
4646
1a11a65d
YC
4647 /* Skip soft reset check in fatal error mode */
4648 if (!amdgpu_ras_is_poison_mode_supported(adev))
4649 return true;
4650
3ba7b418
AG
4651 if (amdgpu_sriov_vf(adev))
4652 return true;
4653
4654 if (amdgpu_gpu_recovery == -1) {
4655 switch (adev->asic_type) {
b3523c45
AD
4656#ifdef CONFIG_DRM_AMDGPU_SI
4657 case CHIP_VERDE:
4658 case CHIP_TAHITI:
4659 case CHIP_PITCAIRN:
4660 case CHIP_OLAND:
4661 case CHIP_HAINAN:
4662#endif
4663#ifdef CONFIG_DRM_AMDGPU_CIK
4664 case CHIP_KAVERI:
4665 case CHIP_KABINI:
4666 case CHIP_MULLINS:
4667#endif
4668 case CHIP_CARRIZO:
4669 case CHIP_STONEY:
4670 case CHIP_CYAN_SKILLFISH:
3ba7b418 4671 goto disabled;
b3523c45
AD
4672 default:
4673 break;
3ba7b418 4674 }
12938fad
CK
4675 }
4676
4677 return true;
3ba7b418
AG
4678
4679disabled:
aac89168 4680 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4681 return false;
12938fad
CK
4682}
4683
5c03e584
FX
4684int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4685{
47fc644f
SS
4686 u32 i;
4687 int ret = 0;
5c03e584 4688
47fc644f 4689 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4690
47fc644f 4691 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4692
47fc644f
SS
4693 /* disable BM */
4694 pci_clear_master(adev->pdev);
5c03e584 4695
47fc644f 4696 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4697
47fc644f
SS
4698 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4699 dev_info(adev->dev, "GPU smu mode1 reset\n");
4700 ret = amdgpu_dpm_mode1_reset(adev);
4701 } else {
4702 dev_info(adev->dev, "GPU psp mode1 reset\n");
4703 ret = psp_gpu_reset(adev);
4704 }
5c03e584 4705
47fc644f 4706 if (ret)
2c0f880a 4707 goto mode1_reset_failed;
5c03e584 4708
47fc644f 4709 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4710 ret = amdgpu_psp_wait_for_bootloader(adev);
4711 if (ret)
2c0f880a 4712 goto mode1_reset_failed;
5c03e584 4713
47fc644f
SS
4714 /* wait for asic to come out of reset */
4715 for (i = 0; i < adev->usec_timeout; i++) {
4716 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4717
47fc644f
SS
4718 if (memsize != 0xffffffff)
4719 break;
4720 udelay(1);
4721 }
5c03e584 4722
2c0f880a
HZ
4723 if (i >= adev->usec_timeout) {
4724 ret = -ETIMEDOUT;
4725 goto mode1_reset_failed;
4726 }
4727
47fc644f 4728 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4729
2c0f880a
HZ
4730 return 0;
4731
4732mode1_reset_failed:
4733 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4734 return ret;
5c03e584 4735}
5c6dd71e 4736
e3c1b071 4737int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4738 struct amdgpu_reset_context *reset_context)
26bc5340 4739{
5c1e6fa4 4740 int i, r = 0;
04442bf7
LL
4741 struct amdgpu_job *job = NULL;
4742 bool need_full_reset =
4743 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4744
4745 if (reset_context->reset_req_dev == adev)
4746 job = reset_context->job;
71182665 4747
b602ca5f
TZ
4748 if (amdgpu_sriov_vf(adev)) {
4749 /* stop the data exchange thread */
4750 amdgpu_virt_fini_data_exchange(adev);
4751 }
4752
9e225fb9
AG
4753 amdgpu_fence_driver_isr_toggle(adev, true);
4754
71182665 4755 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4756 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4757 struct amdgpu_ring *ring = adev->rings[i];
4758
51687759 4759 if (!ring || !ring->sched.thread)
0875dc9e 4760 continue;
5740682e 4761
b8920e1e
SS
4762 /* Clear job fence from fence drv to avoid force_completion
4763 * leave NULL and vm flush fence in fence drv
4764 */
5c1e6fa4 4765 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4766
2f9d4084
ML
4767 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4768 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4769 }
d38ceaf9 4770
9e225fb9
AG
4771 amdgpu_fence_driver_isr_toggle(adev, false);
4772
ff99849b 4773 if (job && job->vm)
222b5f04
AG
4774 drm_sched_increase_karma(&job->base);
4775
04442bf7 4776 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 4777 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4778 if (r == -EOPNOTSUPP)
404b277b
LL
4779 r = 0;
4780 else
04442bf7
LL
4781 return r;
4782
1d721ed6 4783 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4784 if (!amdgpu_sriov_vf(adev)) {
4785
4786 if (!need_full_reset)
4787 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4788
360cd081
LG
4789 if (!need_full_reset && amdgpu_gpu_recovery &&
4790 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4791 amdgpu_device_ip_pre_soft_reset(adev);
4792 r = amdgpu_device_ip_soft_reset(adev);
4793 amdgpu_device_ip_post_soft_reset(adev);
4794 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4795 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4796 need_full_reset = true;
4797 }
4798 }
4799
4800 if (need_full_reset)
4801 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4802 if (need_full_reset)
4803 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4804 else
4805 clear_bit(AMDGPU_NEED_FULL_RESET,
4806 &reset_context->flags);
26bc5340
AG
4807 }
4808
4809 return r;
4810}
4811
15fd09a0
SA
4812static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4813{
15fd09a0
SA
4814 int i;
4815
38a15ad9 4816 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4817
4818 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4819 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4820 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4821 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4822 }
4823
4824 return 0;
4825}
4826
3d8785f6
SA
4827#ifdef CONFIG_DEV_COREDUMP
4828static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4829 size_t count, void *data, size_t datalen)
4830{
4831 struct drm_printer p;
4832 struct amdgpu_device *adev = data;
4833 struct drm_print_iterator iter;
4834 int i;
4835
4836 iter.data = buffer;
4837 iter.offset = 0;
4838 iter.start = offset;
4839 iter.remain = count;
4840
4841 p = drm_coredump_printer(&iter);
4842
4843 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4844 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4845 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4846 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4847 if (adev->reset_task_info.pid)
4848 drm_printf(&p, "process_name: %s PID: %d\n",
4849 adev->reset_task_info.process_name,
4850 adev->reset_task_info.pid);
4851
4852 if (adev->reset_vram_lost)
4853 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4854 if (adev->num_regs) {
4855 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4856
4857 for (i = 0; i < adev->num_regs; i++)
4858 drm_printf(&p, "0x%08x: 0x%08x\n",
4859 adev->reset_dump_reg_list[i],
4860 adev->reset_dump_reg_value[i]);
4861 }
4862
4863 return count - iter.remain;
4864}
4865
4866static void amdgpu_devcoredump_free(void *data)
4867{
4868}
4869
4870static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4871{
4872 struct drm_device *dev = adev_to_drm(adev);
4873
4874 ktime_get_ts64(&adev->reset_time);
d68ccdb2 4875 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
3d8785f6
SA
4876 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4877}
4878#endif
4879
04442bf7
LL
4880int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4881 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4882{
4883 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4884 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4885 int r = 0;
f5c7e779 4886 bool gpu_reset_for_dev_remove = 0;
26bc5340 4887
04442bf7
LL
4888 /* Try reset handler method first */
4889 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4890 reset_list);
15fd09a0 4891 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4892
4893 reset_context->reset_device_list = device_list_handle;
04442bf7 4894 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 4895 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4896 if (r == -EOPNOTSUPP)
404b277b
LL
4897 r = 0;
4898 else
04442bf7
LL
4899 return r;
4900
4901 /* Reset handler not implemented, use the default method */
4902 need_full_reset =
4903 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4904 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4905
f5c7e779
YC
4906 gpu_reset_for_dev_remove =
4907 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4908 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4909
26bc5340 4910 /*
655ce9cb 4911 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4912 * to allow proper links negotiation in FW (within 1 sec)
4913 */
7ac71382 4914 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4915 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4916 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4917 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4918 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4919 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4920 r = -EALREADY;
4921 } else
4922 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4923
041a62bc 4924 if (r) {
aac89168 4925 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4926 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4927 break;
ce316fa5
LM
4928 }
4929 }
4930
041a62bc
AG
4931 /* For XGMI wait for all resets to complete before proceed */
4932 if (!r) {
655ce9cb 4933 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4934 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4935 flush_work(&tmp_adev->xgmi_reset_work);
4936 r = tmp_adev->asic_reset_res;
4937 if (r)
4938 break;
ce316fa5
LM
4939 }
4940 }
4941 }
ce316fa5 4942 }
26bc5340 4943
43c4d576 4944 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4945 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4946 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4947 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4948 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4949 }
4950
00eaa571 4951 amdgpu_ras_intr_cleared();
43c4d576 4952 }
00eaa571 4953
f5c7e779
YC
4954 /* Since the mode1 reset affects base ip blocks, the
4955 * phase1 ip blocks need to be resumed. Otherwise there
4956 * will be a BIOS signature error and the psp bootloader
4957 * can't load kdb on the next amdgpu install.
4958 */
4959 if (gpu_reset_for_dev_remove) {
4960 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4961 amdgpu_device_ip_resume_phase1(tmp_adev);
4962
4963 goto end;
4964 }
4965
655ce9cb 4966 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4967 if (need_full_reset) {
4968 /* post card */
e3c1b071 4969 r = amdgpu_device_asic_init(tmp_adev);
4970 if (r) {
aac89168 4971 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4972 } else {
26bc5340 4973 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 4974
26bc5340
AG
4975 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4976 if (r)
4977 goto out;
4978
4979 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4980#ifdef CONFIG_DEV_COREDUMP
4981 tmp_adev->reset_vram_lost = vram_lost;
4982 memset(&tmp_adev->reset_task_info, 0,
4983 sizeof(tmp_adev->reset_task_info));
4984 if (reset_context->job && reset_context->job->vm)
4985 tmp_adev->reset_task_info =
4986 reset_context->job->vm->task_info;
4987 amdgpu_reset_capture_coredumpm(tmp_adev);
4988#endif
26bc5340 4989 if (vram_lost) {
77e7f829 4990 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4991 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4992 }
4993
26bc5340
AG
4994 r = amdgpu_device_fw_loading(tmp_adev);
4995 if (r)
4996 return r;
4997
4998 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4999 if (r)
5000 goto out;
5001
5002 if (vram_lost)
5003 amdgpu_device_fill_reset_magic(tmp_adev);
5004
fdafb359
EQ
5005 /*
5006 * Add this ASIC as tracked as reset was already
5007 * complete successfully.
5008 */
5009 amdgpu_register_gpu_instance(tmp_adev);
5010
04442bf7
LL
5011 if (!reset_context->hive &&
5012 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5013 amdgpu_xgmi_add_device(tmp_adev);
5014
7c04ca50 5015 r = amdgpu_device_ip_late_init(tmp_adev);
5016 if (r)
5017 goto out;
5018
087451f3 5019 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5020
e8fbaf03
GC
5021 /*
5022 * The GPU enters bad state once faulty pages
5023 * by ECC has reached the threshold, and ras
5024 * recovery is scheduled next. So add one check
5025 * here to break recovery if it indeed exceeds
5026 * bad page threshold, and remind user to
5027 * retire this GPU or setting one bigger
5028 * bad_page_threshold value to fix this once
5029 * probing driver again.
5030 */
11003c68 5031 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5032 /* must succeed. */
5033 amdgpu_ras_resume(tmp_adev);
5034 } else {
5035 r = -EINVAL;
5036 goto out;
5037 }
e79a04d5 5038
26bc5340 5039 /* Update PSP FW topology after reset */
04442bf7
LL
5040 if (reset_context->hive &&
5041 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5042 r = amdgpu_xgmi_update_topology(
5043 reset_context->hive, tmp_adev);
26bc5340
AG
5044 }
5045 }
5046
26bc5340
AG
5047out:
5048 if (!r) {
5049 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5050 r = amdgpu_ib_ring_tests(tmp_adev);
5051 if (r) {
5052 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5053 need_full_reset = true;
5054 r = -EAGAIN;
5055 goto end;
5056 }
5057 }
5058
5059 if (!r)
5060 r = amdgpu_device_recover_vram(tmp_adev);
5061 else
5062 tmp_adev->asic_reset_res = r;
5063 }
5064
5065end:
04442bf7
LL
5066 if (need_full_reset)
5067 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5068 else
5069 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5070 return r;
5071}
5072
e923be99 5073static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5074{
5740682e 5075
a3a09142
AD
5076 switch (amdgpu_asic_reset_method(adev)) {
5077 case AMD_RESET_METHOD_MODE1:
5078 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5079 break;
5080 case AMD_RESET_METHOD_MODE2:
5081 adev->mp1_state = PP_MP1_STATE_RESET;
5082 break;
5083 default:
5084 adev->mp1_state = PP_MP1_STATE_NONE;
5085 break;
5086 }
26bc5340 5087}
d38ceaf9 5088
e923be99 5089static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5090{
89041940 5091 amdgpu_vf_error_trans_all(adev);
a3a09142 5092 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5093}
5094
3f12acc8
EQ
5095static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5096{
5097 struct pci_dev *p = NULL;
5098
5099 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5100 adev->pdev->bus->number, 1);
5101 if (p) {
5102 pm_runtime_enable(&(p->dev));
5103 pm_runtime_resume(&(p->dev));
5104 }
b85e285e
YY
5105
5106 pci_dev_put(p);
3f12acc8
EQ
5107}
5108
5109static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5110{
5111 enum amd_reset_method reset_method;
5112 struct pci_dev *p = NULL;
5113 u64 expires;
5114
5115 /*
5116 * For now, only BACO and mode1 reset are confirmed
5117 * to suffer the audio issue without proper suspended.
5118 */
5119 reset_method = amdgpu_asic_reset_method(adev);
5120 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5121 (reset_method != AMD_RESET_METHOD_MODE1))
5122 return -EINVAL;
5123
5124 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5125 adev->pdev->bus->number, 1);
5126 if (!p)
5127 return -ENODEV;
5128
5129 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5130 if (!expires)
5131 /*
5132 * If we cannot get the audio device autosuspend delay,
5133 * a fixed 4S interval will be used. Considering 3S is
5134 * the audio controller default autosuspend delay setting.
5135 * 4S used here is guaranteed to cover that.
5136 */
54b7feb9 5137 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5138
5139 while (!pm_runtime_status_suspended(&(p->dev))) {
5140 if (!pm_runtime_suspend(&(p->dev)))
5141 break;
5142
5143 if (expires < ktime_get_mono_fast_ns()) {
5144 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5145 pci_dev_put(p);
3f12acc8
EQ
5146 /* TODO: abort the succeeding gpu reset? */
5147 return -ETIMEDOUT;
5148 }
5149 }
5150
5151 pm_runtime_disable(&(p->dev));
5152
b85e285e 5153 pci_dev_put(p);
3f12acc8
EQ
5154 return 0;
5155}
5156
d193b12b 5157static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5158{
5159 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5160
5161#if defined(CONFIG_DEBUG_FS)
5162 if (!amdgpu_sriov_vf(adev))
5163 cancel_work(&adev->reset_work);
5164#endif
5165
5166 if (adev->kfd.dev)
5167 cancel_work(&adev->kfd.reset_work);
5168
5169 if (amdgpu_sriov_vf(adev))
5170 cancel_work(&adev->virt.flr_work);
5171
5172 if (con && adev->ras_enabled)
5173 cancel_work(&con->recovery_work);
5174
5175}
5176
26bc5340 5177/**
6e9c65f7 5178 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5179 *
982a820b 5180 * @adev: amdgpu_device pointer
26bc5340 5181 * @job: which job trigger hang
80bd2de1 5182 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5183 *
5184 * Attempt to reset the GPU if it has hung (all asics).
5185 * Attempt to do soft-reset or full-reset and reinitialize Asic
5186 * Returns 0 for success or an error on failure.
5187 */
5188
cf727044 5189int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5190 struct amdgpu_job *job,
5191 struct amdgpu_reset_context *reset_context)
26bc5340 5192{
1d721ed6 5193 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5194 bool job_signaled = false;
26bc5340 5195 struct amdgpu_hive_info *hive = NULL;
26bc5340 5196 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5197 int i, r = 0;
bb5c7235 5198 bool need_emergency_restart = false;
3f12acc8 5199 bool audio_suspended = false;
f5c7e779
YC
5200 bool gpu_reset_for_dev_remove = false;
5201
5202 gpu_reset_for_dev_remove =
5203 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5204 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5205
6e3cd2a9 5206 /*
bb5c7235
WS
5207 * Special case: RAS triggered and full reset isn't supported
5208 */
5209 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5210
d5ea093e
AG
5211 /*
5212 * Flush RAM to disk so that after reboot
5213 * the user can read log and see why the system rebooted.
5214 */
bb5c7235 5215 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5216 DRM_WARN("Emergency reboot.");
5217
5218 ksys_sync_helper();
5219 emergency_restart();
5220 }
5221
b823821f 5222 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5223 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5224
175ac6ec
ZL
5225 if (!amdgpu_sriov_vf(adev))
5226 hive = amdgpu_get_xgmi_hive(adev);
681260df 5227 if (hive)
53b3f8f4 5228 mutex_lock(&hive->hive_lock);
26bc5340 5229
f1549c09
LG
5230 reset_context->job = job;
5231 reset_context->hive = hive;
9e94d22c
EQ
5232 /*
5233 * Build list of devices to reset.
5234 * In case we are in XGMI hive mode, resort the device list
5235 * to put adev in the 1st position.
5236 */
5237 INIT_LIST_HEAD(&device_list);
175ac6ec 5238 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5239 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5240 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5241 if (gpu_reset_for_dev_remove && adev->shutdown)
5242 tmp_adev->shutdown = true;
5243 }
655ce9cb 5244 if (!list_is_first(&adev->reset_list, &device_list))
5245 list_rotate_to_front(&adev->reset_list, &device_list);
5246 device_list_handle = &device_list;
26bc5340 5247 } else {
655ce9cb 5248 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5249 device_list_handle = &device_list;
5250 }
5251
e923be99
AG
5252 /* We need to lock reset domain only once both for XGMI and single device */
5253 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5254 reset_list);
3675c2f2 5255 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5256
1d721ed6 5257 /* block all schedulers and reset given job's ring */
655ce9cb 5258 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5259
e923be99 5260 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5261
3f12acc8
EQ
5262 /*
5263 * Try to put the audio codec into suspend state
5264 * before gpu reset started.
5265 *
5266 * Due to the power domain of the graphics device
5267 * is shared with AZ power domain. Without this,
5268 * we may change the audio hardware from behind
5269 * the audio driver's back. That will trigger
5270 * some audio codec errors.
5271 */
5272 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5273 audio_suspended = true;
5274
9e94d22c
EQ
5275 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5276
52fb44cf
EQ
5277 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5278
c004d44e 5279 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5280 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5281
12ffa55d
AG
5282 /*
5283 * Mark these ASICs to be reseted as untracked first
5284 * And add them back after reset completed
5285 */
5286 amdgpu_unregister_gpu_instance(tmp_adev);
5287
163d4cd2 5288 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5289
f1c1314b 5290 /* disable ras on ALL IPs */
bb5c7235 5291 if (!need_emergency_restart &&
b823821f 5292 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5293 amdgpu_ras_suspend(tmp_adev);
5294
1d721ed6
AG
5295 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5296 struct amdgpu_ring *ring = tmp_adev->rings[i];
5297
5298 if (!ring || !ring->sched.thread)
5299 continue;
5300
0b2d2c2e 5301 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5302
bb5c7235 5303 if (need_emergency_restart)
7c6e68c7 5304 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5305 }
8f8c80f4 5306 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5307 }
5308
bb5c7235 5309 if (need_emergency_restart)
7c6e68c7
AG
5310 goto skip_sched_resume;
5311
1d721ed6
AG
5312 /*
5313 * Must check guilty signal here since after this point all old
5314 * HW fences are force signaled.
5315 *
5316 * job->base holds a reference to parent fence
5317 */
f6a3f660 5318 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5319 job_signaled = true;
1d721ed6
AG
5320 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5321 goto skip_hw_reset;
5322 }
5323
26bc5340 5324retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5325 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5326 if (gpu_reset_for_dev_remove) {
5327 /* Workaroud for ASICs need to disable SMC first */
5328 amdgpu_device_smu_fini_early(tmp_adev);
5329 }
f1549c09 5330 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5331 /*TODO Should we stop ?*/
5332 if (r) {
aac89168 5333 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5334 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5335 tmp_adev->asic_reset_res = r;
5336 }
247c7b0d
AG
5337
5338 /*
5339 * Drop all pending non scheduler resets. Scheduler resets
5340 * were already dropped during drm_sched_stop
5341 */
d193b12b 5342 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5343 }
5344
5345 /* Actual ASIC resets if needed.*/
4f30d920 5346 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5347 if (amdgpu_sriov_vf(adev)) {
5348 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5349 if (r)
5350 adev->asic_reset_res = r;
950d6425 5351
28606c4e
YC
5352 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5353 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5354 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
950d6425 5355 amdgpu_ras_resume(adev);
26bc5340 5356 } else {
f1549c09 5357 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5358 if (r && r == -EAGAIN)
26bc5340 5359 goto retry;
f5c7e779
YC
5360
5361 if (!r && gpu_reset_for_dev_remove)
5362 goto recover_end;
26bc5340
AG
5363 }
5364
1d721ed6
AG
5365skip_hw_reset:
5366
26bc5340 5367 /* Post ASIC reset for all devs .*/
655ce9cb 5368 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5369
1d721ed6
AG
5370 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5371 struct amdgpu_ring *ring = tmp_adev->rings[i];
5372
5373 if (!ring || !ring->sched.thread)
5374 continue;
5375
6868a2c4 5376 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5377 }
5378
693073a0 5379 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5380 amdgpu_mes_self_test(tmp_adev);
5381
b8920e1e 5382 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5383 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5384
7258fa31
SK
5385 if (tmp_adev->asic_reset_res)
5386 r = tmp_adev->asic_reset_res;
5387
1d721ed6 5388 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5389
5390 if (r) {
5391 /* bad news, how to tell it to userspace ? */
12ffa55d 5392 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5393 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5394 } else {
12ffa55d 5395 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5396 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5397 DRM_WARN("smart shift update failed\n");
26bc5340 5398 }
7c6e68c7 5399 }
26bc5340 5400
7c6e68c7 5401skip_sched_resume:
655ce9cb 5402 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5403 /* unlock kfd: SRIOV would do it separately */
c004d44e 5404 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5405 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5406
5407 /* kfd_post_reset will do nothing if kfd device is not initialized,
5408 * need to bring up kfd here if it's not be initialized before
5409 */
5410 if (!adev->kfd.init_complete)
5411 amdgpu_amdkfd_device_init(adev);
5412
3f12acc8
EQ
5413 if (audio_suspended)
5414 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5415
5416 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5417
5418 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5419 }
5420
f5c7e779 5421recover_end:
e923be99
AG
5422 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5423 reset_list);
5424 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5425
9e94d22c 5426 if (hive) {
9e94d22c 5427 mutex_unlock(&hive->hive_lock);
d95e8e97 5428 amdgpu_put_xgmi_hive(hive);
9e94d22c 5429 }
26bc5340 5430
f287a3c5 5431 if (r)
26bc5340 5432 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5433
5434 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5435 return r;
5436}
5437
e3ecdffa
AD
5438/**
5439 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5440 *
5441 * @adev: amdgpu_device pointer
5442 *
5443 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5444 * and lanes) of the slot the device is in. Handles APUs and
5445 * virtualized environments where PCIE config space may not be available.
5446 */
5494d864 5447static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5448{
5d9a6330 5449 struct pci_dev *pdev;
c5313457
HK
5450 enum pci_bus_speed speed_cap, platform_speed_cap;
5451 enum pcie_link_width platform_link_width;
d0dd7f0c 5452
cd474ba0
AD
5453 if (amdgpu_pcie_gen_cap)
5454 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5455
cd474ba0
AD
5456 if (amdgpu_pcie_lane_cap)
5457 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5458
cd474ba0 5459 /* covers APUs as well */
04e85958 5460 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5461 if (adev->pm.pcie_gen_mask == 0)
5462 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5463 if (adev->pm.pcie_mlw_mask == 0)
5464 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5465 return;
cd474ba0 5466 }
d0dd7f0c 5467
c5313457
HK
5468 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5469 return;
5470
dbaa922b
AD
5471 pcie_bandwidth_available(adev->pdev, NULL,
5472 &platform_speed_cap, &platform_link_width);
c5313457 5473
cd474ba0 5474 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5475 /* asic caps */
5476 pdev = adev->pdev;
5477 speed_cap = pcie_get_speed_cap(pdev);
5478 if (speed_cap == PCI_SPEED_UNKNOWN) {
5479 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5480 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5481 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5482 } else {
2b3a1f51
FX
5483 if (speed_cap == PCIE_SPEED_32_0GT)
5484 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5485 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5486 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5487 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5488 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5489 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5490 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5491 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5492 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5493 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5494 else if (speed_cap == PCIE_SPEED_8_0GT)
5495 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5496 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5497 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5498 else if (speed_cap == PCIE_SPEED_5_0GT)
5499 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5500 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5501 else
5502 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5503 }
5504 /* platform caps */
c5313457 5505 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5506 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5507 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5508 } else {
2b3a1f51
FX
5509 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5510 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5511 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5512 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5513 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5514 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5515 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5516 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5517 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5518 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5519 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5520 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5521 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5522 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5523 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5524 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5525 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5526 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5527 else
5528 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5529
cd474ba0
AD
5530 }
5531 }
5532 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5533 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5534 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5535 } else {
c5313457 5536 switch (platform_link_width) {
5d9a6330 5537 case PCIE_LNK_X32:
cd474ba0
AD
5538 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5539 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5540 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5541 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5542 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5543 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5544 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5545 break;
5d9a6330 5546 case PCIE_LNK_X16:
cd474ba0
AD
5547 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5548 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5549 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5550 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5551 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5552 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5553 break;
5d9a6330 5554 case PCIE_LNK_X12:
cd474ba0
AD
5555 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5556 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5557 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5558 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5559 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5560 break;
5d9a6330 5561 case PCIE_LNK_X8:
cd474ba0
AD
5562 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5563 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5564 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5565 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5566 break;
5d9a6330 5567 case PCIE_LNK_X4:
cd474ba0
AD
5568 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5569 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5570 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5571 break;
5d9a6330 5572 case PCIE_LNK_X2:
cd474ba0
AD
5573 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5574 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5575 break;
5d9a6330 5576 case PCIE_LNK_X1:
cd474ba0
AD
5577 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5578 break;
5579 default:
5580 break;
5581 }
d0dd7f0c
AD
5582 }
5583 }
5584}
d38ceaf9 5585
08a2fd23
RE
5586/**
5587 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5588 *
5589 * @adev: amdgpu_device pointer
5590 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5591 *
5592 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5593 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5594 * @peer_adev.
5595 */
5596bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5597 struct amdgpu_device *peer_adev)
5598{
5599#ifdef CONFIG_HSA_AMD_P2P
5600 uint64_t address_mask = peer_adev->dev->dma_mask ?
5601 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5602 resource_size_t aper_limit =
5603 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5604 bool p2p_access =
5605 !adev->gmc.xgmi.connected_to_cpu &&
5606 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5607
5608 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5609 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5610 !(adev->gmc.aper_base & address_mask ||
5611 aper_limit & address_mask));
5612#else
5613 return false;
5614#endif
5615}
5616
361dbd01
AD
5617int amdgpu_device_baco_enter(struct drm_device *dev)
5618{
1348969a 5619 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5620 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5621
6ab68650 5622 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5623 return -ENOTSUPP;
5624
8ab0d6f0 5625 if (ras && adev->ras_enabled &&
acdae216 5626 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5627 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5628
9530273e 5629 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5630}
5631
5632int amdgpu_device_baco_exit(struct drm_device *dev)
5633{
1348969a 5634 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5635 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5636 int ret = 0;
361dbd01 5637
6ab68650 5638 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5639 return -ENOTSUPP;
5640
9530273e
EQ
5641 ret = amdgpu_dpm_baco_exit(adev);
5642 if (ret)
5643 return ret;
7a22677b 5644
8ab0d6f0 5645 if (ras && adev->ras_enabled &&
acdae216 5646 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5647 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5648
1bece222
CL
5649 if (amdgpu_passthrough(adev) &&
5650 adev->nbio.funcs->clear_doorbell_interrupt)
5651 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5652
7a22677b 5653 return 0;
361dbd01 5654}
c9a6b82f
AG
5655
5656/**
5657 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5658 * @pdev: PCI device struct
5659 * @state: PCI channel state
5660 *
5661 * Description: Called when a PCI error is detected.
5662 *
5663 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5664 */
5665pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5666{
5667 struct drm_device *dev = pci_get_drvdata(pdev);
5668 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5669 int i;
c9a6b82f
AG
5670
5671 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5672
6894305c
AG
5673 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5674 DRM_WARN("No support for XGMI hive yet...");
5675 return PCI_ERS_RESULT_DISCONNECT;
5676 }
5677
e17e27f9
GC
5678 adev->pci_channel_state = state;
5679
c9a6b82f
AG
5680 switch (state) {
5681 case pci_channel_io_normal:
5682 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5683 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5684 case pci_channel_io_frozen:
5685 /*
d0fb18b5 5686 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5687 * to GPU during PCI error recovery
5688 */
3675c2f2 5689 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5690 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5691
5692 /*
5693 * Block any work scheduling as we do for regular GPU reset
5694 * for the duration of the recovery
5695 */
5696 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5697 struct amdgpu_ring *ring = adev->rings[i];
5698
5699 if (!ring || !ring->sched.thread)
5700 continue;
5701
5702 drm_sched_stop(&ring->sched, NULL);
5703 }
8f8c80f4 5704 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5705 return PCI_ERS_RESULT_NEED_RESET;
5706 case pci_channel_io_perm_failure:
5707 /* Permanent error, prepare for device removal */
5708 return PCI_ERS_RESULT_DISCONNECT;
5709 }
5710
5711 return PCI_ERS_RESULT_NEED_RESET;
5712}
5713
5714/**
5715 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5716 * @pdev: pointer to PCI device
5717 */
5718pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5719{
5720
5721 DRM_INFO("PCI error: mmio enabled callback!!\n");
5722
5723 /* TODO - dump whatever for debugging purposes */
5724
5725 /* This called only if amdgpu_pci_error_detected returns
5726 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5727 * works, no need to reset slot.
5728 */
5729
5730 return PCI_ERS_RESULT_RECOVERED;
5731}
5732
5733/**
5734 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5735 * @pdev: PCI device struct
5736 *
5737 * Description: This routine is called by the pci error recovery
5738 * code after the PCI slot has been reset, just before we
5739 * should resume normal operations.
5740 */
5741pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5742{
5743 struct drm_device *dev = pci_get_drvdata(pdev);
5744 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5745 int r, i;
04442bf7 5746 struct amdgpu_reset_context reset_context;
362c7b91 5747 u32 memsize;
7ac71382 5748 struct list_head device_list;
c9a6b82f
AG
5749
5750 DRM_INFO("PCI error: slot reset callback!!\n");
5751
04442bf7
LL
5752 memset(&reset_context, 0, sizeof(reset_context));
5753
7ac71382 5754 INIT_LIST_HEAD(&device_list);
655ce9cb 5755 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5756
362c7b91
AG
5757 /* wait for asic to come out of reset */
5758 msleep(500);
5759
7ac71382 5760 /* Restore PCI confspace */
c1dd4aa6 5761 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5762
362c7b91
AG
5763 /* confirm ASIC came out of reset */
5764 for (i = 0; i < adev->usec_timeout; i++) {
5765 memsize = amdgpu_asic_get_config_memsize(adev);
5766
5767 if (memsize != 0xffffffff)
5768 break;
5769 udelay(1);
5770 }
5771 if (memsize == 0xffffffff) {
5772 r = -ETIME;
5773 goto out;
5774 }
5775
04442bf7
LL
5776 reset_context.method = AMD_RESET_METHOD_NONE;
5777 reset_context.reset_req_dev = adev;
5778 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5779 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5780
7afefb81 5781 adev->no_hw_access = true;
04442bf7 5782 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5783 adev->no_hw_access = false;
c9a6b82f
AG
5784 if (r)
5785 goto out;
5786
04442bf7 5787 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5788
5789out:
c9a6b82f 5790 if (!r) {
c1dd4aa6
AG
5791 if (amdgpu_device_cache_pci_state(adev->pdev))
5792 pci_restore_state(adev->pdev);
5793
c9a6b82f
AG
5794 DRM_INFO("PCIe error recovery succeeded\n");
5795 } else {
5796 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5797 amdgpu_device_unset_mp1_state(adev);
5798 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5799 }
5800
5801 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5802}
5803
5804/**
5805 * amdgpu_pci_resume() - resume normal ops after PCI reset
5806 * @pdev: pointer to PCI device
5807 *
5808 * Called when the error recovery driver tells us that its
505199a3 5809 * OK to resume normal operation.
c9a6b82f
AG
5810 */
5811void amdgpu_pci_resume(struct pci_dev *pdev)
5812{
5813 struct drm_device *dev = pci_get_drvdata(pdev);
5814 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5815 int i;
c9a6b82f 5816
c9a6b82f
AG
5817
5818 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5819
e17e27f9
GC
5820 /* Only continue execution for the case of pci_channel_io_frozen */
5821 if (adev->pci_channel_state != pci_channel_io_frozen)
5822 return;
5823
acd89fca
AG
5824 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5825 struct amdgpu_ring *ring = adev->rings[i];
5826
5827 if (!ring || !ring->sched.thread)
5828 continue;
5829
acd89fca
AG
5830 drm_sched_start(&ring->sched, true);
5831 }
5832
e923be99
AG
5833 amdgpu_device_unset_mp1_state(adev);
5834 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5835}
c1dd4aa6
AG
5836
5837bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5838{
5839 struct drm_device *dev = pci_get_drvdata(pdev);
5840 struct amdgpu_device *adev = drm_to_adev(dev);
5841 int r;
5842
5843 r = pci_save_state(pdev);
5844 if (!r) {
5845 kfree(adev->pci_state);
5846
5847 adev->pci_state = pci_store_saved_state(pdev);
5848
5849 if (!adev->pci_state) {
5850 DRM_ERROR("Failed to store PCI saved state");
5851 return false;
5852 }
5853 } else {
5854 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5855 return false;
5856 }
5857
5858 return true;
5859}
5860
5861bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5862{
5863 struct drm_device *dev = pci_get_drvdata(pdev);
5864 struct amdgpu_device *adev = drm_to_adev(dev);
5865 int r;
5866
5867 if (!adev->pci_state)
5868 return false;
5869
5870 r = pci_load_saved_state(pdev, adev->pci_state);
5871
5872 if (!r) {
5873 pci_restore_state(pdev);
5874 } else {
5875 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5876 return false;
5877 }
5878
5879 return true;
5880}
5881
810085dd
EH
5882void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5883 struct amdgpu_ring *ring)
5884{
5885#ifdef CONFIG_X86_64
b818a5d3 5886 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5887 return;
5888#endif
5889 if (adev->gmc.xgmi.connected_to_cpu)
5890 return;
5891
5892 if (ring && ring->funcs->emit_hdp_flush)
5893 amdgpu_ring_emit_hdp_flush(ring);
5894 else
5895 amdgpu_asic_flush_hdp(adev, ring);
5896}
c1dd4aa6 5897
810085dd
EH
5898void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5899 struct amdgpu_ring *ring)
5900{
5901#ifdef CONFIG_X86_64
b818a5d3 5902 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5903 return;
5904#endif
5905 if (adev->gmc.xgmi.connected_to_cpu)
5906 return;
c1dd4aa6 5907
810085dd
EH
5908 amdgpu_asic_invalidate_hdp(adev, ring);
5909}
34f3a4a9 5910
89a7a870
AG
5911int amdgpu_in_reset(struct amdgpu_device *adev)
5912{
5913 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
5914}
5915
34f3a4a9
LY
5916/**
5917 * amdgpu_device_halt() - bring hardware to some kind of halt state
5918 *
5919 * @adev: amdgpu_device pointer
5920 *
5921 * Bring hardware to some kind of halt state so that no one can touch it
5922 * any more. It will help to maintain error context when error occurred.
5923 * Compare to a simple hang, the system will keep stable at least for SSH
5924 * access. Then it should be trivial to inspect the hardware state and
5925 * see what's going on. Implemented as following:
5926 *
5927 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5928 * clears all CPU mappings to device, disallows remappings through page faults
5929 * 2. amdgpu_irq_disable_all() disables all interrupts
5930 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5931 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5932 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5933 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5934 * flush any in flight DMA operations
5935 */
5936void amdgpu_device_halt(struct amdgpu_device *adev)
5937{
5938 struct pci_dev *pdev = adev->pdev;
e0f943b4 5939 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 5940
2c1c7ba4 5941 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
5942 drm_dev_unplug(ddev);
5943
5944 amdgpu_irq_disable_all(adev);
5945
5946 amdgpu_fence_driver_hw_fini(adev);
5947
5948 adev->no_hw_access = true;
5949
5950 amdgpu_device_unmap_mmio(adev);
5951
5952 pci_disable_device(pdev);
5953 pci_wait_for_pending_transaction(pdev);
5954}
86700a40
XD
5955
5956u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5957 u32 reg)
5958{
5959 unsigned long flags, address, data;
5960 u32 r;
5961
5962 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5963 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5964
5965 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5966 WREG32(address, reg * 4);
5967 (void)RREG32(address);
5968 r = RREG32(data);
5969 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5970 return r;
5971}
5972
5973void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5974 u32 reg, u32 v)
5975{
5976 unsigned long flags, address, data;
5977
5978 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5979 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5980
5981 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5982 WREG32(address, reg * 4);
5983 (void)RREG32(address);
5984 WREG32(data, v);
5985 (void)RREG32(data);
5986 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5987}
68ce8b24
CK
5988
5989/**
5990 * amdgpu_device_switch_gang - switch to a new gang
5991 * @adev: amdgpu_device pointer
5992 * @gang: the gang to switch to
5993 *
5994 * Try to switch to a new gang.
5995 * Returns: NULL if we switched to the new gang or a reference to the current
5996 * gang leader.
5997 */
5998struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5999 struct dma_fence *gang)
6000{
6001 struct dma_fence *old = NULL;
6002
6003 do {
6004 dma_fence_put(old);
6005 rcu_read_lock();
6006 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6007 rcu_read_unlock();
6008
6009 if (old == gang)
6010 break;
6011
6012 if (!dma_fence_is_signaled(old))
6013 return old;
6014
6015 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6016 old, gang) != old);
6017
6018 dma_fence_put(old);
6019 return NULL;
6020}
220c8cc8
AD
6021
6022bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6023{
6024 switch (adev->asic_type) {
6025#ifdef CONFIG_DRM_AMDGPU_SI
6026 case CHIP_HAINAN:
6027#endif
6028 case CHIP_TOPAZ:
6029 /* chips with no display hardware */
6030 return false;
6031#ifdef CONFIG_DRM_AMDGPU_SI
6032 case CHIP_TAHITI:
6033 case CHIP_PITCAIRN:
6034 case CHIP_VERDE:
6035 case CHIP_OLAND:
6036#endif
6037#ifdef CONFIG_DRM_AMDGPU_CIK
6038 case CHIP_BONAIRE:
6039 case CHIP_HAWAII:
6040 case CHIP_KAVERI:
6041 case CHIP_KABINI:
6042 case CHIP_MULLINS:
6043#endif
6044 case CHIP_TONGA:
6045 case CHIP_FIJI:
6046 case CHIP_POLARIS10:
6047 case CHIP_POLARIS11:
6048 case CHIP_POLARIS12:
6049 case CHIP_VEGAM:
6050 case CHIP_CARRIZO:
6051 case CHIP_STONEY:
6052 /* chips with display hardware */
6053 return true;
6054 default:
6055 /* IP discovery */
6056 if (!adev->ip_versions[DCE_HWIP][0] ||
6057 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6058 return false;
6059 return true;
6060 }
6061}
81283fee
JZ
6062
6063uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6064 uint32_t inst, uint32_t reg_addr, char reg_name[],
6065 uint32_t expected_value, uint32_t mask)
6066{
6067 uint32_t ret = 0;
6068 uint32_t old_ = 0;
6069 uint32_t tmp_ = RREG32(reg_addr);
6070 uint32_t loop = adev->usec_timeout;
6071
6072 while ((tmp_ & (mask)) != (expected_value)) {
6073 if (old_ != tmp_) {
6074 loop = adev->usec_timeout;
6075 old_ = tmp_;
6076 } else
6077 udelay(1);
6078 tmp_ = RREG32(reg_addr);
6079 loop--;
6080 if (!loop) {
6081 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6082 inst, reg_name, (uint32_t)expected_value,
6083 (uint32_t)(tmp_ & (mask)));
6084 ret = -ETIMEDOUT;
6085 break;
6086 }
6087 }
6088 return ret;
6089}