drm/amd: Add a module parameter for seamless boot
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166 167
fd496ca8 168/**
b98c6299 169 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
170 *
171 * @dev: drm_device pointer
172 *
b98c6299 173 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
174 * otherwise return false.
175 */
b98c6299 176bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
177{
178 struct amdgpu_device *adev = drm_to_adev(dev);
179
b98c6299 180 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
181 return true;
182 return false;
183}
184
e3ecdffa 185/**
0330b848 186 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
187 *
188 * @dev: drm_device pointer
189 *
b98c6299 190 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
191 * otherwise return false.
192 */
31af062a 193bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 194{
1348969a 195 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 196
b98c6299
AD
197 if (adev->has_pr3 ||
198 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
199 return true;
200 return false;
201}
202
a69cba42
AD
203/**
204 * amdgpu_device_supports_baco - Does the device support BACO
205 *
206 * @dev: drm_device pointer
207 *
208 * Returns true if the device supporte BACO,
209 * otherwise return false.
210 */
211bool amdgpu_device_supports_baco(struct drm_device *dev)
212{
1348969a 213 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
214
215 return amdgpu_asic_supports_baco(adev);
216}
217
3fa8f89d
S
218/**
219 * amdgpu_device_supports_smart_shift - Is the device dGPU with
220 * smart shift support
221 *
222 * @dev: drm_device pointer
223 *
224 * Returns true if the device is a dGPU with Smart Shift support,
225 * otherwise returns false.
226 */
227bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
228{
229 return (amdgpu_device_supports_boco(dev) &&
230 amdgpu_acpi_is_power_shift_control_supported());
231}
232
6e3cd2a9
MCC
233/*
234 * VRAM access helper functions
235 */
236
e35e2b11 237/**
048af66b 238 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
239 *
240 * @adev: amdgpu_device pointer
241 * @pos: offset of the buffer in vram
242 * @buf: virtual address of the buffer in system memory
243 * @size: read/write size, sizeof(@buf) must > @size
244 * @write: true - write to vram, otherwise - read from vram
245 */
048af66b
KW
246void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
247 void *buf, size_t size, bool write)
e35e2b11 248{
e35e2b11 249 unsigned long flags;
048af66b
KW
250 uint32_t hi = ~0, tmp = 0;
251 uint32_t *data = buf;
ce05ac56 252 uint64_t last;
f89f8c6b 253 int idx;
ce05ac56 254
c58a863b 255 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 256 return;
9d11eb0d 257
048af66b
KW
258 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
259
260 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
261 for (last = pos + size; pos < last; pos += 4) {
262 tmp = pos >> 31;
263
264 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
265 if (tmp != hi) {
266 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
267 hi = tmp;
268 }
269 if (write)
270 WREG32_NO_KIQ(mmMM_DATA, *data++);
271 else
272 *data++ = RREG32_NO_KIQ(mmMM_DATA);
273 }
274
275 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
276 drm_dev_exit(idx);
277}
278
279/**
bbe04dec 280 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
281 *
282 * @adev: amdgpu_device pointer
283 * @pos: offset of the buffer in vram
284 * @buf: virtual address of the buffer in system memory
285 * @size: read/write size, sizeof(@buf) must > @size
286 * @write: true - write to vram, otherwise - read from vram
287 *
288 * The return value means how many bytes have been transferred.
289 */
290size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
291 void *buf, size_t size, bool write)
292{
9d11eb0d 293#ifdef CONFIG_64BIT
048af66b
KW
294 void __iomem *addr;
295 size_t count = 0;
296 uint64_t last;
297
298 if (!adev->mman.aper_base_kaddr)
299 return 0;
300
9d11eb0d
CK
301 last = min(pos + size, adev->gmc.visible_vram_size);
302 if (last > pos) {
048af66b
KW
303 addr = adev->mman.aper_base_kaddr + pos;
304 count = last - pos;
9d11eb0d
CK
305
306 if (write) {
307 memcpy_toio(addr, buf, count);
4c452b5c
SS
308 /* Make sure HDP write cache flush happens without any reordering
309 * after the system memory contents are sent over PCIe device
310 */
9d11eb0d 311 mb();
810085dd 312 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 313 } else {
810085dd 314 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
315 /* Make sure HDP read cache is invalidated before issuing a read
316 * to the PCIe device
317 */
9d11eb0d
CK
318 mb();
319 memcpy_fromio(buf, addr, count);
320 }
321
9d11eb0d 322 }
048af66b
KW
323
324 return count;
325#else
326 return 0;
9d11eb0d 327#endif
048af66b 328}
9d11eb0d 329
048af66b
KW
330/**
331 * amdgpu_device_vram_access - read/write a buffer in vram
332 *
333 * @adev: amdgpu_device pointer
334 * @pos: offset of the buffer in vram
335 * @buf: virtual address of the buffer in system memory
336 * @size: read/write size, sizeof(@buf) must > @size
337 * @write: true - write to vram, otherwise - read from vram
338 */
339void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
340 void *buf, size_t size, bool write)
341{
342 size_t count;
e35e2b11 343
048af66b
KW
344 /* try to using vram apreature to access vram first */
345 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
346 size -= count;
347 if (size) {
348 /* using MM to access rest vram */
349 pos += count;
350 buf += count;
351 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
352 }
353}
354
d38ceaf9 355/*
f7ee1874 356 * register access helper functions.
d38ceaf9 357 */
56b53c0b
DL
358
359/* Check if hw access should be skipped because of hotplug or device error */
360bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
361{
7afefb81 362 if (adev->no_hw_access)
56b53c0b
DL
363 return true;
364
365#ifdef CONFIG_LOCKDEP
366 /*
367 * This is a bit complicated to understand, so worth a comment. What we assert
368 * here is that the GPU reset is not running on another thread in parallel.
369 *
370 * For this we trylock the read side of the reset semaphore, if that succeeds
371 * we know that the reset is not running in paralell.
372 *
373 * If the trylock fails we assert that we are either already holding the read
374 * side of the lock or are the reset thread itself and hold the write side of
375 * the lock.
376 */
377 if (in_task()) {
d0fb18b5
AG
378 if (down_read_trylock(&adev->reset_domain->sem))
379 up_read(&adev->reset_domain->sem);
56b53c0b 380 else
d0fb18b5 381 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
382 }
383#endif
384 return false;
385}
386
e3ecdffa 387/**
f7ee1874 388 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
389 *
390 * @adev: amdgpu_device pointer
391 * @reg: dword aligned register offset
392 * @acc_flags: access flags which require special behavior
393 *
394 * Returns the 32 bit value from the offset specified.
395 */
f7ee1874
HZ
396uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
397 uint32_t reg, uint32_t acc_flags)
d38ceaf9 398{
f4b373f4
TSD
399 uint32_t ret;
400
56b53c0b 401 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
402 return 0;
403
f7ee1874
HZ
404 if ((reg * 4) < adev->rmmio_size) {
405 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
406 amdgpu_sriov_runtime(adev) &&
d0fb18b5 407 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 408 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 409 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
410 } else {
411 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
412 }
413 } else {
414 ret = adev->pcie_rreg(adev, reg * 4);
81202807 415 }
bc992ba5 416
f7ee1874 417 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 418
f4b373f4 419 return ret;
d38ceaf9
AD
420}
421
421a2a30
ML
422/*
423 * MMIO register read with bytes helper functions
424 * @offset:bytes offset from MMIO start
b8920e1e 425 */
421a2a30 426
e3ecdffa
AD
427/**
428 * amdgpu_mm_rreg8 - read a memory mapped IO register
429 *
430 * @adev: amdgpu_device pointer
431 * @offset: byte aligned register offset
432 *
433 * Returns the 8 bit value from the offset specified.
434 */
7cbbc745
AG
435uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
436{
56b53c0b 437 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
438 return 0;
439
421a2a30
ML
440 if (offset < adev->rmmio_size)
441 return (readb(adev->rmmio + offset));
442 BUG();
443}
444
445/*
446 * MMIO register write with bytes helper functions
447 * @offset:bytes offset from MMIO start
448 * @value: the value want to be written to the register
b8920e1e
SS
449 */
450
e3ecdffa
AD
451/**
452 * amdgpu_mm_wreg8 - read a memory mapped IO register
453 *
454 * @adev: amdgpu_device pointer
455 * @offset: byte aligned register offset
456 * @value: 8 bit value to write
457 *
458 * Writes the value specified to the offset specified.
459 */
7cbbc745
AG
460void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
461{
56b53c0b 462 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
463 return;
464
421a2a30
ML
465 if (offset < adev->rmmio_size)
466 writeb(value, adev->rmmio + offset);
467 else
468 BUG();
469}
470
e3ecdffa 471/**
f7ee1874 472 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
473 *
474 * @adev: amdgpu_device pointer
475 * @reg: dword aligned register offset
476 * @v: 32 bit value to write to the register
477 * @acc_flags: access flags which require special behavior
478 *
479 * Writes the value specified to the offset specified.
480 */
f7ee1874
HZ
481void amdgpu_device_wreg(struct amdgpu_device *adev,
482 uint32_t reg, uint32_t v,
483 uint32_t acc_flags)
d38ceaf9 484{
56b53c0b 485 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
486 return;
487
f7ee1874
HZ
488 if ((reg * 4) < adev->rmmio_size) {
489 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
490 amdgpu_sriov_runtime(adev) &&
d0fb18b5 491 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 492 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 493 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
494 } else {
495 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
496 }
497 } else {
498 adev->pcie_wreg(adev, reg * 4, v);
81202807 499 }
bc992ba5 500
f7ee1874 501 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 502}
d38ceaf9 503
03f2abb0 504/**
4cc9f86f 505 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 506 *
71579346
RB
507 * @adev: amdgpu_device pointer
508 * @reg: mmio/rlc register
509 * @v: value to write
8057a9d6 510 * @xcc_id: xcc accelerated compute core id
71579346
RB
511 *
512 * this function is invoked only for the debugfs register access
03f2abb0 513 */
f7ee1874 514void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
515 uint32_t reg, uint32_t v,
516 uint32_t xcc_id)
2e0cc4d4 517{
56b53c0b 518 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
519 return;
520
2e0cc4d4 521 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
522 adev->gfx.rlc.funcs &&
523 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 524 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 525 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
526 } else if ((reg * 4) >= adev->rmmio_size) {
527 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
528 } else {
529 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 530 }
d38ceaf9
AD
531}
532
1bba3683
HZ
533/**
534 * amdgpu_device_indirect_rreg - read an indirect register
535 *
536 * @adev: amdgpu_device pointer
22f453fb 537 * @reg_addr: indirect register address to read from
1bba3683
HZ
538 *
539 * Returns the value of indirect register @reg_addr
540 */
541u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
542 u32 reg_addr)
543{
65ba96e9 544 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
545 void __iomem *pcie_index_offset;
546 void __iomem *pcie_data_offset;
65ba96e9
HZ
547 u32 r;
548
549 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
550 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
551
552 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
553 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
554 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
555
556 writel(reg_addr, pcie_index_offset);
557 readl(pcie_index_offset);
558 r = readl(pcie_data_offset);
559 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
560
561 return r;
562}
563
0c552ed3
LM
564u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
565 u64 reg_addr)
566{
567 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
568 u32 r;
569 void __iomem *pcie_index_offset;
570 void __iomem *pcie_index_hi_offset;
571 void __iomem *pcie_data_offset;
572
573 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
574 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 575 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
576 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
577 else
578 pcie_index_hi = 0;
579
580 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
581 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
582 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
583 if (pcie_index_hi != 0)
584 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
585 pcie_index_hi * 4;
586
587 writel(reg_addr, pcie_index_offset);
588 readl(pcie_index_offset);
589 if (pcie_index_hi != 0) {
590 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
591 readl(pcie_index_hi_offset);
592 }
593 r = readl(pcie_data_offset);
594
595 /* clear the high bits */
596 if (pcie_index_hi != 0) {
597 writel(0, pcie_index_hi_offset);
598 readl(pcie_index_hi_offset);
599 }
600
601 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
602
603 return r;
604}
605
1bba3683
HZ
606/**
607 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
608 *
609 * @adev: amdgpu_device pointer
22f453fb 610 * @reg_addr: indirect register address to read from
1bba3683
HZ
611 *
612 * Returns the value of indirect register @reg_addr
613 */
614u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
615 u32 reg_addr)
616{
65ba96e9 617 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
618 void __iomem *pcie_index_offset;
619 void __iomem *pcie_data_offset;
65ba96e9
HZ
620 u64 r;
621
622 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
623 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
624
625 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
626 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
627 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
628
629 /* read low 32 bits */
630 writel(reg_addr, pcie_index_offset);
631 readl(pcie_index_offset);
632 r = readl(pcie_data_offset);
633 /* read high 32 bits */
634 writel(reg_addr + 4, pcie_index_offset);
635 readl(pcie_index_offset);
636 r |= ((u64)readl(pcie_data_offset) << 32);
637 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
638
639 return r;
640}
641
a76b2870
CL
642u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
643 u64 reg_addr)
644{
645 unsigned long flags, pcie_index, pcie_data;
646 unsigned long pcie_index_hi = 0;
647 void __iomem *pcie_index_offset;
648 void __iomem *pcie_index_hi_offset;
649 void __iomem *pcie_data_offset;
650 u64 r;
651
652 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
653 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
654 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
655 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
656
657 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
658 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
659 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
660 if (pcie_index_hi != 0)
661 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
662 pcie_index_hi * 4;
663
664 /* read low 32 bits */
665 writel(reg_addr, pcie_index_offset);
666 readl(pcie_index_offset);
667 if (pcie_index_hi != 0) {
668 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
669 readl(pcie_index_hi_offset);
670 }
671 r = readl(pcie_data_offset);
672 /* read high 32 bits */
673 writel(reg_addr + 4, pcie_index_offset);
674 readl(pcie_index_offset);
675 if (pcie_index_hi != 0) {
676 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
677 readl(pcie_index_hi_offset);
678 }
679 r |= ((u64)readl(pcie_data_offset) << 32);
680
681 /* clear the high bits */
682 if (pcie_index_hi != 0) {
683 writel(0, pcie_index_hi_offset);
684 readl(pcie_index_hi_offset);
685 }
686
687 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
688
689 return r;
690}
691
1bba3683
HZ
692/**
693 * amdgpu_device_indirect_wreg - write an indirect register address
694 *
695 * @adev: amdgpu_device pointer
1bba3683
HZ
696 * @reg_addr: indirect register offset
697 * @reg_data: indirect register data
698 *
699 */
700void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
701 u32 reg_addr, u32 reg_data)
702{
65ba96e9 703 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
704 void __iomem *pcie_index_offset;
705 void __iomem *pcie_data_offset;
706
65ba96e9
HZ
707 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
708 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
709
1bba3683
HZ
710 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
711 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
712 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
713
714 writel(reg_addr, pcie_index_offset);
715 readl(pcie_index_offset);
716 writel(reg_data, pcie_data_offset);
717 readl(pcie_data_offset);
718 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
719}
720
0c552ed3
LM
721void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
722 u64 reg_addr, u32 reg_data)
723{
724 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
725 void __iomem *pcie_index_offset;
726 void __iomem *pcie_index_hi_offset;
727 void __iomem *pcie_data_offset;
728
729 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
730 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 731 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
732 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
733 else
734 pcie_index_hi = 0;
735
736 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
737 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
738 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
739 if (pcie_index_hi != 0)
740 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
741 pcie_index_hi * 4;
742
743 writel(reg_addr, pcie_index_offset);
744 readl(pcie_index_offset);
745 if (pcie_index_hi != 0) {
746 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
747 readl(pcie_index_hi_offset);
748 }
749 writel(reg_data, pcie_data_offset);
750 readl(pcie_data_offset);
751
752 /* clear the high bits */
753 if (pcie_index_hi != 0) {
754 writel(0, pcie_index_hi_offset);
755 readl(pcie_index_hi_offset);
756 }
757
758 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
759}
760
1bba3683
HZ
761/**
762 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
763 *
764 * @adev: amdgpu_device pointer
1bba3683
HZ
765 * @reg_addr: indirect register offset
766 * @reg_data: indirect register data
767 *
768 */
769void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
770 u32 reg_addr, u64 reg_data)
771{
65ba96e9 772 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
773 void __iomem *pcie_index_offset;
774 void __iomem *pcie_data_offset;
775
65ba96e9
HZ
776 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
777 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
778
1bba3683
HZ
779 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
780 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
781 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
782
783 /* write low 32 bits */
784 writel(reg_addr, pcie_index_offset);
785 readl(pcie_index_offset);
786 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
787 readl(pcie_data_offset);
788 /* write high 32 bits */
789 writel(reg_addr + 4, pcie_index_offset);
790 readl(pcie_index_offset);
791 writel((u32)(reg_data >> 32), pcie_data_offset);
792 readl(pcie_data_offset);
793 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
794}
795
a76b2870
CL
796void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
797 u64 reg_addr, u64 reg_data)
798{
799 unsigned long flags, pcie_index, pcie_data;
800 unsigned long pcie_index_hi = 0;
801 void __iomem *pcie_index_offset;
802 void __iomem *pcie_index_hi_offset;
803 void __iomem *pcie_data_offset;
804
805 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
806 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
807 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
808 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
809
810 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
811 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
812 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
813 if (pcie_index_hi != 0)
814 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
815 pcie_index_hi * 4;
816
817 /* write low 32 bits */
818 writel(reg_addr, pcie_index_offset);
819 readl(pcie_index_offset);
820 if (pcie_index_hi != 0) {
821 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
822 readl(pcie_index_hi_offset);
823 }
824 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
825 readl(pcie_data_offset);
826 /* write high 32 bits */
827 writel(reg_addr + 4, pcie_index_offset);
828 readl(pcie_index_offset);
829 if (pcie_index_hi != 0) {
830 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
831 readl(pcie_index_hi_offset);
832 }
833 writel((u32)(reg_data >> 32), pcie_data_offset);
834 readl(pcie_data_offset);
835
836 /* clear the high bits */
837 if (pcie_index_hi != 0) {
838 writel(0, pcie_index_hi_offset);
839 readl(pcie_index_hi_offset);
840 }
841
842 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
843}
844
dabc114e
HZ
845/**
846 * amdgpu_device_get_rev_id - query device rev_id
847 *
848 * @adev: amdgpu_device pointer
849 *
850 * Return device rev_id
851 */
852u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
853{
854 return adev->nbio.funcs->get_rev_id(adev);
855}
856
d38ceaf9
AD
857/**
858 * amdgpu_invalid_rreg - dummy reg read function
859 *
982a820b 860 * @adev: amdgpu_device pointer
d38ceaf9
AD
861 * @reg: offset of register
862 *
863 * Dummy register read function. Used for register blocks
864 * that certain asics don't have (all asics).
865 * Returns the value in the register.
866 */
867static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
868{
869 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
870 BUG();
871 return 0;
872}
873
0c552ed3
LM
874static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
875{
876 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
877 BUG();
878 return 0;
879}
880
d38ceaf9
AD
881/**
882 * amdgpu_invalid_wreg - dummy reg write function
883 *
982a820b 884 * @adev: amdgpu_device pointer
d38ceaf9
AD
885 * @reg: offset of register
886 * @v: value to write to the register
887 *
888 * Dummy register read function. Used for register blocks
889 * that certain asics don't have (all asics).
890 */
891static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
892{
893 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
894 reg, v);
895 BUG();
896}
897
0c552ed3
LM
898static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
899{
900 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
901 reg, v);
902 BUG();
903}
904
4fa1c6a6
TZ
905/**
906 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
907 *
982a820b 908 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
909 * @reg: offset of register
910 *
911 * Dummy register read function. Used for register blocks
912 * that certain asics don't have (all asics).
913 * Returns the value in the register.
914 */
915static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
916{
917 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
918 BUG();
919 return 0;
920}
921
a76b2870
CL
922static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
923{
924 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
925 BUG();
926 return 0;
927}
928
4fa1c6a6
TZ
929/**
930 * amdgpu_invalid_wreg64 - dummy reg write function
931 *
982a820b 932 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
933 * @reg: offset of register
934 * @v: value to write to the register
935 *
936 * Dummy register read function. Used for register blocks
937 * that certain asics don't have (all asics).
938 */
939static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
940{
941 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
942 reg, v);
943 BUG();
944}
945
a76b2870
CL
946static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
947{
948 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
949 reg, v);
950 BUG();
951}
952
d38ceaf9
AD
953/**
954 * amdgpu_block_invalid_rreg - dummy reg read function
955 *
982a820b 956 * @adev: amdgpu_device pointer
d38ceaf9
AD
957 * @block: offset of instance
958 * @reg: offset of register
959 *
960 * Dummy register read function. Used for register blocks
961 * that certain asics don't have (all asics).
962 * Returns the value in the register.
963 */
964static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
965 uint32_t block, uint32_t reg)
966{
967 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
968 reg, block);
969 BUG();
970 return 0;
971}
972
973/**
974 * amdgpu_block_invalid_wreg - dummy reg write function
975 *
982a820b 976 * @adev: amdgpu_device pointer
d38ceaf9
AD
977 * @block: offset of instance
978 * @reg: offset of register
979 * @v: value to write to the register
980 *
981 * Dummy register read function. Used for register blocks
982 * that certain asics don't have (all asics).
983 */
984static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
985 uint32_t block,
986 uint32_t reg, uint32_t v)
987{
988 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
989 reg, block, v);
990 BUG();
991}
992
4d2997ab
AD
993/**
994 * amdgpu_device_asic_init - Wrapper for atom asic_init
995 *
982a820b 996 * @adev: amdgpu_device pointer
4d2997ab
AD
997 *
998 * Does any asic specific work and then calls atom asic init.
999 */
1000static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1001{
15c5c5f5
LL
1002 int ret;
1003
4d2997ab
AD
1004 amdgpu_asic_pre_asic_init(adev);
1005
4e8303cf
LL
1006 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1007 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1008 amdgpu_psp_wait_for_bootloader(adev);
1009 ret = amdgpu_atomfirmware_asic_init(adev, true);
1010 return ret;
1011 } else {
85d1bcc6 1012 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1013 }
1014
1015 return 0;
4d2997ab
AD
1016}
1017
e3ecdffa 1018/**
7ccfd79f 1019 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1020 *
982a820b 1021 * @adev: amdgpu_device pointer
e3ecdffa
AD
1022 *
1023 * Allocates a scratch page of VRAM for use by various things in the
1024 * driver.
1025 */
7ccfd79f 1026static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1027{
7ccfd79f
CK
1028 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1029 AMDGPU_GEM_DOMAIN_VRAM |
1030 AMDGPU_GEM_DOMAIN_GTT,
1031 &adev->mem_scratch.robj,
1032 &adev->mem_scratch.gpu_addr,
1033 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1034}
1035
e3ecdffa 1036/**
7ccfd79f 1037 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1038 *
982a820b 1039 * @adev: amdgpu_device pointer
e3ecdffa
AD
1040 *
1041 * Frees the VRAM scratch page.
1042 */
7ccfd79f 1043static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1044{
7ccfd79f 1045 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1046}
1047
1048/**
9c3f2b54 1049 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1050 *
1051 * @adev: amdgpu_device pointer
1052 * @registers: pointer to the register array
1053 * @array_size: size of the register array
1054 *
b8920e1e 1055 * Programs an array or registers with and or masks.
d38ceaf9
AD
1056 * This is a helper for setting golden registers.
1057 */
9c3f2b54
AD
1058void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1059 const u32 *registers,
1060 const u32 array_size)
d38ceaf9
AD
1061{
1062 u32 tmp, reg, and_mask, or_mask;
1063 int i;
1064
1065 if (array_size % 3)
1066 return;
1067
47fc644f 1068 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1069 reg = registers[i + 0];
1070 and_mask = registers[i + 1];
1071 or_mask = registers[i + 2];
1072
1073 if (and_mask == 0xffffffff) {
1074 tmp = or_mask;
1075 } else {
1076 tmp = RREG32(reg);
1077 tmp &= ~and_mask;
e0d07657
HZ
1078 if (adev->family >= AMDGPU_FAMILY_AI)
1079 tmp |= (or_mask & and_mask);
1080 else
1081 tmp |= or_mask;
d38ceaf9
AD
1082 }
1083 WREG32(reg, tmp);
1084 }
1085}
1086
e3ecdffa
AD
1087/**
1088 * amdgpu_device_pci_config_reset - reset the GPU
1089 *
1090 * @adev: amdgpu_device pointer
1091 *
1092 * Resets the GPU using the pci config reset sequence.
1093 * Only applicable to asics prior to vega10.
1094 */
8111c387 1095void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1096{
1097 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1098}
1099
af484df8
AD
1100/**
1101 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1102 *
1103 * @adev: amdgpu_device pointer
1104 *
1105 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1106 */
1107int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1108{
1109 return pci_reset_function(adev->pdev);
1110}
1111
d38ceaf9 1112/*
06ec9070 1113 * amdgpu_device_wb_*()
455a7bc2 1114 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1115 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1116 */
1117
1118/**
06ec9070 1119 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1120 *
1121 * @adev: amdgpu_device pointer
1122 *
1123 * Disables Writeback and frees the Writeback memory (all asics).
1124 * Used at driver shutdown.
1125 */
06ec9070 1126static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1127{
1128 if (adev->wb.wb_obj) {
a76ed485
AD
1129 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1130 &adev->wb.gpu_addr,
1131 (void **)&adev->wb.wb);
d38ceaf9
AD
1132 adev->wb.wb_obj = NULL;
1133 }
1134}
1135
1136/**
03f2abb0 1137 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1138 *
1139 * @adev: amdgpu_device pointer
1140 *
455a7bc2 1141 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1142 * Used at driver startup.
1143 * Returns 0 on success or an -error on failure.
1144 */
06ec9070 1145static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1146{
1147 int r;
1148
1149 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1150 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1151 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1152 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1153 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1154 (void **)&adev->wb.wb);
d38ceaf9
AD
1155 if (r) {
1156 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1157 return r;
1158 }
d38ceaf9
AD
1159
1160 adev->wb.num_wb = AMDGPU_MAX_WB;
1161 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1162
1163 /* clear wb memory */
73469585 1164 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1165 }
1166
1167 return 0;
1168}
1169
1170/**
131b4b36 1171 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1172 *
1173 * @adev: amdgpu_device pointer
1174 * @wb: wb index
1175 *
1176 * Allocate a wb slot for use by the driver (all asics).
1177 * Returns 0 on success or -EINVAL on failure.
1178 */
131b4b36 1179int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1180{
1181 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1182
97407b63 1183 if (offset < adev->wb.num_wb) {
7014285a 1184 __set_bit(offset, adev->wb.used);
63ae07ca 1185 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1186 return 0;
1187 } else {
1188 return -EINVAL;
1189 }
1190}
1191
d38ceaf9 1192/**
131b4b36 1193 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1194 *
1195 * @adev: amdgpu_device pointer
1196 * @wb: wb index
1197 *
1198 * Free a wb slot allocated for use by the driver (all asics)
1199 */
131b4b36 1200void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1201{
73469585 1202 wb >>= 3;
d38ceaf9 1203 if (wb < adev->wb.num_wb)
73469585 1204 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1205}
1206
d6895ad3
CK
1207/**
1208 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1209 *
1210 * @adev: amdgpu_device pointer
1211 *
1212 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1213 * to fail, but if any of the BARs is not accessible after the size we abort
1214 * driver loading by returning -ENODEV.
1215 */
1216int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1217{
453f617a 1218 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1219 struct pci_bus *root;
1220 struct resource *res;
b8920e1e 1221 unsigned int i;
d6895ad3
CK
1222 u16 cmd;
1223 int r;
1224
822130b5
AB
1225 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1226 return 0;
1227
0c03b912 1228 /* Bypass for VF */
1229 if (amdgpu_sriov_vf(adev))
1230 return 0;
1231
b7221f2b
AD
1232 /* skip if the bios has already enabled large BAR */
1233 if (adev->gmc.real_vram_size &&
1234 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1235 return 0;
1236
31b8adab
CK
1237 /* Check if the root BUS has 64bit memory resources */
1238 root = adev->pdev->bus;
1239 while (root->parent)
1240 root = root->parent;
1241
1242 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1243 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1244 res->start > 0x100000000ull)
1245 break;
1246 }
1247
1248 /* Trying to resize is pointless without a root hub window above 4GB */
1249 if (!res)
1250 return 0;
1251
453f617a
ND
1252 /* Limit the BAR size to what is available */
1253 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1254 rbar_size);
1255
d6895ad3
CK
1256 /* Disable memory decoding while we change the BAR addresses and size */
1257 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1258 pci_write_config_word(adev->pdev, PCI_COMMAND,
1259 cmd & ~PCI_COMMAND_MEMORY);
1260
1261 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1262 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1263 if (adev->asic_type >= CHIP_BONAIRE)
1264 pci_release_resource(adev->pdev, 2);
1265
1266 pci_release_resource(adev->pdev, 0);
1267
1268 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1269 if (r == -ENOSPC)
1270 DRM_INFO("Not enough PCI address space for a large BAR.");
1271 else if (r && r != -ENOTSUPP)
1272 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1273
1274 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1275
1276 /* When the doorbell or fb BAR isn't available we have no chance of
1277 * using the device.
1278 */
43c064db 1279 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1280 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1281 return -ENODEV;
1282
1283 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1284
1285 return 0;
1286}
a05502e5 1287
9535a86a
SZ
1288static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1289{
b8920e1e 1290 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1291 return false;
9535a86a
SZ
1292
1293 return true;
1294}
1295
d38ceaf9
AD
1296/*
1297 * GPU helpers function.
1298 */
1299/**
39c640c0 1300 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1301 *
1302 * @adev: amdgpu_device pointer
1303 *
c836fec5
JQ
1304 * Check if the asic has been initialized (all asics) at driver startup
1305 * or post is needed if hw reset is performed.
1306 * Returns true if need or false if not.
d38ceaf9 1307 */
39c640c0 1308bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1309{
1310 uint32_t reg;
1311
bec86378
ML
1312 if (amdgpu_sriov_vf(adev))
1313 return false;
1314
9535a86a
SZ
1315 if (!amdgpu_device_read_bios(adev))
1316 return false;
1317
bec86378 1318 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1319 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1320 * some old smc fw still need driver do vPost otherwise gpu hang, while
1321 * those smc fw version above 22.15 doesn't have this flaw, so we force
1322 * vpost executed for smc version below 22.15
bec86378
ML
1323 */
1324 if (adev->asic_type == CHIP_FIJI) {
1325 int err;
1326 uint32_t fw_ver;
b8920e1e 1327
bec86378
ML
1328 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1329 /* force vPost if error occured */
1330 if (err)
1331 return true;
1332
1333 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1334 if (fw_ver < 0x00160e00)
1335 return true;
bec86378 1336 }
bec86378 1337 }
91fe77eb 1338
e3c1b071 1339 /* Don't post if we need to reset whole hive on init */
1340 if (adev->gmc.xgmi.pending_reset)
1341 return false;
1342
91fe77eb 1343 if (adev->has_hw_reset) {
1344 adev->has_hw_reset = false;
1345 return true;
1346 }
1347
1348 /* bios scratch used on CIK+ */
1349 if (adev->asic_type >= CHIP_BONAIRE)
1350 return amdgpu_atombios_scratch_need_asic_init(adev);
1351
1352 /* check MEM_SIZE for older asics */
1353 reg = amdgpu_asic_get_config_memsize(adev);
1354
1355 if ((reg != 0) && (reg != 0xffffffff))
1356 return false;
1357
1358 return true;
70e64c4d
ML
1359}
1360
bb0f8429
ML
1361/*
1362 * Check whether seamless boot is supported.
1363 *
1364 * So far we only support seamless boot on select ASICs.
1365 * If everything goes well, we may consider expanding
1366 * seamless boot to other ASICs.
1367 */
1368bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1369{
5dc270d3
ML
1370 switch (amdgpu_seamless) {
1371 case -1:
1372 break;
1373 case 1:
1374 return true;
1375 case 0:
1376 return false;
1377 default:
1378 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1379 amdgpu_seamless);
1380 return false;
1381 }
1382
1383 if (adev->mman.keep_stolen_vga_memory)
1384 return false;
1385
bb0f8429
ML
1386 switch (adev->ip_versions[DCE_HWIP][0]) {
1387 case IP_VERSION(3, 0, 1):
5dc270d3 1388 return true;
bb0f8429
ML
1389 default:
1390 break;
1391 }
1392
1393 return false;
1394}
1395
5d1eb4c4
ML
1396/*
1397 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1398 * speed switching. Until we have confirmation from Intel that a specific host
1399 * supports it, it's safer that we keep it disabled for all.
1400 *
1401 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1402 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1403 */
1404bool amdgpu_device_pcie_dynamic_switching_supported(void)
1405{
1406#if IS_ENABLED(CONFIG_X86)
1407 struct cpuinfo_x86 *c = &cpu_data(0);
1408
1409 if (c->x86_vendor == X86_VENDOR_INTEL)
1410 return false;
1411#endif
1412 return true;
1413}
1414
0ab5d711
ML
1415/**
1416 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1417 *
1418 * @adev: amdgpu_device pointer
1419 *
1420 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1421 * be set for this device.
1422 *
1423 * Returns true if it should be used or false if not.
1424 */
1425bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1426{
1427 switch (amdgpu_aspm) {
1428 case -1:
1429 break;
1430 case 0:
1431 return false;
1432 case 1:
1433 return true;
1434 default:
1435 return false;
1436 }
1437 return pcie_aspm_enabled(adev->pdev);
1438}
1439
3ad5dcfe
KHF
1440bool amdgpu_device_aspm_support_quirk(void)
1441{
1442#if IS_ENABLED(CONFIG_X86)
1443 struct cpuinfo_x86 *c = &cpu_data(0);
1444
1445 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1446#else
1447 return true;
1448#endif
1449}
1450
d38ceaf9
AD
1451/* if we get transitioned to only one device, take VGA back */
1452/**
06ec9070 1453 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1454 *
bf44e8ce 1455 * @pdev: PCI device pointer
d38ceaf9
AD
1456 * @state: enable/disable vga decode
1457 *
1458 * Enable/disable vga decode (all asics).
1459 * Returns VGA resource flags.
1460 */
bf44e8ce
CH
1461static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1462 bool state)
d38ceaf9 1463{
bf44e8ce 1464 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1465
d38ceaf9
AD
1466 amdgpu_asic_set_vga_state(adev, state);
1467 if (state)
1468 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1469 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1470 else
1471 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1472}
1473
e3ecdffa
AD
1474/**
1475 * amdgpu_device_check_block_size - validate the vm block size
1476 *
1477 * @adev: amdgpu_device pointer
1478 *
1479 * Validates the vm block size specified via module parameter.
1480 * The vm block size defines number of bits in page table versus page directory,
1481 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1482 * page table and the remaining bits are in the page directory.
1483 */
06ec9070 1484static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1485{
1486 /* defines number of bits in page table versus page directory,
1487 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1488 * page table and the remaining bits are in the page directory
1489 */
bab4fee7
JZ
1490 if (amdgpu_vm_block_size == -1)
1491 return;
a1adf8be 1492
bab4fee7 1493 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1494 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1495 amdgpu_vm_block_size);
97489129 1496 amdgpu_vm_block_size = -1;
a1adf8be 1497 }
a1adf8be
CZ
1498}
1499
e3ecdffa
AD
1500/**
1501 * amdgpu_device_check_vm_size - validate the vm size
1502 *
1503 * @adev: amdgpu_device pointer
1504 *
1505 * Validates the vm size in GB specified via module parameter.
1506 * The VM size is the size of the GPU virtual memory space in GB.
1507 */
06ec9070 1508static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1509{
64dab074
AD
1510 /* no need to check the default value */
1511 if (amdgpu_vm_size == -1)
1512 return;
1513
83ca145d
ZJ
1514 if (amdgpu_vm_size < 1) {
1515 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1516 amdgpu_vm_size);
f3368128 1517 amdgpu_vm_size = -1;
83ca145d 1518 }
83ca145d
ZJ
1519}
1520
7951e376
RZ
1521static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1522{
1523 struct sysinfo si;
a9d4fe2f 1524 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1525 uint64_t total_memory;
1526 uint64_t dram_size_seven_GB = 0x1B8000000;
1527 uint64_t dram_size_three_GB = 0xB8000000;
1528
1529 if (amdgpu_smu_memory_pool_size == 0)
1530 return;
1531
1532 if (!is_os_64) {
1533 DRM_WARN("Not 64-bit OS, feature not supported\n");
1534 goto def_value;
1535 }
1536 si_meminfo(&si);
1537 total_memory = (uint64_t)si.totalram * si.mem_unit;
1538
1539 if ((amdgpu_smu_memory_pool_size == 1) ||
1540 (amdgpu_smu_memory_pool_size == 2)) {
1541 if (total_memory < dram_size_three_GB)
1542 goto def_value1;
1543 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1544 (amdgpu_smu_memory_pool_size == 8)) {
1545 if (total_memory < dram_size_seven_GB)
1546 goto def_value1;
1547 } else {
1548 DRM_WARN("Smu memory pool size not supported\n");
1549 goto def_value;
1550 }
1551 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1552
1553 return;
1554
1555def_value1:
1556 DRM_WARN("No enough system memory\n");
1557def_value:
1558 adev->pm.smu_prv_buffer_size = 0;
1559}
1560
9f6a7857
HR
1561static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1562{
1563 if (!(adev->flags & AMD_IS_APU) ||
1564 adev->asic_type < CHIP_RAVEN)
1565 return 0;
1566
1567 switch (adev->asic_type) {
1568 case CHIP_RAVEN:
1569 if (adev->pdev->device == 0x15dd)
1570 adev->apu_flags |= AMD_APU_IS_RAVEN;
1571 if (adev->pdev->device == 0x15d8)
1572 adev->apu_flags |= AMD_APU_IS_PICASSO;
1573 break;
1574 case CHIP_RENOIR:
1575 if ((adev->pdev->device == 0x1636) ||
1576 (adev->pdev->device == 0x164c))
1577 adev->apu_flags |= AMD_APU_IS_RENOIR;
1578 else
1579 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1580 break;
1581 case CHIP_VANGOGH:
1582 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1583 break;
1584 case CHIP_YELLOW_CARP:
1585 break;
d0f56dc2 1586 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1587 if ((adev->pdev->device == 0x13FE) ||
1588 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1589 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1590 break;
9f6a7857 1591 default:
4eaf21b7 1592 break;
9f6a7857
HR
1593 }
1594
1595 return 0;
1596}
1597
d38ceaf9 1598/**
06ec9070 1599 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1600 *
1601 * @adev: amdgpu_device pointer
1602 *
1603 * Validates certain module parameters and updates
1604 * the associated values used by the driver (all asics).
1605 */
912dfc84 1606static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1607{
5b011235
CZ
1608 if (amdgpu_sched_jobs < 4) {
1609 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1610 amdgpu_sched_jobs);
1611 amdgpu_sched_jobs = 4;
47fc644f 1612 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1613 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1614 amdgpu_sched_jobs);
1615 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1616 }
d38ceaf9 1617
83e74db6 1618 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1619 /* gart size must be greater or equal to 32M */
1620 dev_warn(adev->dev, "gart size (%d) too small\n",
1621 amdgpu_gart_size);
83e74db6 1622 amdgpu_gart_size = -1;
d38ceaf9
AD
1623 }
1624
36d38372 1625 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1626 /* gtt size must be greater or equal to 32M */
36d38372
CK
1627 dev_warn(adev->dev, "gtt size (%d) too small\n",
1628 amdgpu_gtt_size);
1629 amdgpu_gtt_size = -1;
d38ceaf9
AD
1630 }
1631
d07f14be
RH
1632 /* valid range is between 4 and 9 inclusive */
1633 if (amdgpu_vm_fragment_size != -1 &&
1634 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1635 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1636 amdgpu_vm_fragment_size = -1;
1637 }
1638
5d5bd5e3
KW
1639 if (amdgpu_sched_hw_submission < 2) {
1640 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1641 amdgpu_sched_hw_submission);
1642 amdgpu_sched_hw_submission = 2;
1643 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1644 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1645 amdgpu_sched_hw_submission);
1646 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1647 }
1648
2656fd23
AG
1649 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1650 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1651 amdgpu_reset_method = -1;
1652 }
1653
7951e376
RZ
1654 amdgpu_device_check_smu_prv_buffer_size(adev);
1655
06ec9070 1656 amdgpu_device_check_vm_size(adev);
d38ceaf9 1657
06ec9070 1658 amdgpu_device_check_block_size(adev);
6a7f76e7 1659
19aede77 1660 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1661
e3c00faa 1662 return 0;
d38ceaf9
AD
1663}
1664
1665/**
1666 * amdgpu_switcheroo_set_state - set switcheroo state
1667 *
1668 * @pdev: pci dev pointer
1694467b 1669 * @state: vga_switcheroo state
d38ceaf9 1670 *
12024b17 1671 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1672 * the asics before or after it is powered up using ACPI methods.
1673 */
8aba21b7
LT
1674static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1675 enum vga_switcheroo_state state)
d38ceaf9
AD
1676{
1677 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1678 int r;
d38ceaf9 1679
b98c6299 1680 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1681 return;
1682
1683 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1684 pr_info("switched on\n");
d38ceaf9
AD
1685 /* don't suspend or resume card normally */
1686 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1687
8f66090b
TZ
1688 pci_set_power_state(pdev, PCI_D0);
1689 amdgpu_device_load_pci_state(pdev);
1690 r = pci_enable_device(pdev);
de185019
AD
1691 if (r)
1692 DRM_WARN("pci_enable_device failed (%d)\n", r);
1693 amdgpu_device_resume(dev, true);
d38ceaf9 1694
d38ceaf9 1695 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1696 } else {
dd4fa6c1 1697 pr_info("switched off\n");
d38ceaf9 1698 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1699 amdgpu_device_suspend(dev, true);
8f66090b 1700 amdgpu_device_cache_pci_state(pdev);
de185019 1701 /* Shut down the device */
8f66090b
TZ
1702 pci_disable_device(pdev);
1703 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1704 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1705 }
1706}
1707
1708/**
1709 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1710 *
1711 * @pdev: pci dev pointer
1712 *
1713 * Callback for the switcheroo driver. Check of the switcheroo
1714 * state can be changed.
1715 * Returns true if the state can be changed, false if not.
1716 */
1717static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1718{
1719 struct drm_device *dev = pci_get_drvdata(pdev);
1720
b8920e1e 1721 /*
d38ceaf9
AD
1722 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1723 * locking inversion with the driver load path. And the access here is
1724 * completely racy anyway. So don't bother with locking for now.
1725 */
7e13ad89 1726 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1727}
1728
1729static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1730 .set_gpu_state = amdgpu_switcheroo_set_state,
1731 .reprobe = NULL,
1732 .can_switch = amdgpu_switcheroo_can_switch,
1733};
1734
e3ecdffa
AD
1735/**
1736 * amdgpu_device_ip_set_clockgating_state - set the CG state
1737 *
87e3f136 1738 * @dev: amdgpu_device pointer
e3ecdffa
AD
1739 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1740 * @state: clockgating state (gate or ungate)
1741 *
1742 * Sets the requested clockgating state for all instances of
1743 * the hardware IP specified.
1744 * Returns the error code from the last instance.
1745 */
43fa561f 1746int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1747 enum amd_ip_block_type block_type,
1748 enum amd_clockgating_state state)
d38ceaf9 1749{
43fa561f 1750 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1751 int i, r = 0;
1752
1753 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1754 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1755 continue;
c722865a
RZ
1756 if (adev->ip_blocks[i].version->type != block_type)
1757 continue;
1758 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1759 continue;
1760 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1761 (void *)adev, state);
1762 if (r)
1763 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1764 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1765 }
1766 return r;
1767}
1768
e3ecdffa
AD
1769/**
1770 * amdgpu_device_ip_set_powergating_state - set the PG state
1771 *
87e3f136 1772 * @dev: amdgpu_device pointer
e3ecdffa
AD
1773 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1774 * @state: powergating state (gate or ungate)
1775 *
1776 * Sets the requested powergating state for all instances of
1777 * the hardware IP specified.
1778 * Returns the error code from the last instance.
1779 */
43fa561f 1780int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1781 enum amd_ip_block_type block_type,
1782 enum amd_powergating_state state)
d38ceaf9 1783{
43fa561f 1784 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1785 int i, r = 0;
1786
1787 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1788 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1789 continue;
c722865a
RZ
1790 if (adev->ip_blocks[i].version->type != block_type)
1791 continue;
1792 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1793 continue;
1794 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1795 (void *)adev, state);
1796 if (r)
1797 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1798 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1799 }
1800 return r;
1801}
1802
e3ecdffa
AD
1803/**
1804 * amdgpu_device_ip_get_clockgating_state - get the CG state
1805 *
1806 * @adev: amdgpu_device pointer
1807 * @flags: clockgating feature flags
1808 *
1809 * Walks the list of IPs on the device and updates the clockgating
1810 * flags for each IP.
1811 * Updates @flags with the feature flags for each hardware IP where
1812 * clockgating is enabled.
1813 */
2990a1fc 1814void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1815 u64 *flags)
6cb2d4e4
HR
1816{
1817 int i;
1818
1819 for (i = 0; i < adev->num_ip_blocks; i++) {
1820 if (!adev->ip_blocks[i].status.valid)
1821 continue;
1822 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1823 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1824 }
1825}
1826
e3ecdffa
AD
1827/**
1828 * amdgpu_device_ip_wait_for_idle - wait for idle
1829 *
1830 * @adev: amdgpu_device pointer
1831 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1832 *
1833 * Waits for the request hardware IP to be idle.
1834 * Returns 0 for success or a negative error code on failure.
1835 */
2990a1fc
AD
1836int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1837 enum amd_ip_block_type block_type)
5dbbb60b
AD
1838{
1839 int i, r;
1840
1841 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1842 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1843 continue;
a1255107
AD
1844 if (adev->ip_blocks[i].version->type == block_type) {
1845 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1846 if (r)
1847 return r;
1848 break;
1849 }
1850 }
1851 return 0;
1852
1853}
1854
e3ecdffa
AD
1855/**
1856 * amdgpu_device_ip_is_idle - is the hardware IP idle
1857 *
1858 * @adev: amdgpu_device pointer
1859 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1860 *
1861 * Check if the hardware IP is idle or not.
1862 * Returns true if it the IP is idle, false if not.
1863 */
2990a1fc
AD
1864bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1865 enum amd_ip_block_type block_type)
5dbbb60b
AD
1866{
1867 int i;
1868
1869 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1870 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1871 continue;
a1255107
AD
1872 if (adev->ip_blocks[i].version->type == block_type)
1873 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1874 }
1875 return true;
1876
1877}
1878
e3ecdffa
AD
1879/**
1880 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1881 *
1882 * @adev: amdgpu_device pointer
87e3f136 1883 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1884 *
1885 * Returns a pointer to the hardware IP block structure
1886 * if it exists for the asic, otherwise NULL.
1887 */
2990a1fc
AD
1888struct amdgpu_ip_block *
1889amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1890 enum amd_ip_block_type type)
d38ceaf9
AD
1891{
1892 int i;
1893
1894 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1895 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1896 return &adev->ip_blocks[i];
1897
1898 return NULL;
1899}
1900
1901/**
2990a1fc 1902 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1903 *
1904 * @adev: amdgpu_device pointer
5fc3aeeb 1905 * @type: enum amd_ip_block_type
d38ceaf9
AD
1906 * @major: major version
1907 * @minor: minor version
1908 *
1909 * return 0 if equal or greater
1910 * return 1 if smaller or the ip_block doesn't exist
1911 */
2990a1fc
AD
1912int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1913 enum amd_ip_block_type type,
1914 u32 major, u32 minor)
d38ceaf9 1915{
2990a1fc 1916 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1917
a1255107
AD
1918 if (ip_block && ((ip_block->version->major > major) ||
1919 ((ip_block->version->major == major) &&
1920 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1921 return 0;
1922
1923 return 1;
1924}
1925
a1255107 1926/**
2990a1fc 1927 * amdgpu_device_ip_block_add
a1255107
AD
1928 *
1929 * @adev: amdgpu_device pointer
1930 * @ip_block_version: pointer to the IP to add
1931 *
1932 * Adds the IP block driver information to the collection of IPs
1933 * on the asic.
1934 */
2990a1fc
AD
1935int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1936 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1937{
1938 if (!ip_block_version)
1939 return -EINVAL;
1940
7bd939d0
LG
1941 switch (ip_block_version->type) {
1942 case AMD_IP_BLOCK_TYPE_VCN:
1943 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1944 return 0;
1945 break;
1946 case AMD_IP_BLOCK_TYPE_JPEG:
1947 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1948 return 0;
1949 break;
1950 default:
1951 break;
1952 }
1953
e966a725 1954 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1955 ip_block_version->funcs->name);
1956
a1255107
AD
1957 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1958
1959 return 0;
1960}
1961
e3ecdffa
AD
1962/**
1963 * amdgpu_device_enable_virtual_display - enable virtual display feature
1964 *
1965 * @adev: amdgpu_device pointer
1966 *
1967 * Enabled the virtual display feature if the user has enabled it via
1968 * the module parameter virtual_display. This feature provides a virtual
1969 * display hardware on headless boards or in virtualized environments.
1970 * This function parses and validates the configuration string specified by
1971 * the user and configues the virtual display configuration (number of
1972 * virtual connectors, crtcs, etc.) specified.
1973 */
483ef985 1974static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1975{
1976 adev->enable_virtual_display = false;
1977
1978 if (amdgpu_virtual_display) {
8f66090b 1979 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1980 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1981
1982 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1983 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1984 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1985 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1986 if (!strcmp("all", pciaddname)
1987 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1988 long num_crtc;
1989 int res = -1;
1990
9accf2fd 1991 adev->enable_virtual_display = true;
0f66356d
ED
1992
1993 if (pciaddname_tmp)
1994 res = kstrtol(pciaddname_tmp, 10,
1995 &num_crtc);
1996
1997 if (!res) {
1998 if (num_crtc < 1)
1999 num_crtc = 1;
2000 if (num_crtc > 6)
2001 num_crtc = 6;
2002 adev->mode_info.num_crtc = num_crtc;
2003 } else {
2004 adev->mode_info.num_crtc = 1;
2005 }
9accf2fd
ED
2006 break;
2007 }
2008 }
2009
0f66356d
ED
2010 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2011 amdgpu_virtual_display, pci_address_name,
2012 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2013
2014 kfree(pciaddstr);
2015 }
2016}
2017
25263da3
AD
2018void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2019{
2020 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2021 adev->mode_info.num_crtc = 1;
2022 adev->enable_virtual_display = true;
2023 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2024 adev->enable_virtual_display, adev->mode_info.num_crtc);
2025 }
2026}
2027
e3ecdffa
AD
2028/**
2029 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2030 *
2031 * @adev: amdgpu_device pointer
2032 *
2033 * Parses the asic configuration parameters specified in the gpu info
2034 * firmware and makes them availale to the driver for use in configuring
2035 * the asic.
2036 * Returns 0 on success, -EINVAL on failure.
2037 */
e2a75f88
AD
2038static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2039{
e2a75f88 2040 const char *chip_name;
c0a43457 2041 char fw_name[40];
e2a75f88
AD
2042 int err;
2043 const struct gpu_info_firmware_header_v1_0 *hdr;
2044
ab4fe3e1
HR
2045 adev->firmware.gpu_info_fw = NULL;
2046
72de33f8 2047 if (adev->mman.discovery_bin) {
cc375d8c
TY
2048 /*
2049 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2050 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2051 * when DAL no longer needs it.
2052 */
2053 if (adev->asic_type != CHIP_NAVI12)
2054 return 0;
258620d0
AD
2055 }
2056
e2a75f88 2057 switch (adev->asic_type) {
e2a75f88
AD
2058 default:
2059 return 0;
2060 case CHIP_VEGA10:
2061 chip_name = "vega10";
2062 break;
3f76dced
AD
2063 case CHIP_VEGA12:
2064 chip_name = "vega12";
2065 break;
2d2e5e7e 2066 case CHIP_RAVEN:
54f78a76 2067 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2068 chip_name = "raven2";
54f78a76 2069 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2070 chip_name = "picasso";
54c4d17e
FX
2071 else
2072 chip_name = "raven";
2d2e5e7e 2073 break;
65e60f6e
LM
2074 case CHIP_ARCTURUS:
2075 chip_name = "arcturus";
2076 break;
42b325e5
XY
2077 case CHIP_NAVI12:
2078 chip_name = "navi12";
2079 break;
e2a75f88
AD
2080 }
2081
2082 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2083 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2084 if (err) {
2085 dev_err(adev->dev,
b31d3063 2086 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2087 fw_name);
2088 goto out;
2089 }
2090
ab4fe3e1 2091 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2092 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2093
2094 switch (hdr->version_major) {
2095 case 1:
2096 {
2097 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2098 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2099 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2100
cc375d8c
TY
2101 /*
2102 * Should be droped when DAL no longer needs it.
2103 */
2104 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2105 goto parse_soc_bounding_box;
2106
b5ab16bf
AD
2107 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2108 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2109 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2110 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2111 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2112 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2113 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2114 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2115 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2116 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2117 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2118 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2119 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2120 adev->gfx.cu_info.max_waves_per_simd =
2121 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2122 adev->gfx.cu_info.max_scratch_slots_per_cu =
2123 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2124 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2125 if (hdr->version_minor >= 1) {
35c2e910
HZ
2126 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2127 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2128 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2129 adev->gfx.config.num_sc_per_sh =
2130 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2131 adev->gfx.config.num_packer_per_sc =
2132 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2133 }
ec51d3fa
XY
2134
2135parse_soc_bounding_box:
ec51d3fa
XY
2136 /*
2137 * soc bounding box info is not integrated in disocovery table,
258620d0 2138 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2139 */
48321c3d
HW
2140 if (hdr->version_minor == 2) {
2141 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2142 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2143 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2144 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2145 }
e2a75f88
AD
2146 break;
2147 }
2148 default:
2149 dev_err(adev->dev,
2150 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2151 err = -EINVAL;
2152 goto out;
2153 }
2154out:
e2a75f88
AD
2155 return err;
2156}
2157
e3ecdffa
AD
2158/**
2159 * amdgpu_device_ip_early_init - run early init for hardware IPs
2160 *
2161 * @adev: amdgpu_device pointer
2162 *
2163 * Early initialization pass for hardware IPs. The hardware IPs that make
2164 * up each asic are discovered each IP's early_init callback is run. This
2165 * is the first stage in initializing the asic.
2166 * Returns 0 on success, negative error code on failure.
2167 */
06ec9070 2168static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2169{
901e2be2
AD
2170 struct drm_device *dev = adev_to_drm(adev);
2171 struct pci_dev *parent;
aaa36a97 2172 int i, r;
ced69502 2173 bool total;
d38ceaf9 2174
483ef985 2175 amdgpu_device_enable_virtual_display(adev);
a6be7570 2176
00a979f3 2177 if (amdgpu_sriov_vf(adev)) {
00a979f3 2178 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2179 if (r)
2180 return r;
00a979f3
WS
2181 }
2182
d38ceaf9 2183 switch (adev->asic_type) {
33f34802
KW
2184#ifdef CONFIG_DRM_AMDGPU_SI
2185 case CHIP_VERDE:
2186 case CHIP_TAHITI:
2187 case CHIP_PITCAIRN:
2188 case CHIP_OLAND:
2189 case CHIP_HAINAN:
295d0daf 2190 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2191 r = si_set_ip_blocks(adev);
2192 if (r)
2193 return r;
2194 break;
2195#endif
a2e73f56
AD
2196#ifdef CONFIG_DRM_AMDGPU_CIK
2197 case CHIP_BONAIRE:
2198 case CHIP_HAWAII:
2199 case CHIP_KAVERI:
2200 case CHIP_KABINI:
2201 case CHIP_MULLINS:
e1ad2d53 2202 if (adev->flags & AMD_IS_APU)
a2e73f56 2203 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2204 else
2205 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2206
2207 r = cik_set_ip_blocks(adev);
2208 if (r)
2209 return r;
2210 break;
2211#endif
da87c30b
AD
2212 case CHIP_TOPAZ:
2213 case CHIP_TONGA:
2214 case CHIP_FIJI:
2215 case CHIP_POLARIS10:
2216 case CHIP_POLARIS11:
2217 case CHIP_POLARIS12:
2218 case CHIP_VEGAM:
2219 case CHIP_CARRIZO:
2220 case CHIP_STONEY:
2221 if (adev->flags & AMD_IS_APU)
2222 adev->family = AMDGPU_FAMILY_CZ;
2223 else
2224 adev->family = AMDGPU_FAMILY_VI;
2225
2226 r = vi_set_ip_blocks(adev);
2227 if (r)
2228 return r;
2229 break;
d38ceaf9 2230 default:
63352b7f
AD
2231 r = amdgpu_discovery_set_ip_blocks(adev);
2232 if (r)
2233 return r;
2234 break;
d38ceaf9
AD
2235 }
2236
901e2be2
AD
2237 if (amdgpu_has_atpx() &&
2238 (amdgpu_is_atpx_hybrid() ||
2239 amdgpu_has_atpx_dgpu_power_cntl()) &&
2240 ((adev->flags & AMD_IS_APU) == 0) &&
2241 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2242 adev->flags |= AMD_IS_PX;
2243
85ac2021
AD
2244 if (!(adev->flags & AMD_IS_APU)) {
2245 parent = pci_upstream_bridge(adev->pdev);
2246 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2247 }
901e2be2 2248
1884734a 2249
3b94fb10 2250 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2251 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2252 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2253 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2254 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2255
ced69502 2256 total = true;
d38ceaf9
AD
2257 for (i = 0; i < adev->num_ip_blocks; i++) {
2258 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2259 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2260 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2261 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2262 } else {
a1255107
AD
2263 if (adev->ip_blocks[i].version->funcs->early_init) {
2264 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2265 if (r == -ENOENT) {
a1255107 2266 adev->ip_blocks[i].status.valid = false;
2c1a2784 2267 } else if (r) {
a1255107
AD
2268 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2269 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2270 total = false;
2c1a2784 2271 } else {
a1255107 2272 adev->ip_blocks[i].status.valid = true;
2c1a2784 2273 }
974e6b64 2274 } else {
a1255107 2275 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2276 }
d38ceaf9 2277 }
21a249ca
AD
2278 /* get the vbios after the asic_funcs are set up */
2279 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2280 r = amdgpu_device_parse_gpu_info_fw(adev);
2281 if (r)
2282 return r;
2283
21a249ca 2284 /* Read BIOS */
9535a86a
SZ
2285 if (amdgpu_device_read_bios(adev)) {
2286 if (!amdgpu_get_bios(adev))
2287 return -EINVAL;
21a249ca 2288
9535a86a
SZ
2289 r = amdgpu_atombios_init(adev);
2290 if (r) {
2291 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2292 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2293 return r;
2294 }
21a249ca 2295 }
77eabc6f
PJZ
2296
2297 /*get pf2vf msg info at it's earliest time*/
2298 if (amdgpu_sriov_vf(adev))
2299 amdgpu_virt_init_data_exchange(adev);
2300
21a249ca 2301 }
d38ceaf9 2302 }
ced69502
ML
2303 if (!total)
2304 return -ENODEV;
d38ceaf9 2305
00fa4035 2306 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2307 adev->cg_flags &= amdgpu_cg_mask;
2308 adev->pg_flags &= amdgpu_pg_mask;
2309
d38ceaf9
AD
2310 return 0;
2311}
2312
0a4f2520
RZ
2313static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2314{
2315 int i, r;
2316
2317 for (i = 0; i < adev->num_ip_blocks; i++) {
2318 if (!adev->ip_blocks[i].status.sw)
2319 continue;
2320 if (adev->ip_blocks[i].status.hw)
2321 continue;
2322 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2323 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2324 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2325 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2326 if (r) {
2327 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2328 adev->ip_blocks[i].version->funcs->name, r);
2329 return r;
2330 }
2331 adev->ip_blocks[i].status.hw = true;
2332 }
2333 }
2334
2335 return 0;
2336}
2337
2338static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2339{
2340 int i, r;
2341
2342 for (i = 0; i < adev->num_ip_blocks; i++) {
2343 if (!adev->ip_blocks[i].status.sw)
2344 continue;
2345 if (adev->ip_blocks[i].status.hw)
2346 continue;
2347 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2348 if (r) {
2349 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2350 adev->ip_blocks[i].version->funcs->name, r);
2351 return r;
2352 }
2353 adev->ip_blocks[i].status.hw = true;
2354 }
2355
2356 return 0;
2357}
2358
7a3e0bb2
RZ
2359static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2360{
2361 int r = 0;
2362 int i;
80f41f84 2363 uint32_t smu_version;
7a3e0bb2
RZ
2364
2365 if (adev->asic_type >= CHIP_VEGA10) {
2366 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2367 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2368 continue;
2369
e3c1b071 2370 if (!adev->ip_blocks[i].status.sw)
2371 continue;
2372
482f0e53
ML
2373 /* no need to do the fw loading again if already done*/
2374 if (adev->ip_blocks[i].status.hw == true)
2375 break;
2376
53b3f8f4 2377 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2378 r = adev->ip_blocks[i].version->funcs->resume(adev);
2379 if (r) {
2380 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2381 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2382 return r;
2383 }
2384 } else {
2385 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2386 if (r) {
2387 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2388 adev->ip_blocks[i].version->funcs->name, r);
2389 return r;
7a3e0bb2 2390 }
7a3e0bb2 2391 }
482f0e53
ML
2392
2393 adev->ip_blocks[i].status.hw = true;
2394 break;
7a3e0bb2
RZ
2395 }
2396 }
482f0e53 2397
8973d9ec
ED
2398 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2399 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2400
80f41f84 2401 return r;
7a3e0bb2
RZ
2402}
2403
5fd8518d
AG
2404static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2405{
2406 long timeout;
2407 int r, i;
2408
2409 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2410 struct amdgpu_ring *ring = adev->rings[i];
2411
2412 /* No need to setup the GPU scheduler for rings that don't need it */
2413 if (!ring || ring->no_scheduler)
2414 continue;
2415
2416 switch (ring->funcs->type) {
2417 case AMDGPU_RING_TYPE_GFX:
2418 timeout = adev->gfx_timeout;
2419 break;
2420 case AMDGPU_RING_TYPE_COMPUTE:
2421 timeout = adev->compute_timeout;
2422 break;
2423 case AMDGPU_RING_TYPE_SDMA:
2424 timeout = adev->sdma_timeout;
2425 break;
2426 default:
2427 timeout = adev->video_timeout;
2428 break;
2429 }
2430
2431 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2432 ring->num_hw_submission, 0,
8ab62eda
JG
2433 timeout, adev->reset_domain->wq,
2434 ring->sched_score, ring->name,
2435 adev->dev);
5fd8518d
AG
2436 if (r) {
2437 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2438 ring->name);
2439 return r;
2440 }
2441 }
2442
d425c6f4
JZ
2443 amdgpu_xcp_update_partition_sched_list(adev);
2444
5fd8518d
AG
2445 return 0;
2446}
2447
2448
e3ecdffa
AD
2449/**
2450 * amdgpu_device_ip_init - run init for hardware IPs
2451 *
2452 * @adev: amdgpu_device pointer
2453 *
2454 * Main initialization pass for hardware IPs. The list of all the hardware
2455 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2456 * are run. sw_init initializes the software state associated with each IP
2457 * and hw_init initializes the hardware associated with each IP.
2458 * Returns 0 on success, negative error code on failure.
2459 */
06ec9070 2460static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2461{
2462 int i, r;
2463
c030f2e4 2464 r = amdgpu_ras_init(adev);
2465 if (r)
2466 return r;
2467
d38ceaf9 2468 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2469 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2470 continue;
a1255107 2471 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2472 if (r) {
a1255107
AD
2473 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2474 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2475 goto init_failed;
2c1a2784 2476 }
a1255107 2477 adev->ip_blocks[i].status.sw = true;
bfca0289 2478
c1c39032
AD
2479 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2480 /* need to do common hw init early so everything is set up for gmc */
2481 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2482 if (r) {
2483 DRM_ERROR("hw_init %d failed %d\n", i, r);
2484 goto init_failed;
2485 }
2486 adev->ip_blocks[i].status.hw = true;
2487 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2488 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2489 /* Try to reserve bad pages early */
2490 if (amdgpu_sriov_vf(adev))
2491 amdgpu_virt_exchange_data(adev);
2492
7ccfd79f 2493 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2494 if (r) {
7ccfd79f 2495 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2496 goto init_failed;
2c1a2784 2497 }
a1255107 2498 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2499 if (r) {
2500 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2501 goto init_failed;
2c1a2784 2502 }
06ec9070 2503 r = amdgpu_device_wb_init(adev);
2c1a2784 2504 if (r) {
06ec9070 2505 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2506 goto init_failed;
2c1a2784 2507 }
a1255107 2508 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2509
2510 /* right after GMC hw init, we create CSA */
02ff519e 2511 if (adev->gfx.mcbp) {
1e256e27 2512 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2513 AMDGPU_GEM_DOMAIN_VRAM |
2514 AMDGPU_GEM_DOMAIN_GTT,
2515 AMDGPU_CSA_SIZE);
2493664f
ML
2516 if (r) {
2517 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2518 goto init_failed;
2493664f
ML
2519 }
2520 }
d38ceaf9
AD
2521 }
2522 }
2523
c9ffa427 2524 if (amdgpu_sriov_vf(adev))
22c16d25 2525 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2526
533aed27
AG
2527 r = amdgpu_ib_pool_init(adev);
2528 if (r) {
2529 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2530 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2531 goto init_failed;
2532 }
2533
c8963ea4
RZ
2534 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2535 if (r)
72d3f592 2536 goto init_failed;
0a4f2520
RZ
2537
2538 r = amdgpu_device_ip_hw_init_phase1(adev);
2539 if (r)
72d3f592 2540 goto init_failed;
0a4f2520 2541
7a3e0bb2
RZ
2542 r = amdgpu_device_fw_loading(adev);
2543 if (r)
72d3f592 2544 goto init_failed;
7a3e0bb2 2545
0a4f2520
RZ
2546 r = amdgpu_device_ip_hw_init_phase2(adev);
2547 if (r)
72d3f592 2548 goto init_failed;
d38ceaf9 2549
121a2bc6
AG
2550 /*
2551 * retired pages will be loaded from eeprom and reserved here,
2552 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2553 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2554 * for I2C communication which only true at this point.
b82e65a9
GC
2555 *
2556 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2557 * failure from bad gpu situation and stop amdgpu init process
2558 * accordingly. For other failed cases, it will still release all
2559 * the resource and print error message, rather than returning one
2560 * negative value to upper level.
121a2bc6
AG
2561 *
2562 * Note: theoretically, this should be called before all vram allocations
2563 * to protect retired page from abusing
2564 */
b82e65a9
GC
2565 r = amdgpu_ras_recovery_init(adev);
2566 if (r)
2567 goto init_failed;
121a2bc6 2568
cfbb6b00
AG
2569 /**
2570 * In case of XGMI grab extra reference for reset domain for this device
2571 */
a4c63caf 2572 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2573 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2574 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2575 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2576
dfd0287b
LH
2577 if (WARN_ON(!hive)) {
2578 r = -ENOENT;
2579 goto init_failed;
2580 }
2581
46c67660 2582 if (!hive->reset_domain ||
2583 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2584 r = -ENOENT;
2585 amdgpu_put_xgmi_hive(hive);
2586 goto init_failed;
2587 }
2588
2589 /* Drop the early temporary reset domain we created for device */
2590 amdgpu_reset_put_reset_domain(adev->reset_domain);
2591 adev->reset_domain = hive->reset_domain;
9dfa4860 2592 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2593 }
a4c63caf
AG
2594 }
2595 }
2596
5fd8518d
AG
2597 r = amdgpu_device_init_schedulers(adev);
2598 if (r)
2599 goto init_failed;
e3c1b071 2600
2601 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2602 if (!adev->gmc.xgmi.pending_reset) {
2603 kgd2kfd_init_zone_device(adev);
e3c1b071 2604 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2605 }
c6332b97 2606
bd607166
KR
2607 amdgpu_fru_get_product_info(adev);
2608
72d3f592 2609init_failed:
c6332b97 2610
72d3f592 2611 return r;
d38ceaf9
AD
2612}
2613
e3ecdffa
AD
2614/**
2615 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2616 *
2617 * @adev: amdgpu_device pointer
2618 *
2619 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2620 * this function before a GPU reset. If the value is retained after a
2621 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2622 */
06ec9070 2623static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2624{
2625 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2626}
2627
e3ecdffa
AD
2628/**
2629 * amdgpu_device_check_vram_lost - check if vram is valid
2630 *
2631 * @adev: amdgpu_device pointer
2632 *
2633 * Checks the reset magic value written to the gart pointer in VRAM.
2634 * The driver calls this after a GPU reset to see if the contents of
2635 * VRAM is lost or now.
2636 * returns true if vram is lost, false if not.
2637 */
06ec9070 2638static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2639{
dadce777
EQ
2640 if (memcmp(adev->gart.ptr, adev->reset_magic,
2641 AMDGPU_RESET_MAGIC_NUM))
2642 return true;
2643
53b3f8f4 2644 if (!amdgpu_in_reset(adev))
dadce777
EQ
2645 return false;
2646
2647 /*
2648 * For all ASICs with baco/mode1 reset, the VRAM is
2649 * always assumed to be lost.
2650 */
2651 switch (amdgpu_asic_reset_method(adev)) {
2652 case AMD_RESET_METHOD_BACO:
2653 case AMD_RESET_METHOD_MODE1:
2654 return true;
2655 default:
2656 return false;
2657 }
0c49e0b8
CZ
2658}
2659
e3ecdffa 2660/**
1112a46b 2661 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2662 *
2663 * @adev: amdgpu_device pointer
b8b72130 2664 * @state: clockgating state (gate or ungate)
e3ecdffa 2665 *
e3ecdffa 2666 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2667 * set_clockgating_state callbacks are run.
2668 * Late initialization pass enabling clockgating for hardware IPs.
2669 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2670 * Returns 0 on success, negative error code on failure.
2671 */
fdd34271 2672
5d89bb2d
LL
2673int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2674 enum amd_clockgating_state state)
d38ceaf9 2675{
1112a46b 2676 int i, j, r;
d38ceaf9 2677
4a2ba394
SL
2678 if (amdgpu_emu_mode == 1)
2679 return 0;
2680
1112a46b
RZ
2681 for (j = 0; j < adev->num_ip_blocks; j++) {
2682 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2683 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2684 continue;
47198eb7 2685 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2686 if (adev->in_s0ix &&
47198eb7
AD
2687 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2688 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2689 continue;
4a446d55 2690 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2691 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2692 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2693 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2694 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2695 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2696 /* enable clockgating to save power */
a1255107 2697 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2698 state);
4a446d55
AD
2699 if (r) {
2700 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2701 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2702 return r;
2703 }
b0b00ff1 2704 }
d38ceaf9 2705 }
06b18f61 2706
c9f96fd5
RZ
2707 return 0;
2708}
2709
5d89bb2d
LL
2710int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2711 enum amd_powergating_state state)
c9f96fd5 2712{
1112a46b 2713 int i, j, r;
06b18f61 2714
c9f96fd5
RZ
2715 if (amdgpu_emu_mode == 1)
2716 return 0;
2717
1112a46b
RZ
2718 for (j = 0; j < adev->num_ip_blocks; j++) {
2719 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2720 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2721 continue;
47198eb7 2722 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2723 if (adev->in_s0ix &&
47198eb7
AD
2724 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2725 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2726 continue;
c9f96fd5
RZ
2727 /* skip CG for VCE/UVD, it's handled specially */
2728 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2729 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2730 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2731 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2732 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2733 /* enable powergating to save power */
2734 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2735 state);
c9f96fd5
RZ
2736 if (r) {
2737 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2738 adev->ip_blocks[i].version->funcs->name, r);
2739 return r;
2740 }
2741 }
2742 }
2dc80b00
S
2743 return 0;
2744}
2745
beff74bc
AD
2746static int amdgpu_device_enable_mgpu_fan_boost(void)
2747{
2748 struct amdgpu_gpu_instance *gpu_ins;
2749 struct amdgpu_device *adev;
2750 int i, ret = 0;
2751
2752 mutex_lock(&mgpu_info.mutex);
2753
2754 /*
2755 * MGPU fan boost feature should be enabled
2756 * only when there are two or more dGPUs in
2757 * the system
2758 */
2759 if (mgpu_info.num_dgpu < 2)
2760 goto out;
2761
2762 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2763 gpu_ins = &(mgpu_info.gpu_ins[i]);
2764 adev = gpu_ins->adev;
2765 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2766 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2767 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2768 if (ret)
2769 break;
2770
2771 gpu_ins->mgpu_fan_enabled = 1;
2772 }
2773 }
2774
2775out:
2776 mutex_unlock(&mgpu_info.mutex);
2777
2778 return ret;
2779}
2780
e3ecdffa
AD
2781/**
2782 * amdgpu_device_ip_late_init - run late init for hardware IPs
2783 *
2784 * @adev: amdgpu_device pointer
2785 *
2786 * Late initialization pass for hardware IPs. The list of all the hardware
2787 * IPs that make up the asic is walked and the late_init callbacks are run.
2788 * late_init covers any special initialization that an IP requires
2789 * after all of the have been initialized or something that needs to happen
2790 * late in the init process.
2791 * Returns 0 on success, negative error code on failure.
2792 */
06ec9070 2793static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2794{
60599a03 2795 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2796 int i = 0, r;
2797
2798 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2799 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2800 continue;
2801 if (adev->ip_blocks[i].version->funcs->late_init) {
2802 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2803 if (r) {
2804 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2805 adev->ip_blocks[i].version->funcs->name, r);
2806 return r;
2807 }
2dc80b00 2808 }
73f847db 2809 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2810 }
2811
867e24ca 2812 r = amdgpu_ras_late_init(adev);
2813 if (r) {
2814 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2815 return r;
2816 }
2817
a891d239
DL
2818 amdgpu_ras_set_error_query_ready(adev, true);
2819
1112a46b
RZ
2820 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2821 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2822
06ec9070 2823 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2824
beff74bc
AD
2825 r = amdgpu_device_enable_mgpu_fan_boost();
2826 if (r)
2827 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2828
4da8b639 2829 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2830 if (amdgpu_passthrough(adev) &&
2831 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2832 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2833 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2834
2835 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2836 mutex_lock(&mgpu_info.mutex);
2837
2838 /*
2839 * Reset device p-state to low as this was booted with high.
2840 *
2841 * This should be performed only after all devices from the same
2842 * hive get initialized.
2843 *
2844 * However, it's unknown how many device in the hive in advance.
2845 * As this is counted one by one during devices initializations.
2846 *
2847 * So, we wait for all XGMI interlinked devices initialized.
2848 * This may bring some delays as those devices may come from
2849 * different hives. But that should be OK.
2850 */
2851 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2852 for (i = 0; i < mgpu_info.num_gpu; i++) {
2853 gpu_instance = &(mgpu_info.gpu_ins[i]);
2854 if (gpu_instance->adev->flags & AMD_IS_APU)
2855 continue;
2856
d84a430d
JK
2857 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2858 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2859 if (r) {
2860 DRM_ERROR("pstate setting failed (%d).\n", r);
2861 break;
2862 }
2863 }
2864 }
2865
2866 mutex_unlock(&mgpu_info.mutex);
2867 }
2868
d38ceaf9
AD
2869 return 0;
2870}
2871
613aa3ea
LY
2872/**
2873 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2874 *
2875 * @adev: amdgpu_device pointer
2876 *
2877 * For ASICs need to disable SMC first
2878 */
2879static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2880{
2881 int i, r;
2882
4e8303cf 2883 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
2884 return;
2885
2886 for (i = 0; i < adev->num_ip_blocks; i++) {
2887 if (!adev->ip_blocks[i].status.hw)
2888 continue;
2889 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2890 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2891 /* XXX handle errors */
2892 if (r) {
2893 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2894 adev->ip_blocks[i].version->funcs->name, r);
2895 }
2896 adev->ip_blocks[i].status.hw = false;
2897 break;
2898 }
2899 }
2900}
2901
e9669fb7 2902static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2903{
2904 int i, r;
2905
e9669fb7
AG
2906 for (i = 0; i < adev->num_ip_blocks; i++) {
2907 if (!adev->ip_blocks[i].version->funcs->early_fini)
2908 continue;
5278a159 2909
e9669fb7
AG
2910 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2911 if (r) {
2912 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2913 adev->ip_blocks[i].version->funcs->name, r);
2914 }
2915 }
c030f2e4 2916
05df1f01 2917 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2918 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2919
7270e895
TY
2920 amdgpu_amdkfd_suspend(adev, false);
2921
613aa3ea
LY
2922 /* Workaroud for ASICs need to disable SMC first */
2923 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2924
d38ceaf9 2925 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2926 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2927 continue;
8201a67a 2928
a1255107 2929 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2930 /* XXX handle errors */
2c1a2784 2931 if (r) {
a1255107
AD
2932 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2933 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2934 }
8201a67a 2935
a1255107 2936 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2937 }
2938
6effad8a
GC
2939 if (amdgpu_sriov_vf(adev)) {
2940 if (amdgpu_virt_release_full_gpu(adev, false))
2941 DRM_ERROR("failed to release exclusive mode on fini\n");
2942 }
2943
e9669fb7
AG
2944 return 0;
2945}
2946
2947/**
2948 * amdgpu_device_ip_fini - run fini for hardware IPs
2949 *
2950 * @adev: amdgpu_device pointer
2951 *
2952 * Main teardown pass for hardware IPs. The list of all the hardware
2953 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2954 * are run. hw_fini tears down the hardware associated with each IP
2955 * and sw_fini tears down any software state associated with each IP.
2956 * Returns 0 on success, negative error code on failure.
2957 */
2958static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2959{
2960 int i, r;
2961
2962 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2963 amdgpu_virt_release_ras_err_handler_data(adev);
2964
e9669fb7
AG
2965 if (adev->gmc.xgmi.num_physical_nodes > 1)
2966 amdgpu_xgmi_remove_device(adev);
2967
c004d44e 2968 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2969
d38ceaf9 2970 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2971 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2972 continue;
c12aba3a
ML
2973
2974 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2975 amdgpu_ucode_free_bo(adev);
1e256e27 2976 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2977 amdgpu_device_wb_fini(adev);
7ccfd79f 2978 amdgpu_device_mem_scratch_fini(adev);
533aed27 2979 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2980 }
2981
a1255107 2982 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2983 /* XXX handle errors */
2c1a2784 2984 if (r) {
a1255107
AD
2985 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2986 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2987 }
a1255107
AD
2988 adev->ip_blocks[i].status.sw = false;
2989 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2990 }
2991
a6dcfd9c 2992 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2993 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2994 continue;
a1255107
AD
2995 if (adev->ip_blocks[i].version->funcs->late_fini)
2996 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2997 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2998 }
2999
c030f2e4 3000 amdgpu_ras_fini(adev);
3001
d38ceaf9
AD
3002 return 0;
3003}
3004
e3ecdffa 3005/**
beff74bc 3006 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3007 *
1112a46b 3008 * @work: work_struct.
e3ecdffa 3009 */
beff74bc 3010static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3011{
3012 struct amdgpu_device *adev =
beff74bc 3013 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3014 int r;
3015
3016 r = amdgpu_ib_ring_tests(adev);
3017 if (r)
3018 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3019}
3020
1e317b99
RZ
3021static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3022{
3023 struct amdgpu_device *adev =
3024 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3025
90a92662
MD
3026 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3027 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3028
3029 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3030 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3031}
3032
e3ecdffa 3033/**
e7854a03 3034 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3035 *
3036 * @adev: amdgpu_device pointer
3037 *
3038 * Main suspend function for hardware IPs. The list of all the hardware
3039 * IPs that make up the asic is walked, clockgating is disabled and the
3040 * suspend callbacks are run. suspend puts the hardware and software state
3041 * in each IP into a state suitable for suspend.
3042 * Returns 0 on success, negative error code on failure.
3043 */
e7854a03
AD
3044static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3045{
3046 int i, r;
3047
50ec83f0
AD
3048 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3049 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3050
b31d6ada
EQ
3051 /*
3052 * Per PMFW team's suggestion, driver needs to handle gfxoff
3053 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3054 * scenario. Add the missing df cstate disablement here.
3055 */
3056 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3057 dev_warn(adev->dev, "Failed to disallow df cstate");
3058
e7854a03
AD
3059 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3060 if (!adev->ip_blocks[i].status.valid)
3061 continue;
2b9f7848 3062
e7854a03 3063 /* displays are handled separately */
2b9f7848
ND
3064 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3065 continue;
3066
3067 /* XXX handle errors */
3068 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3069 /* XXX handle errors */
3070 if (r) {
3071 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3072 adev->ip_blocks[i].version->funcs->name, r);
3073 return r;
e7854a03 3074 }
2b9f7848
ND
3075
3076 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3077 }
3078
e7854a03
AD
3079 return 0;
3080}
3081
3082/**
3083 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3084 *
3085 * @adev: amdgpu_device pointer
3086 *
3087 * Main suspend function for hardware IPs. The list of all the hardware
3088 * IPs that make up the asic is walked, clockgating is disabled and the
3089 * suspend callbacks are run. suspend puts the hardware and software state
3090 * in each IP into a state suitable for suspend.
3091 * Returns 0 on success, negative error code on failure.
3092 */
3093static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3094{
3095 int i, r;
3096
557f42a2 3097 if (adev->in_s0ix)
bc143d8b 3098 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3099
d38ceaf9 3100 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3101 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3102 continue;
e7854a03
AD
3103 /* displays are handled in phase1 */
3104 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3105 continue;
bff77e86
LM
3106 /* PSP lost connection when err_event_athub occurs */
3107 if (amdgpu_ras_intr_triggered() &&
3108 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3109 adev->ip_blocks[i].status.hw = false;
3110 continue;
3111 }
e3c1b071 3112
3113 /* skip unnecessary suspend if we do not initialize them yet */
3114 if (adev->gmc.xgmi.pending_reset &&
3115 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3116 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3117 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3118 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3119 adev->ip_blocks[i].status.hw = false;
3120 continue;
3121 }
557f42a2 3122
afa6646b 3123 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3124 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3125 * like at runtime. PSP is also part of the always on hardware
3126 * so no need to suspend it.
3127 */
557f42a2 3128 if (adev->in_s0ix &&
32ff160d 3129 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3130 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3131 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3132 continue;
3133
2a7798ea
AD
3134 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3135 if (adev->in_s0ix &&
4e8303cf
LL
3136 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3137 IP_VERSION(5, 0, 0)) &&
3138 (adev->ip_blocks[i].version->type ==
3139 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3140 continue;
3141
e11c7750
TH
3142 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3143 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3144 * from this location and RLC Autoload automatically also gets loaded
3145 * from here based on PMFW -> PSP message during re-init sequence.
3146 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3147 * the TMR and reload FWs again for IMU enabled APU ASICs.
3148 */
3149 if (amdgpu_in_reset(adev) &&
3150 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3151 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3152 continue;
3153
d38ceaf9 3154 /* XXX handle errors */
a1255107 3155 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3156 /* XXX handle errors */
2c1a2784 3157 if (r) {
a1255107
AD
3158 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3159 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3160 }
876923fb 3161 adev->ip_blocks[i].status.hw = false;
a3a09142 3162 /* handle putting the SMC in the appropriate state */
47fc644f 3163 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3164 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3165 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3166 if (r) {
3167 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3168 adev->mp1_state, r);
3169 return r;
3170 }
a3a09142
AD
3171 }
3172 }
d38ceaf9
AD
3173 }
3174
3175 return 0;
3176}
3177
e7854a03
AD
3178/**
3179 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3180 *
3181 * @adev: amdgpu_device pointer
3182 *
3183 * Main suspend function for hardware IPs. The list of all the hardware
3184 * IPs that make up the asic is walked, clockgating is disabled and the
3185 * suspend callbacks are run. suspend puts the hardware and software state
3186 * in each IP into a state suitable for suspend.
3187 * Returns 0 on success, negative error code on failure.
3188 */
3189int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3190{
3191 int r;
3192
3c73683c
JC
3193 if (amdgpu_sriov_vf(adev)) {
3194 amdgpu_virt_fini_data_exchange(adev);
e7819644 3195 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3196 }
e7819644 3197
e7854a03
AD
3198 r = amdgpu_device_ip_suspend_phase1(adev);
3199 if (r)
3200 return r;
3201 r = amdgpu_device_ip_suspend_phase2(adev);
3202
e7819644
YT
3203 if (amdgpu_sriov_vf(adev))
3204 amdgpu_virt_release_full_gpu(adev, false);
3205
e7854a03
AD
3206 return r;
3207}
3208
06ec9070 3209static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3210{
3211 int i, r;
3212
2cb681b6 3213 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3214 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3215 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3216 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3217 AMD_IP_BLOCK_TYPE_IH,
3218 };
a90ad3c2 3219
95ea3dbc 3220 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3221 int j;
3222 struct amdgpu_ip_block *block;
a90ad3c2 3223
4cd2a96d
J
3224 block = &adev->ip_blocks[i];
3225 block->status.hw = false;
2cb681b6 3226
4cd2a96d 3227 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3228
4cd2a96d 3229 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3230 !block->status.valid)
3231 continue;
3232
3233 r = block->version->funcs->hw_init(adev);
0aaeefcc 3234 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3235 if (r)
3236 return r;
482f0e53 3237 block->status.hw = true;
a90ad3c2
ML
3238 }
3239 }
3240
3241 return 0;
3242}
3243
06ec9070 3244static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3245{
3246 int i, r;
3247
2cb681b6
ML
3248 static enum amd_ip_block_type ip_order[] = {
3249 AMD_IP_BLOCK_TYPE_SMC,
3250 AMD_IP_BLOCK_TYPE_DCE,
3251 AMD_IP_BLOCK_TYPE_GFX,
3252 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3253 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3254 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3255 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3256 AMD_IP_BLOCK_TYPE_VCN,
3257 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3258 };
a90ad3c2 3259
2cb681b6
ML
3260 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3261 int j;
3262 struct amdgpu_ip_block *block;
a90ad3c2 3263
2cb681b6
ML
3264 for (j = 0; j < adev->num_ip_blocks; j++) {
3265 block = &adev->ip_blocks[j];
3266
3267 if (block->version->type != ip_order[i] ||
482f0e53
ML
3268 !block->status.valid ||
3269 block->status.hw)
2cb681b6
ML
3270 continue;
3271
895bd048
JZ
3272 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3273 r = block->version->funcs->resume(adev);
3274 else
3275 r = block->version->funcs->hw_init(adev);
3276
0aaeefcc 3277 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3278 if (r)
3279 return r;
482f0e53 3280 block->status.hw = true;
a90ad3c2
ML
3281 }
3282 }
3283
3284 return 0;
3285}
3286
e3ecdffa
AD
3287/**
3288 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3289 *
3290 * @adev: amdgpu_device pointer
3291 *
3292 * First resume function for hardware IPs. The list of all the hardware
3293 * IPs that make up the asic is walked and the resume callbacks are run for
3294 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3295 * after a suspend and updates the software state as necessary. This
3296 * function is also used for restoring the GPU after a GPU reset.
3297 * Returns 0 on success, negative error code on failure.
3298 */
06ec9070 3299static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3300{
3301 int i, r;
3302
a90ad3c2 3303 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3304 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3305 continue;
a90ad3c2 3306 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3307 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3308 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3309 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3310
fcf0649f
CZ
3311 r = adev->ip_blocks[i].version->funcs->resume(adev);
3312 if (r) {
3313 DRM_ERROR("resume of IP block <%s> failed %d\n",
3314 adev->ip_blocks[i].version->funcs->name, r);
3315 return r;
3316 }
482f0e53 3317 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3318 }
3319 }
3320
3321 return 0;
3322}
3323
e3ecdffa
AD
3324/**
3325 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3326 *
3327 * @adev: amdgpu_device pointer
3328 *
3329 * First resume function for hardware IPs. The list of all the hardware
3330 * IPs that make up the asic is walked and the resume callbacks are run for
3331 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3332 * functional state after a suspend and updates the software state as
3333 * necessary. This function is also used for restoring the GPU after a GPU
3334 * reset.
3335 * Returns 0 on success, negative error code on failure.
3336 */
06ec9070 3337static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3338{
3339 int i, r;
3340
3341 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3342 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3343 continue;
fcf0649f 3344 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3345 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3346 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3347 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3348 continue;
a1255107 3349 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3350 if (r) {
a1255107
AD
3351 DRM_ERROR("resume of IP block <%s> failed %d\n",
3352 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3353 return r;
2c1a2784 3354 }
482f0e53 3355 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3356 }
3357
3358 return 0;
3359}
3360
e3ecdffa
AD
3361/**
3362 * amdgpu_device_ip_resume - run resume for hardware IPs
3363 *
3364 * @adev: amdgpu_device pointer
3365 *
3366 * Main resume function for hardware IPs. The hardware IPs
3367 * are split into two resume functions because they are
b8920e1e 3368 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3369 * steps need to be take between them. In this case (S3/S4) they are
3370 * run sequentially.
3371 * Returns 0 on success, negative error code on failure.
3372 */
06ec9070 3373static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3374{
3375 int r;
3376
06ec9070 3377 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3378 if (r)
3379 return r;
7a3e0bb2
RZ
3380
3381 r = amdgpu_device_fw_loading(adev);
3382 if (r)
3383 return r;
3384
06ec9070 3385 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3386
3387 return r;
3388}
3389
e3ecdffa
AD
3390/**
3391 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3392 *
3393 * @adev: amdgpu_device pointer
3394 *
3395 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3396 */
4e99a44e 3397static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3398{
6867e1b5
ML
3399 if (amdgpu_sriov_vf(adev)) {
3400 if (adev->is_atom_fw) {
58ff791a 3401 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3402 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3403 } else {
3404 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3405 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3406 }
3407
3408 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3409 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3410 }
048765ad
AR
3411}
3412
e3ecdffa
AD
3413/**
3414 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3415 *
3416 * @asic_type: AMD asic type
3417 *
3418 * Check if there is DC (new modesetting infrastructre) support for an asic.
3419 * returns true if DC has support, false if not.
3420 */
4562236b
HW
3421bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3422{
3423 switch (asic_type) {
0637d417
AD
3424#ifdef CONFIG_DRM_AMDGPU_SI
3425 case CHIP_HAINAN:
3426#endif
3427 case CHIP_TOPAZ:
3428 /* chips with no display hardware */
3429 return false;
4562236b 3430#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3431 case CHIP_TAHITI:
3432 case CHIP_PITCAIRN:
3433 case CHIP_VERDE:
3434 case CHIP_OLAND:
2d32ffd6
AD
3435 /*
3436 * We have systems in the wild with these ASICs that require
3437 * LVDS and VGA support which is not supported with DC.
3438 *
3439 * Fallback to the non-DC driver here by default so as not to
3440 * cause regressions.
3441 */
3442#if defined(CONFIG_DRM_AMD_DC_SI)
3443 return amdgpu_dc > 0;
3444#else
3445 return false;
64200c46 3446#endif
4562236b 3447 case CHIP_BONAIRE:
0d6fbccb 3448 case CHIP_KAVERI:
367e6687
AD
3449 case CHIP_KABINI:
3450 case CHIP_MULLINS:
d9fda248
HW
3451 /*
3452 * We have systems in the wild with these ASICs that require
b5a0168e 3453 * VGA support which is not supported with DC.
d9fda248
HW
3454 *
3455 * Fallback to the non-DC driver here by default so as not to
3456 * cause regressions.
3457 */
3458 return amdgpu_dc > 0;
f7f12b25 3459 default:
fd187853 3460 return amdgpu_dc != 0;
f7f12b25 3461#else
4562236b 3462 default:
93b09a9a 3463 if (amdgpu_dc > 0)
b8920e1e 3464 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3465 return false;
f7f12b25 3466#endif
4562236b
HW
3467 }
3468}
3469
3470/**
3471 * amdgpu_device_has_dc_support - check if dc is supported
3472 *
982a820b 3473 * @adev: amdgpu_device pointer
4562236b
HW
3474 *
3475 * Returns true for supported, false for not supported
3476 */
3477bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3478{
25263da3 3479 if (adev->enable_virtual_display ||
abaf210c 3480 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3481 return false;
3482
4562236b
HW
3483 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3484}
3485
d4535e2c
AG
3486static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3487{
3488 struct amdgpu_device *adev =
3489 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3490 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3491
c6a6e2db
AG
3492 /* It's a bug to not have a hive within this function */
3493 if (WARN_ON(!hive))
3494 return;
3495
3496 /*
3497 * Use task barrier to synchronize all xgmi reset works across the
3498 * hive. task_barrier_enter and task_barrier_exit will block
3499 * until all the threads running the xgmi reset works reach
3500 * those points. task_barrier_full will do both blocks.
3501 */
3502 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3503
3504 task_barrier_enter(&hive->tb);
4a580877 3505 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3506
3507 if (adev->asic_reset_res)
3508 goto fail;
3509
3510 task_barrier_exit(&hive->tb);
4a580877 3511 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3512
3513 if (adev->asic_reset_res)
3514 goto fail;
43c4d576 3515
5e67bba3 3516 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3517 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3518 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3519 } else {
3520
3521 task_barrier_full(&hive->tb);
3522 adev->asic_reset_res = amdgpu_asic_reset(adev);
3523 }
ce316fa5 3524
c6a6e2db 3525fail:
d4535e2c 3526 if (adev->asic_reset_res)
fed184e9 3527 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3528 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3529 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3530}
3531
71f98027
AD
3532static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3533{
3534 char *input = amdgpu_lockup_timeout;
3535 char *timeout_setting = NULL;
3536 int index = 0;
3537 long timeout;
3538 int ret = 0;
3539
3540 /*
67387dfe
AD
3541 * By default timeout for non compute jobs is 10000
3542 * and 60000 for compute jobs.
71f98027 3543 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3544 * jobs are 60000 by default.
71f98027
AD
3545 */
3546 adev->gfx_timeout = msecs_to_jiffies(10000);
3547 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3548 if (amdgpu_sriov_vf(adev))
3549 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3550 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3551 else
67387dfe 3552 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3553
f440ff44 3554 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3555 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3556 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3557 ret = kstrtol(timeout_setting, 0, &timeout);
3558 if (ret)
3559 return ret;
3560
3561 if (timeout == 0) {
3562 index++;
3563 continue;
3564 } else if (timeout < 0) {
3565 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3566 dev_warn(adev->dev, "lockup timeout disabled");
3567 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3568 } else {
3569 timeout = msecs_to_jiffies(timeout);
3570 }
3571
3572 switch (index++) {
3573 case 0:
3574 adev->gfx_timeout = timeout;
3575 break;
3576 case 1:
3577 adev->compute_timeout = timeout;
3578 break;
3579 case 2:
3580 adev->sdma_timeout = timeout;
3581 break;
3582 case 3:
3583 adev->video_timeout = timeout;
3584 break;
3585 default:
3586 break;
3587 }
3588 }
3589 /*
3590 * There is only one value specified and
3591 * it should apply to all non-compute jobs.
3592 */
bcccee89 3593 if (index == 1) {
71f98027 3594 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3595 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3596 adev->compute_timeout = adev->gfx_timeout;
3597 }
71f98027
AD
3598 }
3599
3600 return ret;
3601}
d4535e2c 3602
4a74c38c
PY
3603/**
3604 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3605 *
3606 * @adev: amdgpu_device pointer
3607 *
3608 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3609 */
3610static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3611{
3612 struct iommu_domain *domain;
3613
3614 domain = iommu_get_domain_for_dev(adev->dev);
3615 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3616 adev->ram_is_direct_mapped = true;
3617}
3618
77f3a5cd 3619static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3620 &dev_attr_pcie_replay_count.attr,
3621 NULL
3622};
3623
02ff519e
AD
3624static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3625{
3626 if (amdgpu_mcbp == 1)
3627 adev->gfx.mcbp = true;
1e9e15dc
JZ
3628 else if (amdgpu_mcbp == 0)
3629 adev->gfx.mcbp = false;
4e8303cf
LL
3630 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3631 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3632 adev->gfx.num_gfx_rings)
50a7c876
AD
3633 adev->gfx.mcbp = true;
3634
02ff519e
AD
3635 if (amdgpu_sriov_vf(adev))
3636 adev->gfx.mcbp = true;
3637
3638 if (adev->gfx.mcbp)
3639 DRM_INFO("MCBP is enabled\n");
3640}
3641
d38ceaf9
AD
3642/**
3643 * amdgpu_device_init - initialize the driver
3644 *
3645 * @adev: amdgpu_device pointer
d38ceaf9
AD
3646 * @flags: driver flags
3647 *
3648 * Initializes the driver info and hw (all asics).
3649 * Returns 0 for success or an error on failure.
3650 * Called at driver startup.
3651 */
3652int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3653 uint32_t flags)
3654{
8aba21b7
LT
3655 struct drm_device *ddev = adev_to_drm(adev);
3656 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3657 int r, i;
b98c6299 3658 bool px = false;
95844d20 3659 u32 max_MBps;
59e9fff1 3660 int tmp;
d38ceaf9
AD
3661
3662 adev->shutdown = false;
d38ceaf9 3663 adev->flags = flags;
4e66d7d2
YZ
3664
3665 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3666 adev->asic_type = amdgpu_force_asic_type;
3667 else
3668 adev->asic_type = flags & AMD_ASIC_MASK;
3669
d38ceaf9 3670 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3671 if (amdgpu_emu_mode == 1)
8bdab6bb 3672 adev->usec_timeout *= 10;
770d13b1 3673 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3674 adev->accel_working = false;
3675 adev->num_rings = 0;
68ce8b24 3676 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3677 adev->mman.buffer_funcs = NULL;
3678 adev->mman.buffer_funcs_ring = NULL;
3679 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3680 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3681 adev->gmc.gmc_funcs = NULL;
7bd939d0 3682 adev->harvest_ip_mask = 0x0;
f54d1867 3683 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3684 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3685
3686 adev->smc_rreg = &amdgpu_invalid_rreg;
3687 adev->smc_wreg = &amdgpu_invalid_wreg;
3688 adev->pcie_rreg = &amdgpu_invalid_rreg;
3689 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3690 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3691 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3692 adev->pciep_rreg = &amdgpu_invalid_rreg;
3693 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3694 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3695 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3696 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3697 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3698 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3699 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3700 adev->didt_rreg = &amdgpu_invalid_rreg;
3701 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3702 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3703 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3704 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3705 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3706
3e39ab90
AD
3707 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3708 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3709 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3710
3711 /* mutex initialization are all done here so we
b8920e1e
SS
3712 * can recall function without having locking issues
3713 */
0e5ca0d1 3714 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3715 mutex_init(&adev->pm.mutex);
3716 mutex_init(&adev->gfx.gpu_clock_mutex);
3717 mutex_init(&adev->srbm_mutex);
b8866c26 3718 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3719 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3720 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3721 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3722 mutex_init(&adev->mn_lock);
e23b74aa 3723 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3724 hash_init(adev->mn_hash);
32eaeae0 3725 mutex_init(&adev->psp.mutex);
bd052211 3726 mutex_init(&adev->notifier_lock);
8cda7a4f 3727 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3728 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3729
ab3b9de6 3730 amdgpu_device_init_apu_flags(adev);
9f6a7857 3731
912dfc84
EQ
3732 r = amdgpu_device_check_arguments(adev);
3733 if (r)
3734 return r;
d38ceaf9 3735
d38ceaf9
AD
3736 spin_lock_init(&adev->mmio_idx_lock);
3737 spin_lock_init(&adev->smc_idx_lock);
3738 spin_lock_init(&adev->pcie_idx_lock);
3739 spin_lock_init(&adev->uvd_ctx_idx_lock);
3740 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3741 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3742 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3743 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3744 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3745
0c4e7fa5
CZ
3746 INIT_LIST_HEAD(&adev->shadow_list);
3747 mutex_init(&adev->shadow_list_lock);
3748
655ce9cb 3749 INIT_LIST_HEAD(&adev->reset_list);
3750
6492e1b0 3751 INIT_LIST_HEAD(&adev->ras_list);
3752
3e38b634
EQ
3753 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3754
beff74bc
AD
3755 INIT_DELAYED_WORK(&adev->delayed_init_work,
3756 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3757 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3758 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3759
d4535e2c
AG
3760 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3761
d23ee13f 3762 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3763 adev->gfx.gfx_off_residency = 0;
3764 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3765 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3766
b265bdbd
EQ
3767 atomic_set(&adev->throttling_logging_enabled, 1);
3768 /*
3769 * If throttling continues, logging will be performed every minute
3770 * to avoid log flooding. "-1" is subtracted since the thermal
3771 * throttling interrupt comes every second. Thus, the total logging
3772 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3773 * for throttling interrupt) = 60 seconds.
3774 */
3775 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3776 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3777
0fa49558
AX
3778 /* Registers mapping */
3779 /* TODO: block userspace mapping of io register */
da69c161
KW
3780 if (adev->asic_type >= CHIP_BONAIRE) {
3781 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3782 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3783 } else {
3784 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3785 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3786 }
d38ceaf9 3787
6c08e0ef
EQ
3788 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3789 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3790
d38ceaf9 3791 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3792 if (!adev->rmmio)
d38ceaf9 3793 return -ENOMEM;
b8920e1e 3794
d38ceaf9 3795 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3796 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3797
436afdfa
PY
3798 /*
3799 * Reset domain needs to be present early, before XGMI hive discovered
3800 * (if any) and intitialized to use reset sem and in_gpu reset flag
3801 * early on during init and before calling to RREG32.
3802 */
3803 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3804 if (!adev->reset_domain)
3805 return -ENOMEM;
3806
3aa0115d
ML
3807 /* detect hw virtualization here */
3808 amdgpu_detect_virtualization(adev);
3809
04e85958
TL
3810 amdgpu_device_get_pcie_info(adev);
3811
dffa11b4
ML
3812 r = amdgpu_device_get_job_timeout_settings(adev);
3813 if (r) {
3814 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3815 return r;
a190d1c7
XY
3816 }
3817
d38ceaf9 3818 /* early init functions */
06ec9070 3819 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3820 if (r)
4ef87d8f 3821 return r;
d38ceaf9 3822
02ff519e
AD
3823 amdgpu_device_set_mcbp(adev);
3824
b7cdb41e
ML
3825 /* Get rid of things like offb */
3826 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3827 if (r)
3828 return r;
3829
4d33e704
SK
3830 /* Enable TMZ based on IP_VERSION */
3831 amdgpu_gmc_tmz_set(adev);
3832
957b0787 3833 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3834 /* Need to get xgmi info early to decide the reset behavior*/
3835 if (adev->gmc.xgmi.supported) {
3836 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3837 if (r)
3838 return r;
3839 }
3840
8e6d0b69 3841 /* enable PCIE atomic ops */
b4520bfd
GW
3842 if (amdgpu_sriov_vf(adev)) {
3843 if (adev->virt.fw_reserve.p_pf2vf)
3844 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3845 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3846 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3847 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3848 * internal path natively support atomics, set have_atomics_support to true.
3849 */
b4520bfd 3850 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
3851 (amdgpu_ip_version(adev, GC_HWIP, 0) >
3852 IP_VERSION(9, 0, 0))) {
0e768043 3853 adev->have_atomics_support = true;
b4520bfd 3854 } else {
8e6d0b69 3855 adev->have_atomics_support =
3856 !pci_enable_atomic_ops_to_root(adev->pdev,
3857 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3858 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3859 }
3860
8e6d0b69 3861 if (!adev->have_atomics_support)
3862 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3863
6585661d 3864 /* doorbell bar mapping and doorbell index init*/
43c064db 3865 amdgpu_doorbell_init(adev);
6585661d 3866
9475a943
SL
3867 if (amdgpu_emu_mode == 1) {
3868 /* post the asic on emulation mode */
3869 emu_soc_asic_init(adev);
bfca0289 3870 goto fence_driver_init;
9475a943 3871 }
bfca0289 3872
04442bf7
LL
3873 amdgpu_reset_init(adev);
3874
4e99a44e 3875 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3876 if (adev->bios)
3877 amdgpu_device_detect_sriov_bios(adev);
048765ad 3878
95e8e59e
AD
3879 /* check if we need to reset the asic
3880 * E.g., driver was not cleanly unloaded previously, etc.
3881 */
f14899fd 3882 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3883 if (adev->gmc.xgmi.num_physical_nodes) {
3884 dev_info(adev->dev, "Pending hive reset.\n");
3885 adev->gmc.xgmi.pending_reset = true;
3886 /* Only need to init necessary block for SMU to handle the reset */
3887 for (i = 0; i < adev->num_ip_blocks; i++) {
3888 if (!adev->ip_blocks[i].status.valid)
3889 continue;
3890 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3891 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3892 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3893 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3894 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3895 adev->ip_blocks[i].version->funcs->name);
3896 adev->ip_blocks[i].status.hw = true;
3897 }
3898 }
3899 } else {
59e9fff1 3900 tmp = amdgpu_reset_method;
3901 /* It should do a default reset when loading or reloading the driver,
3902 * regardless of the module parameter reset_method.
3903 */
3904 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3905 r = amdgpu_asic_reset(adev);
59e9fff1 3906 amdgpu_reset_method = tmp;
e3c1b071 3907 if (r) {
3908 dev_err(adev->dev, "asic reset on init failed\n");
3909 goto failed;
3910 }
95e8e59e
AD
3911 }
3912 }
3913
d38ceaf9 3914 /* Post card if necessary */
39c640c0 3915 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3916 if (!adev->bios) {
bec86378 3917 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3918 r = -EINVAL;
3919 goto failed;
d38ceaf9 3920 }
bec86378 3921 DRM_INFO("GPU posting now...\n");
4d2997ab 3922 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3923 if (r) {
3924 dev_err(adev->dev, "gpu post error!\n");
3925 goto failed;
3926 }
d38ceaf9
AD
3927 }
3928
9535a86a
SZ
3929 if (adev->bios) {
3930 if (adev->is_atom_fw) {
3931 /* Initialize clocks */
3932 r = amdgpu_atomfirmware_get_clock_info(adev);
3933 if (r) {
3934 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3935 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3936 goto failed;
3937 }
3938 } else {
3939 /* Initialize clocks */
3940 r = amdgpu_atombios_get_clock_info(adev);
3941 if (r) {
3942 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3943 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3944 goto failed;
3945 }
3946 /* init i2c buses */
3947 if (!amdgpu_device_has_dc_support(adev))
3948 amdgpu_atombios_i2c_init(adev);
a5bde2f9 3949 }
2c1a2784 3950 }
d38ceaf9 3951
bfca0289 3952fence_driver_init:
d38ceaf9 3953 /* Fence driver */
067f44c8 3954 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3955 if (r) {
067f44c8 3956 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3957 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3958 goto failed;
2c1a2784 3959 }
d38ceaf9
AD
3960
3961 /* init the mode config */
4a580877 3962 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3963
06ec9070 3964 r = amdgpu_device_ip_init(adev);
d38ceaf9 3965 if (r) {
06ec9070 3966 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3967 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3968 goto release_ras_con;
d38ceaf9
AD
3969 }
3970
8d35a259
LG
3971 amdgpu_fence_driver_hw_init(adev);
3972
d69b8971
YZ
3973 dev_info(adev->dev,
3974 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3975 adev->gfx.config.max_shader_engines,
3976 adev->gfx.config.max_sh_per_se,
3977 adev->gfx.config.max_cu_per_sh,
3978 adev->gfx.cu_info.number);
3979
d38ceaf9
AD
3980 adev->accel_working = true;
3981
e59c0205
AX
3982 amdgpu_vm_check_compute_bug(adev);
3983
95844d20
MO
3984 /* Initialize the buffer migration limit. */
3985 if (amdgpu_moverate >= 0)
3986 max_MBps = amdgpu_moverate;
3987 else
3988 max_MBps = 8; /* Allow 8 MB/s. */
3989 /* Get a log2 for easy divisions. */
3990 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3991
b0adca4d
EQ
3992 /*
3993 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3994 * Otherwise the mgpu fan boost feature will be skipped due to the
3995 * gpu instance is counted less.
3996 */
3997 amdgpu_register_gpu_instance(adev);
3998
d38ceaf9
AD
3999 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4000 * explicit gating rather than handling it automatically.
4001 */
e3c1b071 4002 if (!adev->gmc.xgmi.pending_reset) {
4003 r = amdgpu_device_ip_late_init(adev);
4004 if (r) {
4005 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4006 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4007 goto release_ras_con;
e3c1b071 4008 }
4009 /* must succeed. */
4010 amdgpu_ras_resume(adev);
4011 queue_delayed_work(system_wq, &adev->delayed_init_work,
4012 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4013 }
d38ceaf9 4014
38eecbe0
CL
4015 if (amdgpu_sriov_vf(adev)) {
4016 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4017 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4018 }
2c738637 4019
90bcb9b5
EQ
4020 /*
4021 * Place those sysfs registering after `late_init`. As some of those
4022 * operations performed in `late_init` might affect the sysfs
4023 * interfaces creating.
4024 */
4025 r = amdgpu_atombios_sysfs_init(adev);
4026 if (r)
4027 drm_err(&adev->ddev,
4028 "registering atombios sysfs failed (%d).\n", r);
4029
4030 r = amdgpu_pm_sysfs_init(adev);
4031 if (r)
4032 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4033
4034 r = amdgpu_ucode_sysfs_init(adev);
4035 if (r) {
4036 adev->ucode_sysfs_en = false;
4037 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4038 } else
4039 adev->ucode_sysfs_en = true;
4040
77f3a5cd 4041 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4042 if (r)
77f3a5cd 4043 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4044
7957ec80
LL
4045 amdgpu_fru_sysfs_init(adev);
4046
d155bef0
AB
4047 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4048 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4049 if (r)
4050 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4051
c1dd4aa6
AG
4052 /* Have stored pci confspace at hand for restore in sudden PCI error */
4053 if (amdgpu_device_cache_pci_state(adev->pdev))
4054 pci_restore_state(pdev);
4055
8c3dd61c
KHF
4056 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4057 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4058 * ignore it
4059 */
8c3dd61c 4060 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4061 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4062
d37a3929
OC
4063 px = amdgpu_device_supports_px(ddev);
4064
4065 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4066 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4067 vga_switcheroo_register_client(adev->pdev,
4068 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4069
4070 if (px)
8c3dd61c 4071 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4072
e3c1b071 4073 if (adev->gmc.xgmi.pending_reset)
4074 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4075 msecs_to_jiffies(AMDGPU_RESUME_MS));
4076
4a74c38c
PY
4077 amdgpu_device_check_iommu_direct_map(adev);
4078
d38ceaf9 4079 return 0;
83ba126a 4080
970fd197 4081release_ras_con:
38eecbe0
CL
4082 if (amdgpu_sriov_vf(adev))
4083 amdgpu_virt_release_full_gpu(adev, true);
4084
4085 /* failed in exclusive mode due to timeout */
4086 if (amdgpu_sriov_vf(adev) &&
4087 !amdgpu_sriov_runtime(adev) &&
4088 amdgpu_virt_mmio_blocked(adev) &&
4089 !amdgpu_virt_wait_reset(adev)) {
4090 dev_err(adev->dev, "VF exclusive mode timeout\n");
4091 /* Don't send request since VF is inactive. */
4092 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4093 adev->virt.ops = NULL;
4094 r = -EAGAIN;
4095 }
970fd197
SY
4096 amdgpu_release_ras_context(adev);
4097
83ba126a 4098failed:
89041940 4099 amdgpu_vf_error_trans_all(adev);
8840a387 4100
83ba126a 4101 return r;
d38ceaf9
AD
4102}
4103
07775fc1
AG
4104static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4105{
62d5f9f7 4106
07775fc1
AG
4107 /* Clear all CPU mappings pointing to this device */
4108 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4109
4110 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4111 amdgpu_doorbell_fini(adev);
07775fc1
AG
4112
4113 iounmap(adev->rmmio);
4114 adev->rmmio = NULL;
4115 if (adev->mman.aper_base_kaddr)
4116 iounmap(adev->mman.aper_base_kaddr);
4117 adev->mman.aper_base_kaddr = NULL;
4118
4119 /* Memory manager related */
a0ba1279 4120 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4121 arch_phys_wc_del(adev->gmc.vram_mtrr);
4122 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4123 }
4124}
4125
d38ceaf9 4126/**
bbe04dec 4127 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4128 *
4129 * @adev: amdgpu_device pointer
4130 *
4131 * Tear down the driver info (all asics).
4132 * Called at driver shutdown.
4133 */
72c8c97b 4134void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4135{
aac89168 4136 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4137 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4138 adev->shutdown = true;
9f875167 4139
752c683d
ML
4140 /* make sure IB test finished before entering exclusive mode
4141 * to avoid preemption on IB test
b8920e1e 4142 */
519b8b76 4143 if (amdgpu_sriov_vf(adev)) {
752c683d 4144 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4145 amdgpu_virt_fini_data_exchange(adev);
4146 }
752c683d 4147
e5b03032
ML
4148 /* disable all interrupts */
4149 amdgpu_irq_disable_all(adev);
47fc644f 4150 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4151 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4152 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4153 else
4a580877 4154 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4155 }
8d35a259 4156 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4157
cd3a8a59 4158 if (adev->mman.initialized)
9bff18d1 4159 drain_workqueue(adev->mman.bdev.wq);
98f56188 4160
53e9d836 4161 if (adev->pm.sysfs_initialized)
7c868b59 4162 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4163 if (adev->ucode_sysfs_en)
4164 amdgpu_ucode_sysfs_fini(adev);
4165 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4166 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4167
232d1d43
SY
4168 /* disable ras feature must before hw fini */
4169 amdgpu_ras_pre_fini(adev);
4170
e9669fb7 4171 amdgpu_device_ip_fini_early(adev);
d10d0daa 4172
a3848df6
YW
4173 amdgpu_irq_fini_hw(adev);
4174
b6fd6e0f
SK
4175 if (adev->mman.initialized)
4176 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4177
d10d0daa 4178 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4179
39934d3e
VP
4180 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4181 amdgpu_device_unmap_mmio(adev);
87172e89 4182
72c8c97b
AG
4183}
4184
4185void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4186{
62d5f9f7 4187 int idx;
d37a3929 4188 bool px;
62d5f9f7 4189
8d35a259 4190 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4191 amdgpu_device_ip_fini(adev);
b31d3063 4192 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4193 adev->accel_working = false;
68ce8b24 4194 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4195
4196 amdgpu_reset_fini(adev);
4197
d38ceaf9 4198 /* free i2c buses */
4562236b
HW
4199 if (!amdgpu_device_has_dc_support(adev))
4200 amdgpu_i2c_fini(adev);
bfca0289
SL
4201
4202 if (amdgpu_emu_mode != 1)
4203 amdgpu_atombios_fini(adev);
4204
d38ceaf9
AD
4205 kfree(adev->bios);
4206 adev->bios = NULL;
d37a3929
OC
4207
4208 px = amdgpu_device_supports_px(adev_to_drm(adev));
4209
4210 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4211 apple_gmux_detect(NULL, NULL)))
84c8b22e 4212 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4213
4214 if (px)
83ba126a 4215 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4216
38d6be81 4217 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4218 vga_client_unregister(adev->pdev);
e9bc1bf7 4219
62d5f9f7
LS
4220 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4221
4222 iounmap(adev->rmmio);
4223 adev->rmmio = NULL;
43c064db 4224 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4225 drm_dev_exit(idx);
4226 }
4227
d155bef0
AB
4228 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4229 amdgpu_pmu_fini(adev);
72de33f8 4230 if (adev->mman.discovery_bin)
a190d1c7 4231 amdgpu_discovery_fini(adev);
72c8c97b 4232
cfbb6b00
AG
4233 amdgpu_reset_put_reset_domain(adev->reset_domain);
4234 adev->reset_domain = NULL;
4235
72c8c97b
AG
4236 kfree(adev->pci_state);
4237
d38ceaf9
AD
4238}
4239
58144d28
ND
4240/**
4241 * amdgpu_device_evict_resources - evict device resources
4242 * @adev: amdgpu device object
4243 *
4244 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4245 * of the vram memory type. Mainly used for evicting device resources
4246 * at suspend time.
4247 *
4248 */
7863c155 4249static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4250{
7863c155
ML
4251 int ret;
4252
e53d9665
ML
4253 /* No need to evict vram on APUs for suspend to ram or s2idle */
4254 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4255 return 0;
58144d28 4256
7863c155
ML
4257 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4258 if (ret)
58144d28 4259 DRM_WARN("evicting device resources failed\n");
7863c155 4260 return ret;
58144d28 4261}
d38ceaf9
AD
4262
4263/*
4264 * Suspend & resume.
4265 */
4266/**
810ddc3a 4267 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4268 *
87e3f136 4269 * @dev: drm dev pointer
87e3f136 4270 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4271 *
4272 * Puts the hw in the suspend state (all asics).
4273 * Returns 0 for success or an error on failure.
4274 * Called at driver suspend.
4275 */
de185019 4276int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4277{
a2e15b0e 4278 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4279 int r = 0;
d38ceaf9 4280
d38ceaf9
AD
4281 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4282 return 0;
4283
44779b43 4284 adev->in_suspend = true;
3fa8f89d 4285
47ea2076
SF
4286 /* Evict the majority of BOs before grabbing the full access */
4287 r = amdgpu_device_evict_resources(adev);
4288 if (r)
4289 return r;
4290
d7274ec7
BZ
4291 if (amdgpu_sriov_vf(adev)) {
4292 amdgpu_virt_fini_data_exchange(adev);
4293 r = amdgpu_virt_request_full_gpu(adev, false);
4294 if (r)
4295 return r;
4296 }
4297
3fa8f89d
S
4298 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4299 DRM_WARN("smart shift update failed\n");
4300
5f818173 4301 if (fbcon)
087451f3 4302 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4303
beff74bc 4304 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4305 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4306
5e6932fe 4307 amdgpu_ras_suspend(adev);
4308
2196927b 4309 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4310
c004d44e 4311 if (!adev->in_s0ix)
5d3a2d95 4312 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4313
7863c155
ML
4314 r = amdgpu_device_evict_resources(adev);
4315 if (r)
4316 return r;
d38ceaf9 4317
8d35a259 4318 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4319
2196927b 4320 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4321
d7274ec7
BZ
4322 if (amdgpu_sriov_vf(adev))
4323 amdgpu_virt_release_full_gpu(adev, false);
4324
d38ceaf9
AD
4325 return 0;
4326}
4327
4328/**
810ddc3a 4329 * amdgpu_device_resume - initiate device resume
d38ceaf9 4330 *
87e3f136 4331 * @dev: drm dev pointer
87e3f136 4332 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4333 *
4334 * Bring the hw back to operating state (all asics).
4335 * Returns 0 for success or an error on failure.
4336 * Called at driver resume.
4337 */
de185019 4338int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4339{
1348969a 4340 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4341 int r = 0;
d38ceaf9 4342
d7274ec7
BZ
4343 if (amdgpu_sriov_vf(adev)) {
4344 r = amdgpu_virt_request_full_gpu(adev, true);
4345 if (r)
4346 return r;
4347 }
4348
d38ceaf9
AD
4349 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4350 return 0;
4351
62498733 4352 if (adev->in_s0ix)
bc143d8b 4353 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4354
d38ceaf9 4355 /* post card */
39c640c0 4356 if (amdgpu_device_need_post(adev)) {
4d2997ab 4357 r = amdgpu_device_asic_init(adev);
74b0b157 4358 if (r)
aac89168 4359 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4360 }
d38ceaf9 4361
06ec9070 4362 r = amdgpu_device_ip_resume(adev);
d7274ec7 4363
e6707218 4364 if (r) {
aac89168 4365 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4366 goto exit;
e6707218 4367 }
8d35a259 4368 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4369
06ec9070 4370 r = amdgpu_device_ip_late_init(adev);
03161a6e 4371 if (r)
3c22c1ea 4372 goto exit;
d38ceaf9 4373
beff74bc
AD
4374 queue_delayed_work(system_wq, &adev->delayed_init_work,
4375 msecs_to_jiffies(AMDGPU_RESUME_MS));
4376
c004d44e 4377 if (!adev->in_s0ix) {
5d3a2d95
AD
4378 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4379 if (r)
3c22c1ea 4380 goto exit;
5d3a2d95 4381 }
756e6880 4382
3c22c1ea
SF
4383exit:
4384 if (amdgpu_sriov_vf(adev)) {
4385 amdgpu_virt_init_data_exchange(adev);
4386 amdgpu_virt_release_full_gpu(adev, true);
4387 }
4388
4389 if (r)
4390 return r;
4391
96a5d8d4 4392 /* Make sure IB tests flushed */
beff74bc 4393 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4394
a2e15b0e 4395 if (fbcon)
087451f3 4396 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4397
5e6932fe 4398 amdgpu_ras_resume(adev);
4399
d09ef243
AD
4400 if (adev->mode_info.num_crtc) {
4401 /*
4402 * Most of the connector probing functions try to acquire runtime pm
4403 * refs to ensure that the GPU is powered on when connector polling is
4404 * performed. Since we're calling this from a runtime PM callback,
4405 * trying to acquire rpm refs will cause us to deadlock.
4406 *
4407 * Since we're guaranteed to be holding the rpm lock, it's safe to
4408 * temporarily disable the rpm helpers so this doesn't deadlock us.
4409 */
23a1a9e5 4410#ifdef CONFIG_PM
d09ef243 4411 dev->dev->power.disable_depth++;
23a1a9e5 4412#endif
d09ef243
AD
4413 if (!adev->dc_enabled)
4414 drm_helper_hpd_irq_event(dev);
4415 else
4416 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4417#ifdef CONFIG_PM
d09ef243 4418 dev->dev->power.disable_depth--;
23a1a9e5 4419#endif
d09ef243 4420 }
44779b43
RZ
4421 adev->in_suspend = false;
4422
dc907c9d
JX
4423 if (adev->enable_mes)
4424 amdgpu_mes_self_test(adev);
4425
3fa8f89d
S
4426 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4427 DRM_WARN("smart shift update failed\n");
4428
4d3b9ae5 4429 return 0;
d38ceaf9
AD
4430}
4431
e3ecdffa
AD
4432/**
4433 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4434 *
4435 * @adev: amdgpu_device pointer
4436 *
4437 * The list of all the hardware IPs that make up the asic is walked and
4438 * the check_soft_reset callbacks are run. check_soft_reset determines
4439 * if the asic is still hung or not.
4440 * Returns true if any of the IPs are still in a hung state, false if not.
4441 */
06ec9070 4442static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4443{
4444 int i;
4445 bool asic_hang = false;
4446
f993d628
ML
4447 if (amdgpu_sriov_vf(adev))
4448 return true;
4449
8bc04c29
AD
4450 if (amdgpu_asic_need_full_reset(adev))
4451 return true;
4452
63fbf42f 4453 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4454 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4455 continue;
a1255107
AD
4456 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4457 adev->ip_blocks[i].status.hang =
4458 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4459 if (adev->ip_blocks[i].status.hang) {
aac89168 4460 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4461 asic_hang = true;
4462 }
4463 }
4464 return asic_hang;
4465}
4466
e3ecdffa
AD
4467/**
4468 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4469 *
4470 * @adev: amdgpu_device pointer
4471 *
4472 * The list of all the hardware IPs that make up the asic is walked and the
4473 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4474 * handles any IP specific hardware or software state changes that are
4475 * necessary for a soft reset to succeed.
4476 * Returns 0 on success, negative error code on failure.
4477 */
06ec9070 4478static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4479{
4480 int i, r = 0;
4481
4482 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4483 if (!adev->ip_blocks[i].status.valid)
d31a501e 4484 continue;
a1255107
AD
4485 if (adev->ip_blocks[i].status.hang &&
4486 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4487 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4488 if (r)
4489 return r;
4490 }
4491 }
4492
4493 return 0;
4494}
4495
e3ecdffa
AD
4496/**
4497 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4498 *
4499 * @adev: amdgpu_device pointer
4500 *
4501 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4502 * reset is necessary to recover.
4503 * Returns true if a full asic reset is required, false if not.
4504 */
06ec9070 4505static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4506{
da146d3b
AD
4507 int i;
4508
8bc04c29
AD
4509 if (amdgpu_asic_need_full_reset(adev))
4510 return true;
4511
da146d3b 4512 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4513 if (!adev->ip_blocks[i].status.valid)
da146d3b 4514 continue;
a1255107
AD
4515 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4516 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4517 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4518 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4519 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4520 if (adev->ip_blocks[i].status.hang) {
aac89168 4521 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4522 return true;
4523 }
4524 }
35d782fe
CZ
4525 }
4526 return false;
4527}
4528
e3ecdffa
AD
4529/**
4530 * amdgpu_device_ip_soft_reset - do a soft reset
4531 *
4532 * @adev: amdgpu_device pointer
4533 *
4534 * The list of all the hardware IPs that make up the asic is walked and the
4535 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4536 * IP specific hardware or software state changes that are necessary to soft
4537 * reset the IP.
4538 * Returns 0 on success, negative error code on failure.
4539 */
06ec9070 4540static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4541{
4542 int i, r = 0;
4543
4544 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4545 if (!adev->ip_blocks[i].status.valid)
35d782fe 4546 continue;
a1255107
AD
4547 if (adev->ip_blocks[i].status.hang &&
4548 adev->ip_blocks[i].version->funcs->soft_reset) {
4549 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4550 if (r)
4551 return r;
4552 }
4553 }
4554
4555 return 0;
4556}
4557
e3ecdffa
AD
4558/**
4559 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4560 *
4561 * @adev: amdgpu_device pointer
4562 *
4563 * The list of all the hardware IPs that make up the asic is walked and the
4564 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4565 * handles any IP specific hardware or software state changes that are
4566 * necessary after the IP has been soft reset.
4567 * Returns 0 on success, negative error code on failure.
4568 */
06ec9070 4569static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4570{
4571 int i, r = 0;
4572
4573 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4574 if (!adev->ip_blocks[i].status.valid)
35d782fe 4575 continue;
a1255107
AD
4576 if (adev->ip_blocks[i].status.hang &&
4577 adev->ip_blocks[i].version->funcs->post_soft_reset)
4578 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4579 if (r)
4580 return r;
4581 }
4582
4583 return 0;
4584}
4585
e3ecdffa 4586/**
c33adbc7 4587 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4588 *
4589 * @adev: amdgpu_device pointer
4590 *
4591 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4592 * restore things like GPUVM page tables after a GPU reset where
4593 * the contents of VRAM might be lost.
403009bf
CK
4594 *
4595 * Returns:
4596 * 0 on success, negative error code on failure.
e3ecdffa 4597 */
c33adbc7 4598static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4599{
c41d1cf6 4600 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4601 struct amdgpu_bo *shadow;
e18aaea7 4602 struct amdgpu_bo_vm *vmbo;
403009bf 4603 long r = 1, tmo;
c41d1cf6
ML
4604
4605 if (amdgpu_sriov_runtime(adev))
b045d3af 4606 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4607 else
4608 tmo = msecs_to_jiffies(100);
4609
aac89168 4610 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4611 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4612 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4613 /* If vm is compute context or adev is APU, shadow will be NULL */
4614 if (!vmbo->shadow)
4615 continue;
4616 shadow = vmbo->shadow;
4617
403009bf 4618 /* No need to recover an evicted BO */
d3116756
CK
4619 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4620 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4621 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4622 continue;
4623
4624 r = amdgpu_bo_restore_shadow(shadow, &next);
4625 if (r)
4626 break;
4627
c41d1cf6 4628 if (fence) {
1712fb1a 4629 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4630 dma_fence_put(fence);
4631 fence = next;
1712fb1a 4632 if (tmo == 0) {
4633 r = -ETIMEDOUT;
c41d1cf6 4634 break;
1712fb1a 4635 } else if (tmo < 0) {
4636 r = tmo;
4637 break;
4638 }
403009bf
CK
4639 } else {
4640 fence = next;
c41d1cf6 4641 }
c41d1cf6
ML
4642 }
4643 mutex_unlock(&adev->shadow_list_lock);
4644
403009bf
CK
4645 if (fence)
4646 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4647 dma_fence_put(fence);
4648
1712fb1a 4649 if (r < 0 || tmo <= 0) {
aac89168 4650 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4651 return -EIO;
4652 }
c41d1cf6 4653
aac89168 4654 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4655 return 0;
c41d1cf6
ML
4656}
4657
a90ad3c2 4658
e3ecdffa 4659/**
06ec9070 4660 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4661 *
982a820b 4662 * @adev: amdgpu_device pointer
87e3f136 4663 * @from_hypervisor: request from hypervisor
5740682e
ML
4664 *
4665 * do VF FLR and reinitialize Asic
3f48c681 4666 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4667 */
4668static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4669 bool from_hypervisor)
5740682e
ML
4670{
4671 int r;
a5f67c93 4672 struct amdgpu_hive_info *hive = NULL;
7258fa31 4673 int retry_limit = 0;
5740682e 4674
7258fa31 4675retry:
c004d44e 4676 amdgpu_amdkfd_pre_reset(adev);
428890a3 4677
5740682e
ML
4678 if (from_hypervisor)
4679 r = amdgpu_virt_request_full_gpu(adev, true);
4680 else
4681 r = amdgpu_virt_reset_gpu(adev);
4682 if (r)
4683 return r;
f734b213 4684 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4685
83f24a8f
HC
4686 /* some sw clean up VF needs to do before recover */
4687 amdgpu_virt_post_reset(adev);
4688
a90ad3c2 4689 /* Resume IP prior to SMC */
06ec9070 4690 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4691 if (r)
4692 goto error;
a90ad3c2 4693
c9ffa427 4694 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4695
7a3e0bb2
RZ
4696 r = amdgpu_device_fw_loading(adev);
4697 if (r)
4698 return r;
4699
a90ad3c2 4700 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4701 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4702 if (r)
4703 goto error;
a90ad3c2 4704
a5f67c93
ZL
4705 hive = amdgpu_get_xgmi_hive(adev);
4706 /* Update PSP FW topology after reset */
4707 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4708 r = amdgpu_xgmi_update_topology(hive, adev);
4709
4710 if (hive)
4711 amdgpu_put_xgmi_hive(hive);
4712
4713 if (!r) {
a5f67c93 4714 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4715
c004d44e 4716 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4717 }
a90ad3c2 4718
abc34253 4719error:
c41d1cf6 4720 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4721 amdgpu_inc_vram_lost(adev);
c33adbc7 4722 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4723 }
437f3e0b 4724 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4725
7258fa31
SK
4726 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4727 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4728 retry_limit++;
4729 goto retry;
4730 } else
4731 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4732 }
4733
a90ad3c2
ML
4734 return r;
4735}
4736
9a1cddd6 4737/**
4738 * amdgpu_device_has_job_running - check if there is any job in mirror list
4739 *
982a820b 4740 * @adev: amdgpu_device pointer
9a1cddd6 4741 *
4742 * check if there is any job in mirror list
4743 */
4744bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4745{
4746 int i;
4747 struct drm_sched_job *job;
4748
4749 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4750 struct amdgpu_ring *ring = adev->rings[i];
4751
4752 if (!ring || !ring->sched.thread)
4753 continue;
4754
4755 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4756 job = list_first_entry_or_null(&ring->sched.pending_list,
4757 struct drm_sched_job, list);
9a1cddd6 4758 spin_unlock(&ring->sched.job_list_lock);
4759 if (job)
4760 return true;
4761 }
4762 return false;
4763}
4764
12938fad
CK
4765/**
4766 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4767 *
982a820b 4768 * @adev: amdgpu_device pointer
12938fad
CK
4769 *
4770 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4771 * a hung GPU.
4772 */
4773bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4774{
12938fad 4775
3ba7b418
AG
4776 if (amdgpu_gpu_recovery == 0)
4777 goto disabled;
4778
1a11a65d
YC
4779 /* Skip soft reset check in fatal error mode */
4780 if (!amdgpu_ras_is_poison_mode_supported(adev))
4781 return true;
4782
3ba7b418
AG
4783 if (amdgpu_sriov_vf(adev))
4784 return true;
4785
4786 if (amdgpu_gpu_recovery == -1) {
4787 switch (adev->asic_type) {
b3523c45
AD
4788#ifdef CONFIG_DRM_AMDGPU_SI
4789 case CHIP_VERDE:
4790 case CHIP_TAHITI:
4791 case CHIP_PITCAIRN:
4792 case CHIP_OLAND:
4793 case CHIP_HAINAN:
4794#endif
4795#ifdef CONFIG_DRM_AMDGPU_CIK
4796 case CHIP_KAVERI:
4797 case CHIP_KABINI:
4798 case CHIP_MULLINS:
4799#endif
4800 case CHIP_CARRIZO:
4801 case CHIP_STONEY:
4802 case CHIP_CYAN_SKILLFISH:
3ba7b418 4803 goto disabled;
b3523c45
AD
4804 default:
4805 break;
3ba7b418 4806 }
12938fad
CK
4807 }
4808
4809 return true;
3ba7b418
AG
4810
4811disabled:
aac89168 4812 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4813 return false;
12938fad
CK
4814}
4815
5c03e584
FX
4816int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4817{
47fc644f
SS
4818 u32 i;
4819 int ret = 0;
5c03e584 4820
47fc644f 4821 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4822
47fc644f 4823 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4824
47fc644f
SS
4825 /* disable BM */
4826 pci_clear_master(adev->pdev);
5c03e584 4827
47fc644f 4828 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4829
47fc644f
SS
4830 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4831 dev_info(adev->dev, "GPU smu mode1 reset\n");
4832 ret = amdgpu_dpm_mode1_reset(adev);
4833 } else {
4834 dev_info(adev->dev, "GPU psp mode1 reset\n");
4835 ret = psp_gpu_reset(adev);
4836 }
5c03e584 4837
47fc644f 4838 if (ret)
2c0f880a 4839 goto mode1_reset_failed;
5c03e584 4840
47fc644f 4841 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4842 ret = amdgpu_psp_wait_for_bootloader(adev);
4843 if (ret)
2c0f880a 4844 goto mode1_reset_failed;
5c03e584 4845
47fc644f
SS
4846 /* wait for asic to come out of reset */
4847 for (i = 0; i < adev->usec_timeout; i++) {
4848 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4849
47fc644f
SS
4850 if (memsize != 0xffffffff)
4851 break;
4852 udelay(1);
4853 }
5c03e584 4854
2c0f880a
HZ
4855 if (i >= adev->usec_timeout) {
4856 ret = -ETIMEDOUT;
4857 goto mode1_reset_failed;
4858 }
4859
47fc644f 4860 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4861
2c0f880a
HZ
4862 return 0;
4863
4864mode1_reset_failed:
4865 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4866 return ret;
5c03e584 4867}
5c6dd71e 4868
e3c1b071 4869int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4870 struct amdgpu_reset_context *reset_context)
26bc5340 4871{
5c1e6fa4 4872 int i, r = 0;
04442bf7
LL
4873 struct amdgpu_job *job = NULL;
4874 bool need_full_reset =
4875 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4876
4877 if (reset_context->reset_req_dev == adev)
4878 job = reset_context->job;
71182665 4879
b602ca5f
TZ
4880 if (amdgpu_sriov_vf(adev)) {
4881 /* stop the data exchange thread */
4882 amdgpu_virt_fini_data_exchange(adev);
4883 }
4884
9e225fb9
AG
4885 amdgpu_fence_driver_isr_toggle(adev, true);
4886
71182665 4887 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4888 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4889 struct amdgpu_ring *ring = adev->rings[i];
4890
51687759 4891 if (!ring || !ring->sched.thread)
0875dc9e 4892 continue;
5740682e 4893
b8920e1e
SS
4894 /* Clear job fence from fence drv to avoid force_completion
4895 * leave NULL and vm flush fence in fence drv
4896 */
5c1e6fa4 4897 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4898
2f9d4084
ML
4899 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4900 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4901 }
d38ceaf9 4902
9e225fb9
AG
4903 amdgpu_fence_driver_isr_toggle(adev, false);
4904
ff99849b 4905 if (job && job->vm)
222b5f04
AG
4906 drm_sched_increase_karma(&job->base);
4907
04442bf7 4908 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 4909 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4910 if (r == -EOPNOTSUPP)
404b277b
LL
4911 r = 0;
4912 else
04442bf7
LL
4913 return r;
4914
1d721ed6 4915 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4916 if (!amdgpu_sriov_vf(adev)) {
4917
4918 if (!need_full_reset)
4919 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4920
360cd081
LG
4921 if (!need_full_reset && amdgpu_gpu_recovery &&
4922 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4923 amdgpu_device_ip_pre_soft_reset(adev);
4924 r = amdgpu_device_ip_soft_reset(adev);
4925 amdgpu_device_ip_post_soft_reset(adev);
4926 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4927 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4928 need_full_reset = true;
4929 }
4930 }
4931
4932 if (need_full_reset)
4933 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4934 if (need_full_reset)
4935 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4936 else
4937 clear_bit(AMDGPU_NEED_FULL_RESET,
4938 &reset_context->flags);
26bc5340
AG
4939 }
4940
4941 return r;
4942}
4943
15fd09a0
SA
4944static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4945{
15fd09a0
SA
4946 int i;
4947
38a15ad9 4948 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4949
4950 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4951 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4952 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4953 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4954 }
4955
4956 return 0;
4957}
4958
3d8785f6
SA
4959#ifdef CONFIG_DEV_COREDUMP
4960static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4961 size_t count, void *data, size_t datalen)
4962{
4963 struct drm_printer p;
4964 struct amdgpu_device *adev = data;
4965 struct drm_print_iterator iter;
4966 int i;
4967
4968 iter.data = buffer;
4969 iter.offset = 0;
4970 iter.start = offset;
4971 iter.remain = count;
4972
4973 p = drm_coredump_printer(&iter);
4974
4975 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4976 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4977 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4978 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4979 if (adev->reset_task_info.pid)
4980 drm_printf(&p, "process_name: %s PID: %d\n",
4981 adev->reset_task_info.process_name,
4982 adev->reset_task_info.pid);
4983
4984 if (adev->reset_vram_lost)
4985 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4986 if (adev->num_regs) {
4987 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4988
4989 for (i = 0; i < adev->num_regs; i++)
4990 drm_printf(&p, "0x%08x: 0x%08x\n",
4991 adev->reset_dump_reg_list[i],
4992 adev->reset_dump_reg_value[i]);
4993 }
4994
4995 return count - iter.remain;
4996}
4997
4998static void amdgpu_devcoredump_free(void *data)
4999{
5000}
5001
5002static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
5003{
5004 struct drm_device *dev = adev_to_drm(adev);
5005
5006 ktime_get_ts64(&adev->reset_time);
d68ccdb2 5007 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
3d8785f6
SA
5008 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
5009}
5010#endif
5011
04442bf7
LL
5012int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5013 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5014{
5015 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5016 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5017 int r = 0;
f5c7e779 5018 bool gpu_reset_for_dev_remove = 0;
26bc5340 5019
04442bf7
LL
5020 /* Try reset handler method first */
5021 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5022 reset_list);
15fd09a0 5023 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5024
5025 reset_context->reset_device_list = device_list_handle;
04442bf7 5026 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5027 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5028 if (r == -EOPNOTSUPP)
404b277b
LL
5029 r = 0;
5030 else
04442bf7
LL
5031 return r;
5032
5033 /* Reset handler not implemented, use the default method */
5034 need_full_reset =
5035 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5036 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5037
f5c7e779
YC
5038 gpu_reset_for_dev_remove =
5039 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5040 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5041
26bc5340 5042 /*
655ce9cb 5043 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5044 * to allow proper links negotiation in FW (within 1 sec)
5045 */
7ac71382 5046 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5047 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5048 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5049 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5050 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5051 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5052 r = -EALREADY;
5053 } else
5054 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5055
041a62bc 5056 if (r) {
aac89168 5057 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5058 r, adev_to_drm(tmp_adev)->unique);
041a62bc 5059 break;
ce316fa5
LM
5060 }
5061 }
5062
041a62bc
AG
5063 /* For XGMI wait for all resets to complete before proceed */
5064 if (!r) {
655ce9cb 5065 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5066 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5067 flush_work(&tmp_adev->xgmi_reset_work);
5068 r = tmp_adev->asic_reset_res;
5069 if (r)
5070 break;
ce316fa5
LM
5071 }
5072 }
5073 }
ce316fa5 5074 }
26bc5340 5075
43c4d576 5076 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5077 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 5078 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
5079 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
5080 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
5081 }
5082
00eaa571 5083 amdgpu_ras_intr_cleared();
43c4d576 5084 }
00eaa571 5085
f5c7e779
YC
5086 /* Since the mode1 reset affects base ip blocks, the
5087 * phase1 ip blocks need to be resumed. Otherwise there
5088 * will be a BIOS signature error and the psp bootloader
5089 * can't load kdb on the next amdgpu install.
5090 */
5091 if (gpu_reset_for_dev_remove) {
5092 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5093 amdgpu_device_ip_resume_phase1(tmp_adev);
5094
5095 goto end;
5096 }
5097
655ce9cb 5098 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5099 if (need_full_reset) {
5100 /* post card */
e3c1b071 5101 r = amdgpu_device_asic_init(tmp_adev);
5102 if (r) {
aac89168 5103 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5104 } else {
26bc5340 5105 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5106
26bc5340
AG
5107 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5108 if (r)
5109 goto out;
5110
5111 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
5112#ifdef CONFIG_DEV_COREDUMP
5113 tmp_adev->reset_vram_lost = vram_lost;
5114 memset(&tmp_adev->reset_task_info, 0,
5115 sizeof(tmp_adev->reset_task_info));
5116 if (reset_context->job && reset_context->job->vm)
5117 tmp_adev->reset_task_info =
5118 reset_context->job->vm->task_info;
5119 amdgpu_reset_capture_coredumpm(tmp_adev);
5120#endif
26bc5340 5121 if (vram_lost) {
77e7f829 5122 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5123 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5124 }
5125
26bc5340
AG
5126 r = amdgpu_device_fw_loading(tmp_adev);
5127 if (r)
5128 return r;
5129
5130 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5131 if (r)
5132 goto out;
5133
5134 if (vram_lost)
5135 amdgpu_device_fill_reset_magic(tmp_adev);
5136
fdafb359
EQ
5137 /*
5138 * Add this ASIC as tracked as reset was already
5139 * complete successfully.
5140 */
5141 amdgpu_register_gpu_instance(tmp_adev);
5142
04442bf7
LL
5143 if (!reset_context->hive &&
5144 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5145 amdgpu_xgmi_add_device(tmp_adev);
5146
7c04ca50 5147 r = amdgpu_device_ip_late_init(tmp_adev);
5148 if (r)
5149 goto out;
5150
087451f3 5151 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5152
e8fbaf03
GC
5153 /*
5154 * The GPU enters bad state once faulty pages
5155 * by ECC has reached the threshold, and ras
5156 * recovery is scheduled next. So add one check
5157 * here to break recovery if it indeed exceeds
5158 * bad page threshold, and remind user to
5159 * retire this GPU or setting one bigger
5160 * bad_page_threshold value to fix this once
5161 * probing driver again.
5162 */
11003c68 5163 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5164 /* must succeed. */
5165 amdgpu_ras_resume(tmp_adev);
5166 } else {
5167 r = -EINVAL;
5168 goto out;
5169 }
e79a04d5 5170
26bc5340 5171 /* Update PSP FW topology after reset */
04442bf7
LL
5172 if (reset_context->hive &&
5173 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5174 r = amdgpu_xgmi_update_topology(
5175 reset_context->hive, tmp_adev);
26bc5340
AG
5176 }
5177 }
5178
26bc5340
AG
5179out:
5180 if (!r) {
5181 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5182 r = amdgpu_ib_ring_tests(tmp_adev);
5183 if (r) {
5184 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5185 need_full_reset = true;
5186 r = -EAGAIN;
5187 goto end;
5188 }
5189 }
5190
5191 if (!r)
5192 r = amdgpu_device_recover_vram(tmp_adev);
5193 else
5194 tmp_adev->asic_reset_res = r;
5195 }
5196
5197end:
04442bf7
LL
5198 if (need_full_reset)
5199 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5200 else
5201 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5202 return r;
5203}
5204
e923be99 5205static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5206{
5740682e 5207
a3a09142
AD
5208 switch (amdgpu_asic_reset_method(adev)) {
5209 case AMD_RESET_METHOD_MODE1:
5210 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5211 break;
5212 case AMD_RESET_METHOD_MODE2:
5213 adev->mp1_state = PP_MP1_STATE_RESET;
5214 break;
5215 default:
5216 adev->mp1_state = PP_MP1_STATE_NONE;
5217 break;
5218 }
26bc5340 5219}
d38ceaf9 5220
e923be99 5221static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5222{
89041940 5223 amdgpu_vf_error_trans_all(adev);
a3a09142 5224 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5225}
5226
3f12acc8
EQ
5227static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5228{
5229 struct pci_dev *p = NULL;
5230
5231 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5232 adev->pdev->bus->number, 1);
5233 if (p) {
5234 pm_runtime_enable(&(p->dev));
5235 pm_runtime_resume(&(p->dev));
5236 }
b85e285e
YY
5237
5238 pci_dev_put(p);
3f12acc8
EQ
5239}
5240
5241static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5242{
5243 enum amd_reset_method reset_method;
5244 struct pci_dev *p = NULL;
5245 u64 expires;
5246
5247 /*
5248 * For now, only BACO and mode1 reset are confirmed
5249 * to suffer the audio issue without proper suspended.
5250 */
5251 reset_method = amdgpu_asic_reset_method(adev);
5252 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5253 (reset_method != AMD_RESET_METHOD_MODE1))
5254 return -EINVAL;
5255
5256 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5257 adev->pdev->bus->number, 1);
5258 if (!p)
5259 return -ENODEV;
5260
5261 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5262 if (!expires)
5263 /*
5264 * If we cannot get the audio device autosuspend delay,
5265 * a fixed 4S interval will be used. Considering 3S is
5266 * the audio controller default autosuspend delay setting.
5267 * 4S used here is guaranteed to cover that.
5268 */
54b7feb9 5269 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5270
5271 while (!pm_runtime_status_suspended(&(p->dev))) {
5272 if (!pm_runtime_suspend(&(p->dev)))
5273 break;
5274
5275 if (expires < ktime_get_mono_fast_ns()) {
5276 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5277 pci_dev_put(p);
3f12acc8
EQ
5278 /* TODO: abort the succeeding gpu reset? */
5279 return -ETIMEDOUT;
5280 }
5281 }
5282
5283 pm_runtime_disable(&(p->dev));
5284
b85e285e 5285 pci_dev_put(p);
3f12acc8
EQ
5286 return 0;
5287}
5288
d193b12b 5289static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5290{
5291 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5292
5293#if defined(CONFIG_DEBUG_FS)
5294 if (!amdgpu_sriov_vf(adev))
5295 cancel_work(&adev->reset_work);
5296#endif
5297
5298 if (adev->kfd.dev)
5299 cancel_work(&adev->kfd.reset_work);
5300
5301 if (amdgpu_sriov_vf(adev))
5302 cancel_work(&adev->virt.flr_work);
5303
5304 if (con && adev->ras_enabled)
5305 cancel_work(&con->recovery_work);
5306
5307}
5308
26bc5340 5309/**
6e9c65f7 5310 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5311 *
982a820b 5312 * @adev: amdgpu_device pointer
26bc5340 5313 * @job: which job trigger hang
80bd2de1 5314 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5315 *
5316 * Attempt to reset the GPU if it has hung (all asics).
5317 * Attempt to do soft-reset or full-reset and reinitialize Asic
5318 * Returns 0 for success or an error on failure.
5319 */
5320
cf727044 5321int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5322 struct amdgpu_job *job,
5323 struct amdgpu_reset_context *reset_context)
26bc5340 5324{
1d721ed6 5325 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5326 bool job_signaled = false;
26bc5340 5327 struct amdgpu_hive_info *hive = NULL;
26bc5340 5328 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5329 int i, r = 0;
bb5c7235 5330 bool need_emergency_restart = false;
3f12acc8 5331 bool audio_suspended = false;
f5c7e779
YC
5332 bool gpu_reset_for_dev_remove = false;
5333
5334 gpu_reset_for_dev_remove =
5335 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5336 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5337
6e3cd2a9 5338 /*
bb5c7235
WS
5339 * Special case: RAS triggered and full reset isn't supported
5340 */
5341 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5342
d5ea093e
AG
5343 /*
5344 * Flush RAM to disk so that after reboot
5345 * the user can read log and see why the system rebooted.
5346 */
bb5c7235 5347 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5348 DRM_WARN("Emergency reboot.");
5349
5350 ksys_sync_helper();
5351 emergency_restart();
5352 }
5353
b823821f 5354 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5355 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5356
175ac6ec
ZL
5357 if (!amdgpu_sriov_vf(adev))
5358 hive = amdgpu_get_xgmi_hive(adev);
681260df 5359 if (hive)
53b3f8f4 5360 mutex_lock(&hive->hive_lock);
26bc5340 5361
f1549c09
LG
5362 reset_context->job = job;
5363 reset_context->hive = hive;
9e94d22c
EQ
5364 /*
5365 * Build list of devices to reset.
5366 * In case we are in XGMI hive mode, resort the device list
5367 * to put adev in the 1st position.
5368 */
5369 INIT_LIST_HEAD(&device_list);
175ac6ec 5370 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5371 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5372 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5373 if (gpu_reset_for_dev_remove && adev->shutdown)
5374 tmp_adev->shutdown = true;
5375 }
655ce9cb 5376 if (!list_is_first(&adev->reset_list, &device_list))
5377 list_rotate_to_front(&adev->reset_list, &device_list);
5378 device_list_handle = &device_list;
26bc5340 5379 } else {
655ce9cb 5380 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5381 device_list_handle = &device_list;
5382 }
5383
e923be99
AG
5384 /* We need to lock reset domain only once both for XGMI and single device */
5385 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5386 reset_list);
3675c2f2 5387 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5388
1d721ed6 5389 /* block all schedulers and reset given job's ring */
655ce9cb 5390 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5391
e923be99 5392 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5393
3f12acc8
EQ
5394 /*
5395 * Try to put the audio codec into suspend state
5396 * before gpu reset started.
5397 *
5398 * Due to the power domain of the graphics device
5399 * is shared with AZ power domain. Without this,
5400 * we may change the audio hardware from behind
5401 * the audio driver's back. That will trigger
5402 * some audio codec errors.
5403 */
5404 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5405 audio_suspended = true;
5406
9e94d22c
EQ
5407 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5408
52fb44cf
EQ
5409 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5410
c004d44e 5411 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5412 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5413
12ffa55d
AG
5414 /*
5415 * Mark these ASICs to be reseted as untracked first
5416 * And add them back after reset completed
5417 */
5418 amdgpu_unregister_gpu_instance(tmp_adev);
5419
163d4cd2 5420 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5421
f1c1314b 5422 /* disable ras on ALL IPs */
bb5c7235 5423 if (!need_emergency_restart &&
b823821f 5424 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5425 amdgpu_ras_suspend(tmp_adev);
5426
1d721ed6
AG
5427 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5428 struct amdgpu_ring *ring = tmp_adev->rings[i];
5429
5430 if (!ring || !ring->sched.thread)
5431 continue;
5432
0b2d2c2e 5433 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5434
bb5c7235 5435 if (need_emergency_restart)
7c6e68c7 5436 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5437 }
8f8c80f4 5438 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5439 }
5440
bb5c7235 5441 if (need_emergency_restart)
7c6e68c7
AG
5442 goto skip_sched_resume;
5443
1d721ed6
AG
5444 /*
5445 * Must check guilty signal here since after this point all old
5446 * HW fences are force signaled.
5447 *
5448 * job->base holds a reference to parent fence
5449 */
f6a3f660 5450 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5451 job_signaled = true;
1d721ed6
AG
5452 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5453 goto skip_hw_reset;
5454 }
5455
26bc5340 5456retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5457 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5458 if (gpu_reset_for_dev_remove) {
5459 /* Workaroud for ASICs need to disable SMC first */
5460 amdgpu_device_smu_fini_early(tmp_adev);
5461 }
f1549c09 5462 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5463 /*TODO Should we stop ?*/
5464 if (r) {
aac89168 5465 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5466 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5467 tmp_adev->asic_reset_res = r;
5468 }
247c7b0d
AG
5469
5470 /*
5471 * Drop all pending non scheduler resets. Scheduler resets
5472 * were already dropped during drm_sched_stop
5473 */
d193b12b 5474 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5475 }
5476
5477 /* Actual ASIC resets if needed.*/
4f30d920 5478 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5479 if (amdgpu_sriov_vf(adev)) {
5480 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5481 if (r)
5482 adev->asic_reset_res = r;
950d6425 5483
28606c4e 5484 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5485 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5486 IP_VERSION(9, 4, 2) ||
5487 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5488 amdgpu_ras_resume(adev);
26bc5340 5489 } else {
f1549c09 5490 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5491 if (r && r == -EAGAIN)
26bc5340 5492 goto retry;
f5c7e779
YC
5493
5494 if (!r && gpu_reset_for_dev_remove)
5495 goto recover_end;
26bc5340
AG
5496 }
5497
1d721ed6
AG
5498skip_hw_reset:
5499
26bc5340 5500 /* Post ASIC reset for all devs .*/
655ce9cb 5501 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5502
1d721ed6
AG
5503 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5504 struct amdgpu_ring *ring = tmp_adev->rings[i];
5505
5506 if (!ring || !ring->sched.thread)
5507 continue;
5508
6868a2c4 5509 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5510 }
5511
4e8303cf
LL
5512 if (adev->enable_mes &&
5513 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
ed67f729
JX
5514 amdgpu_mes_self_test(tmp_adev);
5515
b8920e1e 5516 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5517 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5518
7258fa31
SK
5519 if (tmp_adev->asic_reset_res)
5520 r = tmp_adev->asic_reset_res;
5521
1d721ed6 5522 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5523
5524 if (r) {
5525 /* bad news, how to tell it to userspace ? */
12ffa55d 5526 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5527 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5528 } else {
12ffa55d 5529 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5530 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5531 DRM_WARN("smart shift update failed\n");
26bc5340 5532 }
7c6e68c7 5533 }
26bc5340 5534
7c6e68c7 5535skip_sched_resume:
655ce9cb 5536 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5537 /* unlock kfd: SRIOV would do it separately */
c004d44e 5538 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5539 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5540
5541 /* kfd_post_reset will do nothing if kfd device is not initialized,
5542 * need to bring up kfd here if it's not be initialized before
5543 */
5544 if (!adev->kfd.init_complete)
5545 amdgpu_amdkfd_device_init(adev);
5546
3f12acc8
EQ
5547 if (audio_suspended)
5548 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5549
5550 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5551
5552 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5553 }
5554
f5c7e779 5555recover_end:
e923be99
AG
5556 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5557 reset_list);
5558 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5559
9e94d22c 5560 if (hive) {
9e94d22c 5561 mutex_unlock(&hive->hive_lock);
d95e8e97 5562 amdgpu_put_xgmi_hive(hive);
9e94d22c 5563 }
26bc5340 5564
f287a3c5 5565 if (r)
26bc5340 5566 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5567
5568 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5569 return r;
5570}
5571
e3ecdffa
AD
5572/**
5573 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5574 *
5575 * @adev: amdgpu_device pointer
5576 *
5577 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5578 * and lanes) of the slot the device is in. Handles APUs and
5579 * virtualized environments where PCIE config space may not be available.
5580 */
5494d864 5581static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5582{
5d9a6330 5583 struct pci_dev *pdev;
c5313457
HK
5584 enum pci_bus_speed speed_cap, platform_speed_cap;
5585 enum pcie_link_width platform_link_width;
d0dd7f0c 5586
cd474ba0
AD
5587 if (amdgpu_pcie_gen_cap)
5588 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5589
cd474ba0
AD
5590 if (amdgpu_pcie_lane_cap)
5591 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5592
cd474ba0 5593 /* covers APUs as well */
04e85958 5594 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5595 if (adev->pm.pcie_gen_mask == 0)
5596 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5597 if (adev->pm.pcie_mlw_mask == 0)
5598 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5599 return;
cd474ba0 5600 }
d0dd7f0c 5601
c5313457
HK
5602 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5603 return;
5604
dbaa922b
AD
5605 pcie_bandwidth_available(adev->pdev, NULL,
5606 &platform_speed_cap, &platform_link_width);
c5313457 5607
cd474ba0 5608 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5609 /* asic caps */
5610 pdev = adev->pdev;
5611 speed_cap = pcie_get_speed_cap(pdev);
5612 if (speed_cap == PCI_SPEED_UNKNOWN) {
5613 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5614 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5615 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5616 } else {
2b3a1f51
FX
5617 if (speed_cap == PCIE_SPEED_32_0GT)
5618 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5619 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5620 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5621 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5622 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5623 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5624 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5625 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5626 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5627 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5628 else if (speed_cap == PCIE_SPEED_8_0GT)
5629 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5630 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5631 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5632 else if (speed_cap == PCIE_SPEED_5_0GT)
5633 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5634 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5635 else
5636 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5637 }
5638 /* platform caps */
c5313457 5639 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5640 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5641 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5642 } else {
2b3a1f51
FX
5643 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5644 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5645 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5646 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5647 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5648 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5649 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5650 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5651 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5652 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5653 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5654 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5655 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5656 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5657 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5658 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5659 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5660 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5661 else
5662 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5663
cd474ba0
AD
5664 }
5665 }
5666 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5667 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5668 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5669 } else {
c5313457 5670 switch (platform_link_width) {
5d9a6330 5671 case PCIE_LNK_X32:
cd474ba0
AD
5672 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5673 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5674 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5675 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5676 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5677 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5678 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5679 break;
5d9a6330 5680 case PCIE_LNK_X16:
cd474ba0
AD
5681 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5682 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5683 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5684 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5685 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5686 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5687 break;
5d9a6330 5688 case PCIE_LNK_X12:
cd474ba0
AD
5689 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5690 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5691 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5692 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5693 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5694 break;
5d9a6330 5695 case PCIE_LNK_X8:
cd474ba0
AD
5696 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5697 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5698 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5699 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5700 break;
5d9a6330 5701 case PCIE_LNK_X4:
cd474ba0
AD
5702 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5703 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5704 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5705 break;
5d9a6330 5706 case PCIE_LNK_X2:
cd474ba0
AD
5707 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5708 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5709 break;
5d9a6330 5710 case PCIE_LNK_X1:
cd474ba0
AD
5711 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5712 break;
5713 default:
5714 break;
5715 }
d0dd7f0c
AD
5716 }
5717 }
5718}
d38ceaf9 5719
08a2fd23
RE
5720/**
5721 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5722 *
5723 * @adev: amdgpu_device pointer
5724 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5725 *
5726 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5727 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5728 * @peer_adev.
5729 */
5730bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5731 struct amdgpu_device *peer_adev)
5732{
5733#ifdef CONFIG_HSA_AMD_P2P
5734 uint64_t address_mask = peer_adev->dev->dma_mask ?
5735 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5736 resource_size_t aper_limit =
5737 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5738 bool p2p_access =
5739 !adev->gmc.xgmi.connected_to_cpu &&
5740 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5741
5742 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5743 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5744 !(adev->gmc.aper_base & address_mask ||
5745 aper_limit & address_mask));
5746#else
5747 return false;
5748#endif
5749}
5750
361dbd01
AD
5751int amdgpu_device_baco_enter(struct drm_device *dev)
5752{
1348969a 5753 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5754 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5755
6ab68650 5756 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5757 return -ENOTSUPP;
5758
8ab0d6f0 5759 if (ras && adev->ras_enabled &&
acdae216 5760 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5761 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5762
9530273e 5763 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5764}
5765
5766int amdgpu_device_baco_exit(struct drm_device *dev)
5767{
1348969a 5768 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5769 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5770 int ret = 0;
361dbd01 5771
6ab68650 5772 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5773 return -ENOTSUPP;
5774
9530273e
EQ
5775 ret = amdgpu_dpm_baco_exit(adev);
5776 if (ret)
5777 return ret;
7a22677b 5778
8ab0d6f0 5779 if (ras && adev->ras_enabled &&
acdae216 5780 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5781 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5782
1bece222
CL
5783 if (amdgpu_passthrough(adev) &&
5784 adev->nbio.funcs->clear_doorbell_interrupt)
5785 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5786
7a22677b 5787 return 0;
361dbd01 5788}
c9a6b82f
AG
5789
5790/**
5791 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5792 * @pdev: PCI device struct
5793 * @state: PCI channel state
5794 *
5795 * Description: Called when a PCI error is detected.
5796 *
5797 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5798 */
5799pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5800{
5801 struct drm_device *dev = pci_get_drvdata(pdev);
5802 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5803 int i;
c9a6b82f
AG
5804
5805 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5806
6894305c
AG
5807 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5808 DRM_WARN("No support for XGMI hive yet...");
5809 return PCI_ERS_RESULT_DISCONNECT;
5810 }
5811
e17e27f9
GC
5812 adev->pci_channel_state = state;
5813
c9a6b82f
AG
5814 switch (state) {
5815 case pci_channel_io_normal:
5816 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5817 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5818 case pci_channel_io_frozen:
5819 /*
d0fb18b5 5820 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5821 * to GPU during PCI error recovery
5822 */
3675c2f2 5823 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5824 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5825
5826 /*
5827 * Block any work scheduling as we do for regular GPU reset
5828 * for the duration of the recovery
5829 */
5830 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5831 struct amdgpu_ring *ring = adev->rings[i];
5832
5833 if (!ring || !ring->sched.thread)
5834 continue;
5835
5836 drm_sched_stop(&ring->sched, NULL);
5837 }
8f8c80f4 5838 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5839 return PCI_ERS_RESULT_NEED_RESET;
5840 case pci_channel_io_perm_failure:
5841 /* Permanent error, prepare for device removal */
5842 return PCI_ERS_RESULT_DISCONNECT;
5843 }
5844
5845 return PCI_ERS_RESULT_NEED_RESET;
5846}
5847
5848/**
5849 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5850 * @pdev: pointer to PCI device
5851 */
5852pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5853{
5854
5855 DRM_INFO("PCI error: mmio enabled callback!!\n");
5856
5857 /* TODO - dump whatever for debugging purposes */
5858
5859 /* This called only if amdgpu_pci_error_detected returns
5860 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5861 * works, no need to reset slot.
5862 */
5863
5864 return PCI_ERS_RESULT_RECOVERED;
5865}
5866
5867/**
5868 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5869 * @pdev: PCI device struct
5870 *
5871 * Description: This routine is called by the pci error recovery
5872 * code after the PCI slot has been reset, just before we
5873 * should resume normal operations.
5874 */
5875pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5876{
5877 struct drm_device *dev = pci_get_drvdata(pdev);
5878 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5879 int r, i;
04442bf7 5880 struct amdgpu_reset_context reset_context;
362c7b91 5881 u32 memsize;
7ac71382 5882 struct list_head device_list;
c9a6b82f
AG
5883
5884 DRM_INFO("PCI error: slot reset callback!!\n");
5885
04442bf7
LL
5886 memset(&reset_context, 0, sizeof(reset_context));
5887
7ac71382 5888 INIT_LIST_HEAD(&device_list);
655ce9cb 5889 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5890
362c7b91
AG
5891 /* wait for asic to come out of reset */
5892 msleep(500);
5893
7ac71382 5894 /* Restore PCI confspace */
c1dd4aa6 5895 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5896
362c7b91
AG
5897 /* confirm ASIC came out of reset */
5898 for (i = 0; i < adev->usec_timeout; i++) {
5899 memsize = amdgpu_asic_get_config_memsize(adev);
5900
5901 if (memsize != 0xffffffff)
5902 break;
5903 udelay(1);
5904 }
5905 if (memsize == 0xffffffff) {
5906 r = -ETIME;
5907 goto out;
5908 }
5909
04442bf7
LL
5910 reset_context.method = AMD_RESET_METHOD_NONE;
5911 reset_context.reset_req_dev = adev;
5912 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5913 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5914
7afefb81 5915 adev->no_hw_access = true;
04442bf7 5916 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5917 adev->no_hw_access = false;
c9a6b82f
AG
5918 if (r)
5919 goto out;
5920
04442bf7 5921 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5922
5923out:
c9a6b82f 5924 if (!r) {
c1dd4aa6
AG
5925 if (amdgpu_device_cache_pci_state(adev->pdev))
5926 pci_restore_state(adev->pdev);
5927
c9a6b82f
AG
5928 DRM_INFO("PCIe error recovery succeeded\n");
5929 } else {
5930 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5931 amdgpu_device_unset_mp1_state(adev);
5932 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5933 }
5934
5935 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5936}
5937
5938/**
5939 * amdgpu_pci_resume() - resume normal ops after PCI reset
5940 * @pdev: pointer to PCI device
5941 *
5942 * Called when the error recovery driver tells us that its
505199a3 5943 * OK to resume normal operation.
c9a6b82f
AG
5944 */
5945void amdgpu_pci_resume(struct pci_dev *pdev)
5946{
5947 struct drm_device *dev = pci_get_drvdata(pdev);
5948 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5949 int i;
c9a6b82f 5950
c9a6b82f
AG
5951
5952 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5953
e17e27f9
GC
5954 /* Only continue execution for the case of pci_channel_io_frozen */
5955 if (adev->pci_channel_state != pci_channel_io_frozen)
5956 return;
5957
acd89fca
AG
5958 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5959 struct amdgpu_ring *ring = adev->rings[i];
5960
5961 if (!ring || !ring->sched.thread)
5962 continue;
5963
acd89fca
AG
5964 drm_sched_start(&ring->sched, true);
5965 }
5966
e923be99
AG
5967 amdgpu_device_unset_mp1_state(adev);
5968 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5969}
c1dd4aa6
AG
5970
5971bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5972{
5973 struct drm_device *dev = pci_get_drvdata(pdev);
5974 struct amdgpu_device *adev = drm_to_adev(dev);
5975 int r;
5976
5977 r = pci_save_state(pdev);
5978 if (!r) {
5979 kfree(adev->pci_state);
5980
5981 adev->pci_state = pci_store_saved_state(pdev);
5982
5983 if (!adev->pci_state) {
5984 DRM_ERROR("Failed to store PCI saved state");
5985 return false;
5986 }
5987 } else {
5988 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5989 return false;
5990 }
5991
5992 return true;
5993}
5994
5995bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5996{
5997 struct drm_device *dev = pci_get_drvdata(pdev);
5998 struct amdgpu_device *adev = drm_to_adev(dev);
5999 int r;
6000
6001 if (!adev->pci_state)
6002 return false;
6003
6004 r = pci_load_saved_state(pdev, adev->pci_state);
6005
6006 if (!r) {
6007 pci_restore_state(pdev);
6008 } else {
6009 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6010 return false;
6011 }
6012
6013 return true;
6014}
6015
810085dd
EH
6016void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6017 struct amdgpu_ring *ring)
6018{
6019#ifdef CONFIG_X86_64
b818a5d3 6020 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6021 return;
6022#endif
6023 if (adev->gmc.xgmi.connected_to_cpu)
6024 return;
6025
6026 if (ring && ring->funcs->emit_hdp_flush)
6027 amdgpu_ring_emit_hdp_flush(ring);
6028 else
6029 amdgpu_asic_flush_hdp(adev, ring);
6030}
c1dd4aa6 6031
810085dd
EH
6032void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6033 struct amdgpu_ring *ring)
6034{
6035#ifdef CONFIG_X86_64
b818a5d3 6036 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6037 return;
6038#endif
6039 if (adev->gmc.xgmi.connected_to_cpu)
6040 return;
c1dd4aa6 6041
810085dd
EH
6042 amdgpu_asic_invalidate_hdp(adev, ring);
6043}
34f3a4a9 6044
89a7a870
AG
6045int amdgpu_in_reset(struct amdgpu_device *adev)
6046{
6047 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6048}
6049
34f3a4a9
LY
6050/**
6051 * amdgpu_device_halt() - bring hardware to some kind of halt state
6052 *
6053 * @adev: amdgpu_device pointer
6054 *
6055 * Bring hardware to some kind of halt state so that no one can touch it
6056 * any more. It will help to maintain error context when error occurred.
6057 * Compare to a simple hang, the system will keep stable at least for SSH
6058 * access. Then it should be trivial to inspect the hardware state and
6059 * see what's going on. Implemented as following:
6060 *
6061 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6062 * clears all CPU mappings to device, disallows remappings through page faults
6063 * 2. amdgpu_irq_disable_all() disables all interrupts
6064 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6065 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6066 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6067 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6068 * flush any in flight DMA operations
6069 */
6070void amdgpu_device_halt(struct amdgpu_device *adev)
6071{
6072 struct pci_dev *pdev = adev->pdev;
e0f943b4 6073 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6074
2c1c7ba4 6075 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6076 drm_dev_unplug(ddev);
6077
6078 amdgpu_irq_disable_all(adev);
6079
6080 amdgpu_fence_driver_hw_fini(adev);
6081
6082 adev->no_hw_access = true;
6083
6084 amdgpu_device_unmap_mmio(adev);
6085
6086 pci_disable_device(pdev);
6087 pci_wait_for_pending_transaction(pdev);
6088}
86700a40
XD
6089
6090u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6091 u32 reg)
6092{
6093 unsigned long flags, address, data;
6094 u32 r;
6095
6096 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6097 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6098
6099 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6100 WREG32(address, reg * 4);
6101 (void)RREG32(address);
6102 r = RREG32(data);
6103 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6104 return r;
6105}
6106
6107void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6108 u32 reg, u32 v)
6109{
6110 unsigned long flags, address, data;
6111
6112 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6113 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6114
6115 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6116 WREG32(address, reg * 4);
6117 (void)RREG32(address);
6118 WREG32(data, v);
6119 (void)RREG32(data);
6120 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6121}
68ce8b24
CK
6122
6123/**
6124 * amdgpu_device_switch_gang - switch to a new gang
6125 * @adev: amdgpu_device pointer
6126 * @gang: the gang to switch to
6127 *
6128 * Try to switch to a new gang.
6129 * Returns: NULL if we switched to the new gang or a reference to the current
6130 * gang leader.
6131 */
6132struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6133 struct dma_fence *gang)
6134{
6135 struct dma_fence *old = NULL;
6136
6137 do {
6138 dma_fence_put(old);
6139 rcu_read_lock();
6140 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6141 rcu_read_unlock();
6142
6143 if (old == gang)
6144 break;
6145
6146 if (!dma_fence_is_signaled(old))
6147 return old;
6148
6149 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6150 old, gang) != old);
6151
6152 dma_fence_put(old);
6153 return NULL;
6154}
220c8cc8
AD
6155
6156bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6157{
6158 switch (adev->asic_type) {
6159#ifdef CONFIG_DRM_AMDGPU_SI
6160 case CHIP_HAINAN:
6161#endif
6162 case CHIP_TOPAZ:
6163 /* chips with no display hardware */
6164 return false;
6165#ifdef CONFIG_DRM_AMDGPU_SI
6166 case CHIP_TAHITI:
6167 case CHIP_PITCAIRN:
6168 case CHIP_VERDE:
6169 case CHIP_OLAND:
6170#endif
6171#ifdef CONFIG_DRM_AMDGPU_CIK
6172 case CHIP_BONAIRE:
6173 case CHIP_HAWAII:
6174 case CHIP_KAVERI:
6175 case CHIP_KABINI:
6176 case CHIP_MULLINS:
6177#endif
6178 case CHIP_TONGA:
6179 case CHIP_FIJI:
6180 case CHIP_POLARIS10:
6181 case CHIP_POLARIS11:
6182 case CHIP_POLARIS12:
6183 case CHIP_VEGAM:
6184 case CHIP_CARRIZO:
6185 case CHIP_STONEY:
6186 /* chips with display hardware */
6187 return true;
6188 default:
6189 /* IP discovery */
4e8303cf 6190 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6191 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6192 return false;
6193 return true;
6194 }
6195}
81283fee
JZ
6196
6197uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6198 uint32_t inst, uint32_t reg_addr, char reg_name[],
6199 uint32_t expected_value, uint32_t mask)
6200{
6201 uint32_t ret = 0;
6202 uint32_t old_ = 0;
6203 uint32_t tmp_ = RREG32(reg_addr);
6204 uint32_t loop = adev->usec_timeout;
6205
6206 while ((tmp_ & (mask)) != (expected_value)) {
6207 if (old_ != tmp_) {
6208 loop = adev->usec_timeout;
6209 old_ = tmp_;
6210 } else
6211 udelay(1);
6212 tmp_ = RREG32(reg_addr);
6213 loop--;
6214 if (!loop) {
6215 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6216 inst, reg_name, (uint32_t)expected_value,
6217 (uint32_t)(tmp_ & (mask)));
6218 ret = -ETIMEDOUT;
6219 break;
6220 }
6221 }
6222 return ret;
6223}