drm/amd: Drop error message about failing to load DMUB firmware
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166 167
fd496ca8 168/**
b98c6299 169 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
170 *
171 * @dev: drm_device pointer
172 *
b98c6299 173 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
174 * otherwise return false.
175 */
b98c6299 176bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
177{
178 struct amdgpu_device *adev = drm_to_adev(dev);
179
b98c6299 180 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
181 return true;
182 return false;
183}
184
e3ecdffa 185/**
0330b848 186 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
187 *
188 * @dev: drm_device pointer
189 *
b98c6299 190 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
191 * otherwise return false.
192 */
31af062a 193bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 194{
1348969a 195 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 196
b98c6299
AD
197 if (adev->has_pr3 ||
198 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
199 return true;
200 return false;
201}
202
a69cba42
AD
203/**
204 * amdgpu_device_supports_baco - Does the device support BACO
205 *
206 * @dev: drm_device pointer
207 *
208 * Returns true if the device supporte BACO,
209 * otherwise return false.
210 */
211bool amdgpu_device_supports_baco(struct drm_device *dev)
212{
1348969a 213 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
214
215 return amdgpu_asic_supports_baco(adev);
216}
217
3fa8f89d
S
218/**
219 * amdgpu_device_supports_smart_shift - Is the device dGPU with
220 * smart shift support
221 *
222 * @dev: drm_device pointer
223 *
224 * Returns true if the device is a dGPU with Smart Shift support,
225 * otherwise returns false.
226 */
227bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
228{
229 return (amdgpu_device_supports_boco(dev) &&
230 amdgpu_acpi_is_power_shift_control_supported());
231}
232
6e3cd2a9
MCC
233/*
234 * VRAM access helper functions
235 */
236
e35e2b11 237/**
048af66b 238 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
239 *
240 * @adev: amdgpu_device pointer
241 * @pos: offset of the buffer in vram
242 * @buf: virtual address of the buffer in system memory
243 * @size: read/write size, sizeof(@buf) must > @size
244 * @write: true - write to vram, otherwise - read from vram
245 */
048af66b
KW
246void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
247 void *buf, size_t size, bool write)
e35e2b11 248{
e35e2b11 249 unsigned long flags;
048af66b
KW
250 uint32_t hi = ~0, tmp = 0;
251 uint32_t *data = buf;
ce05ac56 252 uint64_t last;
f89f8c6b 253 int idx;
ce05ac56 254
c58a863b 255 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 256 return;
9d11eb0d 257
048af66b
KW
258 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
259
260 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
261 for (last = pos + size; pos < last; pos += 4) {
262 tmp = pos >> 31;
263
264 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
265 if (tmp != hi) {
266 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
267 hi = tmp;
268 }
269 if (write)
270 WREG32_NO_KIQ(mmMM_DATA, *data++);
271 else
272 *data++ = RREG32_NO_KIQ(mmMM_DATA);
273 }
274
275 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
276 drm_dev_exit(idx);
277}
278
279/**
bbe04dec 280 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
281 *
282 * @adev: amdgpu_device pointer
283 * @pos: offset of the buffer in vram
284 * @buf: virtual address of the buffer in system memory
285 * @size: read/write size, sizeof(@buf) must > @size
286 * @write: true - write to vram, otherwise - read from vram
287 *
288 * The return value means how many bytes have been transferred.
289 */
290size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
291 void *buf, size_t size, bool write)
292{
9d11eb0d 293#ifdef CONFIG_64BIT
048af66b
KW
294 void __iomem *addr;
295 size_t count = 0;
296 uint64_t last;
297
298 if (!adev->mman.aper_base_kaddr)
299 return 0;
300
9d11eb0d
CK
301 last = min(pos + size, adev->gmc.visible_vram_size);
302 if (last > pos) {
048af66b
KW
303 addr = adev->mman.aper_base_kaddr + pos;
304 count = last - pos;
9d11eb0d
CK
305
306 if (write) {
307 memcpy_toio(addr, buf, count);
4c452b5c
SS
308 /* Make sure HDP write cache flush happens without any reordering
309 * after the system memory contents are sent over PCIe device
310 */
9d11eb0d 311 mb();
810085dd 312 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 313 } else {
810085dd 314 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
315 /* Make sure HDP read cache is invalidated before issuing a read
316 * to the PCIe device
317 */
9d11eb0d
CK
318 mb();
319 memcpy_fromio(buf, addr, count);
320 }
321
9d11eb0d 322 }
048af66b
KW
323
324 return count;
325#else
326 return 0;
9d11eb0d 327#endif
048af66b 328}
9d11eb0d 329
048af66b
KW
330/**
331 * amdgpu_device_vram_access - read/write a buffer in vram
332 *
333 * @adev: amdgpu_device pointer
334 * @pos: offset of the buffer in vram
335 * @buf: virtual address of the buffer in system memory
336 * @size: read/write size, sizeof(@buf) must > @size
337 * @write: true - write to vram, otherwise - read from vram
338 */
339void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
340 void *buf, size_t size, bool write)
341{
342 size_t count;
e35e2b11 343
048af66b
KW
344 /* try to using vram apreature to access vram first */
345 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
346 size -= count;
347 if (size) {
348 /* using MM to access rest vram */
349 pos += count;
350 buf += count;
351 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
352 }
353}
354
d38ceaf9 355/*
f7ee1874 356 * register access helper functions.
d38ceaf9 357 */
56b53c0b
DL
358
359/* Check if hw access should be skipped because of hotplug or device error */
360bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
361{
7afefb81 362 if (adev->no_hw_access)
56b53c0b
DL
363 return true;
364
365#ifdef CONFIG_LOCKDEP
366 /*
367 * This is a bit complicated to understand, so worth a comment. What we assert
368 * here is that the GPU reset is not running on another thread in parallel.
369 *
370 * For this we trylock the read side of the reset semaphore, if that succeeds
371 * we know that the reset is not running in paralell.
372 *
373 * If the trylock fails we assert that we are either already holding the read
374 * side of the lock or are the reset thread itself and hold the write side of
375 * the lock.
376 */
377 if (in_task()) {
d0fb18b5
AG
378 if (down_read_trylock(&adev->reset_domain->sem))
379 up_read(&adev->reset_domain->sem);
56b53c0b 380 else
d0fb18b5 381 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
382 }
383#endif
384 return false;
385}
386
e3ecdffa 387/**
f7ee1874 388 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
389 *
390 * @adev: amdgpu_device pointer
391 * @reg: dword aligned register offset
392 * @acc_flags: access flags which require special behavior
393 *
394 * Returns the 32 bit value from the offset specified.
395 */
f7ee1874
HZ
396uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
397 uint32_t reg, uint32_t acc_flags)
d38ceaf9 398{
f4b373f4
TSD
399 uint32_t ret;
400
56b53c0b 401 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
402 return 0;
403
f7ee1874
HZ
404 if ((reg * 4) < adev->rmmio_size) {
405 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
406 amdgpu_sriov_runtime(adev) &&
d0fb18b5 407 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 408 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 409 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
410 } else {
411 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
412 }
413 } else {
414 ret = adev->pcie_rreg(adev, reg * 4);
81202807 415 }
bc992ba5 416
f7ee1874 417 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 418
f4b373f4 419 return ret;
d38ceaf9
AD
420}
421
421a2a30
ML
422/*
423 * MMIO register read with bytes helper functions
424 * @offset:bytes offset from MMIO start
b8920e1e 425 */
421a2a30 426
e3ecdffa
AD
427/**
428 * amdgpu_mm_rreg8 - read a memory mapped IO register
429 *
430 * @adev: amdgpu_device pointer
431 * @offset: byte aligned register offset
432 *
433 * Returns the 8 bit value from the offset specified.
434 */
7cbbc745
AG
435uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
436{
56b53c0b 437 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
438 return 0;
439
421a2a30
ML
440 if (offset < adev->rmmio_size)
441 return (readb(adev->rmmio + offset));
442 BUG();
443}
444
445/*
446 * MMIO register write with bytes helper functions
447 * @offset:bytes offset from MMIO start
448 * @value: the value want to be written to the register
b8920e1e
SS
449 */
450
e3ecdffa
AD
451/**
452 * amdgpu_mm_wreg8 - read a memory mapped IO register
453 *
454 * @adev: amdgpu_device pointer
455 * @offset: byte aligned register offset
456 * @value: 8 bit value to write
457 *
458 * Writes the value specified to the offset specified.
459 */
7cbbc745
AG
460void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
461{
56b53c0b 462 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
463 return;
464
421a2a30
ML
465 if (offset < adev->rmmio_size)
466 writeb(value, adev->rmmio + offset);
467 else
468 BUG();
469}
470
e3ecdffa 471/**
f7ee1874 472 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
473 *
474 * @adev: amdgpu_device pointer
475 * @reg: dword aligned register offset
476 * @v: 32 bit value to write to the register
477 * @acc_flags: access flags which require special behavior
478 *
479 * Writes the value specified to the offset specified.
480 */
f7ee1874
HZ
481void amdgpu_device_wreg(struct amdgpu_device *adev,
482 uint32_t reg, uint32_t v,
483 uint32_t acc_flags)
d38ceaf9 484{
56b53c0b 485 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
486 return;
487
f7ee1874
HZ
488 if ((reg * 4) < adev->rmmio_size) {
489 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
490 amdgpu_sriov_runtime(adev) &&
d0fb18b5 491 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 492 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 493 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
494 } else {
495 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
496 }
497 } else {
498 adev->pcie_wreg(adev, reg * 4, v);
81202807 499 }
bc992ba5 500
f7ee1874 501 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 502}
d38ceaf9 503
03f2abb0 504/**
4cc9f86f 505 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 506 *
71579346
RB
507 * @adev: amdgpu_device pointer
508 * @reg: mmio/rlc register
509 * @v: value to write
8057a9d6 510 * @xcc_id: xcc accelerated compute core id
71579346
RB
511 *
512 * this function is invoked only for the debugfs register access
03f2abb0 513 */
f7ee1874 514void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
515 uint32_t reg, uint32_t v,
516 uint32_t xcc_id)
2e0cc4d4 517{
56b53c0b 518 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
519 return;
520
2e0cc4d4 521 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
522 adev->gfx.rlc.funcs &&
523 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 524 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 525 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
526 } else if ((reg * 4) >= adev->rmmio_size) {
527 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
528 } else {
529 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 530 }
d38ceaf9
AD
531}
532
1bba3683
HZ
533/**
534 * amdgpu_device_indirect_rreg - read an indirect register
535 *
536 * @adev: amdgpu_device pointer
22f453fb 537 * @reg_addr: indirect register address to read from
1bba3683
HZ
538 *
539 * Returns the value of indirect register @reg_addr
540 */
541u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
542 u32 reg_addr)
543{
65ba96e9 544 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
545 void __iomem *pcie_index_offset;
546 void __iomem *pcie_data_offset;
65ba96e9
HZ
547 u32 r;
548
549 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
550 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
551
552 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
553 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
554 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
555
556 writel(reg_addr, pcie_index_offset);
557 readl(pcie_index_offset);
558 r = readl(pcie_data_offset);
559 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
560
561 return r;
562}
563
0c552ed3
LM
564u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
565 u64 reg_addr)
566{
567 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
568 u32 r;
569 void __iomem *pcie_index_offset;
570 void __iomem *pcie_index_hi_offset;
571 void __iomem *pcie_data_offset;
572
573 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
574 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 575 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
576 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
577 else
578 pcie_index_hi = 0;
579
580 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
581 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
582 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
583 if (pcie_index_hi != 0)
584 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
585 pcie_index_hi * 4;
586
587 writel(reg_addr, pcie_index_offset);
588 readl(pcie_index_offset);
589 if (pcie_index_hi != 0) {
590 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
591 readl(pcie_index_hi_offset);
592 }
593 r = readl(pcie_data_offset);
594
595 /* clear the high bits */
596 if (pcie_index_hi != 0) {
597 writel(0, pcie_index_hi_offset);
598 readl(pcie_index_hi_offset);
599 }
600
601 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
602
603 return r;
604}
605
1bba3683
HZ
606/**
607 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
608 *
609 * @adev: amdgpu_device pointer
22f453fb 610 * @reg_addr: indirect register address to read from
1bba3683
HZ
611 *
612 * Returns the value of indirect register @reg_addr
613 */
614u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
615 u32 reg_addr)
616{
65ba96e9 617 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
618 void __iomem *pcie_index_offset;
619 void __iomem *pcie_data_offset;
65ba96e9
HZ
620 u64 r;
621
622 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
623 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
624
625 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
626 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
627 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
628
629 /* read low 32 bits */
630 writel(reg_addr, pcie_index_offset);
631 readl(pcie_index_offset);
632 r = readl(pcie_data_offset);
633 /* read high 32 bits */
634 writel(reg_addr + 4, pcie_index_offset);
635 readl(pcie_index_offset);
636 r |= ((u64)readl(pcie_data_offset) << 32);
637 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
638
639 return r;
640}
641
a76b2870
CL
642u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
643 u64 reg_addr)
644{
645 unsigned long flags, pcie_index, pcie_data;
646 unsigned long pcie_index_hi = 0;
647 void __iomem *pcie_index_offset;
648 void __iomem *pcie_index_hi_offset;
649 void __iomem *pcie_data_offset;
650 u64 r;
651
652 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
653 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
654 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
655 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
656
657 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
658 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
659 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
660 if (pcie_index_hi != 0)
661 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
662 pcie_index_hi * 4;
663
664 /* read low 32 bits */
665 writel(reg_addr, pcie_index_offset);
666 readl(pcie_index_offset);
667 if (pcie_index_hi != 0) {
668 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
669 readl(pcie_index_hi_offset);
670 }
671 r = readl(pcie_data_offset);
672 /* read high 32 bits */
673 writel(reg_addr + 4, pcie_index_offset);
674 readl(pcie_index_offset);
675 if (pcie_index_hi != 0) {
676 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
677 readl(pcie_index_hi_offset);
678 }
679 r |= ((u64)readl(pcie_data_offset) << 32);
680
681 /* clear the high bits */
682 if (pcie_index_hi != 0) {
683 writel(0, pcie_index_hi_offset);
684 readl(pcie_index_hi_offset);
685 }
686
687 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
688
689 return r;
690}
691
1bba3683
HZ
692/**
693 * amdgpu_device_indirect_wreg - write an indirect register address
694 *
695 * @adev: amdgpu_device pointer
1bba3683
HZ
696 * @reg_addr: indirect register offset
697 * @reg_data: indirect register data
698 *
699 */
700void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
701 u32 reg_addr, u32 reg_data)
702{
65ba96e9 703 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
704 void __iomem *pcie_index_offset;
705 void __iomem *pcie_data_offset;
706
65ba96e9
HZ
707 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
708 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
709
1bba3683
HZ
710 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
711 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
712 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
713
714 writel(reg_addr, pcie_index_offset);
715 readl(pcie_index_offset);
716 writel(reg_data, pcie_data_offset);
717 readl(pcie_data_offset);
718 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
719}
720
0c552ed3
LM
721void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
722 u64 reg_addr, u32 reg_data)
723{
724 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
725 void __iomem *pcie_index_offset;
726 void __iomem *pcie_index_hi_offset;
727 void __iomem *pcie_data_offset;
728
729 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
730 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 731 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
732 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
733 else
734 pcie_index_hi = 0;
735
736 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
737 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
738 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
739 if (pcie_index_hi != 0)
740 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
741 pcie_index_hi * 4;
742
743 writel(reg_addr, pcie_index_offset);
744 readl(pcie_index_offset);
745 if (pcie_index_hi != 0) {
746 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
747 readl(pcie_index_hi_offset);
748 }
749 writel(reg_data, pcie_data_offset);
750 readl(pcie_data_offset);
751
752 /* clear the high bits */
753 if (pcie_index_hi != 0) {
754 writel(0, pcie_index_hi_offset);
755 readl(pcie_index_hi_offset);
756 }
757
758 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
759}
760
1bba3683
HZ
761/**
762 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
763 *
764 * @adev: amdgpu_device pointer
1bba3683
HZ
765 * @reg_addr: indirect register offset
766 * @reg_data: indirect register data
767 *
768 */
769void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
770 u32 reg_addr, u64 reg_data)
771{
65ba96e9 772 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
773 void __iomem *pcie_index_offset;
774 void __iomem *pcie_data_offset;
775
65ba96e9
HZ
776 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
777 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
778
1bba3683
HZ
779 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
780 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
781 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
782
783 /* write low 32 bits */
784 writel(reg_addr, pcie_index_offset);
785 readl(pcie_index_offset);
786 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
787 readl(pcie_data_offset);
788 /* write high 32 bits */
789 writel(reg_addr + 4, pcie_index_offset);
790 readl(pcie_index_offset);
791 writel((u32)(reg_data >> 32), pcie_data_offset);
792 readl(pcie_data_offset);
793 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
794}
795
a76b2870
CL
796void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
797 u64 reg_addr, u64 reg_data)
798{
799 unsigned long flags, pcie_index, pcie_data;
800 unsigned long pcie_index_hi = 0;
801 void __iomem *pcie_index_offset;
802 void __iomem *pcie_index_hi_offset;
803 void __iomem *pcie_data_offset;
804
805 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
806 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
807 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
808 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
809
810 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
811 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
812 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
813 if (pcie_index_hi != 0)
814 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
815 pcie_index_hi * 4;
816
817 /* write low 32 bits */
818 writel(reg_addr, pcie_index_offset);
819 readl(pcie_index_offset);
820 if (pcie_index_hi != 0) {
821 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
822 readl(pcie_index_hi_offset);
823 }
824 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
825 readl(pcie_data_offset);
826 /* write high 32 bits */
827 writel(reg_addr + 4, pcie_index_offset);
828 readl(pcie_index_offset);
829 if (pcie_index_hi != 0) {
830 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
831 readl(pcie_index_hi_offset);
832 }
833 writel((u32)(reg_data >> 32), pcie_data_offset);
834 readl(pcie_data_offset);
835
836 /* clear the high bits */
837 if (pcie_index_hi != 0) {
838 writel(0, pcie_index_hi_offset);
839 readl(pcie_index_hi_offset);
840 }
841
842 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
843}
844
dabc114e
HZ
845/**
846 * amdgpu_device_get_rev_id - query device rev_id
847 *
848 * @adev: amdgpu_device pointer
849 *
850 * Return device rev_id
851 */
852u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
853{
854 return adev->nbio.funcs->get_rev_id(adev);
855}
856
d38ceaf9
AD
857/**
858 * amdgpu_invalid_rreg - dummy reg read function
859 *
982a820b 860 * @adev: amdgpu_device pointer
d38ceaf9
AD
861 * @reg: offset of register
862 *
863 * Dummy register read function. Used for register blocks
864 * that certain asics don't have (all asics).
865 * Returns the value in the register.
866 */
867static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
868{
869 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
870 BUG();
871 return 0;
872}
873
0c552ed3
LM
874static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
875{
876 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
877 BUG();
878 return 0;
879}
880
d38ceaf9
AD
881/**
882 * amdgpu_invalid_wreg - dummy reg write function
883 *
982a820b 884 * @adev: amdgpu_device pointer
d38ceaf9
AD
885 * @reg: offset of register
886 * @v: value to write to the register
887 *
888 * Dummy register read function. Used for register blocks
889 * that certain asics don't have (all asics).
890 */
891static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
892{
893 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
894 reg, v);
895 BUG();
896}
897
0c552ed3
LM
898static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
899{
900 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
901 reg, v);
902 BUG();
903}
904
4fa1c6a6
TZ
905/**
906 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
907 *
982a820b 908 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
909 * @reg: offset of register
910 *
911 * Dummy register read function. Used for register blocks
912 * that certain asics don't have (all asics).
913 * Returns the value in the register.
914 */
915static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
916{
917 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
918 BUG();
919 return 0;
920}
921
a76b2870
CL
922static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
923{
924 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
925 BUG();
926 return 0;
927}
928
4fa1c6a6
TZ
929/**
930 * amdgpu_invalid_wreg64 - dummy reg write function
931 *
982a820b 932 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
933 * @reg: offset of register
934 * @v: value to write to the register
935 *
936 * Dummy register read function. Used for register blocks
937 * that certain asics don't have (all asics).
938 */
939static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
940{
941 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
942 reg, v);
943 BUG();
944}
945
a76b2870
CL
946static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
947{
948 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
949 reg, v);
950 BUG();
951}
952
d38ceaf9
AD
953/**
954 * amdgpu_block_invalid_rreg - dummy reg read function
955 *
982a820b 956 * @adev: amdgpu_device pointer
d38ceaf9
AD
957 * @block: offset of instance
958 * @reg: offset of register
959 *
960 * Dummy register read function. Used for register blocks
961 * that certain asics don't have (all asics).
962 * Returns the value in the register.
963 */
964static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
965 uint32_t block, uint32_t reg)
966{
967 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
968 reg, block);
969 BUG();
970 return 0;
971}
972
973/**
974 * amdgpu_block_invalid_wreg - dummy reg write function
975 *
982a820b 976 * @adev: amdgpu_device pointer
d38ceaf9
AD
977 * @block: offset of instance
978 * @reg: offset of register
979 * @v: value to write to the register
980 *
981 * Dummy register read function. Used for register blocks
982 * that certain asics don't have (all asics).
983 */
984static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
985 uint32_t block,
986 uint32_t reg, uint32_t v)
987{
988 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
989 reg, block, v);
990 BUG();
991}
992
4d2997ab
AD
993/**
994 * amdgpu_device_asic_init - Wrapper for atom asic_init
995 *
982a820b 996 * @adev: amdgpu_device pointer
4d2997ab
AD
997 *
998 * Does any asic specific work and then calls atom asic init.
999 */
1000static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1001{
15c5c5f5
LL
1002 int ret;
1003
4d2997ab
AD
1004 amdgpu_asic_pre_asic_init(adev);
1005
4e8303cf
LL
1006 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1007 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1008 amdgpu_psp_wait_for_bootloader(adev);
1009 ret = amdgpu_atomfirmware_asic_init(adev, true);
1010 return ret;
1011 } else {
85d1bcc6 1012 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1013 }
1014
1015 return 0;
4d2997ab
AD
1016}
1017
e3ecdffa 1018/**
7ccfd79f 1019 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1020 *
982a820b 1021 * @adev: amdgpu_device pointer
e3ecdffa
AD
1022 *
1023 * Allocates a scratch page of VRAM for use by various things in the
1024 * driver.
1025 */
7ccfd79f 1026static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1027{
7ccfd79f
CK
1028 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1029 AMDGPU_GEM_DOMAIN_VRAM |
1030 AMDGPU_GEM_DOMAIN_GTT,
1031 &adev->mem_scratch.robj,
1032 &adev->mem_scratch.gpu_addr,
1033 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1034}
1035
e3ecdffa 1036/**
7ccfd79f 1037 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1038 *
982a820b 1039 * @adev: amdgpu_device pointer
e3ecdffa
AD
1040 *
1041 * Frees the VRAM scratch page.
1042 */
7ccfd79f 1043static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1044{
7ccfd79f 1045 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1046}
1047
1048/**
9c3f2b54 1049 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1050 *
1051 * @adev: amdgpu_device pointer
1052 * @registers: pointer to the register array
1053 * @array_size: size of the register array
1054 *
b8920e1e 1055 * Programs an array or registers with and or masks.
d38ceaf9
AD
1056 * This is a helper for setting golden registers.
1057 */
9c3f2b54
AD
1058void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1059 const u32 *registers,
1060 const u32 array_size)
d38ceaf9
AD
1061{
1062 u32 tmp, reg, and_mask, or_mask;
1063 int i;
1064
1065 if (array_size % 3)
1066 return;
1067
47fc644f 1068 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1069 reg = registers[i + 0];
1070 and_mask = registers[i + 1];
1071 or_mask = registers[i + 2];
1072
1073 if (and_mask == 0xffffffff) {
1074 tmp = or_mask;
1075 } else {
1076 tmp = RREG32(reg);
1077 tmp &= ~and_mask;
e0d07657
HZ
1078 if (adev->family >= AMDGPU_FAMILY_AI)
1079 tmp |= (or_mask & and_mask);
1080 else
1081 tmp |= or_mask;
d38ceaf9
AD
1082 }
1083 WREG32(reg, tmp);
1084 }
1085}
1086
e3ecdffa
AD
1087/**
1088 * amdgpu_device_pci_config_reset - reset the GPU
1089 *
1090 * @adev: amdgpu_device pointer
1091 *
1092 * Resets the GPU using the pci config reset sequence.
1093 * Only applicable to asics prior to vega10.
1094 */
8111c387 1095void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1096{
1097 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1098}
1099
af484df8
AD
1100/**
1101 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1102 *
1103 * @adev: amdgpu_device pointer
1104 *
1105 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1106 */
1107int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1108{
1109 return pci_reset_function(adev->pdev);
1110}
1111
d38ceaf9 1112/*
06ec9070 1113 * amdgpu_device_wb_*()
455a7bc2 1114 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1115 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1116 */
1117
1118/**
06ec9070 1119 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1120 *
1121 * @adev: amdgpu_device pointer
1122 *
1123 * Disables Writeback and frees the Writeback memory (all asics).
1124 * Used at driver shutdown.
1125 */
06ec9070 1126static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1127{
1128 if (adev->wb.wb_obj) {
a76ed485
AD
1129 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1130 &adev->wb.gpu_addr,
1131 (void **)&adev->wb.wb);
d38ceaf9
AD
1132 adev->wb.wb_obj = NULL;
1133 }
1134}
1135
1136/**
03f2abb0 1137 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1138 *
1139 * @adev: amdgpu_device pointer
1140 *
455a7bc2 1141 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1142 * Used at driver startup.
1143 * Returns 0 on success or an -error on failure.
1144 */
06ec9070 1145static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1146{
1147 int r;
1148
1149 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1150 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1151 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1152 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1153 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1154 (void **)&adev->wb.wb);
d38ceaf9
AD
1155 if (r) {
1156 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1157 return r;
1158 }
d38ceaf9
AD
1159
1160 adev->wb.num_wb = AMDGPU_MAX_WB;
1161 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1162
1163 /* clear wb memory */
73469585 1164 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1165 }
1166
1167 return 0;
1168}
1169
1170/**
131b4b36 1171 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1172 *
1173 * @adev: amdgpu_device pointer
1174 * @wb: wb index
1175 *
1176 * Allocate a wb slot for use by the driver (all asics).
1177 * Returns 0 on success or -EINVAL on failure.
1178 */
131b4b36 1179int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1180{
1181 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1182
97407b63 1183 if (offset < adev->wb.num_wb) {
7014285a 1184 __set_bit(offset, adev->wb.used);
63ae07ca 1185 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1186 return 0;
1187 } else {
1188 return -EINVAL;
1189 }
1190}
1191
d38ceaf9 1192/**
131b4b36 1193 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1194 *
1195 * @adev: amdgpu_device pointer
1196 * @wb: wb index
1197 *
1198 * Free a wb slot allocated for use by the driver (all asics)
1199 */
131b4b36 1200void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1201{
73469585 1202 wb >>= 3;
d38ceaf9 1203 if (wb < adev->wb.num_wb)
73469585 1204 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1205}
1206
d6895ad3
CK
1207/**
1208 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1209 *
1210 * @adev: amdgpu_device pointer
1211 *
1212 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1213 * to fail, but if any of the BARs is not accessible after the size we abort
1214 * driver loading by returning -ENODEV.
1215 */
1216int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1217{
453f617a 1218 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1219 struct pci_bus *root;
1220 struct resource *res;
b8920e1e 1221 unsigned int i;
d6895ad3
CK
1222 u16 cmd;
1223 int r;
1224
822130b5
AB
1225 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1226 return 0;
1227
0c03b912 1228 /* Bypass for VF */
1229 if (amdgpu_sriov_vf(adev))
1230 return 0;
1231
b7221f2b
AD
1232 /* skip if the bios has already enabled large BAR */
1233 if (adev->gmc.real_vram_size &&
1234 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1235 return 0;
1236
31b8adab
CK
1237 /* Check if the root BUS has 64bit memory resources */
1238 root = adev->pdev->bus;
1239 while (root->parent)
1240 root = root->parent;
1241
1242 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1243 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1244 res->start > 0x100000000ull)
1245 break;
1246 }
1247
1248 /* Trying to resize is pointless without a root hub window above 4GB */
1249 if (!res)
1250 return 0;
1251
453f617a
ND
1252 /* Limit the BAR size to what is available */
1253 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1254 rbar_size);
1255
d6895ad3
CK
1256 /* Disable memory decoding while we change the BAR addresses and size */
1257 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1258 pci_write_config_word(adev->pdev, PCI_COMMAND,
1259 cmd & ~PCI_COMMAND_MEMORY);
1260
1261 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1262 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1263 if (adev->asic_type >= CHIP_BONAIRE)
1264 pci_release_resource(adev->pdev, 2);
1265
1266 pci_release_resource(adev->pdev, 0);
1267
1268 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1269 if (r == -ENOSPC)
1270 DRM_INFO("Not enough PCI address space for a large BAR.");
1271 else if (r && r != -ENOTSUPP)
1272 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1273
1274 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1275
1276 /* When the doorbell or fb BAR isn't available we have no chance of
1277 * using the device.
1278 */
43c064db 1279 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1280 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1281 return -ENODEV;
1282
1283 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1284
1285 return 0;
1286}
a05502e5 1287
9535a86a
SZ
1288static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1289{
b8920e1e 1290 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1291 return false;
9535a86a
SZ
1292
1293 return true;
1294}
1295
d38ceaf9
AD
1296/*
1297 * GPU helpers function.
1298 */
1299/**
39c640c0 1300 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1301 *
1302 * @adev: amdgpu_device pointer
1303 *
c836fec5
JQ
1304 * Check if the asic has been initialized (all asics) at driver startup
1305 * or post is needed if hw reset is performed.
1306 * Returns true if need or false if not.
d38ceaf9 1307 */
39c640c0 1308bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1309{
1310 uint32_t reg;
1311
bec86378
ML
1312 if (amdgpu_sriov_vf(adev))
1313 return false;
1314
9535a86a
SZ
1315 if (!amdgpu_device_read_bios(adev))
1316 return false;
1317
bec86378 1318 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1319 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1320 * some old smc fw still need driver do vPost otherwise gpu hang, while
1321 * those smc fw version above 22.15 doesn't have this flaw, so we force
1322 * vpost executed for smc version below 22.15
bec86378
ML
1323 */
1324 if (adev->asic_type == CHIP_FIJI) {
1325 int err;
1326 uint32_t fw_ver;
b8920e1e 1327
bec86378
ML
1328 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1329 /* force vPost if error occured */
1330 if (err)
1331 return true;
1332
1333 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1334 if (fw_ver < 0x00160e00)
1335 return true;
bec86378 1336 }
bec86378 1337 }
91fe77eb 1338
e3c1b071 1339 /* Don't post if we need to reset whole hive on init */
1340 if (adev->gmc.xgmi.pending_reset)
1341 return false;
1342
91fe77eb 1343 if (adev->has_hw_reset) {
1344 adev->has_hw_reset = false;
1345 return true;
1346 }
1347
1348 /* bios scratch used on CIK+ */
1349 if (adev->asic_type >= CHIP_BONAIRE)
1350 return amdgpu_atombios_scratch_need_asic_init(adev);
1351
1352 /* check MEM_SIZE for older asics */
1353 reg = amdgpu_asic_get_config_memsize(adev);
1354
1355 if ((reg != 0) && (reg != 0xffffffff))
1356 return false;
1357
1358 return true;
70e64c4d
ML
1359}
1360
bb0f8429
ML
1361/*
1362 * Check whether seamless boot is supported.
1363 *
7f4ce7b5
ML
1364 * So far we only support seamless boot on DCE 3.0 or later.
1365 * If users report that it works on older ASICS as well, we may
1366 * loosen this.
bb0f8429
ML
1367 */
1368bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1369{
5dc270d3
ML
1370 switch (amdgpu_seamless) {
1371 case -1:
1372 break;
1373 case 1:
1374 return true;
1375 case 0:
1376 return false;
1377 default:
1378 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1379 amdgpu_seamless);
1380 return false;
1381 }
1382
1383 if (adev->mman.keep_stolen_vga_memory)
1384 return false;
1385
7f4ce7b5 1386 return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1387}
1388
5d1eb4c4
ML
1389/*
1390 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1391 * speed switching. Until we have confirmation from Intel that a specific host
1392 * supports it, it's safer that we keep it disabled for all.
1393 *
1394 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1395 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1396 */
1397bool amdgpu_device_pcie_dynamic_switching_supported(void)
1398{
1399#if IS_ENABLED(CONFIG_X86)
1400 struct cpuinfo_x86 *c = &cpu_data(0);
1401
1402 if (c->x86_vendor == X86_VENDOR_INTEL)
1403 return false;
1404#endif
1405 return true;
1406}
1407
0ab5d711
ML
1408/**
1409 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1410 *
1411 * @adev: amdgpu_device pointer
1412 *
1413 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1414 * be set for this device.
1415 *
1416 * Returns true if it should be used or false if not.
1417 */
1418bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1419{
1420 switch (amdgpu_aspm) {
1421 case -1:
1422 break;
1423 case 0:
1424 return false;
1425 case 1:
1426 return true;
1427 default:
1428 return false;
1429 }
1430 return pcie_aspm_enabled(adev->pdev);
1431}
1432
3ad5dcfe
KHF
1433bool amdgpu_device_aspm_support_quirk(void)
1434{
1435#if IS_ENABLED(CONFIG_X86)
1436 struct cpuinfo_x86 *c = &cpu_data(0);
1437
1438 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1439#else
1440 return true;
1441#endif
1442}
1443
d38ceaf9
AD
1444/* if we get transitioned to only one device, take VGA back */
1445/**
06ec9070 1446 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1447 *
bf44e8ce 1448 * @pdev: PCI device pointer
d38ceaf9
AD
1449 * @state: enable/disable vga decode
1450 *
1451 * Enable/disable vga decode (all asics).
1452 * Returns VGA resource flags.
1453 */
bf44e8ce
CH
1454static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1455 bool state)
d38ceaf9 1456{
bf44e8ce 1457 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1458
d38ceaf9
AD
1459 amdgpu_asic_set_vga_state(adev, state);
1460 if (state)
1461 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1462 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1463 else
1464 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1465}
1466
e3ecdffa
AD
1467/**
1468 * amdgpu_device_check_block_size - validate the vm block size
1469 *
1470 * @adev: amdgpu_device pointer
1471 *
1472 * Validates the vm block size specified via module parameter.
1473 * The vm block size defines number of bits in page table versus page directory,
1474 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1475 * page table and the remaining bits are in the page directory.
1476 */
06ec9070 1477static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1478{
1479 /* defines number of bits in page table versus page directory,
1480 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1481 * page table and the remaining bits are in the page directory
1482 */
bab4fee7
JZ
1483 if (amdgpu_vm_block_size == -1)
1484 return;
a1adf8be 1485
bab4fee7 1486 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1487 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1488 amdgpu_vm_block_size);
97489129 1489 amdgpu_vm_block_size = -1;
a1adf8be 1490 }
a1adf8be
CZ
1491}
1492
e3ecdffa
AD
1493/**
1494 * amdgpu_device_check_vm_size - validate the vm size
1495 *
1496 * @adev: amdgpu_device pointer
1497 *
1498 * Validates the vm size in GB specified via module parameter.
1499 * The VM size is the size of the GPU virtual memory space in GB.
1500 */
06ec9070 1501static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1502{
64dab074
AD
1503 /* no need to check the default value */
1504 if (amdgpu_vm_size == -1)
1505 return;
1506
83ca145d
ZJ
1507 if (amdgpu_vm_size < 1) {
1508 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1509 amdgpu_vm_size);
f3368128 1510 amdgpu_vm_size = -1;
83ca145d 1511 }
83ca145d
ZJ
1512}
1513
7951e376
RZ
1514static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1515{
1516 struct sysinfo si;
a9d4fe2f 1517 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1518 uint64_t total_memory;
1519 uint64_t dram_size_seven_GB = 0x1B8000000;
1520 uint64_t dram_size_three_GB = 0xB8000000;
1521
1522 if (amdgpu_smu_memory_pool_size == 0)
1523 return;
1524
1525 if (!is_os_64) {
1526 DRM_WARN("Not 64-bit OS, feature not supported\n");
1527 goto def_value;
1528 }
1529 si_meminfo(&si);
1530 total_memory = (uint64_t)si.totalram * si.mem_unit;
1531
1532 if ((amdgpu_smu_memory_pool_size == 1) ||
1533 (amdgpu_smu_memory_pool_size == 2)) {
1534 if (total_memory < dram_size_three_GB)
1535 goto def_value1;
1536 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1537 (amdgpu_smu_memory_pool_size == 8)) {
1538 if (total_memory < dram_size_seven_GB)
1539 goto def_value1;
1540 } else {
1541 DRM_WARN("Smu memory pool size not supported\n");
1542 goto def_value;
1543 }
1544 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1545
1546 return;
1547
1548def_value1:
1549 DRM_WARN("No enough system memory\n");
1550def_value:
1551 adev->pm.smu_prv_buffer_size = 0;
1552}
1553
9f6a7857
HR
1554static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1555{
1556 if (!(adev->flags & AMD_IS_APU) ||
1557 adev->asic_type < CHIP_RAVEN)
1558 return 0;
1559
1560 switch (adev->asic_type) {
1561 case CHIP_RAVEN:
1562 if (adev->pdev->device == 0x15dd)
1563 adev->apu_flags |= AMD_APU_IS_RAVEN;
1564 if (adev->pdev->device == 0x15d8)
1565 adev->apu_flags |= AMD_APU_IS_PICASSO;
1566 break;
1567 case CHIP_RENOIR:
1568 if ((adev->pdev->device == 0x1636) ||
1569 (adev->pdev->device == 0x164c))
1570 adev->apu_flags |= AMD_APU_IS_RENOIR;
1571 else
1572 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1573 break;
1574 case CHIP_VANGOGH:
1575 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1576 break;
1577 case CHIP_YELLOW_CARP:
1578 break;
d0f56dc2 1579 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1580 if ((adev->pdev->device == 0x13FE) ||
1581 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1582 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1583 break;
9f6a7857 1584 default:
4eaf21b7 1585 break;
9f6a7857
HR
1586 }
1587
1588 return 0;
1589}
1590
d38ceaf9 1591/**
06ec9070 1592 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1593 *
1594 * @adev: amdgpu_device pointer
1595 *
1596 * Validates certain module parameters and updates
1597 * the associated values used by the driver (all asics).
1598 */
912dfc84 1599static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1600{
5b011235
CZ
1601 if (amdgpu_sched_jobs < 4) {
1602 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1603 amdgpu_sched_jobs);
1604 amdgpu_sched_jobs = 4;
47fc644f 1605 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1606 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1607 amdgpu_sched_jobs);
1608 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1609 }
d38ceaf9 1610
83e74db6 1611 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1612 /* gart size must be greater or equal to 32M */
1613 dev_warn(adev->dev, "gart size (%d) too small\n",
1614 amdgpu_gart_size);
83e74db6 1615 amdgpu_gart_size = -1;
d38ceaf9
AD
1616 }
1617
36d38372 1618 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1619 /* gtt size must be greater or equal to 32M */
36d38372
CK
1620 dev_warn(adev->dev, "gtt size (%d) too small\n",
1621 amdgpu_gtt_size);
1622 amdgpu_gtt_size = -1;
d38ceaf9
AD
1623 }
1624
d07f14be
RH
1625 /* valid range is between 4 and 9 inclusive */
1626 if (amdgpu_vm_fragment_size != -1 &&
1627 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1628 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1629 amdgpu_vm_fragment_size = -1;
1630 }
1631
5d5bd5e3
KW
1632 if (amdgpu_sched_hw_submission < 2) {
1633 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1634 amdgpu_sched_hw_submission);
1635 amdgpu_sched_hw_submission = 2;
1636 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1637 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1638 amdgpu_sched_hw_submission);
1639 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1640 }
1641
2656fd23
AG
1642 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1643 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1644 amdgpu_reset_method = -1;
1645 }
1646
7951e376
RZ
1647 amdgpu_device_check_smu_prv_buffer_size(adev);
1648
06ec9070 1649 amdgpu_device_check_vm_size(adev);
d38ceaf9 1650
06ec9070 1651 amdgpu_device_check_block_size(adev);
6a7f76e7 1652
19aede77 1653 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1654
e3c00faa 1655 return 0;
d38ceaf9
AD
1656}
1657
1658/**
1659 * amdgpu_switcheroo_set_state - set switcheroo state
1660 *
1661 * @pdev: pci dev pointer
1694467b 1662 * @state: vga_switcheroo state
d38ceaf9 1663 *
12024b17 1664 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1665 * the asics before or after it is powered up using ACPI methods.
1666 */
8aba21b7
LT
1667static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1668 enum vga_switcheroo_state state)
d38ceaf9
AD
1669{
1670 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1671 int r;
d38ceaf9 1672
b98c6299 1673 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1674 return;
1675
1676 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1677 pr_info("switched on\n");
d38ceaf9
AD
1678 /* don't suspend or resume card normally */
1679 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1680
8f66090b
TZ
1681 pci_set_power_state(pdev, PCI_D0);
1682 amdgpu_device_load_pci_state(pdev);
1683 r = pci_enable_device(pdev);
de185019
AD
1684 if (r)
1685 DRM_WARN("pci_enable_device failed (%d)\n", r);
1686 amdgpu_device_resume(dev, true);
d38ceaf9 1687
d38ceaf9 1688 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1689 } else {
dd4fa6c1 1690 pr_info("switched off\n");
d38ceaf9 1691 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1692 amdgpu_device_suspend(dev, true);
8f66090b 1693 amdgpu_device_cache_pci_state(pdev);
de185019 1694 /* Shut down the device */
8f66090b
TZ
1695 pci_disable_device(pdev);
1696 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1697 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1698 }
1699}
1700
1701/**
1702 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1703 *
1704 * @pdev: pci dev pointer
1705 *
1706 * Callback for the switcheroo driver. Check of the switcheroo
1707 * state can be changed.
1708 * Returns true if the state can be changed, false if not.
1709 */
1710static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1711{
1712 struct drm_device *dev = pci_get_drvdata(pdev);
1713
b8920e1e 1714 /*
d38ceaf9
AD
1715 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1716 * locking inversion with the driver load path. And the access here is
1717 * completely racy anyway. So don't bother with locking for now.
1718 */
7e13ad89 1719 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1720}
1721
1722static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1723 .set_gpu_state = amdgpu_switcheroo_set_state,
1724 .reprobe = NULL,
1725 .can_switch = amdgpu_switcheroo_can_switch,
1726};
1727
e3ecdffa
AD
1728/**
1729 * amdgpu_device_ip_set_clockgating_state - set the CG state
1730 *
87e3f136 1731 * @dev: amdgpu_device pointer
e3ecdffa
AD
1732 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1733 * @state: clockgating state (gate or ungate)
1734 *
1735 * Sets the requested clockgating state for all instances of
1736 * the hardware IP specified.
1737 * Returns the error code from the last instance.
1738 */
43fa561f 1739int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1740 enum amd_ip_block_type block_type,
1741 enum amd_clockgating_state state)
d38ceaf9 1742{
43fa561f 1743 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1744 int i, r = 0;
1745
1746 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1747 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1748 continue;
c722865a
RZ
1749 if (adev->ip_blocks[i].version->type != block_type)
1750 continue;
1751 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1752 continue;
1753 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1754 (void *)adev, state);
1755 if (r)
1756 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1757 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1758 }
1759 return r;
1760}
1761
e3ecdffa
AD
1762/**
1763 * amdgpu_device_ip_set_powergating_state - set the PG state
1764 *
87e3f136 1765 * @dev: amdgpu_device pointer
e3ecdffa
AD
1766 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1767 * @state: powergating state (gate or ungate)
1768 *
1769 * Sets the requested powergating state for all instances of
1770 * the hardware IP specified.
1771 * Returns the error code from the last instance.
1772 */
43fa561f 1773int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1774 enum amd_ip_block_type block_type,
1775 enum amd_powergating_state state)
d38ceaf9 1776{
43fa561f 1777 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1778 int i, r = 0;
1779
1780 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1781 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1782 continue;
c722865a
RZ
1783 if (adev->ip_blocks[i].version->type != block_type)
1784 continue;
1785 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1786 continue;
1787 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1788 (void *)adev, state);
1789 if (r)
1790 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1791 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1792 }
1793 return r;
1794}
1795
e3ecdffa
AD
1796/**
1797 * amdgpu_device_ip_get_clockgating_state - get the CG state
1798 *
1799 * @adev: amdgpu_device pointer
1800 * @flags: clockgating feature flags
1801 *
1802 * Walks the list of IPs on the device and updates the clockgating
1803 * flags for each IP.
1804 * Updates @flags with the feature flags for each hardware IP where
1805 * clockgating is enabled.
1806 */
2990a1fc 1807void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1808 u64 *flags)
6cb2d4e4
HR
1809{
1810 int i;
1811
1812 for (i = 0; i < adev->num_ip_blocks; i++) {
1813 if (!adev->ip_blocks[i].status.valid)
1814 continue;
1815 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1816 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1817 }
1818}
1819
e3ecdffa
AD
1820/**
1821 * amdgpu_device_ip_wait_for_idle - wait for idle
1822 *
1823 * @adev: amdgpu_device pointer
1824 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1825 *
1826 * Waits for the request hardware IP to be idle.
1827 * Returns 0 for success or a negative error code on failure.
1828 */
2990a1fc
AD
1829int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1830 enum amd_ip_block_type block_type)
5dbbb60b
AD
1831{
1832 int i, r;
1833
1834 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1835 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1836 continue;
a1255107
AD
1837 if (adev->ip_blocks[i].version->type == block_type) {
1838 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1839 if (r)
1840 return r;
1841 break;
1842 }
1843 }
1844 return 0;
1845
1846}
1847
e3ecdffa
AD
1848/**
1849 * amdgpu_device_ip_is_idle - is the hardware IP idle
1850 *
1851 * @adev: amdgpu_device pointer
1852 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1853 *
1854 * Check if the hardware IP is idle or not.
1855 * Returns true if it the IP is idle, false if not.
1856 */
2990a1fc
AD
1857bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1858 enum amd_ip_block_type block_type)
5dbbb60b
AD
1859{
1860 int i;
1861
1862 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1863 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1864 continue;
a1255107
AD
1865 if (adev->ip_blocks[i].version->type == block_type)
1866 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1867 }
1868 return true;
1869
1870}
1871
e3ecdffa
AD
1872/**
1873 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1874 *
1875 * @adev: amdgpu_device pointer
87e3f136 1876 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1877 *
1878 * Returns a pointer to the hardware IP block structure
1879 * if it exists for the asic, otherwise NULL.
1880 */
2990a1fc
AD
1881struct amdgpu_ip_block *
1882amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1883 enum amd_ip_block_type type)
d38ceaf9
AD
1884{
1885 int i;
1886
1887 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1888 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1889 return &adev->ip_blocks[i];
1890
1891 return NULL;
1892}
1893
1894/**
2990a1fc 1895 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1896 *
1897 * @adev: amdgpu_device pointer
5fc3aeeb 1898 * @type: enum amd_ip_block_type
d38ceaf9
AD
1899 * @major: major version
1900 * @minor: minor version
1901 *
1902 * return 0 if equal or greater
1903 * return 1 if smaller or the ip_block doesn't exist
1904 */
2990a1fc
AD
1905int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1906 enum amd_ip_block_type type,
1907 u32 major, u32 minor)
d38ceaf9 1908{
2990a1fc 1909 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1910
a1255107
AD
1911 if (ip_block && ((ip_block->version->major > major) ||
1912 ((ip_block->version->major == major) &&
1913 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1914 return 0;
1915
1916 return 1;
1917}
1918
a1255107 1919/**
2990a1fc 1920 * amdgpu_device_ip_block_add
a1255107
AD
1921 *
1922 * @adev: amdgpu_device pointer
1923 * @ip_block_version: pointer to the IP to add
1924 *
1925 * Adds the IP block driver information to the collection of IPs
1926 * on the asic.
1927 */
2990a1fc
AD
1928int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1929 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1930{
1931 if (!ip_block_version)
1932 return -EINVAL;
1933
7bd939d0
LG
1934 switch (ip_block_version->type) {
1935 case AMD_IP_BLOCK_TYPE_VCN:
1936 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1937 return 0;
1938 break;
1939 case AMD_IP_BLOCK_TYPE_JPEG:
1940 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1941 return 0;
1942 break;
1943 default:
1944 break;
1945 }
1946
e966a725 1947 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1948 ip_block_version->funcs->name);
1949
a1255107
AD
1950 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1951
1952 return 0;
1953}
1954
e3ecdffa
AD
1955/**
1956 * amdgpu_device_enable_virtual_display - enable virtual display feature
1957 *
1958 * @adev: amdgpu_device pointer
1959 *
1960 * Enabled the virtual display feature if the user has enabled it via
1961 * the module parameter virtual_display. This feature provides a virtual
1962 * display hardware on headless boards or in virtualized environments.
1963 * This function parses and validates the configuration string specified by
1964 * the user and configues the virtual display configuration (number of
1965 * virtual connectors, crtcs, etc.) specified.
1966 */
483ef985 1967static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1968{
1969 adev->enable_virtual_display = false;
1970
1971 if (amdgpu_virtual_display) {
8f66090b 1972 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1973 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1974
1975 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1976 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1977 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1978 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1979 if (!strcmp("all", pciaddname)
1980 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1981 long num_crtc;
1982 int res = -1;
1983
9accf2fd 1984 adev->enable_virtual_display = true;
0f66356d
ED
1985
1986 if (pciaddname_tmp)
1987 res = kstrtol(pciaddname_tmp, 10,
1988 &num_crtc);
1989
1990 if (!res) {
1991 if (num_crtc < 1)
1992 num_crtc = 1;
1993 if (num_crtc > 6)
1994 num_crtc = 6;
1995 adev->mode_info.num_crtc = num_crtc;
1996 } else {
1997 adev->mode_info.num_crtc = 1;
1998 }
9accf2fd
ED
1999 break;
2000 }
2001 }
2002
0f66356d
ED
2003 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2004 amdgpu_virtual_display, pci_address_name,
2005 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2006
2007 kfree(pciaddstr);
2008 }
2009}
2010
25263da3
AD
2011void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2012{
2013 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2014 adev->mode_info.num_crtc = 1;
2015 adev->enable_virtual_display = true;
2016 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2017 adev->enable_virtual_display, adev->mode_info.num_crtc);
2018 }
2019}
2020
e3ecdffa
AD
2021/**
2022 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2023 *
2024 * @adev: amdgpu_device pointer
2025 *
2026 * Parses the asic configuration parameters specified in the gpu info
2027 * firmware and makes them availale to the driver for use in configuring
2028 * the asic.
2029 * Returns 0 on success, -EINVAL on failure.
2030 */
e2a75f88
AD
2031static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2032{
e2a75f88 2033 const char *chip_name;
c0a43457 2034 char fw_name[40];
e2a75f88
AD
2035 int err;
2036 const struct gpu_info_firmware_header_v1_0 *hdr;
2037
ab4fe3e1
HR
2038 adev->firmware.gpu_info_fw = NULL;
2039
72de33f8 2040 if (adev->mman.discovery_bin) {
cc375d8c
TY
2041 /*
2042 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2043 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2044 * when DAL no longer needs it.
2045 */
2046 if (adev->asic_type != CHIP_NAVI12)
2047 return 0;
258620d0
AD
2048 }
2049
e2a75f88 2050 switch (adev->asic_type) {
e2a75f88
AD
2051 default:
2052 return 0;
2053 case CHIP_VEGA10:
2054 chip_name = "vega10";
2055 break;
3f76dced
AD
2056 case CHIP_VEGA12:
2057 chip_name = "vega12";
2058 break;
2d2e5e7e 2059 case CHIP_RAVEN:
54f78a76 2060 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2061 chip_name = "raven2";
54f78a76 2062 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2063 chip_name = "picasso";
54c4d17e
FX
2064 else
2065 chip_name = "raven";
2d2e5e7e 2066 break;
65e60f6e
LM
2067 case CHIP_ARCTURUS:
2068 chip_name = "arcturus";
2069 break;
42b325e5
XY
2070 case CHIP_NAVI12:
2071 chip_name = "navi12";
2072 break;
e2a75f88
AD
2073 }
2074
2075 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2076 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2077 if (err) {
2078 dev_err(adev->dev,
b31d3063 2079 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2080 fw_name);
2081 goto out;
2082 }
2083
ab4fe3e1 2084 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2085 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2086
2087 switch (hdr->version_major) {
2088 case 1:
2089 {
2090 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2091 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2092 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2093
cc375d8c
TY
2094 /*
2095 * Should be droped when DAL no longer needs it.
2096 */
2097 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2098 goto parse_soc_bounding_box;
2099
b5ab16bf
AD
2100 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2101 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2102 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2103 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2104 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2105 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2106 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2107 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2108 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2109 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2110 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2111 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2112 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2113 adev->gfx.cu_info.max_waves_per_simd =
2114 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2115 adev->gfx.cu_info.max_scratch_slots_per_cu =
2116 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2117 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2118 if (hdr->version_minor >= 1) {
35c2e910
HZ
2119 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2120 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2121 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2122 adev->gfx.config.num_sc_per_sh =
2123 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2124 adev->gfx.config.num_packer_per_sc =
2125 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2126 }
ec51d3fa
XY
2127
2128parse_soc_bounding_box:
ec51d3fa
XY
2129 /*
2130 * soc bounding box info is not integrated in disocovery table,
258620d0 2131 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2132 */
48321c3d
HW
2133 if (hdr->version_minor == 2) {
2134 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2135 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2136 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2137 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2138 }
e2a75f88
AD
2139 break;
2140 }
2141 default:
2142 dev_err(adev->dev,
2143 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2144 err = -EINVAL;
2145 goto out;
2146 }
2147out:
e2a75f88
AD
2148 return err;
2149}
2150
e3ecdffa
AD
2151/**
2152 * amdgpu_device_ip_early_init - run early init for hardware IPs
2153 *
2154 * @adev: amdgpu_device pointer
2155 *
2156 * Early initialization pass for hardware IPs. The hardware IPs that make
2157 * up each asic are discovered each IP's early_init callback is run. This
2158 * is the first stage in initializing the asic.
2159 * Returns 0 on success, negative error code on failure.
2160 */
06ec9070 2161static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2162{
901e2be2
AD
2163 struct drm_device *dev = adev_to_drm(adev);
2164 struct pci_dev *parent;
aaa36a97 2165 int i, r;
ced69502 2166 bool total;
d38ceaf9 2167
483ef985 2168 amdgpu_device_enable_virtual_display(adev);
a6be7570 2169
00a979f3 2170 if (amdgpu_sriov_vf(adev)) {
00a979f3 2171 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2172 if (r)
2173 return r;
00a979f3
WS
2174 }
2175
d38ceaf9 2176 switch (adev->asic_type) {
33f34802
KW
2177#ifdef CONFIG_DRM_AMDGPU_SI
2178 case CHIP_VERDE:
2179 case CHIP_TAHITI:
2180 case CHIP_PITCAIRN:
2181 case CHIP_OLAND:
2182 case CHIP_HAINAN:
295d0daf 2183 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2184 r = si_set_ip_blocks(adev);
2185 if (r)
2186 return r;
2187 break;
2188#endif
a2e73f56
AD
2189#ifdef CONFIG_DRM_AMDGPU_CIK
2190 case CHIP_BONAIRE:
2191 case CHIP_HAWAII:
2192 case CHIP_KAVERI:
2193 case CHIP_KABINI:
2194 case CHIP_MULLINS:
e1ad2d53 2195 if (adev->flags & AMD_IS_APU)
a2e73f56 2196 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2197 else
2198 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2199
2200 r = cik_set_ip_blocks(adev);
2201 if (r)
2202 return r;
2203 break;
2204#endif
da87c30b
AD
2205 case CHIP_TOPAZ:
2206 case CHIP_TONGA:
2207 case CHIP_FIJI:
2208 case CHIP_POLARIS10:
2209 case CHIP_POLARIS11:
2210 case CHIP_POLARIS12:
2211 case CHIP_VEGAM:
2212 case CHIP_CARRIZO:
2213 case CHIP_STONEY:
2214 if (adev->flags & AMD_IS_APU)
2215 adev->family = AMDGPU_FAMILY_CZ;
2216 else
2217 adev->family = AMDGPU_FAMILY_VI;
2218
2219 r = vi_set_ip_blocks(adev);
2220 if (r)
2221 return r;
2222 break;
d38ceaf9 2223 default:
63352b7f
AD
2224 r = amdgpu_discovery_set_ip_blocks(adev);
2225 if (r)
2226 return r;
2227 break;
d38ceaf9
AD
2228 }
2229
901e2be2
AD
2230 if (amdgpu_has_atpx() &&
2231 (amdgpu_is_atpx_hybrid() ||
2232 amdgpu_has_atpx_dgpu_power_cntl()) &&
2233 ((adev->flags & AMD_IS_APU) == 0) &&
2234 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2235 adev->flags |= AMD_IS_PX;
2236
85ac2021
AD
2237 if (!(adev->flags & AMD_IS_APU)) {
2238 parent = pci_upstream_bridge(adev->pdev);
2239 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2240 }
901e2be2 2241
1884734a 2242
3b94fb10 2243 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2244 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2245 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2246 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2247 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2248
ced69502 2249 total = true;
d38ceaf9
AD
2250 for (i = 0; i < adev->num_ip_blocks; i++) {
2251 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2252 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2253 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2254 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2255 } else {
a1255107
AD
2256 if (adev->ip_blocks[i].version->funcs->early_init) {
2257 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2258 if (r == -ENOENT) {
a1255107 2259 adev->ip_blocks[i].status.valid = false;
2c1a2784 2260 } else if (r) {
a1255107
AD
2261 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2262 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2263 total = false;
2c1a2784 2264 } else {
a1255107 2265 adev->ip_blocks[i].status.valid = true;
2c1a2784 2266 }
974e6b64 2267 } else {
a1255107 2268 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2269 }
d38ceaf9 2270 }
21a249ca
AD
2271 /* get the vbios after the asic_funcs are set up */
2272 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2273 r = amdgpu_device_parse_gpu_info_fw(adev);
2274 if (r)
2275 return r;
2276
21a249ca 2277 /* Read BIOS */
9535a86a
SZ
2278 if (amdgpu_device_read_bios(adev)) {
2279 if (!amdgpu_get_bios(adev))
2280 return -EINVAL;
21a249ca 2281
9535a86a
SZ
2282 r = amdgpu_atombios_init(adev);
2283 if (r) {
2284 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2285 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2286 return r;
2287 }
21a249ca 2288 }
77eabc6f
PJZ
2289
2290 /*get pf2vf msg info at it's earliest time*/
2291 if (amdgpu_sriov_vf(adev))
2292 amdgpu_virt_init_data_exchange(adev);
2293
21a249ca 2294 }
d38ceaf9 2295 }
ced69502
ML
2296 if (!total)
2297 return -ENODEV;
d38ceaf9 2298
00fa4035 2299 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2300 adev->cg_flags &= amdgpu_cg_mask;
2301 adev->pg_flags &= amdgpu_pg_mask;
2302
d38ceaf9
AD
2303 return 0;
2304}
2305
0a4f2520
RZ
2306static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2307{
2308 int i, r;
2309
2310 for (i = 0; i < adev->num_ip_blocks; i++) {
2311 if (!adev->ip_blocks[i].status.sw)
2312 continue;
2313 if (adev->ip_blocks[i].status.hw)
2314 continue;
2315 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2316 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2317 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2318 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2319 if (r) {
2320 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2321 adev->ip_blocks[i].version->funcs->name, r);
2322 return r;
2323 }
2324 adev->ip_blocks[i].status.hw = true;
2325 }
2326 }
2327
2328 return 0;
2329}
2330
2331static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2332{
2333 int i, r;
2334
2335 for (i = 0; i < adev->num_ip_blocks; i++) {
2336 if (!adev->ip_blocks[i].status.sw)
2337 continue;
2338 if (adev->ip_blocks[i].status.hw)
2339 continue;
2340 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2341 if (r) {
2342 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2343 adev->ip_blocks[i].version->funcs->name, r);
2344 return r;
2345 }
2346 adev->ip_blocks[i].status.hw = true;
2347 }
2348
2349 return 0;
2350}
2351
7a3e0bb2
RZ
2352static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2353{
2354 int r = 0;
2355 int i;
80f41f84 2356 uint32_t smu_version;
7a3e0bb2
RZ
2357
2358 if (adev->asic_type >= CHIP_VEGA10) {
2359 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2360 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2361 continue;
2362
e3c1b071 2363 if (!adev->ip_blocks[i].status.sw)
2364 continue;
2365
482f0e53
ML
2366 /* no need to do the fw loading again if already done*/
2367 if (adev->ip_blocks[i].status.hw == true)
2368 break;
2369
53b3f8f4 2370 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2371 r = adev->ip_blocks[i].version->funcs->resume(adev);
2372 if (r) {
2373 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2374 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2375 return r;
2376 }
2377 } else {
2378 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2379 if (r) {
2380 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2381 adev->ip_blocks[i].version->funcs->name, r);
2382 return r;
7a3e0bb2 2383 }
7a3e0bb2 2384 }
482f0e53
ML
2385
2386 adev->ip_blocks[i].status.hw = true;
2387 break;
7a3e0bb2
RZ
2388 }
2389 }
482f0e53 2390
8973d9ec
ED
2391 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2392 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2393
80f41f84 2394 return r;
7a3e0bb2
RZ
2395}
2396
5fd8518d
AG
2397static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2398{
2399 long timeout;
2400 int r, i;
2401
2402 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2403 struct amdgpu_ring *ring = adev->rings[i];
2404
2405 /* No need to setup the GPU scheduler for rings that don't need it */
2406 if (!ring || ring->no_scheduler)
2407 continue;
2408
2409 switch (ring->funcs->type) {
2410 case AMDGPU_RING_TYPE_GFX:
2411 timeout = adev->gfx_timeout;
2412 break;
2413 case AMDGPU_RING_TYPE_COMPUTE:
2414 timeout = adev->compute_timeout;
2415 break;
2416 case AMDGPU_RING_TYPE_SDMA:
2417 timeout = adev->sdma_timeout;
2418 break;
2419 default:
2420 timeout = adev->video_timeout;
2421 break;
2422 }
2423
2424 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2425 ring->num_hw_submission, 0,
8ab62eda
JG
2426 timeout, adev->reset_domain->wq,
2427 ring->sched_score, ring->name,
2428 adev->dev);
5fd8518d
AG
2429 if (r) {
2430 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2431 ring->name);
2432 return r;
2433 }
2434 }
2435
d425c6f4
JZ
2436 amdgpu_xcp_update_partition_sched_list(adev);
2437
5fd8518d
AG
2438 return 0;
2439}
2440
2441
e3ecdffa
AD
2442/**
2443 * amdgpu_device_ip_init - run init for hardware IPs
2444 *
2445 * @adev: amdgpu_device pointer
2446 *
2447 * Main initialization pass for hardware IPs. The list of all the hardware
2448 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2449 * are run. sw_init initializes the software state associated with each IP
2450 * and hw_init initializes the hardware associated with each IP.
2451 * Returns 0 on success, negative error code on failure.
2452 */
06ec9070 2453static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2454{
2455 int i, r;
2456
c030f2e4 2457 r = amdgpu_ras_init(adev);
2458 if (r)
2459 return r;
2460
d38ceaf9 2461 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2462 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2463 continue;
a1255107 2464 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2465 if (r) {
a1255107
AD
2466 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2467 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2468 goto init_failed;
2c1a2784 2469 }
a1255107 2470 adev->ip_blocks[i].status.sw = true;
bfca0289 2471
c1c39032
AD
2472 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2473 /* need to do common hw init early so everything is set up for gmc */
2474 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2475 if (r) {
2476 DRM_ERROR("hw_init %d failed %d\n", i, r);
2477 goto init_failed;
2478 }
2479 adev->ip_blocks[i].status.hw = true;
2480 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2481 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2482 /* Try to reserve bad pages early */
2483 if (amdgpu_sriov_vf(adev))
2484 amdgpu_virt_exchange_data(adev);
2485
7ccfd79f 2486 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2487 if (r) {
7ccfd79f 2488 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2489 goto init_failed;
2c1a2784 2490 }
a1255107 2491 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2492 if (r) {
2493 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2494 goto init_failed;
2c1a2784 2495 }
06ec9070 2496 r = amdgpu_device_wb_init(adev);
2c1a2784 2497 if (r) {
06ec9070 2498 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2499 goto init_failed;
2c1a2784 2500 }
a1255107 2501 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2502
2503 /* right after GMC hw init, we create CSA */
02ff519e 2504 if (adev->gfx.mcbp) {
1e256e27 2505 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2506 AMDGPU_GEM_DOMAIN_VRAM |
2507 AMDGPU_GEM_DOMAIN_GTT,
2508 AMDGPU_CSA_SIZE);
2493664f
ML
2509 if (r) {
2510 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2511 goto init_failed;
2493664f
ML
2512 }
2513 }
d38ceaf9
AD
2514 }
2515 }
2516
c9ffa427 2517 if (amdgpu_sriov_vf(adev))
22c16d25 2518 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2519
533aed27
AG
2520 r = amdgpu_ib_pool_init(adev);
2521 if (r) {
2522 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2523 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2524 goto init_failed;
2525 }
2526
c8963ea4
RZ
2527 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2528 if (r)
72d3f592 2529 goto init_failed;
0a4f2520
RZ
2530
2531 r = amdgpu_device_ip_hw_init_phase1(adev);
2532 if (r)
72d3f592 2533 goto init_failed;
0a4f2520 2534
7a3e0bb2
RZ
2535 r = amdgpu_device_fw_loading(adev);
2536 if (r)
72d3f592 2537 goto init_failed;
7a3e0bb2 2538
0a4f2520
RZ
2539 r = amdgpu_device_ip_hw_init_phase2(adev);
2540 if (r)
72d3f592 2541 goto init_failed;
d38ceaf9 2542
121a2bc6
AG
2543 /*
2544 * retired pages will be loaded from eeprom and reserved here,
2545 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2546 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2547 * for I2C communication which only true at this point.
b82e65a9
GC
2548 *
2549 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2550 * failure from bad gpu situation and stop amdgpu init process
2551 * accordingly. For other failed cases, it will still release all
2552 * the resource and print error message, rather than returning one
2553 * negative value to upper level.
121a2bc6
AG
2554 *
2555 * Note: theoretically, this should be called before all vram allocations
2556 * to protect retired page from abusing
2557 */
b82e65a9
GC
2558 r = amdgpu_ras_recovery_init(adev);
2559 if (r)
2560 goto init_failed;
121a2bc6 2561
cfbb6b00
AG
2562 /**
2563 * In case of XGMI grab extra reference for reset domain for this device
2564 */
a4c63caf 2565 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2566 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2567 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2568 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2569
dfd0287b
LH
2570 if (WARN_ON(!hive)) {
2571 r = -ENOENT;
2572 goto init_failed;
2573 }
2574
46c67660 2575 if (!hive->reset_domain ||
2576 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2577 r = -ENOENT;
2578 amdgpu_put_xgmi_hive(hive);
2579 goto init_failed;
2580 }
2581
2582 /* Drop the early temporary reset domain we created for device */
2583 amdgpu_reset_put_reset_domain(adev->reset_domain);
2584 adev->reset_domain = hive->reset_domain;
9dfa4860 2585 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2586 }
a4c63caf
AG
2587 }
2588 }
2589
5fd8518d
AG
2590 r = amdgpu_device_init_schedulers(adev);
2591 if (r)
2592 goto init_failed;
e3c1b071 2593
2594 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2595 if (!adev->gmc.xgmi.pending_reset) {
2596 kgd2kfd_init_zone_device(adev);
e3c1b071 2597 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2598 }
c6332b97 2599
bd607166
KR
2600 amdgpu_fru_get_product_info(adev);
2601
72d3f592 2602init_failed:
c6332b97 2603
72d3f592 2604 return r;
d38ceaf9
AD
2605}
2606
e3ecdffa
AD
2607/**
2608 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2609 *
2610 * @adev: amdgpu_device pointer
2611 *
2612 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2613 * this function before a GPU reset. If the value is retained after a
2614 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2615 */
06ec9070 2616static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2617{
2618 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2619}
2620
e3ecdffa
AD
2621/**
2622 * amdgpu_device_check_vram_lost - check if vram is valid
2623 *
2624 * @adev: amdgpu_device pointer
2625 *
2626 * Checks the reset magic value written to the gart pointer in VRAM.
2627 * The driver calls this after a GPU reset to see if the contents of
2628 * VRAM is lost or now.
2629 * returns true if vram is lost, false if not.
2630 */
06ec9070 2631static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2632{
dadce777
EQ
2633 if (memcmp(adev->gart.ptr, adev->reset_magic,
2634 AMDGPU_RESET_MAGIC_NUM))
2635 return true;
2636
53b3f8f4 2637 if (!amdgpu_in_reset(adev))
dadce777
EQ
2638 return false;
2639
2640 /*
2641 * For all ASICs with baco/mode1 reset, the VRAM is
2642 * always assumed to be lost.
2643 */
2644 switch (amdgpu_asic_reset_method(adev)) {
2645 case AMD_RESET_METHOD_BACO:
2646 case AMD_RESET_METHOD_MODE1:
2647 return true;
2648 default:
2649 return false;
2650 }
0c49e0b8
CZ
2651}
2652
e3ecdffa 2653/**
1112a46b 2654 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2655 *
2656 * @adev: amdgpu_device pointer
b8b72130 2657 * @state: clockgating state (gate or ungate)
e3ecdffa 2658 *
e3ecdffa 2659 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2660 * set_clockgating_state callbacks are run.
2661 * Late initialization pass enabling clockgating for hardware IPs.
2662 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2663 * Returns 0 on success, negative error code on failure.
2664 */
fdd34271 2665
5d89bb2d
LL
2666int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2667 enum amd_clockgating_state state)
d38ceaf9 2668{
1112a46b 2669 int i, j, r;
d38ceaf9 2670
4a2ba394
SL
2671 if (amdgpu_emu_mode == 1)
2672 return 0;
2673
1112a46b
RZ
2674 for (j = 0; j < adev->num_ip_blocks; j++) {
2675 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2676 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2677 continue;
47198eb7 2678 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2679 if (adev->in_s0ix &&
47198eb7
AD
2680 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2681 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2682 continue;
4a446d55 2683 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2684 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2685 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2686 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2687 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2688 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2689 /* enable clockgating to save power */
a1255107 2690 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2691 state);
4a446d55
AD
2692 if (r) {
2693 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2694 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2695 return r;
2696 }
b0b00ff1 2697 }
d38ceaf9 2698 }
06b18f61 2699
c9f96fd5
RZ
2700 return 0;
2701}
2702
5d89bb2d
LL
2703int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2704 enum amd_powergating_state state)
c9f96fd5 2705{
1112a46b 2706 int i, j, r;
06b18f61 2707
c9f96fd5
RZ
2708 if (amdgpu_emu_mode == 1)
2709 return 0;
2710
1112a46b
RZ
2711 for (j = 0; j < adev->num_ip_blocks; j++) {
2712 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2713 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2714 continue;
47198eb7 2715 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2716 if (adev->in_s0ix &&
47198eb7
AD
2717 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2718 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2719 continue;
c9f96fd5
RZ
2720 /* skip CG for VCE/UVD, it's handled specially */
2721 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2722 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2723 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2724 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2725 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2726 /* enable powergating to save power */
2727 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2728 state);
c9f96fd5
RZ
2729 if (r) {
2730 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2731 adev->ip_blocks[i].version->funcs->name, r);
2732 return r;
2733 }
2734 }
2735 }
2dc80b00
S
2736 return 0;
2737}
2738
beff74bc
AD
2739static int amdgpu_device_enable_mgpu_fan_boost(void)
2740{
2741 struct amdgpu_gpu_instance *gpu_ins;
2742 struct amdgpu_device *adev;
2743 int i, ret = 0;
2744
2745 mutex_lock(&mgpu_info.mutex);
2746
2747 /*
2748 * MGPU fan boost feature should be enabled
2749 * only when there are two or more dGPUs in
2750 * the system
2751 */
2752 if (mgpu_info.num_dgpu < 2)
2753 goto out;
2754
2755 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2756 gpu_ins = &(mgpu_info.gpu_ins[i]);
2757 adev = gpu_ins->adev;
2758 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2759 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2760 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2761 if (ret)
2762 break;
2763
2764 gpu_ins->mgpu_fan_enabled = 1;
2765 }
2766 }
2767
2768out:
2769 mutex_unlock(&mgpu_info.mutex);
2770
2771 return ret;
2772}
2773
e3ecdffa
AD
2774/**
2775 * amdgpu_device_ip_late_init - run late init for hardware IPs
2776 *
2777 * @adev: amdgpu_device pointer
2778 *
2779 * Late initialization pass for hardware IPs. The list of all the hardware
2780 * IPs that make up the asic is walked and the late_init callbacks are run.
2781 * late_init covers any special initialization that an IP requires
2782 * after all of the have been initialized or something that needs to happen
2783 * late in the init process.
2784 * Returns 0 on success, negative error code on failure.
2785 */
06ec9070 2786static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2787{
60599a03 2788 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2789 int i = 0, r;
2790
2791 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2792 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2793 continue;
2794 if (adev->ip_blocks[i].version->funcs->late_init) {
2795 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2796 if (r) {
2797 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2798 adev->ip_blocks[i].version->funcs->name, r);
2799 return r;
2800 }
2dc80b00 2801 }
73f847db 2802 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2803 }
2804
867e24ca 2805 r = amdgpu_ras_late_init(adev);
2806 if (r) {
2807 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2808 return r;
2809 }
2810
a891d239
DL
2811 amdgpu_ras_set_error_query_ready(adev, true);
2812
1112a46b
RZ
2813 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2814 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2815
06ec9070 2816 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2817
beff74bc
AD
2818 r = amdgpu_device_enable_mgpu_fan_boost();
2819 if (r)
2820 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2821
4da8b639 2822 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2823 if (amdgpu_passthrough(adev) &&
2824 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2825 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2826 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2827
2828 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2829 mutex_lock(&mgpu_info.mutex);
2830
2831 /*
2832 * Reset device p-state to low as this was booted with high.
2833 *
2834 * This should be performed only after all devices from the same
2835 * hive get initialized.
2836 *
2837 * However, it's unknown how many device in the hive in advance.
2838 * As this is counted one by one during devices initializations.
2839 *
2840 * So, we wait for all XGMI interlinked devices initialized.
2841 * This may bring some delays as those devices may come from
2842 * different hives. But that should be OK.
2843 */
2844 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2845 for (i = 0; i < mgpu_info.num_gpu; i++) {
2846 gpu_instance = &(mgpu_info.gpu_ins[i]);
2847 if (gpu_instance->adev->flags & AMD_IS_APU)
2848 continue;
2849
d84a430d
JK
2850 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2851 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2852 if (r) {
2853 DRM_ERROR("pstate setting failed (%d).\n", r);
2854 break;
2855 }
2856 }
2857 }
2858
2859 mutex_unlock(&mgpu_info.mutex);
2860 }
2861
d38ceaf9
AD
2862 return 0;
2863}
2864
613aa3ea
LY
2865/**
2866 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2867 *
2868 * @adev: amdgpu_device pointer
2869 *
2870 * For ASICs need to disable SMC first
2871 */
2872static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2873{
2874 int i, r;
2875
4e8303cf 2876 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
2877 return;
2878
2879 for (i = 0; i < adev->num_ip_blocks; i++) {
2880 if (!adev->ip_blocks[i].status.hw)
2881 continue;
2882 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2883 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2884 /* XXX handle errors */
2885 if (r) {
2886 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2887 adev->ip_blocks[i].version->funcs->name, r);
2888 }
2889 adev->ip_blocks[i].status.hw = false;
2890 break;
2891 }
2892 }
2893}
2894
e9669fb7 2895static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2896{
2897 int i, r;
2898
e9669fb7
AG
2899 for (i = 0; i < adev->num_ip_blocks; i++) {
2900 if (!adev->ip_blocks[i].version->funcs->early_fini)
2901 continue;
5278a159 2902
e9669fb7
AG
2903 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2904 if (r) {
2905 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2906 adev->ip_blocks[i].version->funcs->name, r);
2907 }
2908 }
c030f2e4 2909
05df1f01 2910 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2911 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2912
7270e895
TY
2913 amdgpu_amdkfd_suspend(adev, false);
2914
613aa3ea
LY
2915 /* Workaroud for ASICs need to disable SMC first */
2916 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2917
d38ceaf9 2918 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2919 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2920 continue;
8201a67a 2921
a1255107 2922 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2923 /* XXX handle errors */
2c1a2784 2924 if (r) {
a1255107
AD
2925 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2926 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2927 }
8201a67a 2928
a1255107 2929 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2930 }
2931
6effad8a
GC
2932 if (amdgpu_sriov_vf(adev)) {
2933 if (amdgpu_virt_release_full_gpu(adev, false))
2934 DRM_ERROR("failed to release exclusive mode on fini\n");
2935 }
2936
e9669fb7
AG
2937 return 0;
2938}
2939
2940/**
2941 * amdgpu_device_ip_fini - run fini for hardware IPs
2942 *
2943 * @adev: amdgpu_device pointer
2944 *
2945 * Main teardown pass for hardware IPs. The list of all the hardware
2946 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2947 * are run. hw_fini tears down the hardware associated with each IP
2948 * and sw_fini tears down any software state associated with each IP.
2949 * Returns 0 on success, negative error code on failure.
2950 */
2951static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2952{
2953 int i, r;
2954
2955 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2956 amdgpu_virt_release_ras_err_handler_data(adev);
2957
e9669fb7
AG
2958 if (adev->gmc.xgmi.num_physical_nodes > 1)
2959 amdgpu_xgmi_remove_device(adev);
2960
c004d44e 2961 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2962
d38ceaf9 2963 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2964 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2965 continue;
c12aba3a
ML
2966
2967 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2968 amdgpu_ucode_free_bo(adev);
1e256e27 2969 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2970 amdgpu_device_wb_fini(adev);
7ccfd79f 2971 amdgpu_device_mem_scratch_fini(adev);
533aed27 2972 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2973 }
2974
a1255107 2975 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2976 /* XXX handle errors */
2c1a2784 2977 if (r) {
a1255107
AD
2978 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2979 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2980 }
a1255107
AD
2981 adev->ip_blocks[i].status.sw = false;
2982 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2983 }
2984
a6dcfd9c 2985 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2986 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2987 continue;
a1255107
AD
2988 if (adev->ip_blocks[i].version->funcs->late_fini)
2989 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2990 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2991 }
2992
c030f2e4 2993 amdgpu_ras_fini(adev);
2994
d38ceaf9
AD
2995 return 0;
2996}
2997
e3ecdffa 2998/**
beff74bc 2999 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3000 *
1112a46b 3001 * @work: work_struct.
e3ecdffa 3002 */
beff74bc 3003static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3004{
3005 struct amdgpu_device *adev =
beff74bc 3006 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3007 int r;
3008
3009 r = amdgpu_ib_ring_tests(adev);
3010 if (r)
3011 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3012}
3013
1e317b99
RZ
3014static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3015{
3016 struct amdgpu_device *adev =
3017 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3018
90a92662
MD
3019 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3020 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3021
3022 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3023 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3024}
3025
e3ecdffa 3026/**
e7854a03 3027 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3028 *
3029 * @adev: amdgpu_device pointer
3030 *
3031 * Main suspend function for hardware IPs. The list of all the hardware
3032 * IPs that make up the asic is walked, clockgating is disabled and the
3033 * suspend callbacks are run. suspend puts the hardware and software state
3034 * in each IP into a state suitable for suspend.
3035 * Returns 0 on success, negative error code on failure.
3036 */
e7854a03
AD
3037static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3038{
3039 int i, r;
3040
50ec83f0
AD
3041 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3042 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3043
b31d6ada
EQ
3044 /*
3045 * Per PMFW team's suggestion, driver needs to handle gfxoff
3046 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3047 * scenario. Add the missing df cstate disablement here.
3048 */
3049 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3050 dev_warn(adev->dev, "Failed to disallow df cstate");
3051
e7854a03
AD
3052 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3053 if (!adev->ip_blocks[i].status.valid)
3054 continue;
2b9f7848 3055
e7854a03 3056 /* displays are handled separately */
2b9f7848
ND
3057 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3058 continue;
3059
3060 /* XXX handle errors */
3061 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3062 /* XXX handle errors */
3063 if (r) {
3064 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3065 adev->ip_blocks[i].version->funcs->name, r);
3066 return r;
e7854a03 3067 }
2b9f7848
ND
3068
3069 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3070 }
3071
e7854a03
AD
3072 return 0;
3073}
3074
3075/**
3076 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3077 *
3078 * @adev: amdgpu_device pointer
3079 *
3080 * Main suspend function for hardware IPs. The list of all the hardware
3081 * IPs that make up the asic is walked, clockgating is disabled and the
3082 * suspend callbacks are run. suspend puts the hardware and software state
3083 * in each IP into a state suitable for suspend.
3084 * Returns 0 on success, negative error code on failure.
3085 */
3086static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3087{
3088 int i, r;
3089
557f42a2 3090 if (adev->in_s0ix)
bc143d8b 3091 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3092
d38ceaf9 3093 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3094 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3095 continue;
e7854a03
AD
3096 /* displays are handled in phase1 */
3097 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3098 continue;
bff77e86
LM
3099 /* PSP lost connection when err_event_athub occurs */
3100 if (amdgpu_ras_intr_triggered() &&
3101 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3102 adev->ip_blocks[i].status.hw = false;
3103 continue;
3104 }
e3c1b071 3105
3106 /* skip unnecessary suspend if we do not initialize them yet */
3107 if (adev->gmc.xgmi.pending_reset &&
3108 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3109 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3110 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3111 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3112 adev->ip_blocks[i].status.hw = false;
3113 continue;
3114 }
557f42a2 3115
afa6646b 3116 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3117 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3118 * like at runtime. PSP is also part of the always on hardware
3119 * so no need to suspend it.
3120 */
557f42a2 3121 if (adev->in_s0ix &&
32ff160d 3122 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3123 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3124 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3125 continue;
3126
2a7798ea
AD
3127 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3128 if (adev->in_s0ix &&
4e8303cf
LL
3129 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3130 IP_VERSION(5, 0, 0)) &&
3131 (adev->ip_blocks[i].version->type ==
3132 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3133 continue;
3134
e11c7750
TH
3135 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3136 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3137 * from this location and RLC Autoload automatically also gets loaded
3138 * from here based on PMFW -> PSP message during re-init sequence.
3139 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3140 * the TMR and reload FWs again for IMU enabled APU ASICs.
3141 */
3142 if (amdgpu_in_reset(adev) &&
3143 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3144 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3145 continue;
3146
d38ceaf9 3147 /* XXX handle errors */
a1255107 3148 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3149 /* XXX handle errors */
2c1a2784 3150 if (r) {
a1255107
AD
3151 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3152 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3153 }
876923fb 3154 adev->ip_blocks[i].status.hw = false;
a3a09142 3155 /* handle putting the SMC in the appropriate state */
47fc644f 3156 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3157 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3158 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3159 if (r) {
3160 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3161 adev->mp1_state, r);
3162 return r;
3163 }
a3a09142
AD
3164 }
3165 }
d38ceaf9
AD
3166 }
3167
3168 return 0;
3169}
3170
e7854a03
AD
3171/**
3172 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3173 *
3174 * @adev: amdgpu_device pointer
3175 *
3176 * Main suspend function for hardware IPs. The list of all the hardware
3177 * IPs that make up the asic is walked, clockgating is disabled and the
3178 * suspend callbacks are run. suspend puts the hardware and software state
3179 * in each IP into a state suitable for suspend.
3180 * Returns 0 on success, negative error code on failure.
3181 */
3182int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3183{
3184 int r;
3185
3c73683c
JC
3186 if (amdgpu_sriov_vf(adev)) {
3187 amdgpu_virt_fini_data_exchange(adev);
e7819644 3188 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3189 }
e7819644 3190
e7854a03
AD
3191 r = amdgpu_device_ip_suspend_phase1(adev);
3192 if (r)
3193 return r;
3194 r = amdgpu_device_ip_suspend_phase2(adev);
3195
e7819644
YT
3196 if (amdgpu_sriov_vf(adev))
3197 amdgpu_virt_release_full_gpu(adev, false);
3198
e7854a03
AD
3199 return r;
3200}
3201
06ec9070 3202static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3203{
3204 int i, r;
3205
2cb681b6 3206 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3207 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3208 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3209 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3210 AMD_IP_BLOCK_TYPE_IH,
3211 };
a90ad3c2 3212
95ea3dbc 3213 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3214 int j;
3215 struct amdgpu_ip_block *block;
a90ad3c2 3216
4cd2a96d
J
3217 block = &adev->ip_blocks[i];
3218 block->status.hw = false;
2cb681b6 3219
4cd2a96d 3220 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3221
4cd2a96d 3222 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3223 !block->status.valid)
3224 continue;
3225
3226 r = block->version->funcs->hw_init(adev);
0aaeefcc 3227 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3228 if (r)
3229 return r;
482f0e53 3230 block->status.hw = true;
a90ad3c2
ML
3231 }
3232 }
3233
3234 return 0;
3235}
3236
06ec9070 3237static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3238{
3239 int i, r;
3240
2cb681b6
ML
3241 static enum amd_ip_block_type ip_order[] = {
3242 AMD_IP_BLOCK_TYPE_SMC,
3243 AMD_IP_BLOCK_TYPE_DCE,
3244 AMD_IP_BLOCK_TYPE_GFX,
3245 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3246 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3247 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3248 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3249 AMD_IP_BLOCK_TYPE_VCN,
3250 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3251 };
a90ad3c2 3252
2cb681b6
ML
3253 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3254 int j;
3255 struct amdgpu_ip_block *block;
a90ad3c2 3256
2cb681b6
ML
3257 for (j = 0; j < adev->num_ip_blocks; j++) {
3258 block = &adev->ip_blocks[j];
3259
3260 if (block->version->type != ip_order[i] ||
482f0e53
ML
3261 !block->status.valid ||
3262 block->status.hw)
2cb681b6
ML
3263 continue;
3264
895bd048
JZ
3265 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3266 r = block->version->funcs->resume(adev);
3267 else
3268 r = block->version->funcs->hw_init(adev);
3269
0aaeefcc 3270 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3271 if (r)
3272 return r;
482f0e53 3273 block->status.hw = true;
a90ad3c2
ML
3274 }
3275 }
3276
3277 return 0;
3278}
3279
e3ecdffa
AD
3280/**
3281 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3282 *
3283 * @adev: amdgpu_device pointer
3284 *
3285 * First resume function for hardware IPs. The list of all the hardware
3286 * IPs that make up the asic is walked and the resume callbacks are run for
3287 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3288 * after a suspend and updates the software state as necessary. This
3289 * function is also used for restoring the GPU after a GPU reset.
3290 * Returns 0 on success, negative error code on failure.
3291 */
06ec9070 3292static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3293{
3294 int i, r;
3295
a90ad3c2 3296 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3297 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3298 continue;
a90ad3c2 3299 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3300 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3301 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3302 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3303
fcf0649f
CZ
3304 r = adev->ip_blocks[i].version->funcs->resume(adev);
3305 if (r) {
3306 DRM_ERROR("resume of IP block <%s> failed %d\n",
3307 adev->ip_blocks[i].version->funcs->name, r);
3308 return r;
3309 }
482f0e53 3310 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3311 }
3312 }
3313
3314 return 0;
3315}
3316
e3ecdffa
AD
3317/**
3318 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3319 *
3320 * @adev: amdgpu_device pointer
3321 *
3322 * First resume function for hardware IPs. The list of all the hardware
3323 * IPs that make up the asic is walked and the resume callbacks are run for
3324 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3325 * functional state after a suspend and updates the software state as
3326 * necessary. This function is also used for restoring the GPU after a GPU
3327 * reset.
3328 * Returns 0 on success, negative error code on failure.
3329 */
06ec9070 3330static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3331{
3332 int i, r;
3333
3334 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3335 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3336 continue;
fcf0649f 3337 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3338 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3339 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3340 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3341 continue;
a1255107 3342 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3343 if (r) {
a1255107
AD
3344 DRM_ERROR("resume of IP block <%s> failed %d\n",
3345 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3346 return r;
2c1a2784 3347 }
482f0e53 3348 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3349 }
3350
3351 return 0;
3352}
3353
e3ecdffa
AD
3354/**
3355 * amdgpu_device_ip_resume - run resume for hardware IPs
3356 *
3357 * @adev: amdgpu_device pointer
3358 *
3359 * Main resume function for hardware IPs. The hardware IPs
3360 * are split into two resume functions because they are
b8920e1e 3361 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3362 * steps need to be take between them. In this case (S3/S4) they are
3363 * run sequentially.
3364 * Returns 0 on success, negative error code on failure.
3365 */
06ec9070 3366static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3367{
3368 int r;
3369
06ec9070 3370 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3371 if (r)
3372 return r;
7a3e0bb2
RZ
3373
3374 r = amdgpu_device_fw_loading(adev);
3375 if (r)
3376 return r;
3377
06ec9070 3378 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3379
3380 return r;
3381}
3382
e3ecdffa
AD
3383/**
3384 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3385 *
3386 * @adev: amdgpu_device pointer
3387 *
3388 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3389 */
4e99a44e 3390static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3391{
6867e1b5
ML
3392 if (amdgpu_sriov_vf(adev)) {
3393 if (adev->is_atom_fw) {
58ff791a 3394 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3395 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3396 } else {
3397 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3398 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3399 }
3400
3401 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3402 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3403 }
048765ad
AR
3404}
3405
e3ecdffa
AD
3406/**
3407 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3408 *
3409 * @asic_type: AMD asic type
3410 *
3411 * Check if there is DC (new modesetting infrastructre) support for an asic.
3412 * returns true if DC has support, false if not.
3413 */
4562236b
HW
3414bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3415{
3416 switch (asic_type) {
0637d417
AD
3417#ifdef CONFIG_DRM_AMDGPU_SI
3418 case CHIP_HAINAN:
3419#endif
3420 case CHIP_TOPAZ:
3421 /* chips with no display hardware */
3422 return false;
4562236b 3423#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3424 case CHIP_TAHITI:
3425 case CHIP_PITCAIRN:
3426 case CHIP_VERDE:
3427 case CHIP_OLAND:
2d32ffd6
AD
3428 /*
3429 * We have systems in the wild with these ASICs that require
3430 * LVDS and VGA support which is not supported with DC.
3431 *
3432 * Fallback to the non-DC driver here by default so as not to
3433 * cause regressions.
3434 */
3435#if defined(CONFIG_DRM_AMD_DC_SI)
3436 return amdgpu_dc > 0;
3437#else
3438 return false;
64200c46 3439#endif
4562236b 3440 case CHIP_BONAIRE:
0d6fbccb 3441 case CHIP_KAVERI:
367e6687
AD
3442 case CHIP_KABINI:
3443 case CHIP_MULLINS:
d9fda248
HW
3444 /*
3445 * We have systems in the wild with these ASICs that require
b5a0168e 3446 * VGA support which is not supported with DC.
d9fda248
HW
3447 *
3448 * Fallback to the non-DC driver here by default so as not to
3449 * cause regressions.
3450 */
3451 return amdgpu_dc > 0;
f7f12b25 3452 default:
fd187853 3453 return amdgpu_dc != 0;
f7f12b25 3454#else
4562236b 3455 default:
93b09a9a 3456 if (amdgpu_dc > 0)
b8920e1e 3457 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3458 return false;
f7f12b25 3459#endif
4562236b
HW
3460 }
3461}
3462
3463/**
3464 * amdgpu_device_has_dc_support - check if dc is supported
3465 *
982a820b 3466 * @adev: amdgpu_device pointer
4562236b
HW
3467 *
3468 * Returns true for supported, false for not supported
3469 */
3470bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3471{
25263da3 3472 if (adev->enable_virtual_display ||
abaf210c 3473 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3474 return false;
3475
4562236b
HW
3476 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3477}
3478
d4535e2c
AG
3479static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3480{
3481 struct amdgpu_device *adev =
3482 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3483 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3484
c6a6e2db
AG
3485 /* It's a bug to not have a hive within this function */
3486 if (WARN_ON(!hive))
3487 return;
3488
3489 /*
3490 * Use task barrier to synchronize all xgmi reset works across the
3491 * hive. task_barrier_enter and task_barrier_exit will block
3492 * until all the threads running the xgmi reset works reach
3493 * those points. task_barrier_full will do both blocks.
3494 */
3495 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3496
3497 task_barrier_enter(&hive->tb);
4a580877 3498 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3499
3500 if (adev->asic_reset_res)
3501 goto fail;
3502
3503 task_barrier_exit(&hive->tb);
4a580877 3504 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3505
3506 if (adev->asic_reset_res)
3507 goto fail;
43c4d576 3508
5e67bba3 3509 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3510 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3511 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3512 } else {
3513
3514 task_barrier_full(&hive->tb);
3515 adev->asic_reset_res = amdgpu_asic_reset(adev);
3516 }
ce316fa5 3517
c6a6e2db 3518fail:
d4535e2c 3519 if (adev->asic_reset_res)
fed184e9 3520 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3521 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3522 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3523}
3524
71f98027
AD
3525static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3526{
3527 char *input = amdgpu_lockup_timeout;
3528 char *timeout_setting = NULL;
3529 int index = 0;
3530 long timeout;
3531 int ret = 0;
3532
3533 /*
67387dfe
AD
3534 * By default timeout for non compute jobs is 10000
3535 * and 60000 for compute jobs.
71f98027 3536 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3537 * jobs are 60000 by default.
71f98027
AD
3538 */
3539 adev->gfx_timeout = msecs_to_jiffies(10000);
3540 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3541 if (amdgpu_sriov_vf(adev))
3542 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3543 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3544 else
67387dfe 3545 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3546
f440ff44 3547 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3548 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3549 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3550 ret = kstrtol(timeout_setting, 0, &timeout);
3551 if (ret)
3552 return ret;
3553
3554 if (timeout == 0) {
3555 index++;
3556 continue;
3557 } else if (timeout < 0) {
3558 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3559 dev_warn(adev->dev, "lockup timeout disabled");
3560 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3561 } else {
3562 timeout = msecs_to_jiffies(timeout);
3563 }
3564
3565 switch (index++) {
3566 case 0:
3567 adev->gfx_timeout = timeout;
3568 break;
3569 case 1:
3570 adev->compute_timeout = timeout;
3571 break;
3572 case 2:
3573 adev->sdma_timeout = timeout;
3574 break;
3575 case 3:
3576 adev->video_timeout = timeout;
3577 break;
3578 default:
3579 break;
3580 }
3581 }
3582 /*
3583 * There is only one value specified and
3584 * it should apply to all non-compute jobs.
3585 */
bcccee89 3586 if (index == 1) {
71f98027 3587 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3588 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3589 adev->compute_timeout = adev->gfx_timeout;
3590 }
71f98027
AD
3591 }
3592
3593 return ret;
3594}
d4535e2c 3595
4a74c38c
PY
3596/**
3597 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3598 *
3599 * @adev: amdgpu_device pointer
3600 *
3601 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3602 */
3603static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3604{
3605 struct iommu_domain *domain;
3606
3607 domain = iommu_get_domain_for_dev(adev->dev);
3608 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3609 adev->ram_is_direct_mapped = true;
3610}
3611
77f3a5cd 3612static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3613 &dev_attr_pcie_replay_count.attr,
3614 NULL
3615};
3616
02ff519e
AD
3617static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3618{
3619 if (amdgpu_mcbp == 1)
3620 adev->gfx.mcbp = true;
1e9e15dc
JZ
3621 else if (amdgpu_mcbp == 0)
3622 adev->gfx.mcbp = false;
4e8303cf
LL
3623 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3624 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3625 adev->gfx.num_gfx_rings)
50a7c876
AD
3626 adev->gfx.mcbp = true;
3627
02ff519e
AD
3628 if (amdgpu_sriov_vf(adev))
3629 adev->gfx.mcbp = true;
3630
3631 if (adev->gfx.mcbp)
3632 DRM_INFO("MCBP is enabled\n");
3633}
3634
d38ceaf9
AD
3635/**
3636 * amdgpu_device_init - initialize the driver
3637 *
3638 * @adev: amdgpu_device pointer
d38ceaf9
AD
3639 * @flags: driver flags
3640 *
3641 * Initializes the driver info and hw (all asics).
3642 * Returns 0 for success or an error on failure.
3643 * Called at driver startup.
3644 */
3645int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3646 uint32_t flags)
3647{
8aba21b7
LT
3648 struct drm_device *ddev = adev_to_drm(adev);
3649 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3650 int r, i;
b98c6299 3651 bool px = false;
95844d20 3652 u32 max_MBps;
59e9fff1 3653 int tmp;
d38ceaf9
AD
3654
3655 adev->shutdown = false;
d38ceaf9 3656 adev->flags = flags;
4e66d7d2
YZ
3657
3658 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3659 adev->asic_type = amdgpu_force_asic_type;
3660 else
3661 adev->asic_type = flags & AMD_ASIC_MASK;
3662
d38ceaf9 3663 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3664 if (amdgpu_emu_mode == 1)
8bdab6bb 3665 adev->usec_timeout *= 10;
770d13b1 3666 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3667 adev->accel_working = false;
3668 adev->num_rings = 0;
68ce8b24 3669 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3670 adev->mman.buffer_funcs = NULL;
3671 adev->mman.buffer_funcs_ring = NULL;
3672 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3673 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3674 adev->gmc.gmc_funcs = NULL;
7bd939d0 3675 adev->harvest_ip_mask = 0x0;
f54d1867 3676 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3677 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3678
3679 adev->smc_rreg = &amdgpu_invalid_rreg;
3680 adev->smc_wreg = &amdgpu_invalid_wreg;
3681 adev->pcie_rreg = &amdgpu_invalid_rreg;
3682 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3683 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3684 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3685 adev->pciep_rreg = &amdgpu_invalid_rreg;
3686 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3687 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3688 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3689 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3690 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3691 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3692 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3693 adev->didt_rreg = &amdgpu_invalid_rreg;
3694 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3695 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3696 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3697 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3698 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3699
3e39ab90
AD
3700 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3701 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3702 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3703
3704 /* mutex initialization are all done here so we
b8920e1e
SS
3705 * can recall function without having locking issues
3706 */
0e5ca0d1 3707 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3708 mutex_init(&adev->pm.mutex);
3709 mutex_init(&adev->gfx.gpu_clock_mutex);
3710 mutex_init(&adev->srbm_mutex);
b8866c26 3711 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3712 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3713 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3714 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3715 mutex_init(&adev->mn_lock);
e23b74aa 3716 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3717 hash_init(adev->mn_hash);
32eaeae0 3718 mutex_init(&adev->psp.mutex);
bd052211 3719 mutex_init(&adev->notifier_lock);
8cda7a4f 3720 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3721 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3722
ab3b9de6 3723 amdgpu_device_init_apu_flags(adev);
9f6a7857 3724
912dfc84
EQ
3725 r = amdgpu_device_check_arguments(adev);
3726 if (r)
3727 return r;
d38ceaf9 3728
d38ceaf9
AD
3729 spin_lock_init(&adev->mmio_idx_lock);
3730 spin_lock_init(&adev->smc_idx_lock);
3731 spin_lock_init(&adev->pcie_idx_lock);
3732 spin_lock_init(&adev->uvd_ctx_idx_lock);
3733 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3734 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3735 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3736 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3737 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3738
0c4e7fa5
CZ
3739 INIT_LIST_HEAD(&adev->shadow_list);
3740 mutex_init(&adev->shadow_list_lock);
3741
655ce9cb 3742 INIT_LIST_HEAD(&adev->reset_list);
3743
6492e1b0 3744 INIT_LIST_HEAD(&adev->ras_list);
3745
3e38b634
EQ
3746 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3747
beff74bc
AD
3748 INIT_DELAYED_WORK(&adev->delayed_init_work,
3749 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3750 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3751 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3752
d4535e2c
AG
3753 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3754
d23ee13f 3755 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3756 adev->gfx.gfx_off_residency = 0;
3757 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3758 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3759
b265bdbd
EQ
3760 atomic_set(&adev->throttling_logging_enabled, 1);
3761 /*
3762 * If throttling continues, logging will be performed every minute
3763 * to avoid log flooding. "-1" is subtracted since the thermal
3764 * throttling interrupt comes every second. Thus, the total logging
3765 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3766 * for throttling interrupt) = 60 seconds.
3767 */
3768 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3769 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3770
0fa49558
AX
3771 /* Registers mapping */
3772 /* TODO: block userspace mapping of io register */
da69c161
KW
3773 if (adev->asic_type >= CHIP_BONAIRE) {
3774 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3775 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3776 } else {
3777 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3778 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3779 }
d38ceaf9 3780
6c08e0ef
EQ
3781 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3782 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3783
d38ceaf9 3784 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3785 if (!adev->rmmio)
d38ceaf9 3786 return -ENOMEM;
b8920e1e 3787
d38ceaf9 3788 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3789 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3790
436afdfa
PY
3791 /*
3792 * Reset domain needs to be present early, before XGMI hive discovered
3793 * (if any) and intitialized to use reset sem and in_gpu reset flag
3794 * early on during init and before calling to RREG32.
3795 */
3796 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3797 if (!adev->reset_domain)
3798 return -ENOMEM;
3799
3aa0115d
ML
3800 /* detect hw virtualization here */
3801 amdgpu_detect_virtualization(adev);
3802
04e85958
TL
3803 amdgpu_device_get_pcie_info(adev);
3804
dffa11b4
ML
3805 r = amdgpu_device_get_job_timeout_settings(adev);
3806 if (r) {
3807 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3808 return r;
a190d1c7
XY
3809 }
3810
d38ceaf9 3811 /* early init functions */
06ec9070 3812 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3813 if (r)
4ef87d8f 3814 return r;
d38ceaf9 3815
02ff519e
AD
3816 amdgpu_device_set_mcbp(adev);
3817
b7cdb41e
ML
3818 /* Get rid of things like offb */
3819 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3820 if (r)
3821 return r;
3822
4d33e704
SK
3823 /* Enable TMZ based on IP_VERSION */
3824 amdgpu_gmc_tmz_set(adev);
3825
957b0787 3826 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3827 /* Need to get xgmi info early to decide the reset behavior*/
3828 if (adev->gmc.xgmi.supported) {
3829 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3830 if (r)
3831 return r;
3832 }
3833
8e6d0b69 3834 /* enable PCIE atomic ops */
b4520bfd
GW
3835 if (amdgpu_sriov_vf(adev)) {
3836 if (adev->virt.fw_reserve.p_pf2vf)
3837 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3838 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3839 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3840 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3841 * internal path natively support atomics, set have_atomics_support to true.
3842 */
b4520bfd 3843 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
3844 (amdgpu_ip_version(adev, GC_HWIP, 0) >
3845 IP_VERSION(9, 0, 0))) {
0e768043 3846 adev->have_atomics_support = true;
b4520bfd 3847 } else {
8e6d0b69 3848 adev->have_atomics_support =
3849 !pci_enable_atomic_ops_to_root(adev->pdev,
3850 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3851 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3852 }
3853
8e6d0b69 3854 if (!adev->have_atomics_support)
3855 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3856
6585661d 3857 /* doorbell bar mapping and doorbell index init*/
43c064db 3858 amdgpu_doorbell_init(adev);
6585661d 3859
9475a943
SL
3860 if (amdgpu_emu_mode == 1) {
3861 /* post the asic on emulation mode */
3862 emu_soc_asic_init(adev);
bfca0289 3863 goto fence_driver_init;
9475a943 3864 }
bfca0289 3865
04442bf7
LL
3866 amdgpu_reset_init(adev);
3867
4e99a44e 3868 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3869 if (adev->bios)
3870 amdgpu_device_detect_sriov_bios(adev);
048765ad 3871
95e8e59e
AD
3872 /* check if we need to reset the asic
3873 * E.g., driver was not cleanly unloaded previously, etc.
3874 */
f14899fd 3875 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3876 if (adev->gmc.xgmi.num_physical_nodes) {
3877 dev_info(adev->dev, "Pending hive reset.\n");
3878 adev->gmc.xgmi.pending_reset = true;
3879 /* Only need to init necessary block for SMU to handle the reset */
3880 for (i = 0; i < adev->num_ip_blocks; i++) {
3881 if (!adev->ip_blocks[i].status.valid)
3882 continue;
3883 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3884 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3885 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3886 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3887 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3888 adev->ip_blocks[i].version->funcs->name);
3889 adev->ip_blocks[i].status.hw = true;
3890 }
3891 }
3892 } else {
59e9fff1 3893 tmp = amdgpu_reset_method;
3894 /* It should do a default reset when loading or reloading the driver,
3895 * regardless of the module parameter reset_method.
3896 */
3897 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3898 r = amdgpu_asic_reset(adev);
59e9fff1 3899 amdgpu_reset_method = tmp;
e3c1b071 3900 if (r) {
3901 dev_err(adev->dev, "asic reset on init failed\n");
3902 goto failed;
3903 }
95e8e59e
AD
3904 }
3905 }
3906
d38ceaf9 3907 /* Post card if necessary */
39c640c0 3908 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3909 if (!adev->bios) {
bec86378 3910 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3911 r = -EINVAL;
3912 goto failed;
d38ceaf9 3913 }
bec86378 3914 DRM_INFO("GPU posting now...\n");
4d2997ab 3915 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3916 if (r) {
3917 dev_err(adev->dev, "gpu post error!\n");
3918 goto failed;
3919 }
d38ceaf9
AD
3920 }
3921
9535a86a
SZ
3922 if (adev->bios) {
3923 if (adev->is_atom_fw) {
3924 /* Initialize clocks */
3925 r = amdgpu_atomfirmware_get_clock_info(adev);
3926 if (r) {
3927 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3928 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3929 goto failed;
3930 }
3931 } else {
3932 /* Initialize clocks */
3933 r = amdgpu_atombios_get_clock_info(adev);
3934 if (r) {
3935 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3936 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3937 goto failed;
3938 }
3939 /* init i2c buses */
3940 if (!amdgpu_device_has_dc_support(adev))
3941 amdgpu_atombios_i2c_init(adev);
a5bde2f9 3942 }
2c1a2784 3943 }
d38ceaf9 3944
bfca0289 3945fence_driver_init:
d38ceaf9 3946 /* Fence driver */
067f44c8 3947 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3948 if (r) {
067f44c8 3949 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3950 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3951 goto failed;
2c1a2784 3952 }
d38ceaf9
AD
3953
3954 /* init the mode config */
4a580877 3955 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3956
06ec9070 3957 r = amdgpu_device_ip_init(adev);
d38ceaf9 3958 if (r) {
06ec9070 3959 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3960 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3961 goto release_ras_con;
d38ceaf9
AD
3962 }
3963
8d35a259
LG
3964 amdgpu_fence_driver_hw_init(adev);
3965
d69b8971
YZ
3966 dev_info(adev->dev,
3967 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3968 adev->gfx.config.max_shader_engines,
3969 adev->gfx.config.max_sh_per_se,
3970 adev->gfx.config.max_cu_per_sh,
3971 adev->gfx.cu_info.number);
3972
d38ceaf9
AD
3973 adev->accel_working = true;
3974
e59c0205
AX
3975 amdgpu_vm_check_compute_bug(adev);
3976
95844d20
MO
3977 /* Initialize the buffer migration limit. */
3978 if (amdgpu_moverate >= 0)
3979 max_MBps = amdgpu_moverate;
3980 else
3981 max_MBps = 8; /* Allow 8 MB/s. */
3982 /* Get a log2 for easy divisions. */
3983 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3984
b0adca4d
EQ
3985 /*
3986 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3987 * Otherwise the mgpu fan boost feature will be skipped due to the
3988 * gpu instance is counted less.
3989 */
3990 amdgpu_register_gpu_instance(adev);
3991
d38ceaf9
AD
3992 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3993 * explicit gating rather than handling it automatically.
3994 */
e3c1b071 3995 if (!adev->gmc.xgmi.pending_reset) {
3996 r = amdgpu_device_ip_late_init(adev);
3997 if (r) {
3998 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3999 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4000 goto release_ras_con;
e3c1b071 4001 }
4002 /* must succeed. */
4003 amdgpu_ras_resume(adev);
4004 queue_delayed_work(system_wq, &adev->delayed_init_work,
4005 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4006 }
d38ceaf9 4007
38eecbe0
CL
4008 if (amdgpu_sriov_vf(adev)) {
4009 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4010 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4011 }
2c738637 4012
90bcb9b5
EQ
4013 /*
4014 * Place those sysfs registering after `late_init`. As some of those
4015 * operations performed in `late_init` might affect the sysfs
4016 * interfaces creating.
4017 */
4018 r = amdgpu_atombios_sysfs_init(adev);
4019 if (r)
4020 drm_err(&adev->ddev,
4021 "registering atombios sysfs failed (%d).\n", r);
4022
4023 r = amdgpu_pm_sysfs_init(adev);
4024 if (r)
4025 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4026
4027 r = amdgpu_ucode_sysfs_init(adev);
4028 if (r) {
4029 adev->ucode_sysfs_en = false;
4030 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4031 } else
4032 adev->ucode_sysfs_en = true;
4033
77f3a5cd 4034 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4035 if (r)
77f3a5cd 4036 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4037
7957ec80
LL
4038 amdgpu_fru_sysfs_init(adev);
4039
d155bef0
AB
4040 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4041 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4042 if (r)
4043 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4044
c1dd4aa6
AG
4045 /* Have stored pci confspace at hand for restore in sudden PCI error */
4046 if (amdgpu_device_cache_pci_state(adev->pdev))
4047 pci_restore_state(pdev);
4048
8c3dd61c
KHF
4049 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4050 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4051 * ignore it
4052 */
8c3dd61c 4053 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4054 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4055
d37a3929
OC
4056 px = amdgpu_device_supports_px(ddev);
4057
4058 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4059 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4060 vga_switcheroo_register_client(adev->pdev,
4061 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4062
4063 if (px)
8c3dd61c 4064 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4065
e3c1b071 4066 if (adev->gmc.xgmi.pending_reset)
4067 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4068 msecs_to_jiffies(AMDGPU_RESUME_MS));
4069
4a74c38c
PY
4070 amdgpu_device_check_iommu_direct_map(adev);
4071
d38ceaf9 4072 return 0;
83ba126a 4073
970fd197 4074release_ras_con:
38eecbe0
CL
4075 if (amdgpu_sriov_vf(adev))
4076 amdgpu_virt_release_full_gpu(adev, true);
4077
4078 /* failed in exclusive mode due to timeout */
4079 if (amdgpu_sriov_vf(adev) &&
4080 !amdgpu_sriov_runtime(adev) &&
4081 amdgpu_virt_mmio_blocked(adev) &&
4082 !amdgpu_virt_wait_reset(adev)) {
4083 dev_err(adev->dev, "VF exclusive mode timeout\n");
4084 /* Don't send request since VF is inactive. */
4085 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4086 adev->virt.ops = NULL;
4087 r = -EAGAIN;
4088 }
970fd197
SY
4089 amdgpu_release_ras_context(adev);
4090
83ba126a 4091failed:
89041940 4092 amdgpu_vf_error_trans_all(adev);
8840a387 4093
83ba126a 4094 return r;
d38ceaf9
AD
4095}
4096
07775fc1
AG
4097static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4098{
62d5f9f7 4099
07775fc1
AG
4100 /* Clear all CPU mappings pointing to this device */
4101 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4102
4103 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4104 amdgpu_doorbell_fini(adev);
07775fc1
AG
4105
4106 iounmap(adev->rmmio);
4107 adev->rmmio = NULL;
4108 if (adev->mman.aper_base_kaddr)
4109 iounmap(adev->mman.aper_base_kaddr);
4110 adev->mman.aper_base_kaddr = NULL;
4111
4112 /* Memory manager related */
a0ba1279 4113 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4114 arch_phys_wc_del(adev->gmc.vram_mtrr);
4115 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4116 }
4117}
4118
d38ceaf9 4119/**
bbe04dec 4120 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4121 *
4122 * @adev: amdgpu_device pointer
4123 *
4124 * Tear down the driver info (all asics).
4125 * Called at driver shutdown.
4126 */
72c8c97b 4127void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4128{
aac89168 4129 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4130 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4131 adev->shutdown = true;
9f875167 4132
752c683d
ML
4133 /* make sure IB test finished before entering exclusive mode
4134 * to avoid preemption on IB test
b8920e1e 4135 */
519b8b76 4136 if (amdgpu_sriov_vf(adev)) {
752c683d 4137 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4138 amdgpu_virt_fini_data_exchange(adev);
4139 }
752c683d 4140
e5b03032
ML
4141 /* disable all interrupts */
4142 amdgpu_irq_disable_all(adev);
47fc644f 4143 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4144 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4145 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4146 else
4a580877 4147 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4148 }
8d35a259 4149 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4150
cd3a8a59 4151 if (adev->mman.initialized)
9bff18d1 4152 drain_workqueue(adev->mman.bdev.wq);
98f56188 4153
53e9d836 4154 if (adev->pm.sysfs_initialized)
7c868b59 4155 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4156 if (adev->ucode_sysfs_en)
4157 amdgpu_ucode_sysfs_fini(adev);
4158 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4159 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4160
232d1d43
SY
4161 /* disable ras feature must before hw fini */
4162 amdgpu_ras_pre_fini(adev);
4163
e9669fb7 4164 amdgpu_device_ip_fini_early(adev);
d10d0daa 4165
a3848df6
YW
4166 amdgpu_irq_fini_hw(adev);
4167
b6fd6e0f
SK
4168 if (adev->mman.initialized)
4169 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4170
d10d0daa 4171 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4172
39934d3e
VP
4173 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4174 amdgpu_device_unmap_mmio(adev);
87172e89 4175
72c8c97b
AG
4176}
4177
4178void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4179{
62d5f9f7 4180 int idx;
d37a3929 4181 bool px;
62d5f9f7 4182
8d35a259 4183 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4184 amdgpu_device_ip_fini(adev);
b31d3063 4185 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4186 adev->accel_working = false;
68ce8b24 4187 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4188
4189 amdgpu_reset_fini(adev);
4190
d38ceaf9 4191 /* free i2c buses */
4562236b
HW
4192 if (!amdgpu_device_has_dc_support(adev))
4193 amdgpu_i2c_fini(adev);
bfca0289
SL
4194
4195 if (amdgpu_emu_mode != 1)
4196 amdgpu_atombios_fini(adev);
4197
d38ceaf9
AD
4198 kfree(adev->bios);
4199 adev->bios = NULL;
d37a3929
OC
4200
4201 px = amdgpu_device_supports_px(adev_to_drm(adev));
4202
4203 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4204 apple_gmux_detect(NULL, NULL)))
84c8b22e 4205 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4206
4207 if (px)
83ba126a 4208 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4209
38d6be81 4210 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4211 vga_client_unregister(adev->pdev);
e9bc1bf7 4212
62d5f9f7
LS
4213 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4214
4215 iounmap(adev->rmmio);
4216 adev->rmmio = NULL;
43c064db 4217 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4218 drm_dev_exit(idx);
4219 }
4220
d155bef0
AB
4221 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4222 amdgpu_pmu_fini(adev);
72de33f8 4223 if (adev->mman.discovery_bin)
a190d1c7 4224 amdgpu_discovery_fini(adev);
72c8c97b 4225
cfbb6b00
AG
4226 amdgpu_reset_put_reset_domain(adev->reset_domain);
4227 adev->reset_domain = NULL;
4228
72c8c97b
AG
4229 kfree(adev->pci_state);
4230
d38ceaf9
AD
4231}
4232
58144d28
ND
4233/**
4234 * amdgpu_device_evict_resources - evict device resources
4235 * @adev: amdgpu device object
4236 *
4237 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4238 * of the vram memory type. Mainly used for evicting device resources
4239 * at suspend time.
4240 *
4241 */
7863c155 4242static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4243{
7863c155
ML
4244 int ret;
4245
e53d9665
ML
4246 /* No need to evict vram on APUs for suspend to ram or s2idle */
4247 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4248 return 0;
58144d28 4249
7863c155
ML
4250 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4251 if (ret)
58144d28 4252 DRM_WARN("evicting device resources failed\n");
7863c155 4253 return ret;
58144d28 4254}
d38ceaf9
AD
4255
4256/*
4257 * Suspend & resume.
4258 */
4259/**
810ddc3a 4260 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4261 *
87e3f136 4262 * @dev: drm dev pointer
87e3f136 4263 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4264 *
4265 * Puts the hw in the suspend state (all asics).
4266 * Returns 0 for success or an error on failure.
4267 * Called at driver suspend.
4268 */
de185019 4269int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4270{
a2e15b0e 4271 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4272 int r = 0;
d38ceaf9 4273
d38ceaf9
AD
4274 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4275 return 0;
4276
44779b43 4277 adev->in_suspend = true;
3fa8f89d 4278
47ea2076
SF
4279 /* Evict the majority of BOs before grabbing the full access */
4280 r = amdgpu_device_evict_resources(adev);
4281 if (r)
4282 return r;
4283
d7274ec7
BZ
4284 if (amdgpu_sriov_vf(adev)) {
4285 amdgpu_virt_fini_data_exchange(adev);
4286 r = amdgpu_virt_request_full_gpu(adev, false);
4287 if (r)
4288 return r;
4289 }
4290
3fa8f89d
S
4291 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4292 DRM_WARN("smart shift update failed\n");
4293
5f818173 4294 if (fbcon)
087451f3 4295 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4296
beff74bc 4297 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4298 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4299
5e6932fe 4300 amdgpu_ras_suspend(adev);
4301
2196927b 4302 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4303
c004d44e 4304 if (!adev->in_s0ix)
5d3a2d95 4305 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4306
7863c155
ML
4307 r = amdgpu_device_evict_resources(adev);
4308 if (r)
4309 return r;
d38ceaf9 4310
8d35a259 4311 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4312
2196927b 4313 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4314
d7274ec7
BZ
4315 if (amdgpu_sriov_vf(adev))
4316 amdgpu_virt_release_full_gpu(adev, false);
4317
d38ceaf9
AD
4318 return 0;
4319}
4320
4321/**
810ddc3a 4322 * amdgpu_device_resume - initiate device resume
d38ceaf9 4323 *
87e3f136 4324 * @dev: drm dev pointer
87e3f136 4325 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4326 *
4327 * Bring the hw back to operating state (all asics).
4328 * Returns 0 for success or an error on failure.
4329 * Called at driver resume.
4330 */
de185019 4331int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4332{
1348969a 4333 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4334 int r = 0;
d38ceaf9 4335
d7274ec7
BZ
4336 if (amdgpu_sriov_vf(adev)) {
4337 r = amdgpu_virt_request_full_gpu(adev, true);
4338 if (r)
4339 return r;
4340 }
4341
d38ceaf9
AD
4342 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4343 return 0;
4344
62498733 4345 if (adev->in_s0ix)
bc143d8b 4346 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4347
d38ceaf9 4348 /* post card */
39c640c0 4349 if (amdgpu_device_need_post(adev)) {
4d2997ab 4350 r = amdgpu_device_asic_init(adev);
74b0b157 4351 if (r)
aac89168 4352 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4353 }
d38ceaf9 4354
06ec9070 4355 r = amdgpu_device_ip_resume(adev);
d7274ec7 4356
e6707218 4357 if (r) {
aac89168 4358 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4359 goto exit;
e6707218 4360 }
8d35a259 4361 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4362
06ec9070 4363 r = amdgpu_device_ip_late_init(adev);
03161a6e 4364 if (r)
3c22c1ea 4365 goto exit;
d38ceaf9 4366
beff74bc
AD
4367 queue_delayed_work(system_wq, &adev->delayed_init_work,
4368 msecs_to_jiffies(AMDGPU_RESUME_MS));
4369
c004d44e 4370 if (!adev->in_s0ix) {
5d3a2d95
AD
4371 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4372 if (r)
3c22c1ea 4373 goto exit;
5d3a2d95 4374 }
756e6880 4375
3c22c1ea
SF
4376exit:
4377 if (amdgpu_sriov_vf(adev)) {
4378 amdgpu_virt_init_data_exchange(adev);
4379 amdgpu_virt_release_full_gpu(adev, true);
4380 }
4381
4382 if (r)
4383 return r;
4384
96a5d8d4 4385 /* Make sure IB tests flushed */
beff74bc 4386 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4387
a2e15b0e 4388 if (fbcon)
087451f3 4389 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4390
5e6932fe 4391 amdgpu_ras_resume(adev);
4392
d09ef243
AD
4393 if (adev->mode_info.num_crtc) {
4394 /*
4395 * Most of the connector probing functions try to acquire runtime pm
4396 * refs to ensure that the GPU is powered on when connector polling is
4397 * performed. Since we're calling this from a runtime PM callback,
4398 * trying to acquire rpm refs will cause us to deadlock.
4399 *
4400 * Since we're guaranteed to be holding the rpm lock, it's safe to
4401 * temporarily disable the rpm helpers so this doesn't deadlock us.
4402 */
23a1a9e5 4403#ifdef CONFIG_PM
d09ef243 4404 dev->dev->power.disable_depth++;
23a1a9e5 4405#endif
d09ef243
AD
4406 if (!adev->dc_enabled)
4407 drm_helper_hpd_irq_event(dev);
4408 else
4409 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4410#ifdef CONFIG_PM
d09ef243 4411 dev->dev->power.disable_depth--;
23a1a9e5 4412#endif
d09ef243 4413 }
44779b43
RZ
4414 adev->in_suspend = false;
4415
dc907c9d
JX
4416 if (adev->enable_mes)
4417 amdgpu_mes_self_test(adev);
4418
3fa8f89d
S
4419 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4420 DRM_WARN("smart shift update failed\n");
4421
4d3b9ae5 4422 return 0;
d38ceaf9
AD
4423}
4424
e3ecdffa
AD
4425/**
4426 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4427 *
4428 * @adev: amdgpu_device pointer
4429 *
4430 * The list of all the hardware IPs that make up the asic is walked and
4431 * the check_soft_reset callbacks are run. check_soft_reset determines
4432 * if the asic is still hung or not.
4433 * Returns true if any of the IPs are still in a hung state, false if not.
4434 */
06ec9070 4435static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4436{
4437 int i;
4438 bool asic_hang = false;
4439
f993d628
ML
4440 if (amdgpu_sriov_vf(adev))
4441 return true;
4442
8bc04c29
AD
4443 if (amdgpu_asic_need_full_reset(adev))
4444 return true;
4445
63fbf42f 4446 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4447 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4448 continue;
a1255107
AD
4449 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4450 adev->ip_blocks[i].status.hang =
4451 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4452 if (adev->ip_blocks[i].status.hang) {
aac89168 4453 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4454 asic_hang = true;
4455 }
4456 }
4457 return asic_hang;
4458}
4459
e3ecdffa
AD
4460/**
4461 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4462 *
4463 * @adev: amdgpu_device pointer
4464 *
4465 * The list of all the hardware IPs that make up the asic is walked and the
4466 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4467 * handles any IP specific hardware or software state changes that are
4468 * necessary for a soft reset to succeed.
4469 * Returns 0 on success, negative error code on failure.
4470 */
06ec9070 4471static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4472{
4473 int i, r = 0;
4474
4475 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4476 if (!adev->ip_blocks[i].status.valid)
d31a501e 4477 continue;
a1255107
AD
4478 if (adev->ip_blocks[i].status.hang &&
4479 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4480 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4481 if (r)
4482 return r;
4483 }
4484 }
4485
4486 return 0;
4487}
4488
e3ecdffa
AD
4489/**
4490 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4491 *
4492 * @adev: amdgpu_device pointer
4493 *
4494 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4495 * reset is necessary to recover.
4496 * Returns true if a full asic reset is required, false if not.
4497 */
06ec9070 4498static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4499{
da146d3b
AD
4500 int i;
4501
8bc04c29
AD
4502 if (amdgpu_asic_need_full_reset(adev))
4503 return true;
4504
da146d3b 4505 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4506 if (!adev->ip_blocks[i].status.valid)
da146d3b 4507 continue;
a1255107
AD
4508 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4509 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4510 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4511 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4512 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4513 if (adev->ip_blocks[i].status.hang) {
aac89168 4514 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4515 return true;
4516 }
4517 }
35d782fe
CZ
4518 }
4519 return false;
4520}
4521
e3ecdffa
AD
4522/**
4523 * amdgpu_device_ip_soft_reset - do a soft reset
4524 *
4525 * @adev: amdgpu_device pointer
4526 *
4527 * The list of all the hardware IPs that make up the asic is walked and the
4528 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4529 * IP specific hardware or software state changes that are necessary to soft
4530 * reset the IP.
4531 * Returns 0 on success, negative error code on failure.
4532 */
06ec9070 4533static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4534{
4535 int i, r = 0;
4536
4537 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4538 if (!adev->ip_blocks[i].status.valid)
35d782fe 4539 continue;
a1255107
AD
4540 if (adev->ip_blocks[i].status.hang &&
4541 adev->ip_blocks[i].version->funcs->soft_reset) {
4542 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4543 if (r)
4544 return r;
4545 }
4546 }
4547
4548 return 0;
4549}
4550
e3ecdffa
AD
4551/**
4552 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4553 *
4554 * @adev: amdgpu_device pointer
4555 *
4556 * The list of all the hardware IPs that make up the asic is walked and the
4557 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4558 * handles any IP specific hardware or software state changes that are
4559 * necessary after the IP has been soft reset.
4560 * Returns 0 on success, negative error code on failure.
4561 */
06ec9070 4562static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4563{
4564 int i, r = 0;
4565
4566 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4567 if (!adev->ip_blocks[i].status.valid)
35d782fe 4568 continue;
a1255107
AD
4569 if (adev->ip_blocks[i].status.hang &&
4570 adev->ip_blocks[i].version->funcs->post_soft_reset)
4571 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4572 if (r)
4573 return r;
4574 }
4575
4576 return 0;
4577}
4578
e3ecdffa 4579/**
c33adbc7 4580 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4581 *
4582 * @adev: amdgpu_device pointer
4583 *
4584 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4585 * restore things like GPUVM page tables after a GPU reset where
4586 * the contents of VRAM might be lost.
403009bf
CK
4587 *
4588 * Returns:
4589 * 0 on success, negative error code on failure.
e3ecdffa 4590 */
c33adbc7 4591static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4592{
c41d1cf6 4593 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4594 struct amdgpu_bo *shadow;
e18aaea7 4595 struct amdgpu_bo_vm *vmbo;
403009bf 4596 long r = 1, tmo;
c41d1cf6
ML
4597
4598 if (amdgpu_sriov_runtime(adev))
b045d3af 4599 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4600 else
4601 tmo = msecs_to_jiffies(100);
4602
aac89168 4603 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4604 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4605 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4606 /* If vm is compute context or adev is APU, shadow will be NULL */
4607 if (!vmbo->shadow)
4608 continue;
4609 shadow = vmbo->shadow;
4610
403009bf 4611 /* No need to recover an evicted BO */
d3116756
CK
4612 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4613 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4614 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4615 continue;
4616
4617 r = amdgpu_bo_restore_shadow(shadow, &next);
4618 if (r)
4619 break;
4620
c41d1cf6 4621 if (fence) {
1712fb1a 4622 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4623 dma_fence_put(fence);
4624 fence = next;
1712fb1a 4625 if (tmo == 0) {
4626 r = -ETIMEDOUT;
c41d1cf6 4627 break;
1712fb1a 4628 } else if (tmo < 0) {
4629 r = tmo;
4630 break;
4631 }
403009bf
CK
4632 } else {
4633 fence = next;
c41d1cf6 4634 }
c41d1cf6
ML
4635 }
4636 mutex_unlock(&adev->shadow_list_lock);
4637
403009bf
CK
4638 if (fence)
4639 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4640 dma_fence_put(fence);
4641
1712fb1a 4642 if (r < 0 || tmo <= 0) {
aac89168 4643 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4644 return -EIO;
4645 }
c41d1cf6 4646
aac89168 4647 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4648 return 0;
c41d1cf6
ML
4649}
4650
a90ad3c2 4651
e3ecdffa 4652/**
06ec9070 4653 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4654 *
982a820b 4655 * @adev: amdgpu_device pointer
87e3f136 4656 * @from_hypervisor: request from hypervisor
5740682e
ML
4657 *
4658 * do VF FLR and reinitialize Asic
3f48c681 4659 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4660 */
4661static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4662 bool from_hypervisor)
5740682e
ML
4663{
4664 int r;
a5f67c93 4665 struct amdgpu_hive_info *hive = NULL;
7258fa31 4666 int retry_limit = 0;
5740682e 4667
7258fa31 4668retry:
c004d44e 4669 amdgpu_amdkfd_pre_reset(adev);
428890a3 4670
5740682e
ML
4671 if (from_hypervisor)
4672 r = amdgpu_virt_request_full_gpu(adev, true);
4673 else
4674 r = amdgpu_virt_reset_gpu(adev);
4675 if (r)
4676 return r;
f734b213 4677 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4678
83f24a8f
HC
4679 /* some sw clean up VF needs to do before recover */
4680 amdgpu_virt_post_reset(adev);
4681
a90ad3c2 4682 /* Resume IP prior to SMC */
06ec9070 4683 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4684 if (r)
4685 goto error;
a90ad3c2 4686
c9ffa427 4687 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4688
7a3e0bb2
RZ
4689 r = amdgpu_device_fw_loading(adev);
4690 if (r)
4691 return r;
4692
a90ad3c2 4693 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4694 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4695 if (r)
4696 goto error;
a90ad3c2 4697
a5f67c93
ZL
4698 hive = amdgpu_get_xgmi_hive(adev);
4699 /* Update PSP FW topology after reset */
4700 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4701 r = amdgpu_xgmi_update_topology(hive, adev);
4702
4703 if (hive)
4704 amdgpu_put_xgmi_hive(hive);
4705
4706 if (!r) {
a5f67c93 4707 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4708
c004d44e 4709 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4710 }
a90ad3c2 4711
abc34253 4712error:
c41d1cf6 4713 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4714 amdgpu_inc_vram_lost(adev);
c33adbc7 4715 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4716 }
437f3e0b 4717 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4718
7258fa31
SK
4719 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4720 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4721 retry_limit++;
4722 goto retry;
4723 } else
4724 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4725 }
4726
a90ad3c2
ML
4727 return r;
4728}
4729
9a1cddd6 4730/**
4731 * amdgpu_device_has_job_running - check if there is any job in mirror list
4732 *
982a820b 4733 * @adev: amdgpu_device pointer
9a1cddd6 4734 *
4735 * check if there is any job in mirror list
4736 */
4737bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4738{
4739 int i;
4740 struct drm_sched_job *job;
4741
4742 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4743 struct amdgpu_ring *ring = adev->rings[i];
4744
4745 if (!ring || !ring->sched.thread)
4746 continue;
4747
4748 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4749 job = list_first_entry_or_null(&ring->sched.pending_list,
4750 struct drm_sched_job, list);
9a1cddd6 4751 spin_unlock(&ring->sched.job_list_lock);
4752 if (job)
4753 return true;
4754 }
4755 return false;
4756}
4757
12938fad
CK
4758/**
4759 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4760 *
982a820b 4761 * @adev: amdgpu_device pointer
12938fad
CK
4762 *
4763 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4764 * a hung GPU.
4765 */
4766bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4767{
12938fad 4768
3ba7b418
AG
4769 if (amdgpu_gpu_recovery == 0)
4770 goto disabled;
4771
1a11a65d
YC
4772 /* Skip soft reset check in fatal error mode */
4773 if (!amdgpu_ras_is_poison_mode_supported(adev))
4774 return true;
4775
3ba7b418
AG
4776 if (amdgpu_sriov_vf(adev))
4777 return true;
4778
4779 if (amdgpu_gpu_recovery == -1) {
4780 switch (adev->asic_type) {
b3523c45
AD
4781#ifdef CONFIG_DRM_AMDGPU_SI
4782 case CHIP_VERDE:
4783 case CHIP_TAHITI:
4784 case CHIP_PITCAIRN:
4785 case CHIP_OLAND:
4786 case CHIP_HAINAN:
4787#endif
4788#ifdef CONFIG_DRM_AMDGPU_CIK
4789 case CHIP_KAVERI:
4790 case CHIP_KABINI:
4791 case CHIP_MULLINS:
4792#endif
4793 case CHIP_CARRIZO:
4794 case CHIP_STONEY:
4795 case CHIP_CYAN_SKILLFISH:
3ba7b418 4796 goto disabled;
b3523c45
AD
4797 default:
4798 break;
3ba7b418 4799 }
12938fad
CK
4800 }
4801
4802 return true;
3ba7b418
AG
4803
4804disabled:
aac89168 4805 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4806 return false;
12938fad
CK
4807}
4808
5c03e584
FX
4809int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4810{
47fc644f
SS
4811 u32 i;
4812 int ret = 0;
5c03e584 4813
47fc644f 4814 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4815
47fc644f 4816 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4817
47fc644f
SS
4818 /* disable BM */
4819 pci_clear_master(adev->pdev);
5c03e584 4820
47fc644f 4821 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4822
47fc644f
SS
4823 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4824 dev_info(adev->dev, "GPU smu mode1 reset\n");
4825 ret = amdgpu_dpm_mode1_reset(adev);
4826 } else {
4827 dev_info(adev->dev, "GPU psp mode1 reset\n");
4828 ret = psp_gpu_reset(adev);
4829 }
5c03e584 4830
47fc644f 4831 if (ret)
2c0f880a 4832 goto mode1_reset_failed;
5c03e584 4833
47fc644f 4834 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4835 ret = amdgpu_psp_wait_for_bootloader(adev);
4836 if (ret)
2c0f880a 4837 goto mode1_reset_failed;
5c03e584 4838
47fc644f
SS
4839 /* wait for asic to come out of reset */
4840 for (i = 0; i < adev->usec_timeout; i++) {
4841 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4842
47fc644f
SS
4843 if (memsize != 0xffffffff)
4844 break;
4845 udelay(1);
4846 }
5c03e584 4847
2c0f880a
HZ
4848 if (i >= adev->usec_timeout) {
4849 ret = -ETIMEDOUT;
4850 goto mode1_reset_failed;
4851 }
4852
47fc644f 4853 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4854
2c0f880a
HZ
4855 return 0;
4856
4857mode1_reset_failed:
4858 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4859 return ret;
5c03e584 4860}
5c6dd71e 4861
e3c1b071 4862int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4863 struct amdgpu_reset_context *reset_context)
26bc5340 4864{
5c1e6fa4 4865 int i, r = 0;
04442bf7
LL
4866 struct amdgpu_job *job = NULL;
4867 bool need_full_reset =
4868 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4869
4870 if (reset_context->reset_req_dev == adev)
4871 job = reset_context->job;
71182665 4872
b602ca5f
TZ
4873 if (amdgpu_sriov_vf(adev)) {
4874 /* stop the data exchange thread */
4875 amdgpu_virt_fini_data_exchange(adev);
4876 }
4877
9e225fb9
AG
4878 amdgpu_fence_driver_isr_toggle(adev, true);
4879
71182665 4880 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4881 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4882 struct amdgpu_ring *ring = adev->rings[i];
4883
51687759 4884 if (!ring || !ring->sched.thread)
0875dc9e 4885 continue;
5740682e 4886
b8920e1e
SS
4887 /* Clear job fence from fence drv to avoid force_completion
4888 * leave NULL and vm flush fence in fence drv
4889 */
5c1e6fa4 4890 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4891
2f9d4084
ML
4892 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4893 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4894 }
d38ceaf9 4895
9e225fb9
AG
4896 amdgpu_fence_driver_isr_toggle(adev, false);
4897
ff99849b 4898 if (job && job->vm)
222b5f04
AG
4899 drm_sched_increase_karma(&job->base);
4900
04442bf7 4901 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 4902 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4903 if (r == -EOPNOTSUPP)
404b277b
LL
4904 r = 0;
4905 else
04442bf7
LL
4906 return r;
4907
1d721ed6 4908 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4909 if (!amdgpu_sriov_vf(adev)) {
4910
4911 if (!need_full_reset)
4912 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4913
360cd081
LG
4914 if (!need_full_reset && amdgpu_gpu_recovery &&
4915 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4916 amdgpu_device_ip_pre_soft_reset(adev);
4917 r = amdgpu_device_ip_soft_reset(adev);
4918 amdgpu_device_ip_post_soft_reset(adev);
4919 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4920 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4921 need_full_reset = true;
4922 }
4923 }
4924
4925 if (need_full_reset)
4926 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4927 if (need_full_reset)
4928 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4929 else
4930 clear_bit(AMDGPU_NEED_FULL_RESET,
4931 &reset_context->flags);
26bc5340
AG
4932 }
4933
4934 return r;
4935}
4936
15fd09a0
SA
4937static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4938{
15fd09a0
SA
4939 int i;
4940
38a15ad9 4941 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4942
4943 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4944 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4945 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4946 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4947 }
4948
4949 return 0;
4950}
4951
a7691785
AA
4952#ifndef CONFIG_DEV_COREDUMP
4953static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
4954 struct amdgpu_reset_context *reset_context)
4955{
4956}
4957#else
3d8785f6
SA
4958static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4959 size_t count, void *data, size_t datalen)
4960{
4961 struct drm_printer p;
a7691785 4962 struct amdgpu_coredump_info *coredump = data;
3d8785f6
SA
4963 struct drm_print_iterator iter;
4964 int i;
4965
4966 iter.data = buffer;
4967 iter.offset = 0;
4968 iter.start = offset;
4969 iter.remain = count;
4970
4971 p = drm_coredump_printer(&iter);
4972
4973 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4974 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4975 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
a7691785
AA
4976 drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec);
4977 if (coredump->reset_task_info.pid)
3d8785f6 4978 drm_printf(&p, "process_name: %s PID: %d\n",
a7691785
AA
4979 coredump->reset_task_info.process_name,
4980 coredump->reset_task_info.pid);
3d8785f6 4981
a7691785 4982 if (coredump->reset_vram_lost)
3d8785f6 4983 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
a7691785 4984 if (coredump->adev->num_regs) {
3d8785f6
SA
4985 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4986
a7691785 4987 for (i = 0; i < coredump->adev->num_regs; i++)
3d8785f6 4988 drm_printf(&p, "0x%08x: 0x%08x\n",
a7691785
AA
4989 coredump->adev->reset_dump_reg_list[i],
4990 coredump->adev->reset_dump_reg_value[i]);
3d8785f6
SA
4991 }
4992
4993 return count - iter.remain;
4994}
4995
4996static void amdgpu_devcoredump_free(void *data)
4997{
a7691785 4998 kfree(data);
3d8785f6
SA
4999}
5000
a7691785
AA
5001static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
5002 struct amdgpu_reset_context *reset_context)
3d8785f6 5003{
a7691785 5004 struct amdgpu_coredump_info *coredump;
3d8785f6
SA
5005 struct drm_device *dev = adev_to_drm(adev);
5006
a7691785
AA
5007 coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
5008
5009 if (!coredump) {
5010 DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
5011 return;
5012 }
5013
5014 coredump->reset_vram_lost = vram_lost;
5015
5016 if (reset_context->job && reset_context->job->vm)
5017 coredump->reset_task_info = reset_context->job->vm->task_info;
5018
5019 coredump->adev = adev;
5020
5021 ktime_get_ts64(&coredump->reset_time);
5022
5023 dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
3d8785f6
SA
5024 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
5025}
5026#endif
5027
04442bf7
LL
5028int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5029 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5030{
5031 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5032 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5033 int r = 0;
f5c7e779 5034 bool gpu_reset_for_dev_remove = 0;
26bc5340 5035
04442bf7
LL
5036 /* Try reset handler method first */
5037 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5038 reset_list);
15fd09a0 5039 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5040
5041 reset_context->reset_device_list = device_list_handle;
04442bf7 5042 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5043 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5044 if (r == -EOPNOTSUPP)
404b277b
LL
5045 r = 0;
5046 else
04442bf7
LL
5047 return r;
5048
5049 /* Reset handler not implemented, use the default method */
5050 need_full_reset =
5051 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5052 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5053
f5c7e779
YC
5054 gpu_reset_for_dev_remove =
5055 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5056 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5057
26bc5340 5058 /*
655ce9cb 5059 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5060 * to allow proper links negotiation in FW (within 1 sec)
5061 */
7ac71382 5062 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5063 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5064 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5065 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5066 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5067 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5068 r = -EALREADY;
5069 } else
5070 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5071
041a62bc 5072 if (r) {
aac89168 5073 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5074 r, adev_to_drm(tmp_adev)->unique);
041a62bc 5075 break;
ce316fa5
LM
5076 }
5077 }
5078
041a62bc
AG
5079 /* For XGMI wait for all resets to complete before proceed */
5080 if (!r) {
655ce9cb 5081 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5082 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5083 flush_work(&tmp_adev->xgmi_reset_work);
5084 r = tmp_adev->asic_reset_res;
5085 if (r)
5086 break;
ce316fa5
LM
5087 }
5088 }
5089 }
ce316fa5 5090 }
26bc5340 5091
43c4d576 5092 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5093 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 5094 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
5095 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
5096 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
5097 }
5098
00eaa571 5099 amdgpu_ras_intr_cleared();
43c4d576 5100 }
00eaa571 5101
f5c7e779
YC
5102 /* Since the mode1 reset affects base ip blocks, the
5103 * phase1 ip blocks need to be resumed. Otherwise there
5104 * will be a BIOS signature error and the psp bootloader
5105 * can't load kdb on the next amdgpu install.
5106 */
5107 if (gpu_reset_for_dev_remove) {
5108 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5109 amdgpu_device_ip_resume_phase1(tmp_adev);
5110
5111 goto end;
5112 }
5113
655ce9cb 5114 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5115 if (need_full_reset) {
5116 /* post card */
e3c1b071 5117 r = amdgpu_device_asic_init(tmp_adev);
5118 if (r) {
aac89168 5119 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5120 } else {
26bc5340 5121 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5122
26bc5340
AG
5123 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5124 if (r)
5125 goto out;
5126
5127 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785
AA
5128
5129 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5130
26bc5340 5131 if (vram_lost) {
77e7f829 5132 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5133 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5134 }
5135
26bc5340
AG
5136 r = amdgpu_device_fw_loading(tmp_adev);
5137 if (r)
5138 return r;
5139
c45e38f2
LL
5140 r = amdgpu_xcp_restore_partition_mode(
5141 tmp_adev->xcp_mgr);
5142 if (r)
5143 goto out;
5144
26bc5340
AG
5145 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5146 if (r)
5147 goto out;
5148
5149 if (vram_lost)
5150 amdgpu_device_fill_reset_magic(tmp_adev);
5151
fdafb359
EQ
5152 /*
5153 * Add this ASIC as tracked as reset was already
5154 * complete successfully.
5155 */
5156 amdgpu_register_gpu_instance(tmp_adev);
5157
04442bf7
LL
5158 if (!reset_context->hive &&
5159 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5160 amdgpu_xgmi_add_device(tmp_adev);
5161
7c04ca50 5162 r = amdgpu_device_ip_late_init(tmp_adev);
5163 if (r)
5164 goto out;
5165
087451f3 5166 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5167
e8fbaf03
GC
5168 /*
5169 * The GPU enters bad state once faulty pages
5170 * by ECC has reached the threshold, and ras
5171 * recovery is scheduled next. So add one check
5172 * here to break recovery if it indeed exceeds
5173 * bad page threshold, and remind user to
5174 * retire this GPU or setting one bigger
5175 * bad_page_threshold value to fix this once
5176 * probing driver again.
5177 */
11003c68 5178 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5179 /* must succeed. */
5180 amdgpu_ras_resume(tmp_adev);
5181 } else {
5182 r = -EINVAL;
5183 goto out;
5184 }
e79a04d5 5185
26bc5340 5186 /* Update PSP FW topology after reset */
04442bf7
LL
5187 if (reset_context->hive &&
5188 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5189 r = amdgpu_xgmi_update_topology(
5190 reset_context->hive, tmp_adev);
26bc5340
AG
5191 }
5192 }
5193
26bc5340
AG
5194out:
5195 if (!r) {
5196 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5197 r = amdgpu_ib_ring_tests(tmp_adev);
5198 if (r) {
5199 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5200 need_full_reset = true;
5201 r = -EAGAIN;
5202 goto end;
5203 }
5204 }
5205
5206 if (!r)
5207 r = amdgpu_device_recover_vram(tmp_adev);
5208 else
5209 tmp_adev->asic_reset_res = r;
5210 }
5211
5212end:
04442bf7
LL
5213 if (need_full_reset)
5214 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5215 else
5216 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5217 return r;
5218}
5219
e923be99 5220static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5221{
5740682e 5222
a3a09142
AD
5223 switch (amdgpu_asic_reset_method(adev)) {
5224 case AMD_RESET_METHOD_MODE1:
5225 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5226 break;
5227 case AMD_RESET_METHOD_MODE2:
5228 adev->mp1_state = PP_MP1_STATE_RESET;
5229 break;
5230 default:
5231 adev->mp1_state = PP_MP1_STATE_NONE;
5232 break;
5233 }
26bc5340 5234}
d38ceaf9 5235
e923be99 5236static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5237{
89041940 5238 amdgpu_vf_error_trans_all(adev);
a3a09142 5239 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5240}
5241
3f12acc8
EQ
5242static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5243{
5244 struct pci_dev *p = NULL;
5245
5246 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5247 adev->pdev->bus->number, 1);
5248 if (p) {
5249 pm_runtime_enable(&(p->dev));
5250 pm_runtime_resume(&(p->dev));
5251 }
b85e285e
YY
5252
5253 pci_dev_put(p);
3f12acc8
EQ
5254}
5255
5256static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5257{
5258 enum amd_reset_method reset_method;
5259 struct pci_dev *p = NULL;
5260 u64 expires;
5261
5262 /*
5263 * For now, only BACO and mode1 reset are confirmed
5264 * to suffer the audio issue without proper suspended.
5265 */
5266 reset_method = amdgpu_asic_reset_method(adev);
5267 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5268 (reset_method != AMD_RESET_METHOD_MODE1))
5269 return -EINVAL;
5270
5271 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5272 adev->pdev->bus->number, 1);
5273 if (!p)
5274 return -ENODEV;
5275
5276 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5277 if (!expires)
5278 /*
5279 * If we cannot get the audio device autosuspend delay,
5280 * a fixed 4S interval will be used. Considering 3S is
5281 * the audio controller default autosuspend delay setting.
5282 * 4S used here is guaranteed to cover that.
5283 */
54b7feb9 5284 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5285
5286 while (!pm_runtime_status_suspended(&(p->dev))) {
5287 if (!pm_runtime_suspend(&(p->dev)))
5288 break;
5289
5290 if (expires < ktime_get_mono_fast_ns()) {
5291 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5292 pci_dev_put(p);
3f12acc8
EQ
5293 /* TODO: abort the succeeding gpu reset? */
5294 return -ETIMEDOUT;
5295 }
5296 }
5297
5298 pm_runtime_disable(&(p->dev));
5299
b85e285e 5300 pci_dev_put(p);
3f12acc8
EQ
5301 return 0;
5302}
5303
d193b12b 5304static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5305{
5306 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5307
5308#if defined(CONFIG_DEBUG_FS)
5309 if (!amdgpu_sriov_vf(adev))
5310 cancel_work(&adev->reset_work);
5311#endif
5312
5313 if (adev->kfd.dev)
5314 cancel_work(&adev->kfd.reset_work);
5315
5316 if (amdgpu_sriov_vf(adev))
5317 cancel_work(&adev->virt.flr_work);
5318
5319 if (con && adev->ras_enabled)
5320 cancel_work(&con->recovery_work);
5321
5322}
5323
26bc5340 5324/**
6e9c65f7 5325 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5326 *
982a820b 5327 * @adev: amdgpu_device pointer
26bc5340 5328 * @job: which job trigger hang
80bd2de1 5329 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5330 *
5331 * Attempt to reset the GPU if it has hung (all asics).
5332 * Attempt to do soft-reset or full-reset and reinitialize Asic
5333 * Returns 0 for success or an error on failure.
5334 */
5335
cf727044 5336int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5337 struct amdgpu_job *job,
5338 struct amdgpu_reset_context *reset_context)
26bc5340 5339{
1d721ed6 5340 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5341 bool job_signaled = false;
26bc5340 5342 struct amdgpu_hive_info *hive = NULL;
26bc5340 5343 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5344 int i, r = 0;
bb5c7235 5345 bool need_emergency_restart = false;
3f12acc8 5346 bool audio_suspended = false;
f5c7e779
YC
5347 bool gpu_reset_for_dev_remove = false;
5348
5349 gpu_reset_for_dev_remove =
5350 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5351 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5352
6e3cd2a9 5353 /*
bb5c7235
WS
5354 * Special case: RAS triggered and full reset isn't supported
5355 */
5356 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5357
d5ea093e
AG
5358 /*
5359 * Flush RAM to disk so that after reboot
5360 * the user can read log and see why the system rebooted.
5361 */
bb5c7235 5362 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5363 DRM_WARN("Emergency reboot.");
5364
5365 ksys_sync_helper();
5366 emergency_restart();
5367 }
5368
b823821f 5369 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5370 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5371
175ac6ec
ZL
5372 if (!amdgpu_sriov_vf(adev))
5373 hive = amdgpu_get_xgmi_hive(adev);
681260df 5374 if (hive)
53b3f8f4 5375 mutex_lock(&hive->hive_lock);
26bc5340 5376
f1549c09
LG
5377 reset_context->job = job;
5378 reset_context->hive = hive;
9e94d22c
EQ
5379 /*
5380 * Build list of devices to reset.
5381 * In case we are in XGMI hive mode, resort the device list
5382 * to put adev in the 1st position.
5383 */
5384 INIT_LIST_HEAD(&device_list);
175ac6ec 5385 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5386 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5387 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5388 if (gpu_reset_for_dev_remove && adev->shutdown)
5389 tmp_adev->shutdown = true;
5390 }
655ce9cb 5391 if (!list_is_first(&adev->reset_list, &device_list))
5392 list_rotate_to_front(&adev->reset_list, &device_list);
5393 device_list_handle = &device_list;
26bc5340 5394 } else {
655ce9cb 5395 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5396 device_list_handle = &device_list;
5397 }
5398
e923be99
AG
5399 /* We need to lock reset domain only once both for XGMI and single device */
5400 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5401 reset_list);
3675c2f2 5402 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5403
1d721ed6 5404 /* block all schedulers and reset given job's ring */
655ce9cb 5405 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5406
e923be99 5407 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5408
3f12acc8
EQ
5409 /*
5410 * Try to put the audio codec into suspend state
5411 * before gpu reset started.
5412 *
5413 * Due to the power domain of the graphics device
5414 * is shared with AZ power domain. Without this,
5415 * we may change the audio hardware from behind
5416 * the audio driver's back. That will trigger
5417 * some audio codec errors.
5418 */
5419 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5420 audio_suspended = true;
5421
9e94d22c
EQ
5422 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5423
52fb44cf
EQ
5424 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5425
c004d44e 5426 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5427 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5428
12ffa55d
AG
5429 /*
5430 * Mark these ASICs to be reseted as untracked first
5431 * And add them back after reset completed
5432 */
5433 amdgpu_unregister_gpu_instance(tmp_adev);
5434
163d4cd2 5435 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5436
f1c1314b 5437 /* disable ras on ALL IPs */
bb5c7235 5438 if (!need_emergency_restart &&
b823821f 5439 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5440 amdgpu_ras_suspend(tmp_adev);
5441
1d721ed6
AG
5442 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5443 struct amdgpu_ring *ring = tmp_adev->rings[i];
5444
5445 if (!ring || !ring->sched.thread)
5446 continue;
5447
0b2d2c2e 5448 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5449
bb5c7235 5450 if (need_emergency_restart)
7c6e68c7 5451 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5452 }
8f8c80f4 5453 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5454 }
5455
bb5c7235 5456 if (need_emergency_restart)
7c6e68c7
AG
5457 goto skip_sched_resume;
5458
1d721ed6
AG
5459 /*
5460 * Must check guilty signal here since after this point all old
5461 * HW fences are force signaled.
5462 *
5463 * job->base holds a reference to parent fence
5464 */
f6a3f660 5465 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5466 job_signaled = true;
1d721ed6
AG
5467 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5468 goto skip_hw_reset;
5469 }
5470
26bc5340 5471retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5472 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5473 if (gpu_reset_for_dev_remove) {
5474 /* Workaroud for ASICs need to disable SMC first */
5475 amdgpu_device_smu_fini_early(tmp_adev);
5476 }
f1549c09 5477 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5478 /*TODO Should we stop ?*/
5479 if (r) {
aac89168 5480 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5481 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5482 tmp_adev->asic_reset_res = r;
5483 }
247c7b0d
AG
5484
5485 /*
5486 * Drop all pending non scheduler resets. Scheduler resets
5487 * were already dropped during drm_sched_stop
5488 */
d193b12b 5489 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5490 }
5491
5492 /* Actual ASIC resets if needed.*/
4f30d920 5493 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5494 if (amdgpu_sriov_vf(adev)) {
5495 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5496 if (r)
5497 adev->asic_reset_res = r;
950d6425 5498
28606c4e 5499 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5500 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5501 IP_VERSION(9, 4, 2) ||
5502 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5503 amdgpu_ras_resume(adev);
26bc5340 5504 } else {
f1549c09 5505 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5506 if (r && r == -EAGAIN)
26bc5340 5507 goto retry;
f5c7e779
YC
5508
5509 if (!r && gpu_reset_for_dev_remove)
5510 goto recover_end;
26bc5340
AG
5511 }
5512
1d721ed6
AG
5513skip_hw_reset:
5514
26bc5340 5515 /* Post ASIC reset for all devs .*/
655ce9cb 5516 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5517
1d721ed6
AG
5518 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5519 struct amdgpu_ring *ring = tmp_adev->rings[i];
5520
5521 if (!ring || !ring->sched.thread)
5522 continue;
5523
6868a2c4 5524 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5525 }
5526
4e8303cf
LL
5527 if (adev->enable_mes &&
5528 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
ed67f729
JX
5529 amdgpu_mes_self_test(tmp_adev);
5530
b8920e1e 5531 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5532 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5533
7258fa31
SK
5534 if (tmp_adev->asic_reset_res)
5535 r = tmp_adev->asic_reset_res;
5536
1d721ed6 5537 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5538
5539 if (r) {
5540 /* bad news, how to tell it to userspace ? */
12ffa55d 5541 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5542 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5543 } else {
12ffa55d 5544 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5545 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5546 DRM_WARN("smart shift update failed\n");
26bc5340 5547 }
7c6e68c7 5548 }
26bc5340 5549
7c6e68c7 5550skip_sched_resume:
655ce9cb 5551 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5552 /* unlock kfd: SRIOV would do it separately */
c004d44e 5553 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5554 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5555
5556 /* kfd_post_reset will do nothing if kfd device is not initialized,
5557 * need to bring up kfd here if it's not be initialized before
5558 */
5559 if (!adev->kfd.init_complete)
5560 amdgpu_amdkfd_device_init(adev);
5561
3f12acc8
EQ
5562 if (audio_suspended)
5563 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5564
5565 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5566
5567 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5568 }
5569
f5c7e779 5570recover_end:
e923be99
AG
5571 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5572 reset_list);
5573 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5574
9e94d22c 5575 if (hive) {
9e94d22c 5576 mutex_unlock(&hive->hive_lock);
d95e8e97 5577 amdgpu_put_xgmi_hive(hive);
9e94d22c 5578 }
26bc5340 5579
f287a3c5 5580 if (r)
26bc5340 5581 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5582
5583 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5584 return r;
5585}
5586
e3ecdffa
AD
5587/**
5588 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5589 *
5590 * @adev: amdgpu_device pointer
5591 *
5592 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5593 * and lanes) of the slot the device is in. Handles APUs and
5594 * virtualized environments where PCIE config space may not be available.
5595 */
5494d864 5596static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5597{
5d9a6330 5598 struct pci_dev *pdev;
c5313457
HK
5599 enum pci_bus_speed speed_cap, platform_speed_cap;
5600 enum pcie_link_width platform_link_width;
d0dd7f0c 5601
cd474ba0
AD
5602 if (amdgpu_pcie_gen_cap)
5603 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5604
cd474ba0
AD
5605 if (amdgpu_pcie_lane_cap)
5606 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5607
cd474ba0 5608 /* covers APUs as well */
04e85958 5609 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5610 if (adev->pm.pcie_gen_mask == 0)
5611 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5612 if (adev->pm.pcie_mlw_mask == 0)
5613 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5614 return;
cd474ba0 5615 }
d0dd7f0c 5616
c5313457
HK
5617 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5618 return;
5619
dbaa922b
AD
5620 pcie_bandwidth_available(adev->pdev, NULL,
5621 &platform_speed_cap, &platform_link_width);
c5313457 5622
cd474ba0 5623 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5624 /* asic caps */
5625 pdev = adev->pdev;
5626 speed_cap = pcie_get_speed_cap(pdev);
5627 if (speed_cap == PCI_SPEED_UNKNOWN) {
5628 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5629 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5630 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5631 } else {
2b3a1f51
FX
5632 if (speed_cap == PCIE_SPEED_32_0GT)
5633 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5634 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5635 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5636 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5637 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5638 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5639 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5640 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5641 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5642 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5643 else if (speed_cap == PCIE_SPEED_8_0GT)
5644 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5645 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5646 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5647 else if (speed_cap == PCIE_SPEED_5_0GT)
5648 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5649 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5650 else
5651 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5652 }
5653 /* platform caps */
c5313457 5654 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5655 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5656 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5657 } else {
2b3a1f51
FX
5658 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5659 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5660 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5661 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5662 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5663 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5664 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5665 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5666 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5667 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5668 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5669 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5670 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5671 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5672 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5673 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5674 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5675 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5676 else
5677 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5678
cd474ba0
AD
5679 }
5680 }
5681 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5682 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5683 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5684 } else {
c5313457 5685 switch (platform_link_width) {
5d9a6330 5686 case PCIE_LNK_X32:
cd474ba0
AD
5687 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5688 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5689 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5690 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5691 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5692 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5693 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5694 break;
5d9a6330 5695 case PCIE_LNK_X16:
cd474ba0
AD
5696 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5697 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5698 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5699 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5700 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5701 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5702 break;
5d9a6330 5703 case PCIE_LNK_X12:
cd474ba0
AD
5704 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5705 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5706 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5707 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5708 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5709 break;
5d9a6330 5710 case PCIE_LNK_X8:
cd474ba0
AD
5711 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5712 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5713 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5714 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5715 break;
5d9a6330 5716 case PCIE_LNK_X4:
cd474ba0
AD
5717 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5718 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5719 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5720 break;
5d9a6330 5721 case PCIE_LNK_X2:
cd474ba0
AD
5722 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5723 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5724 break;
5d9a6330 5725 case PCIE_LNK_X1:
cd474ba0
AD
5726 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5727 break;
5728 default:
5729 break;
5730 }
d0dd7f0c
AD
5731 }
5732 }
5733}
d38ceaf9 5734
08a2fd23
RE
5735/**
5736 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5737 *
5738 * @adev: amdgpu_device pointer
5739 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5740 *
5741 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5742 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5743 * @peer_adev.
5744 */
5745bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5746 struct amdgpu_device *peer_adev)
5747{
5748#ifdef CONFIG_HSA_AMD_P2P
5749 uint64_t address_mask = peer_adev->dev->dma_mask ?
5750 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5751 resource_size_t aper_limit =
5752 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5753 bool p2p_access =
5754 !adev->gmc.xgmi.connected_to_cpu &&
5755 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5756
5757 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5758 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5759 !(adev->gmc.aper_base & address_mask ||
5760 aper_limit & address_mask));
5761#else
5762 return false;
5763#endif
5764}
5765
361dbd01
AD
5766int amdgpu_device_baco_enter(struct drm_device *dev)
5767{
1348969a 5768 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5769 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5770
6ab68650 5771 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5772 return -ENOTSUPP;
5773
8ab0d6f0 5774 if (ras && adev->ras_enabled &&
acdae216 5775 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5776 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5777
9530273e 5778 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5779}
5780
5781int amdgpu_device_baco_exit(struct drm_device *dev)
5782{
1348969a 5783 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5784 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5785 int ret = 0;
361dbd01 5786
6ab68650 5787 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5788 return -ENOTSUPP;
5789
9530273e
EQ
5790 ret = amdgpu_dpm_baco_exit(adev);
5791 if (ret)
5792 return ret;
7a22677b 5793
8ab0d6f0 5794 if (ras && adev->ras_enabled &&
acdae216 5795 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5796 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5797
1bece222
CL
5798 if (amdgpu_passthrough(adev) &&
5799 adev->nbio.funcs->clear_doorbell_interrupt)
5800 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5801
7a22677b 5802 return 0;
361dbd01 5803}
c9a6b82f
AG
5804
5805/**
5806 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5807 * @pdev: PCI device struct
5808 * @state: PCI channel state
5809 *
5810 * Description: Called when a PCI error is detected.
5811 *
5812 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5813 */
5814pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5815{
5816 struct drm_device *dev = pci_get_drvdata(pdev);
5817 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5818 int i;
c9a6b82f
AG
5819
5820 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5821
6894305c
AG
5822 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5823 DRM_WARN("No support for XGMI hive yet...");
5824 return PCI_ERS_RESULT_DISCONNECT;
5825 }
5826
e17e27f9
GC
5827 adev->pci_channel_state = state;
5828
c9a6b82f
AG
5829 switch (state) {
5830 case pci_channel_io_normal:
5831 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5832 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5833 case pci_channel_io_frozen:
5834 /*
d0fb18b5 5835 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5836 * to GPU during PCI error recovery
5837 */
3675c2f2 5838 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5839 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5840
5841 /*
5842 * Block any work scheduling as we do for regular GPU reset
5843 * for the duration of the recovery
5844 */
5845 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5846 struct amdgpu_ring *ring = adev->rings[i];
5847
5848 if (!ring || !ring->sched.thread)
5849 continue;
5850
5851 drm_sched_stop(&ring->sched, NULL);
5852 }
8f8c80f4 5853 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5854 return PCI_ERS_RESULT_NEED_RESET;
5855 case pci_channel_io_perm_failure:
5856 /* Permanent error, prepare for device removal */
5857 return PCI_ERS_RESULT_DISCONNECT;
5858 }
5859
5860 return PCI_ERS_RESULT_NEED_RESET;
5861}
5862
5863/**
5864 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5865 * @pdev: pointer to PCI device
5866 */
5867pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5868{
5869
5870 DRM_INFO("PCI error: mmio enabled callback!!\n");
5871
5872 /* TODO - dump whatever for debugging purposes */
5873
5874 /* This called only if amdgpu_pci_error_detected returns
5875 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5876 * works, no need to reset slot.
5877 */
5878
5879 return PCI_ERS_RESULT_RECOVERED;
5880}
5881
5882/**
5883 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5884 * @pdev: PCI device struct
5885 *
5886 * Description: This routine is called by the pci error recovery
5887 * code after the PCI slot has been reset, just before we
5888 * should resume normal operations.
5889 */
5890pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5891{
5892 struct drm_device *dev = pci_get_drvdata(pdev);
5893 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5894 int r, i;
04442bf7 5895 struct amdgpu_reset_context reset_context;
362c7b91 5896 u32 memsize;
7ac71382 5897 struct list_head device_list;
c9a6b82f
AG
5898
5899 DRM_INFO("PCI error: slot reset callback!!\n");
5900
04442bf7
LL
5901 memset(&reset_context, 0, sizeof(reset_context));
5902
7ac71382 5903 INIT_LIST_HEAD(&device_list);
655ce9cb 5904 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5905
362c7b91
AG
5906 /* wait for asic to come out of reset */
5907 msleep(500);
5908
7ac71382 5909 /* Restore PCI confspace */
c1dd4aa6 5910 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5911
362c7b91
AG
5912 /* confirm ASIC came out of reset */
5913 for (i = 0; i < adev->usec_timeout; i++) {
5914 memsize = amdgpu_asic_get_config_memsize(adev);
5915
5916 if (memsize != 0xffffffff)
5917 break;
5918 udelay(1);
5919 }
5920 if (memsize == 0xffffffff) {
5921 r = -ETIME;
5922 goto out;
5923 }
5924
04442bf7
LL
5925 reset_context.method = AMD_RESET_METHOD_NONE;
5926 reset_context.reset_req_dev = adev;
5927 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5928 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5929
7afefb81 5930 adev->no_hw_access = true;
04442bf7 5931 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5932 adev->no_hw_access = false;
c9a6b82f
AG
5933 if (r)
5934 goto out;
5935
04442bf7 5936 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5937
5938out:
c9a6b82f 5939 if (!r) {
c1dd4aa6
AG
5940 if (amdgpu_device_cache_pci_state(adev->pdev))
5941 pci_restore_state(adev->pdev);
5942
c9a6b82f
AG
5943 DRM_INFO("PCIe error recovery succeeded\n");
5944 } else {
5945 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5946 amdgpu_device_unset_mp1_state(adev);
5947 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5948 }
5949
5950 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5951}
5952
5953/**
5954 * amdgpu_pci_resume() - resume normal ops after PCI reset
5955 * @pdev: pointer to PCI device
5956 *
5957 * Called when the error recovery driver tells us that its
505199a3 5958 * OK to resume normal operation.
c9a6b82f
AG
5959 */
5960void amdgpu_pci_resume(struct pci_dev *pdev)
5961{
5962 struct drm_device *dev = pci_get_drvdata(pdev);
5963 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5964 int i;
c9a6b82f 5965
c9a6b82f
AG
5966
5967 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5968
e17e27f9
GC
5969 /* Only continue execution for the case of pci_channel_io_frozen */
5970 if (adev->pci_channel_state != pci_channel_io_frozen)
5971 return;
5972
acd89fca
AG
5973 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5974 struct amdgpu_ring *ring = adev->rings[i];
5975
5976 if (!ring || !ring->sched.thread)
5977 continue;
5978
acd89fca
AG
5979 drm_sched_start(&ring->sched, true);
5980 }
5981
e923be99
AG
5982 amdgpu_device_unset_mp1_state(adev);
5983 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5984}
c1dd4aa6
AG
5985
5986bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5987{
5988 struct drm_device *dev = pci_get_drvdata(pdev);
5989 struct amdgpu_device *adev = drm_to_adev(dev);
5990 int r;
5991
5992 r = pci_save_state(pdev);
5993 if (!r) {
5994 kfree(adev->pci_state);
5995
5996 adev->pci_state = pci_store_saved_state(pdev);
5997
5998 if (!adev->pci_state) {
5999 DRM_ERROR("Failed to store PCI saved state");
6000 return false;
6001 }
6002 } else {
6003 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6004 return false;
6005 }
6006
6007 return true;
6008}
6009
6010bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6011{
6012 struct drm_device *dev = pci_get_drvdata(pdev);
6013 struct amdgpu_device *adev = drm_to_adev(dev);
6014 int r;
6015
6016 if (!adev->pci_state)
6017 return false;
6018
6019 r = pci_load_saved_state(pdev, adev->pci_state);
6020
6021 if (!r) {
6022 pci_restore_state(pdev);
6023 } else {
6024 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6025 return false;
6026 }
6027
6028 return true;
6029}
6030
810085dd
EH
6031void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6032 struct amdgpu_ring *ring)
6033{
6034#ifdef CONFIG_X86_64
b818a5d3 6035 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6036 return;
6037#endif
6038 if (adev->gmc.xgmi.connected_to_cpu)
6039 return;
6040
6041 if (ring && ring->funcs->emit_hdp_flush)
6042 amdgpu_ring_emit_hdp_flush(ring);
6043 else
6044 amdgpu_asic_flush_hdp(adev, ring);
6045}
c1dd4aa6 6046
810085dd
EH
6047void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6048 struct amdgpu_ring *ring)
6049{
6050#ifdef CONFIG_X86_64
b818a5d3 6051 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6052 return;
6053#endif
6054 if (adev->gmc.xgmi.connected_to_cpu)
6055 return;
c1dd4aa6 6056
810085dd
EH
6057 amdgpu_asic_invalidate_hdp(adev, ring);
6058}
34f3a4a9 6059
89a7a870
AG
6060int amdgpu_in_reset(struct amdgpu_device *adev)
6061{
6062 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6063}
6064
34f3a4a9
LY
6065/**
6066 * amdgpu_device_halt() - bring hardware to some kind of halt state
6067 *
6068 * @adev: amdgpu_device pointer
6069 *
6070 * Bring hardware to some kind of halt state so that no one can touch it
6071 * any more. It will help to maintain error context when error occurred.
6072 * Compare to a simple hang, the system will keep stable at least for SSH
6073 * access. Then it should be trivial to inspect the hardware state and
6074 * see what's going on. Implemented as following:
6075 *
6076 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6077 * clears all CPU mappings to device, disallows remappings through page faults
6078 * 2. amdgpu_irq_disable_all() disables all interrupts
6079 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6080 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6081 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6082 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6083 * flush any in flight DMA operations
6084 */
6085void amdgpu_device_halt(struct amdgpu_device *adev)
6086{
6087 struct pci_dev *pdev = adev->pdev;
e0f943b4 6088 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6089
2c1c7ba4 6090 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6091 drm_dev_unplug(ddev);
6092
6093 amdgpu_irq_disable_all(adev);
6094
6095 amdgpu_fence_driver_hw_fini(adev);
6096
6097 adev->no_hw_access = true;
6098
6099 amdgpu_device_unmap_mmio(adev);
6100
6101 pci_disable_device(pdev);
6102 pci_wait_for_pending_transaction(pdev);
6103}
86700a40
XD
6104
6105u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6106 u32 reg)
6107{
6108 unsigned long flags, address, data;
6109 u32 r;
6110
6111 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6112 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6113
6114 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6115 WREG32(address, reg * 4);
6116 (void)RREG32(address);
6117 r = RREG32(data);
6118 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6119 return r;
6120}
6121
6122void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6123 u32 reg, u32 v)
6124{
6125 unsigned long flags, address, data;
6126
6127 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6128 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6129
6130 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6131 WREG32(address, reg * 4);
6132 (void)RREG32(address);
6133 WREG32(data, v);
6134 (void)RREG32(data);
6135 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6136}
68ce8b24
CK
6137
6138/**
6139 * amdgpu_device_switch_gang - switch to a new gang
6140 * @adev: amdgpu_device pointer
6141 * @gang: the gang to switch to
6142 *
6143 * Try to switch to a new gang.
6144 * Returns: NULL if we switched to the new gang or a reference to the current
6145 * gang leader.
6146 */
6147struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6148 struct dma_fence *gang)
6149{
6150 struct dma_fence *old = NULL;
6151
6152 do {
6153 dma_fence_put(old);
6154 rcu_read_lock();
6155 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6156 rcu_read_unlock();
6157
6158 if (old == gang)
6159 break;
6160
6161 if (!dma_fence_is_signaled(old))
6162 return old;
6163
6164 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6165 old, gang) != old);
6166
6167 dma_fence_put(old);
6168 return NULL;
6169}
220c8cc8
AD
6170
6171bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6172{
6173 switch (adev->asic_type) {
6174#ifdef CONFIG_DRM_AMDGPU_SI
6175 case CHIP_HAINAN:
6176#endif
6177 case CHIP_TOPAZ:
6178 /* chips with no display hardware */
6179 return false;
6180#ifdef CONFIG_DRM_AMDGPU_SI
6181 case CHIP_TAHITI:
6182 case CHIP_PITCAIRN:
6183 case CHIP_VERDE:
6184 case CHIP_OLAND:
6185#endif
6186#ifdef CONFIG_DRM_AMDGPU_CIK
6187 case CHIP_BONAIRE:
6188 case CHIP_HAWAII:
6189 case CHIP_KAVERI:
6190 case CHIP_KABINI:
6191 case CHIP_MULLINS:
6192#endif
6193 case CHIP_TONGA:
6194 case CHIP_FIJI:
6195 case CHIP_POLARIS10:
6196 case CHIP_POLARIS11:
6197 case CHIP_POLARIS12:
6198 case CHIP_VEGAM:
6199 case CHIP_CARRIZO:
6200 case CHIP_STONEY:
6201 /* chips with display hardware */
6202 return true;
6203 default:
6204 /* IP discovery */
4e8303cf 6205 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6206 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6207 return false;
6208 return true;
6209 }
6210}
81283fee
JZ
6211
6212uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6213 uint32_t inst, uint32_t reg_addr, char reg_name[],
6214 uint32_t expected_value, uint32_t mask)
6215{
6216 uint32_t ret = 0;
6217 uint32_t old_ = 0;
6218 uint32_t tmp_ = RREG32(reg_addr);
6219 uint32_t loop = adev->usec_timeout;
6220
6221 while ((tmp_ & (mask)) != (expected_value)) {
6222 if (old_ != tmp_) {
6223 loop = adev->usec_timeout;
6224 old_ = tmp_;
6225 } else
6226 udelay(1);
6227 tmp_ = RREG32(reg_addr);
6228 loop--;
6229 if (!loop) {
6230 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6231 inst, reg_name, (uint32_t)expected_value,
6232 (uint32_t)(tmp_ & (mask)));
6233 ret = -ETIMEDOUT;
6234 break;
6235 }
6236 }
6237 return ret;
6238}