drm/amdgpu: add remap_hdp_registers callback for nbio 7.11
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166 167
fd496ca8 168/**
b98c6299 169 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
170 *
171 * @dev: drm_device pointer
172 *
b98c6299 173 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
174 * otherwise return false.
175 */
b98c6299 176bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
177{
178 struct amdgpu_device *adev = drm_to_adev(dev);
179
b98c6299 180 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
181 return true;
182 return false;
183}
184
e3ecdffa 185/**
0330b848 186 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
187 *
188 * @dev: drm_device pointer
189 *
b98c6299 190 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
191 * otherwise return false.
192 */
31af062a 193bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 194{
1348969a 195 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 196
b98c6299
AD
197 if (adev->has_pr3 ||
198 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
199 return true;
200 return false;
201}
202
a69cba42
AD
203/**
204 * amdgpu_device_supports_baco - Does the device support BACO
205 *
206 * @dev: drm_device pointer
207 *
208 * Returns true if the device supporte BACO,
209 * otherwise return false.
210 */
211bool amdgpu_device_supports_baco(struct drm_device *dev)
212{
1348969a 213 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
214
215 return amdgpu_asic_supports_baco(adev);
216}
217
3fa8f89d
S
218/**
219 * amdgpu_device_supports_smart_shift - Is the device dGPU with
220 * smart shift support
221 *
222 * @dev: drm_device pointer
223 *
224 * Returns true if the device is a dGPU with Smart Shift support,
225 * otherwise returns false.
226 */
227bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
228{
229 return (amdgpu_device_supports_boco(dev) &&
230 amdgpu_acpi_is_power_shift_control_supported());
231}
232
6e3cd2a9
MCC
233/*
234 * VRAM access helper functions
235 */
236
e35e2b11 237/**
048af66b 238 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
239 *
240 * @adev: amdgpu_device pointer
241 * @pos: offset of the buffer in vram
242 * @buf: virtual address of the buffer in system memory
243 * @size: read/write size, sizeof(@buf) must > @size
244 * @write: true - write to vram, otherwise - read from vram
245 */
048af66b
KW
246void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
247 void *buf, size_t size, bool write)
e35e2b11 248{
e35e2b11 249 unsigned long flags;
048af66b
KW
250 uint32_t hi = ~0, tmp = 0;
251 uint32_t *data = buf;
ce05ac56 252 uint64_t last;
f89f8c6b 253 int idx;
ce05ac56 254
c58a863b 255 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 256 return;
9d11eb0d 257
048af66b
KW
258 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
259
260 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
261 for (last = pos + size; pos < last; pos += 4) {
262 tmp = pos >> 31;
263
264 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
265 if (tmp != hi) {
266 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
267 hi = tmp;
268 }
269 if (write)
270 WREG32_NO_KIQ(mmMM_DATA, *data++);
271 else
272 *data++ = RREG32_NO_KIQ(mmMM_DATA);
273 }
274
275 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
276 drm_dev_exit(idx);
277}
278
279/**
bbe04dec 280 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
281 *
282 * @adev: amdgpu_device pointer
283 * @pos: offset of the buffer in vram
284 * @buf: virtual address of the buffer in system memory
285 * @size: read/write size, sizeof(@buf) must > @size
286 * @write: true - write to vram, otherwise - read from vram
287 *
288 * The return value means how many bytes have been transferred.
289 */
290size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
291 void *buf, size_t size, bool write)
292{
9d11eb0d 293#ifdef CONFIG_64BIT
048af66b
KW
294 void __iomem *addr;
295 size_t count = 0;
296 uint64_t last;
297
298 if (!adev->mman.aper_base_kaddr)
299 return 0;
300
9d11eb0d
CK
301 last = min(pos + size, adev->gmc.visible_vram_size);
302 if (last > pos) {
048af66b
KW
303 addr = adev->mman.aper_base_kaddr + pos;
304 count = last - pos;
9d11eb0d
CK
305
306 if (write) {
307 memcpy_toio(addr, buf, count);
4c452b5c
SS
308 /* Make sure HDP write cache flush happens without any reordering
309 * after the system memory contents are sent over PCIe device
310 */
9d11eb0d 311 mb();
810085dd 312 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 313 } else {
810085dd 314 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
315 /* Make sure HDP read cache is invalidated before issuing a read
316 * to the PCIe device
317 */
9d11eb0d
CK
318 mb();
319 memcpy_fromio(buf, addr, count);
320 }
321
9d11eb0d 322 }
048af66b
KW
323
324 return count;
325#else
326 return 0;
9d11eb0d 327#endif
048af66b 328}
9d11eb0d 329
048af66b
KW
330/**
331 * amdgpu_device_vram_access - read/write a buffer in vram
332 *
333 * @adev: amdgpu_device pointer
334 * @pos: offset of the buffer in vram
335 * @buf: virtual address of the buffer in system memory
336 * @size: read/write size, sizeof(@buf) must > @size
337 * @write: true - write to vram, otherwise - read from vram
338 */
339void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
340 void *buf, size_t size, bool write)
341{
342 size_t count;
e35e2b11 343
048af66b
KW
344 /* try to using vram apreature to access vram first */
345 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
346 size -= count;
347 if (size) {
348 /* using MM to access rest vram */
349 pos += count;
350 buf += count;
351 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
352 }
353}
354
d38ceaf9 355/*
f7ee1874 356 * register access helper functions.
d38ceaf9 357 */
56b53c0b
DL
358
359/* Check if hw access should be skipped because of hotplug or device error */
360bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
361{
7afefb81 362 if (adev->no_hw_access)
56b53c0b
DL
363 return true;
364
365#ifdef CONFIG_LOCKDEP
366 /*
367 * This is a bit complicated to understand, so worth a comment. What we assert
368 * here is that the GPU reset is not running on another thread in parallel.
369 *
370 * For this we trylock the read side of the reset semaphore, if that succeeds
371 * we know that the reset is not running in paralell.
372 *
373 * If the trylock fails we assert that we are either already holding the read
374 * side of the lock or are the reset thread itself and hold the write side of
375 * the lock.
376 */
377 if (in_task()) {
d0fb18b5
AG
378 if (down_read_trylock(&adev->reset_domain->sem))
379 up_read(&adev->reset_domain->sem);
56b53c0b 380 else
d0fb18b5 381 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
382 }
383#endif
384 return false;
385}
386
e3ecdffa 387/**
f7ee1874 388 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
389 *
390 * @adev: amdgpu_device pointer
391 * @reg: dword aligned register offset
392 * @acc_flags: access flags which require special behavior
393 *
394 * Returns the 32 bit value from the offset specified.
395 */
f7ee1874
HZ
396uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
397 uint32_t reg, uint32_t acc_flags)
d38ceaf9 398{
f4b373f4
TSD
399 uint32_t ret;
400
56b53c0b 401 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
402 return 0;
403
f7ee1874
HZ
404 if ((reg * 4) < adev->rmmio_size) {
405 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
406 amdgpu_sriov_runtime(adev) &&
d0fb18b5 407 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 408 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 409 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
410 } else {
411 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
412 }
413 } else {
414 ret = adev->pcie_rreg(adev, reg * 4);
81202807 415 }
bc992ba5 416
f7ee1874 417 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 418
f4b373f4 419 return ret;
d38ceaf9
AD
420}
421
421a2a30
ML
422/*
423 * MMIO register read with bytes helper functions
424 * @offset:bytes offset from MMIO start
b8920e1e 425 */
421a2a30 426
e3ecdffa
AD
427/**
428 * amdgpu_mm_rreg8 - read a memory mapped IO register
429 *
430 * @adev: amdgpu_device pointer
431 * @offset: byte aligned register offset
432 *
433 * Returns the 8 bit value from the offset specified.
434 */
7cbbc745
AG
435uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
436{
56b53c0b 437 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
438 return 0;
439
421a2a30
ML
440 if (offset < adev->rmmio_size)
441 return (readb(adev->rmmio + offset));
442 BUG();
443}
444
445/*
446 * MMIO register write with bytes helper functions
447 * @offset:bytes offset from MMIO start
448 * @value: the value want to be written to the register
b8920e1e
SS
449 */
450
e3ecdffa
AD
451/**
452 * amdgpu_mm_wreg8 - read a memory mapped IO register
453 *
454 * @adev: amdgpu_device pointer
455 * @offset: byte aligned register offset
456 * @value: 8 bit value to write
457 *
458 * Writes the value specified to the offset specified.
459 */
7cbbc745
AG
460void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
461{
56b53c0b 462 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
463 return;
464
421a2a30
ML
465 if (offset < adev->rmmio_size)
466 writeb(value, adev->rmmio + offset);
467 else
468 BUG();
469}
470
e3ecdffa 471/**
f7ee1874 472 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
473 *
474 * @adev: amdgpu_device pointer
475 * @reg: dword aligned register offset
476 * @v: 32 bit value to write to the register
477 * @acc_flags: access flags which require special behavior
478 *
479 * Writes the value specified to the offset specified.
480 */
f7ee1874
HZ
481void amdgpu_device_wreg(struct amdgpu_device *adev,
482 uint32_t reg, uint32_t v,
483 uint32_t acc_flags)
d38ceaf9 484{
56b53c0b 485 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
486 return;
487
f7ee1874
HZ
488 if ((reg * 4) < adev->rmmio_size) {
489 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
490 amdgpu_sriov_runtime(adev) &&
d0fb18b5 491 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 492 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 493 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
494 } else {
495 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
496 }
497 } else {
498 adev->pcie_wreg(adev, reg * 4, v);
81202807 499 }
bc992ba5 500
f7ee1874 501 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 502}
d38ceaf9 503
03f2abb0 504/**
4cc9f86f 505 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 506 *
71579346
RB
507 * @adev: amdgpu_device pointer
508 * @reg: mmio/rlc register
509 * @v: value to write
8057a9d6 510 * @xcc_id: xcc accelerated compute core id
71579346
RB
511 *
512 * this function is invoked only for the debugfs register access
03f2abb0 513 */
f7ee1874 514void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
515 uint32_t reg, uint32_t v,
516 uint32_t xcc_id)
2e0cc4d4 517{
56b53c0b 518 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
519 return;
520
2e0cc4d4 521 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
522 adev->gfx.rlc.funcs &&
523 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 524 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 525 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
526 } else if ((reg * 4) >= adev->rmmio_size) {
527 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
528 } else {
529 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 530 }
d38ceaf9
AD
531}
532
1bba3683
HZ
533/**
534 * amdgpu_device_indirect_rreg - read an indirect register
535 *
536 * @adev: amdgpu_device pointer
22f453fb 537 * @reg_addr: indirect register address to read from
1bba3683
HZ
538 *
539 * Returns the value of indirect register @reg_addr
540 */
541u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
542 u32 reg_addr)
543{
65ba96e9 544 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
545 void __iomem *pcie_index_offset;
546 void __iomem *pcie_data_offset;
65ba96e9
HZ
547 u32 r;
548
549 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
550 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
551
552 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
553 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
554 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
555
556 writel(reg_addr, pcie_index_offset);
557 readl(pcie_index_offset);
558 r = readl(pcie_data_offset);
559 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
560
561 return r;
562}
563
0c552ed3
LM
564u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
565 u64 reg_addr)
566{
567 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
568 u32 r;
569 void __iomem *pcie_index_offset;
570 void __iomem *pcie_index_hi_offset;
571 void __iomem *pcie_data_offset;
572
573 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
574 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 575 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
576 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
577 else
578 pcie_index_hi = 0;
579
580 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
581 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
582 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
583 if (pcie_index_hi != 0)
584 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
585 pcie_index_hi * 4;
586
587 writel(reg_addr, pcie_index_offset);
588 readl(pcie_index_offset);
589 if (pcie_index_hi != 0) {
590 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
591 readl(pcie_index_hi_offset);
592 }
593 r = readl(pcie_data_offset);
594
595 /* clear the high bits */
596 if (pcie_index_hi != 0) {
597 writel(0, pcie_index_hi_offset);
598 readl(pcie_index_hi_offset);
599 }
600
601 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
602
603 return r;
604}
605
1bba3683
HZ
606/**
607 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
608 *
609 * @adev: amdgpu_device pointer
22f453fb 610 * @reg_addr: indirect register address to read from
1bba3683
HZ
611 *
612 * Returns the value of indirect register @reg_addr
613 */
614u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
615 u32 reg_addr)
616{
65ba96e9 617 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
618 void __iomem *pcie_index_offset;
619 void __iomem *pcie_data_offset;
65ba96e9
HZ
620 u64 r;
621
622 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
623 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
624
625 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
626 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
627 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
628
629 /* read low 32 bits */
630 writel(reg_addr, pcie_index_offset);
631 readl(pcie_index_offset);
632 r = readl(pcie_data_offset);
633 /* read high 32 bits */
634 writel(reg_addr + 4, pcie_index_offset);
635 readl(pcie_index_offset);
636 r |= ((u64)readl(pcie_data_offset) << 32);
637 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
638
639 return r;
640}
641
a76b2870
CL
642u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
643 u64 reg_addr)
644{
645 unsigned long flags, pcie_index, pcie_data;
646 unsigned long pcie_index_hi = 0;
647 void __iomem *pcie_index_offset;
648 void __iomem *pcie_index_hi_offset;
649 void __iomem *pcie_data_offset;
650 u64 r;
651
652 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
653 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
654 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
655 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
656
657 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
658 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
659 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
660 if (pcie_index_hi != 0)
661 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
662 pcie_index_hi * 4;
663
664 /* read low 32 bits */
665 writel(reg_addr, pcie_index_offset);
666 readl(pcie_index_offset);
667 if (pcie_index_hi != 0) {
668 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
669 readl(pcie_index_hi_offset);
670 }
671 r = readl(pcie_data_offset);
672 /* read high 32 bits */
673 writel(reg_addr + 4, pcie_index_offset);
674 readl(pcie_index_offset);
675 if (pcie_index_hi != 0) {
676 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
677 readl(pcie_index_hi_offset);
678 }
679 r |= ((u64)readl(pcie_data_offset) << 32);
680
681 /* clear the high bits */
682 if (pcie_index_hi != 0) {
683 writel(0, pcie_index_hi_offset);
684 readl(pcie_index_hi_offset);
685 }
686
687 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
688
689 return r;
690}
691
1bba3683
HZ
692/**
693 * amdgpu_device_indirect_wreg - write an indirect register address
694 *
695 * @adev: amdgpu_device pointer
1bba3683
HZ
696 * @reg_addr: indirect register offset
697 * @reg_data: indirect register data
698 *
699 */
700void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
701 u32 reg_addr, u32 reg_data)
702{
65ba96e9 703 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
704 void __iomem *pcie_index_offset;
705 void __iomem *pcie_data_offset;
706
65ba96e9
HZ
707 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
708 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
709
1bba3683
HZ
710 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
711 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
712 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
713
714 writel(reg_addr, pcie_index_offset);
715 readl(pcie_index_offset);
716 writel(reg_data, pcie_data_offset);
717 readl(pcie_data_offset);
718 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
719}
720
0c552ed3
LM
721void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
722 u64 reg_addr, u32 reg_data)
723{
724 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
725 void __iomem *pcie_index_offset;
726 void __iomem *pcie_index_hi_offset;
727 void __iomem *pcie_data_offset;
728
729 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
730 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 731 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
732 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
733 else
734 pcie_index_hi = 0;
735
736 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
737 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
738 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
739 if (pcie_index_hi != 0)
740 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
741 pcie_index_hi * 4;
742
743 writel(reg_addr, pcie_index_offset);
744 readl(pcie_index_offset);
745 if (pcie_index_hi != 0) {
746 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
747 readl(pcie_index_hi_offset);
748 }
749 writel(reg_data, pcie_data_offset);
750 readl(pcie_data_offset);
751
752 /* clear the high bits */
753 if (pcie_index_hi != 0) {
754 writel(0, pcie_index_hi_offset);
755 readl(pcie_index_hi_offset);
756 }
757
758 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
759}
760
1bba3683
HZ
761/**
762 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
763 *
764 * @adev: amdgpu_device pointer
1bba3683
HZ
765 * @reg_addr: indirect register offset
766 * @reg_data: indirect register data
767 *
768 */
769void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
770 u32 reg_addr, u64 reg_data)
771{
65ba96e9 772 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
773 void __iomem *pcie_index_offset;
774 void __iomem *pcie_data_offset;
775
65ba96e9
HZ
776 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
777 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
778
1bba3683
HZ
779 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
780 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
781 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
782
783 /* write low 32 bits */
784 writel(reg_addr, pcie_index_offset);
785 readl(pcie_index_offset);
786 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
787 readl(pcie_data_offset);
788 /* write high 32 bits */
789 writel(reg_addr + 4, pcie_index_offset);
790 readl(pcie_index_offset);
791 writel((u32)(reg_data >> 32), pcie_data_offset);
792 readl(pcie_data_offset);
793 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
794}
795
a76b2870
CL
796void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
797 u64 reg_addr, u64 reg_data)
798{
799 unsigned long flags, pcie_index, pcie_data;
800 unsigned long pcie_index_hi = 0;
801 void __iomem *pcie_index_offset;
802 void __iomem *pcie_index_hi_offset;
803 void __iomem *pcie_data_offset;
804
805 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
806 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
807 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
808 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
809
810 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
811 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
812 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
813 if (pcie_index_hi != 0)
814 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
815 pcie_index_hi * 4;
816
817 /* write low 32 bits */
818 writel(reg_addr, pcie_index_offset);
819 readl(pcie_index_offset);
820 if (pcie_index_hi != 0) {
821 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
822 readl(pcie_index_hi_offset);
823 }
824 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
825 readl(pcie_data_offset);
826 /* write high 32 bits */
827 writel(reg_addr + 4, pcie_index_offset);
828 readl(pcie_index_offset);
829 if (pcie_index_hi != 0) {
830 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
831 readl(pcie_index_hi_offset);
832 }
833 writel((u32)(reg_data >> 32), pcie_data_offset);
834 readl(pcie_data_offset);
835
836 /* clear the high bits */
837 if (pcie_index_hi != 0) {
838 writel(0, pcie_index_hi_offset);
839 readl(pcie_index_hi_offset);
840 }
841
842 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
843}
844
dabc114e
HZ
845/**
846 * amdgpu_device_get_rev_id - query device rev_id
847 *
848 * @adev: amdgpu_device pointer
849 *
850 * Return device rev_id
851 */
852u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
853{
854 return adev->nbio.funcs->get_rev_id(adev);
855}
856
d38ceaf9
AD
857/**
858 * amdgpu_invalid_rreg - dummy reg read function
859 *
982a820b 860 * @adev: amdgpu_device pointer
d38ceaf9
AD
861 * @reg: offset of register
862 *
863 * Dummy register read function. Used for register blocks
864 * that certain asics don't have (all asics).
865 * Returns the value in the register.
866 */
867static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
868{
869 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
870 BUG();
871 return 0;
872}
873
0c552ed3
LM
874static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
875{
876 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
877 BUG();
878 return 0;
879}
880
d38ceaf9
AD
881/**
882 * amdgpu_invalid_wreg - dummy reg write function
883 *
982a820b 884 * @adev: amdgpu_device pointer
d38ceaf9
AD
885 * @reg: offset of register
886 * @v: value to write to the register
887 *
888 * Dummy register read function. Used for register blocks
889 * that certain asics don't have (all asics).
890 */
891static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
892{
893 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
894 reg, v);
895 BUG();
896}
897
0c552ed3
LM
898static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
899{
900 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
901 reg, v);
902 BUG();
903}
904
4fa1c6a6
TZ
905/**
906 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
907 *
982a820b 908 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
909 * @reg: offset of register
910 *
911 * Dummy register read function. Used for register blocks
912 * that certain asics don't have (all asics).
913 * Returns the value in the register.
914 */
915static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
916{
917 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
918 BUG();
919 return 0;
920}
921
a76b2870
CL
922static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
923{
924 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
925 BUG();
926 return 0;
927}
928
4fa1c6a6
TZ
929/**
930 * amdgpu_invalid_wreg64 - dummy reg write function
931 *
982a820b 932 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
933 * @reg: offset of register
934 * @v: value to write to the register
935 *
936 * Dummy register read function. Used for register blocks
937 * that certain asics don't have (all asics).
938 */
939static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
940{
941 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
942 reg, v);
943 BUG();
944}
945
a76b2870
CL
946static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
947{
948 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
949 reg, v);
950 BUG();
951}
952
d38ceaf9
AD
953/**
954 * amdgpu_block_invalid_rreg - dummy reg read function
955 *
982a820b 956 * @adev: amdgpu_device pointer
d38ceaf9
AD
957 * @block: offset of instance
958 * @reg: offset of register
959 *
960 * Dummy register read function. Used for register blocks
961 * that certain asics don't have (all asics).
962 * Returns the value in the register.
963 */
964static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
965 uint32_t block, uint32_t reg)
966{
967 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
968 reg, block);
969 BUG();
970 return 0;
971}
972
973/**
974 * amdgpu_block_invalid_wreg - dummy reg write function
975 *
982a820b 976 * @adev: amdgpu_device pointer
d38ceaf9
AD
977 * @block: offset of instance
978 * @reg: offset of register
979 * @v: value to write to the register
980 *
981 * Dummy register read function. Used for register blocks
982 * that certain asics don't have (all asics).
983 */
984static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
985 uint32_t block,
986 uint32_t reg, uint32_t v)
987{
988 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
989 reg, block, v);
990 BUG();
991}
992
4d2997ab
AD
993/**
994 * amdgpu_device_asic_init - Wrapper for atom asic_init
995 *
982a820b 996 * @adev: amdgpu_device pointer
4d2997ab
AD
997 *
998 * Does any asic specific work and then calls atom asic init.
999 */
1000static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1001{
15c5c5f5
LL
1002 int ret;
1003
4d2997ab
AD
1004 amdgpu_asic_pre_asic_init(adev);
1005
5db392a0 1006 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
15c5c5f5
LL
1007 adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
1008 amdgpu_psp_wait_for_bootloader(adev);
1009 ret = amdgpu_atomfirmware_asic_init(adev, true);
1010 return ret;
1011 } else {
85d1bcc6 1012 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1013 }
1014
1015 return 0;
4d2997ab
AD
1016}
1017
e3ecdffa 1018/**
7ccfd79f 1019 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1020 *
982a820b 1021 * @adev: amdgpu_device pointer
e3ecdffa
AD
1022 *
1023 * Allocates a scratch page of VRAM for use by various things in the
1024 * driver.
1025 */
7ccfd79f 1026static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1027{
7ccfd79f
CK
1028 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1029 AMDGPU_GEM_DOMAIN_VRAM |
1030 AMDGPU_GEM_DOMAIN_GTT,
1031 &adev->mem_scratch.robj,
1032 &adev->mem_scratch.gpu_addr,
1033 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1034}
1035
e3ecdffa 1036/**
7ccfd79f 1037 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1038 *
982a820b 1039 * @adev: amdgpu_device pointer
e3ecdffa
AD
1040 *
1041 * Frees the VRAM scratch page.
1042 */
7ccfd79f 1043static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1044{
7ccfd79f 1045 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1046}
1047
1048/**
9c3f2b54 1049 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1050 *
1051 * @adev: amdgpu_device pointer
1052 * @registers: pointer to the register array
1053 * @array_size: size of the register array
1054 *
b8920e1e 1055 * Programs an array or registers with and or masks.
d38ceaf9
AD
1056 * This is a helper for setting golden registers.
1057 */
9c3f2b54
AD
1058void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1059 const u32 *registers,
1060 const u32 array_size)
d38ceaf9
AD
1061{
1062 u32 tmp, reg, and_mask, or_mask;
1063 int i;
1064
1065 if (array_size % 3)
1066 return;
1067
47fc644f 1068 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1069 reg = registers[i + 0];
1070 and_mask = registers[i + 1];
1071 or_mask = registers[i + 2];
1072
1073 if (and_mask == 0xffffffff) {
1074 tmp = or_mask;
1075 } else {
1076 tmp = RREG32(reg);
1077 tmp &= ~and_mask;
e0d07657
HZ
1078 if (adev->family >= AMDGPU_FAMILY_AI)
1079 tmp |= (or_mask & and_mask);
1080 else
1081 tmp |= or_mask;
d38ceaf9
AD
1082 }
1083 WREG32(reg, tmp);
1084 }
1085}
1086
e3ecdffa
AD
1087/**
1088 * amdgpu_device_pci_config_reset - reset the GPU
1089 *
1090 * @adev: amdgpu_device pointer
1091 *
1092 * Resets the GPU using the pci config reset sequence.
1093 * Only applicable to asics prior to vega10.
1094 */
8111c387 1095void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1096{
1097 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1098}
1099
af484df8
AD
1100/**
1101 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1102 *
1103 * @adev: amdgpu_device pointer
1104 *
1105 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1106 */
1107int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1108{
1109 return pci_reset_function(adev->pdev);
1110}
1111
d38ceaf9 1112/*
06ec9070 1113 * amdgpu_device_wb_*()
455a7bc2 1114 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1115 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1116 */
1117
1118/**
06ec9070 1119 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1120 *
1121 * @adev: amdgpu_device pointer
1122 *
1123 * Disables Writeback and frees the Writeback memory (all asics).
1124 * Used at driver shutdown.
1125 */
06ec9070 1126static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1127{
1128 if (adev->wb.wb_obj) {
a76ed485
AD
1129 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1130 &adev->wb.gpu_addr,
1131 (void **)&adev->wb.wb);
d38ceaf9
AD
1132 adev->wb.wb_obj = NULL;
1133 }
1134}
1135
1136/**
03f2abb0 1137 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1138 *
1139 * @adev: amdgpu_device pointer
1140 *
455a7bc2 1141 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1142 * Used at driver startup.
1143 * Returns 0 on success or an -error on failure.
1144 */
06ec9070 1145static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1146{
1147 int r;
1148
1149 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1150 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1151 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1152 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1153 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1154 (void **)&adev->wb.wb);
d38ceaf9
AD
1155 if (r) {
1156 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1157 return r;
1158 }
d38ceaf9
AD
1159
1160 adev->wb.num_wb = AMDGPU_MAX_WB;
1161 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1162
1163 /* clear wb memory */
73469585 1164 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1165 }
1166
1167 return 0;
1168}
1169
1170/**
131b4b36 1171 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1172 *
1173 * @adev: amdgpu_device pointer
1174 * @wb: wb index
1175 *
1176 * Allocate a wb slot for use by the driver (all asics).
1177 * Returns 0 on success or -EINVAL on failure.
1178 */
131b4b36 1179int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1180{
1181 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1182
97407b63 1183 if (offset < adev->wb.num_wb) {
7014285a 1184 __set_bit(offset, adev->wb.used);
63ae07ca 1185 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1186 return 0;
1187 } else {
1188 return -EINVAL;
1189 }
1190}
1191
d38ceaf9 1192/**
131b4b36 1193 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1194 *
1195 * @adev: amdgpu_device pointer
1196 * @wb: wb index
1197 *
1198 * Free a wb slot allocated for use by the driver (all asics)
1199 */
131b4b36 1200void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1201{
73469585 1202 wb >>= 3;
d38ceaf9 1203 if (wb < adev->wb.num_wb)
73469585 1204 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1205}
1206
d6895ad3
CK
1207/**
1208 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1209 *
1210 * @adev: amdgpu_device pointer
1211 *
1212 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1213 * to fail, but if any of the BARs is not accessible after the size we abort
1214 * driver loading by returning -ENODEV.
1215 */
1216int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1217{
453f617a 1218 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1219 struct pci_bus *root;
1220 struct resource *res;
b8920e1e 1221 unsigned int i;
d6895ad3
CK
1222 u16 cmd;
1223 int r;
1224
822130b5
AB
1225 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1226 return 0;
1227
0c03b912 1228 /* Bypass for VF */
1229 if (amdgpu_sriov_vf(adev))
1230 return 0;
1231
b7221f2b
AD
1232 /* skip if the bios has already enabled large BAR */
1233 if (adev->gmc.real_vram_size &&
1234 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1235 return 0;
1236
31b8adab
CK
1237 /* Check if the root BUS has 64bit memory resources */
1238 root = adev->pdev->bus;
1239 while (root->parent)
1240 root = root->parent;
1241
1242 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1243 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1244 res->start > 0x100000000ull)
1245 break;
1246 }
1247
1248 /* Trying to resize is pointless without a root hub window above 4GB */
1249 if (!res)
1250 return 0;
1251
453f617a
ND
1252 /* Limit the BAR size to what is available */
1253 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1254 rbar_size);
1255
d6895ad3
CK
1256 /* Disable memory decoding while we change the BAR addresses and size */
1257 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1258 pci_write_config_word(adev->pdev, PCI_COMMAND,
1259 cmd & ~PCI_COMMAND_MEMORY);
1260
1261 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1262 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1263 if (adev->asic_type >= CHIP_BONAIRE)
1264 pci_release_resource(adev->pdev, 2);
1265
1266 pci_release_resource(adev->pdev, 0);
1267
1268 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1269 if (r == -ENOSPC)
1270 DRM_INFO("Not enough PCI address space for a large BAR.");
1271 else if (r && r != -ENOTSUPP)
1272 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1273
1274 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1275
1276 /* When the doorbell or fb BAR isn't available we have no chance of
1277 * using the device.
1278 */
43c064db 1279 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1280 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1281 return -ENODEV;
1282
1283 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1284
1285 return 0;
1286}
a05502e5 1287
9535a86a
SZ
1288static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1289{
b8920e1e 1290 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1291 return false;
9535a86a
SZ
1292
1293 return true;
1294}
1295
d38ceaf9
AD
1296/*
1297 * GPU helpers function.
1298 */
1299/**
39c640c0 1300 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1301 *
1302 * @adev: amdgpu_device pointer
1303 *
c836fec5
JQ
1304 * Check if the asic has been initialized (all asics) at driver startup
1305 * or post is needed if hw reset is performed.
1306 * Returns true if need or false if not.
d38ceaf9 1307 */
39c640c0 1308bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1309{
1310 uint32_t reg;
1311
bec86378
ML
1312 if (amdgpu_sriov_vf(adev))
1313 return false;
1314
9535a86a
SZ
1315 if (!amdgpu_device_read_bios(adev))
1316 return false;
1317
bec86378 1318 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1319 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1320 * some old smc fw still need driver do vPost otherwise gpu hang, while
1321 * those smc fw version above 22.15 doesn't have this flaw, so we force
1322 * vpost executed for smc version below 22.15
bec86378
ML
1323 */
1324 if (adev->asic_type == CHIP_FIJI) {
1325 int err;
1326 uint32_t fw_ver;
b8920e1e 1327
bec86378
ML
1328 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1329 /* force vPost if error occured */
1330 if (err)
1331 return true;
1332
1333 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1334 if (fw_ver < 0x00160e00)
1335 return true;
bec86378 1336 }
bec86378 1337 }
91fe77eb 1338
e3c1b071 1339 /* Don't post if we need to reset whole hive on init */
1340 if (adev->gmc.xgmi.pending_reset)
1341 return false;
1342
91fe77eb 1343 if (adev->has_hw_reset) {
1344 adev->has_hw_reset = false;
1345 return true;
1346 }
1347
1348 /* bios scratch used on CIK+ */
1349 if (adev->asic_type >= CHIP_BONAIRE)
1350 return amdgpu_atombios_scratch_need_asic_init(adev);
1351
1352 /* check MEM_SIZE for older asics */
1353 reg = amdgpu_asic_get_config_memsize(adev);
1354
1355 if ((reg != 0) && (reg != 0xffffffff))
1356 return false;
1357
1358 return true;
70e64c4d
ML
1359}
1360
5d1eb4c4
ML
1361/*
1362 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1363 * speed switching. Until we have confirmation from Intel that a specific host
1364 * supports it, it's safer that we keep it disabled for all.
1365 *
1366 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1367 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1368 */
1369bool amdgpu_device_pcie_dynamic_switching_supported(void)
1370{
1371#if IS_ENABLED(CONFIG_X86)
1372 struct cpuinfo_x86 *c = &cpu_data(0);
1373
1374 if (c->x86_vendor == X86_VENDOR_INTEL)
1375 return false;
1376#endif
1377 return true;
1378}
1379
0ab5d711
ML
1380/**
1381 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1382 *
1383 * @adev: amdgpu_device pointer
1384 *
1385 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1386 * be set for this device.
1387 *
1388 * Returns true if it should be used or false if not.
1389 */
1390bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1391{
1392 switch (amdgpu_aspm) {
1393 case -1:
1394 break;
1395 case 0:
1396 return false;
1397 case 1:
1398 return true;
1399 default:
1400 return false;
1401 }
1402 return pcie_aspm_enabled(adev->pdev);
1403}
1404
3ad5dcfe
KHF
1405bool amdgpu_device_aspm_support_quirk(void)
1406{
1407#if IS_ENABLED(CONFIG_X86)
1408 struct cpuinfo_x86 *c = &cpu_data(0);
1409
1410 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1411#else
1412 return true;
1413#endif
1414}
1415
d38ceaf9
AD
1416/* if we get transitioned to only one device, take VGA back */
1417/**
06ec9070 1418 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1419 *
bf44e8ce 1420 * @pdev: PCI device pointer
d38ceaf9
AD
1421 * @state: enable/disable vga decode
1422 *
1423 * Enable/disable vga decode (all asics).
1424 * Returns VGA resource flags.
1425 */
bf44e8ce
CH
1426static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1427 bool state)
d38ceaf9 1428{
bf44e8ce 1429 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1430
d38ceaf9
AD
1431 amdgpu_asic_set_vga_state(adev, state);
1432 if (state)
1433 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1434 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1435 else
1436 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1437}
1438
e3ecdffa
AD
1439/**
1440 * amdgpu_device_check_block_size - validate the vm block size
1441 *
1442 * @adev: amdgpu_device pointer
1443 *
1444 * Validates the vm block size specified via module parameter.
1445 * The vm block size defines number of bits in page table versus page directory,
1446 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1447 * page table and the remaining bits are in the page directory.
1448 */
06ec9070 1449static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1450{
1451 /* defines number of bits in page table versus page directory,
1452 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1453 * page table and the remaining bits are in the page directory
1454 */
bab4fee7
JZ
1455 if (amdgpu_vm_block_size == -1)
1456 return;
a1adf8be 1457
bab4fee7 1458 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1459 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1460 amdgpu_vm_block_size);
97489129 1461 amdgpu_vm_block_size = -1;
a1adf8be 1462 }
a1adf8be
CZ
1463}
1464
e3ecdffa
AD
1465/**
1466 * amdgpu_device_check_vm_size - validate the vm size
1467 *
1468 * @adev: amdgpu_device pointer
1469 *
1470 * Validates the vm size in GB specified via module parameter.
1471 * The VM size is the size of the GPU virtual memory space in GB.
1472 */
06ec9070 1473static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1474{
64dab074
AD
1475 /* no need to check the default value */
1476 if (amdgpu_vm_size == -1)
1477 return;
1478
83ca145d
ZJ
1479 if (amdgpu_vm_size < 1) {
1480 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1481 amdgpu_vm_size);
f3368128 1482 amdgpu_vm_size = -1;
83ca145d 1483 }
83ca145d
ZJ
1484}
1485
7951e376
RZ
1486static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1487{
1488 struct sysinfo si;
a9d4fe2f 1489 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1490 uint64_t total_memory;
1491 uint64_t dram_size_seven_GB = 0x1B8000000;
1492 uint64_t dram_size_three_GB = 0xB8000000;
1493
1494 if (amdgpu_smu_memory_pool_size == 0)
1495 return;
1496
1497 if (!is_os_64) {
1498 DRM_WARN("Not 64-bit OS, feature not supported\n");
1499 goto def_value;
1500 }
1501 si_meminfo(&si);
1502 total_memory = (uint64_t)si.totalram * si.mem_unit;
1503
1504 if ((amdgpu_smu_memory_pool_size == 1) ||
1505 (amdgpu_smu_memory_pool_size == 2)) {
1506 if (total_memory < dram_size_three_GB)
1507 goto def_value1;
1508 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1509 (amdgpu_smu_memory_pool_size == 8)) {
1510 if (total_memory < dram_size_seven_GB)
1511 goto def_value1;
1512 } else {
1513 DRM_WARN("Smu memory pool size not supported\n");
1514 goto def_value;
1515 }
1516 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1517
1518 return;
1519
1520def_value1:
1521 DRM_WARN("No enough system memory\n");
1522def_value:
1523 adev->pm.smu_prv_buffer_size = 0;
1524}
1525
9f6a7857
HR
1526static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1527{
1528 if (!(adev->flags & AMD_IS_APU) ||
1529 adev->asic_type < CHIP_RAVEN)
1530 return 0;
1531
1532 switch (adev->asic_type) {
1533 case CHIP_RAVEN:
1534 if (adev->pdev->device == 0x15dd)
1535 adev->apu_flags |= AMD_APU_IS_RAVEN;
1536 if (adev->pdev->device == 0x15d8)
1537 adev->apu_flags |= AMD_APU_IS_PICASSO;
1538 break;
1539 case CHIP_RENOIR:
1540 if ((adev->pdev->device == 0x1636) ||
1541 (adev->pdev->device == 0x164c))
1542 adev->apu_flags |= AMD_APU_IS_RENOIR;
1543 else
1544 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1545 break;
1546 case CHIP_VANGOGH:
1547 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1548 break;
1549 case CHIP_YELLOW_CARP:
1550 break;
d0f56dc2 1551 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1552 if ((adev->pdev->device == 0x13FE) ||
1553 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1554 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1555 break;
9f6a7857 1556 default:
4eaf21b7 1557 break;
9f6a7857
HR
1558 }
1559
1560 return 0;
1561}
1562
d38ceaf9 1563/**
06ec9070 1564 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1565 *
1566 * @adev: amdgpu_device pointer
1567 *
1568 * Validates certain module parameters and updates
1569 * the associated values used by the driver (all asics).
1570 */
912dfc84 1571static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1572{
5b011235
CZ
1573 if (amdgpu_sched_jobs < 4) {
1574 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1575 amdgpu_sched_jobs);
1576 amdgpu_sched_jobs = 4;
47fc644f 1577 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1578 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1579 amdgpu_sched_jobs);
1580 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1581 }
d38ceaf9 1582
83e74db6 1583 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1584 /* gart size must be greater or equal to 32M */
1585 dev_warn(adev->dev, "gart size (%d) too small\n",
1586 amdgpu_gart_size);
83e74db6 1587 amdgpu_gart_size = -1;
d38ceaf9
AD
1588 }
1589
36d38372 1590 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1591 /* gtt size must be greater or equal to 32M */
36d38372
CK
1592 dev_warn(adev->dev, "gtt size (%d) too small\n",
1593 amdgpu_gtt_size);
1594 amdgpu_gtt_size = -1;
d38ceaf9
AD
1595 }
1596
d07f14be
RH
1597 /* valid range is between 4 and 9 inclusive */
1598 if (amdgpu_vm_fragment_size != -1 &&
1599 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1600 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1601 amdgpu_vm_fragment_size = -1;
1602 }
1603
5d5bd5e3
KW
1604 if (amdgpu_sched_hw_submission < 2) {
1605 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1606 amdgpu_sched_hw_submission);
1607 amdgpu_sched_hw_submission = 2;
1608 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1609 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1610 amdgpu_sched_hw_submission);
1611 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1612 }
1613
2656fd23
AG
1614 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1615 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1616 amdgpu_reset_method = -1;
1617 }
1618
7951e376
RZ
1619 amdgpu_device_check_smu_prv_buffer_size(adev);
1620
06ec9070 1621 amdgpu_device_check_vm_size(adev);
d38ceaf9 1622
06ec9070 1623 amdgpu_device_check_block_size(adev);
6a7f76e7 1624
19aede77 1625 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1626
e3c00faa 1627 return 0;
d38ceaf9
AD
1628}
1629
1630/**
1631 * amdgpu_switcheroo_set_state - set switcheroo state
1632 *
1633 * @pdev: pci dev pointer
1694467b 1634 * @state: vga_switcheroo state
d38ceaf9 1635 *
12024b17 1636 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1637 * the asics before or after it is powered up using ACPI methods.
1638 */
8aba21b7
LT
1639static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1640 enum vga_switcheroo_state state)
d38ceaf9
AD
1641{
1642 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1643 int r;
d38ceaf9 1644
b98c6299 1645 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1646 return;
1647
1648 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1649 pr_info("switched on\n");
d38ceaf9
AD
1650 /* don't suspend or resume card normally */
1651 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1652
8f66090b
TZ
1653 pci_set_power_state(pdev, PCI_D0);
1654 amdgpu_device_load_pci_state(pdev);
1655 r = pci_enable_device(pdev);
de185019
AD
1656 if (r)
1657 DRM_WARN("pci_enable_device failed (%d)\n", r);
1658 amdgpu_device_resume(dev, true);
d38ceaf9 1659
d38ceaf9 1660 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1661 } else {
dd4fa6c1 1662 pr_info("switched off\n");
d38ceaf9 1663 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1664 amdgpu_device_suspend(dev, true);
8f66090b 1665 amdgpu_device_cache_pci_state(pdev);
de185019 1666 /* Shut down the device */
8f66090b
TZ
1667 pci_disable_device(pdev);
1668 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1669 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1670 }
1671}
1672
1673/**
1674 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1675 *
1676 * @pdev: pci dev pointer
1677 *
1678 * Callback for the switcheroo driver. Check of the switcheroo
1679 * state can be changed.
1680 * Returns true if the state can be changed, false if not.
1681 */
1682static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1683{
1684 struct drm_device *dev = pci_get_drvdata(pdev);
1685
b8920e1e 1686 /*
d38ceaf9
AD
1687 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1688 * locking inversion with the driver load path. And the access here is
1689 * completely racy anyway. So don't bother with locking for now.
1690 */
7e13ad89 1691 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1692}
1693
1694static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1695 .set_gpu_state = amdgpu_switcheroo_set_state,
1696 .reprobe = NULL,
1697 .can_switch = amdgpu_switcheroo_can_switch,
1698};
1699
e3ecdffa
AD
1700/**
1701 * amdgpu_device_ip_set_clockgating_state - set the CG state
1702 *
87e3f136 1703 * @dev: amdgpu_device pointer
e3ecdffa
AD
1704 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1705 * @state: clockgating state (gate or ungate)
1706 *
1707 * Sets the requested clockgating state for all instances of
1708 * the hardware IP specified.
1709 * Returns the error code from the last instance.
1710 */
43fa561f 1711int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1712 enum amd_ip_block_type block_type,
1713 enum amd_clockgating_state state)
d38ceaf9 1714{
43fa561f 1715 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1716 int i, r = 0;
1717
1718 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1719 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1720 continue;
c722865a
RZ
1721 if (adev->ip_blocks[i].version->type != block_type)
1722 continue;
1723 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1724 continue;
1725 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1726 (void *)adev, state);
1727 if (r)
1728 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1729 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1730 }
1731 return r;
1732}
1733
e3ecdffa
AD
1734/**
1735 * amdgpu_device_ip_set_powergating_state - set the PG state
1736 *
87e3f136 1737 * @dev: amdgpu_device pointer
e3ecdffa
AD
1738 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1739 * @state: powergating state (gate or ungate)
1740 *
1741 * Sets the requested powergating state for all instances of
1742 * the hardware IP specified.
1743 * Returns the error code from the last instance.
1744 */
43fa561f 1745int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1746 enum amd_ip_block_type block_type,
1747 enum amd_powergating_state state)
d38ceaf9 1748{
43fa561f 1749 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1750 int i, r = 0;
1751
1752 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1753 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1754 continue;
c722865a
RZ
1755 if (adev->ip_blocks[i].version->type != block_type)
1756 continue;
1757 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1758 continue;
1759 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1760 (void *)adev, state);
1761 if (r)
1762 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1763 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1764 }
1765 return r;
1766}
1767
e3ecdffa
AD
1768/**
1769 * amdgpu_device_ip_get_clockgating_state - get the CG state
1770 *
1771 * @adev: amdgpu_device pointer
1772 * @flags: clockgating feature flags
1773 *
1774 * Walks the list of IPs on the device and updates the clockgating
1775 * flags for each IP.
1776 * Updates @flags with the feature flags for each hardware IP where
1777 * clockgating is enabled.
1778 */
2990a1fc 1779void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1780 u64 *flags)
6cb2d4e4
HR
1781{
1782 int i;
1783
1784 for (i = 0; i < adev->num_ip_blocks; i++) {
1785 if (!adev->ip_blocks[i].status.valid)
1786 continue;
1787 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1788 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1789 }
1790}
1791
e3ecdffa
AD
1792/**
1793 * amdgpu_device_ip_wait_for_idle - wait for idle
1794 *
1795 * @adev: amdgpu_device pointer
1796 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1797 *
1798 * Waits for the request hardware IP to be idle.
1799 * Returns 0 for success or a negative error code on failure.
1800 */
2990a1fc
AD
1801int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1802 enum amd_ip_block_type block_type)
5dbbb60b
AD
1803{
1804 int i, r;
1805
1806 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1807 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1808 continue;
a1255107
AD
1809 if (adev->ip_blocks[i].version->type == block_type) {
1810 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1811 if (r)
1812 return r;
1813 break;
1814 }
1815 }
1816 return 0;
1817
1818}
1819
e3ecdffa
AD
1820/**
1821 * amdgpu_device_ip_is_idle - is the hardware IP idle
1822 *
1823 * @adev: amdgpu_device pointer
1824 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1825 *
1826 * Check if the hardware IP is idle or not.
1827 * Returns true if it the IP is idle, false if not.
1828 */
2990a1fc
AD
1829bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1830 enum amd_ip_block_type block_type)
5dbbb60b
AD
1831{
1832 int i;
1833
1834 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1835 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1836 continue;
a1255107
AD
1837 if (adev->ip_blocks[i].version->type == block_type)
1838 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1839 }
1840 return true;
1841
1842}
1843
e3ecdffa
AD
1844/**
1845 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1846 *
1847 * @adev: amdgpu_device pointer
87e3f136 1848 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1849 *
1850 * Returns a pointer to the hardware IP block structure
1851 * if it exists for the asic, otherwise NULL.
1852 */
2990a1fc
AD
1853struct amdgpu_ip_block *
1854amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1855 enum amd_ip_block_type type)
d38ceaf9
AD
1856{
1857 int i;
1858
1859 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1860 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1861 return &adev->ip_blocks[i];
1862
1863 return NULL;
1864}
1865
1866/**
2990a1fc 1867 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1868 *
1869 * @adev: amdgpu_device pointer
5fc3aeeb 1870 * @type: enum amd_ip_block_type
d38ceaf9
AD
1871 * @major: major version
1872 * @minor: minor version
1873 *
1874 * return 0 if equal or greater
1875 * return 1 if smaller or the ip_block doesn't exist
1876 */
2990a1fc
AD
1877int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1878 enum amd_ip_block_type type,
1879 u32 major, u32 minor)
d38ceaf9 1880{
2990a1fc 1881 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1882
a1255107
AD
1883 if (ip_block && ((ip_block->version->major > major) ||
1884 ((ip_block->version->major == major) &&
1885 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1886 return 0;
1887
1888 return 1;
1889}
1890
a1255107 1891/**
2990a1fc 1892 * amdgpu_device_ip_block_add
a1255107
AD
1893 *
1894 * @adev: amdgpu_device pointer
1895 * @ip_block_version: pointer to the IP to add
1896 *
1897 * Adds the IP block driver information to the collection of IPs
1898 * on the asic.
1899 */
2990a1fc
AD
1900int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1901 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1902{
1903 if (!ip_block_version)
1904 return -EINVAL;
1905
7bd939d0
LG
1906 switch (ip_block_version->type) {
1907 case AMD_IP_BLOCK_TYPE_VCN:
1908 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1909 return 0;
1910 break;
1911 case AMD_IP_BLOCK_TYPE_JPEG:
1912 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1913 return 0;
1914 break;
1915 default:
1916 break;
1917 }
1918
e966a725 1919 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1920 ip_block_version->funcs->name);
1921
a1255107
AD
1922 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1923
1924 return 0;
1925}
1926
e3ecdffa
AD
1927/**
1928 * amdgpu_device_enable_virtual_display - enable virtual display feature
1929 *
1930 * @adev: amdgpu_device pointer
1931 *
1932 * Enabled the virtual display feature if the user has enabled it via
1933 * the module parameter virtual_display. This feature provides a virtual
1934 * display hardware on headless boards or in virtualized environments.
1935 * This function parses and validates the configuration string specified by
1936 * the user and configues the virtual display configuration (number of
1937 * virtual connectors, crtcs, etc.) specified.
1938 */
483ef985 1939static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1940{
1941 adev->enable_virtual_display = false;
1942
1943 if (amdgpu_virtual_display) {
8f66090b 1944 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1945 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1946
1947 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1948 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1949 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1950 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1951 if (!strcmp("all", pciaddname)
1952 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1953 long num_crtc;
1954 int res = -1;
1955
9accf2fd 1956 adev->enable_virtual_display = true;
0f66356d
ED
1957
1958 if (pciaddname_tmp)
1959 res = kstrtol(pciaddname_tmp, 10,
1960 &num_crtc);
1961
1962 if (!res) {
1963 if (num_crtc < 1)
1964 num_crtc = 1;
1965 if (num_crtc > 6)
1966 num_crtc = 6;
1967 adev->mode_info.num_crtc = num_crtc;
1968 } else {
1969 adev->mode_info.num_crtc = 1;
1970 }
9accf2fd
ED
1971 break;
1972 }
1973 }
1974
0f66356d
ED
1975 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1976 amdgpu_virtual_display, pci_address_name,
1977 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1978
1979 kfree(pciaddstr);
1980 }
1981}
1982
25263da3
AD
1983void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1984{
1985 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1986 adev->mode_info.num_crtc = 1;
1987 adev->enable_virtual_display = true;
1988 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1989 adev->enable_virtual_display, adev->mode_info.num_crtc);
1990 }
1991}
1992
e3ecdffa
AD
1993/**
1994 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1995 *
1996 * @adev: amdgpu_device pointer
1997 *
1998 * Parses the asic configuration parameters specified in the gpu info
1999 * firmware and makes them availale to the driver for use in configuring
2000 * the asic.
2001 * Returns 0 on success, -EINVAL on failure.
2002 */
e2a75f88
AD
2003static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2004{
e2a75f88 2005 const char *chip_name;
c0a43457 2006 char fw_name[40];
e2a75f88
AD
2007 int err;
2008 const struct gpu_info_firmware_header_v1_0 *hdr;
2009
ab4fe3e1
HR
2010 adev->firmware.gpu_info_fw = NULL;
2011
72de33f8 2012 if (adev->mman.discovery_bin) {
cc375d8c
TY
2013 /*
2014 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2015 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2016 * when DAL no longer needs it.
2017 */
2018 if (adev->asic_type != CHIP_NAVI12)
2019 return 0;
258620d0
AD
2020 }
2021
e2a75f88 2022 switch (adev->asic_type) {
e2a75f88
AD
2023 default:
2024 return 0;
2025 case CHIP_VEGA10:
2026 chip_name = "vega10";
2027 break;
3f76dced
AD
2028 case CHIP_VEGA12:
2029 chip_name = "vega12";
2030 break;
2d2e5e7e 2031 case CHIP_RAVEN:
54f78a76 2032 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2033 chip_name = "raven2";
54f78a76 2034 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2035 chip_name = "picasso";
54c4d17e
FX
2036 else
2037 chip_name = "raven";
2d2e5e7e 2038 break;
65e60f6e
LM
2039 case CHIP_ARCTURUS:
2040 chip_name = "arcturus";
2041 break;
42b325e5
XY
2042 case CHIP_NAVI12:
2043 chip_name = "navi12";
2044 break;
e2a75f88
AD
2045 }
2046
2047 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2048 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2049 if (err) {
2050 dev_err(adev->dev,
b31d3063 2051 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2052 fw_name);
2053 goto out;
2054 }
2055
ab4fe3e1 2056 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2057 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2058
2059 switch (hdr->version_major) {
2060 case 1:
2061 {
2062 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2063 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2064 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2065
cc375d8c
TY
2066 /*
2067 * Should be droped when DAL no longer needs it.
2068 */
2069 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2070 goto parse_soc_bounding_box;
2071
b5ab16bf
AD
2072 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2073 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2074 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2075 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2076 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2077 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2078 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2079 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2080 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2081 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2082 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2083 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2084 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2085 adev->gfx.cu_info.max_waves_per_simd =
2086 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2087 adev->gfx.cu_info.max_scratch_slots_per_cu =
2088 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2089 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2090 if (hdr->version_minor >= 1) {
35c2e910
HZ
2091 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2092 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2093 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2094 adev->gfx.config.num_sc_per_sh =
2095 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2096 adev->gfx.config.num_packer_per_sc =
2097 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2098 }
ec51d3fa
XY
2099
2100parse_soc_bounding_box:
ec51d3fa
XY
2101 /*
2102 * soc bounding box info is not integrated in disocovery table,
258620d0 2103 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2104 */
48321c3d
HW
2105 if (hdr->version_minor == 2) {
2106 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2107 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2108 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2109 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2110 }
e2a75f88
AD
2111 break;
2112 }
2113 default:
2114 dev_err(adev->dev,
2115 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2116 err = -EINVAL;
2117 goto out;
2118 }
2119out:
e2a75f88
AD
2120 return err;
2121}
2122
e3ecdffa
AD
2123/**
2124 * amdgpu_device_ip_early_init - run early init for hardware IPs
2125 *
2126 * @adev: amdgpu_device pointer
2127 *
2128 * Early initialization pass for hardware IPs. The hardware IPs that make
2129 * up each asic are discovered each IP's early_init callback is run. This
2130 * is the first stage in initializing the asic.
2131 * Returns 0 on success, negative error code on failure.
2132 */
06ec9070 2133static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2134{
901e2be2
AD
2135 struct drm_device *dev = adev_to_drm(adev);
2136 struct pci_dev *parent;
aaa36a97 2137 int i, r;
ced69502 2138 bool total;
d38ceaf9 2139
483ef985 2140 amdgpu_device_enable_virtual_display(adev);
a6be7570 2141
00a979f3 2142 if (amdgpu_sriov_vf(adev)) {
00a979f3 2143 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2144 if (r)
2145 return r;
00a979f3
WS
2146 }
2147
d38ceaf9 2148 switch (adev->asic_type) {
33f34802
KW
2149#ifdef CONFIG_DRM_AMDGPU_SI
2150 case CHIP_VERDE:
2151 case CHIP_TAHITI:
2152 case CHIP_PITCAIRN:
2153 case CHIP_OLAND:
2154 case CHIP_HAINAN:
295d0daf 2155 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2156 r = si_set_ip_blocks(adev);
2157 if (r)
2158 return r;
2159 break;
2160#endif
a2e73f56
AD
2161#ifdef CONFIG_DRM_AMDGPU_CIK
2162 case CHIP_BONAIRE:
2163 case CHIP_HAWAII:
2164 case CHIP_KAVERI:
2165 case CHIP_KABINI:
2166 case CHIP_MULLINS:
e1ad2d53 2167 if (adev->flags & AMD_IS_APU)
a2e73f56 2168 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2169 else
2170 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2171
2172 r = cik_set_ip_blocks(adev);
2173 if (r)
2174 return r;
2175 break;
2176#endif
da87c30b
AD
2177 case CHIP_TOPAZ:
2178 case CHIP_TONGA:
2179 case CHIP_FIJI:
2180 case CHIP_POLARIS10:
2181 case CHIP_POLARIS11:
2182 case CHIP_POLARIS12:
2183 case CHIP_VEGAM:
2184 case CHIP_CARRIZO:
2185 case CHIP_STONEY:
2186 if (adev->flags & AMD_IS_APU)
2187 adev->family = AMDGPU_FAMILY_CZ;
2188 else
2189 adev->family = AMDGPU_FAMILY_VI;
2190
2191 r = vi_set_ip_blocks(adev);
2192 if (r)
2193 return r;
2194 break;
d38ceaf9 2195 default:
63352b7f
AD
2196 r = amdgpu_discovery_set_ip_blocks(adev);
2197 if (r)
2198 return r;
2199 break;
d38ceaf9
AD
2200 }
2201
901e2be2
AD
2202 if (amdgpu_has_atpx() &&
2203 (amdgpu_is_atpx_hybrid() ||
2204 amdgpu_has_atpx_dgpu_power_cntl()) &&
2205 ((adev->flags & AMD_IS_APU) == 0) &&
2206 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2207 adev->flags |= AMD_IS_PX;
2208
85ac2021
AD
2209 if (!(adev->flags & AMD_IS_APU)) {
2210 parent = pci_upstream_bridge(adev->pdev);
2211 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2212 }
901e2be2 2213
1884734a 2214
3b94fb10 2215 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2216 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2217 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2218 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2219 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2220
ced69502 2221 total = true;
d38ceaf9
AD
2222 for (i = 0; i < adev->num_ip_blocks; i++) {
2223 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2224 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2225 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2226 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2227 } else {
a1255107
AD
2228 if (adev->ip_blocks[i].version->funcs->early_init) {
2229 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2230 if (r == -ENOENT) {
a1255107 2231 adev->ip_blocks[i].status.valid = false;
2c1a2784 2232 } else if (r) {
a1255107
AD
2233 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2234 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2235 total = false;
2c1a2784 2236 } else {
a1255107 2237 adev->ip_blocks[i].status.valid = true;
2c1a2784 2238 }
974e6b64 2239 } else {
a1255107 2240 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2241 }
d38ceaf9 2242 }
21a249ca
AD
2243 /* get the vbios after the asic_funcs are set up */
2244 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2245 r = amdgpu_device_parse_gpu_info_fw(adev);
2246 if (r)
2247 return r;
2248
21a249ca 2249 /* Read BIOS */
9535a86a
SZ
2250 if (amdgpu_device_read_bios(adev)) {
2251 if (!amdgpu_get_bios(adev))
2252 return -EINVAL;
21a249ca 2253
9535a86a
SZ
2254 r = amdgpu_atombios_init(adev);
2255 if (r) {
2256 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2257 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2258 return r;
2259 }
21a249ca 2260 }
77eabc6f
PJZ
2261
2262 /*get pf2vf msg info at it's earliest time*/
2263 if (amdgpu_sriov_vf(adev))
2264 amdgpu_virt_init_data_exchange(adev);
2265
21a249ca 2266 }
d38ceaf9 2267 }
ced69502
ML
2268 if (!total)
2269 return -ENODEV;
d38ceaf9 2270
00fa4035 2271 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2272 adev->cg_flags &= amdgpu_cg_mask;
2273 adev->pg_flags &= amdgpu_pg_mask;
2274
d38ceaf9
AD
2275 return 0;
2276}
2277
0a4f2520
RZ
2278static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2279{
2280 int i, r;
2281
2282 for (i = 0; i < adev->num_ip_blocks; i++) {
2283 if (!adev->ip_blocks[i].status.sw)
2284 continue;
2285 if (adev->ip_blocks[i].status.hw)
2286 continue;
2287 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2288 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2289 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2290 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2291 if (r) {
2292 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2293 adev->ip_blocks[i].version->funcs->name, r);
2294 return r;
2295 }
2296 adev->ip_blocks[i].status.hw = true;
2297 }
2298 }
2299
2300 return 0;
2301}
2302
2303static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2304{
2305 int i, r;
2306
2307 for (i = 0; i < adev->num_ip_blocks; i++) {
2308 if (!adev->ip_blocks[i].status.sw)
2309 continue;
2310 if (adev->ip_blocks[i].status.hw)
2311 continue;
2312 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2313 if (r) {
2314 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2315 adev->ip_blocks[i].version->funcs->name, r);
2316 return r;
2317 }
2318 adev->ip_blocks[i].status.hw = true;
2319 }
2320
2321 return 0;
2322}
2323
7a3e0bb2
RZ
2324static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2325{
2326 int r = 0;
2327 int i;
80f41f84 2328 uint32_t smu_version;
7a3e0bb2
RZ
2329
2330 if (adev->asic_type >= CHIP_VEGA10) {
2331 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2332 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2333 continue;
2334
e3c1b071 2335 if (!adev->ip_blocks[i].status.sw)
2336 continue;
2337
482f0e53
ML
2338 /* no need to do the fw loading again if already done*/
2339 if (adev->ip_blocks[i].status.hw == true)
2340 break;
2341
53b3f8f4 2342 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2343 r = adev->ip_blocks[i].version->funcs->resume(adev);
2344 if (r) {
2345 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2346 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2347 return r;
2348 }
2349 } else {
2350 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2351 if (r) {
2352 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2353 adev->ip_blocks[i].version->funcs->name, r);
2354 return r;
7a3e0bb2 2355 }
7a3e0bb2 2356 }
482f0e53
ML
2357
2358 adev->ip_blocks[i].status.hw = true;
2359 break;
7a3e0bb2
RZ
2360 }
2361 }
482f0e53 2362
8973d9ec
ED
2363 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2364 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2365
80f41f84 2366 return r;
7a3e0bb2
RZ
2367}
2368
5fd8518d
AG
2369static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2370{
2371 long timeout;
2372 int r, i;
2373
2374 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2375 struct amdgpu_ring *ring = adev->rings[i];
2376
2377 /* No need to setup the GPU scheduler for rings that don't need it */
2378 if (!ring || ring->no_scheduler)
2379 continue;
2380
2381 switch (ring->funcs->type) {
2382 case AMDGPU_RING_TYPE_GFX:
2383 timeout = adev->gfx_timeout;
2384 break;
2385 case AMDGPU_RING_TYPE_COMPUTE:
2386 timeout = adev->compute_timeout;
2387 break;
2388 case AMDGPU_RING_TYPE_SDMA:
2389 timeout = adev->sdma_timeout;
2390 break;
2391 default:
2392 timeout = adev->video_timeout;
2393 break;
2394 }
2395
2396 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2397 ring->num_hw_submission, 0,
8ab62eda
JG
2398 timeout, adev->reset_domain->wq,
2399 ring->sched_score, ring->name,
2400 adev->dev);
5fd8518d
AG
2401 if (r) {
2402 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2403 ring->name);
2404 return r;
2405 }
2406 }
2407
d425c6f4
JZ
2408 amdgpu_xcp_update_partition_sched_list(adev);
2409
5fd8518d
AG
2410 return 0;
2411}
2412
2413
e3ecdffa
AD
2414/**
2415 * amdgpu_device_ip_init - run init for hardware IPs
2416 *
2417 * @adev: amdgpu_device pointer
2418 *
2419 * Main initialization pass for hardware IPs. The list of all the hardware
2420 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2421 * are run. sw_init initializes the software state associated with each IP
2422 * and hw_init initializes the hardware associated with each IP.
2423 * Returns 0 on success, negative error code on failure.
2424 */
06ec9070 2425static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2426{
2427 int i, r;
2428
c030f2e4 2429 r = amdgpu_ras_init(adev);
2430 if (r)
2431 return r;
2432
d38ceaf9 2433 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2434 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2435 continue;
a1255107 2436 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2437 if (r) {
a1255107
AD
2438 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2439 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2440 goto init_failed;
2c1a2784 2441 }
a1255107 2442 adev->ip_blocks[i].status.sw = true;
bfca0289 2443
c1c39032
AD
2444 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2445 /* need to do common hw init early so everything is set up for gmc */
2446 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2447 if (r) {
2448 DRM_ERROR("hw_init %d failed %d\n", i, r);
2449 goto init_failed;
2450 }
2451 adev->ip_blocks[i].status.hw = true;
2452 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2453 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2454 /* Try to reserve bad pages early */
2455 if (amdgpu_sriov_vf(adev))
2456 amdgpu_virt_exchange_data(adev);
2457
7ccfd79f 2458 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2459 if (r) {
7ccfd79f 2460 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2461 goto init_failed;
2c1a2784 2462 }
a1255107 2463 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2464 if (r) {
2465 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2466 goto init_failed;
2c1a2784 2467 }
06ec9070 2468 r = amdgpu_device_wb_init(adev);
2c1a2784 2469 if (r) {
06ec9070 2470 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2471 goto init_failed;
2c1a2784 2472 }
a1255107 2473 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2474
2475 /* right after GMC hw init, we create CSA */
02ff519e 2476 if (adev->gfx.mcbp) {
1e256e27 2477 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2478 AMDGPU_GEM_DOMAIN_VRAM |
2479 AMDGPU_GEM_DOMAIN_GTT,
2480 AMDGPU_CSA_SIZE);
2493664f
ML
2481 if (r) {
2482 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2483 goto init_failed;
2493664f
ML
2484 }
2485 }
d38ceaf9
AD
2486 }
2487 }
2488
c9ffa427 2489 if (amdgpu_sriov_vf(adev))
22c16d25 2490 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2491
533aed27
AG
2492 r = amdgpu_ib_pool_init(adev);
2493 if (r) {
2494 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2495 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2496 goto init_failed;
2497 }
2498
c8963ea4
RZ
2499 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2500 if (r)
72d3f592 2501 goto init_failed;
0a4f2520
RZ
2502
2503 r = amdgpu_device_ip_hw_init_phase1(adev);
2504 if (r)
72d3f592 2505 goto init_failed;
0a4f2520 2506
7a3e0bb2
RZ
2507 r = amdgpu_device_fw_loading(adev);
2508 if (r)
72d3f592 2509 goto init_failed;
7a3e0bb2 2510
0a4f2520
RZ
2511 r = amdgpu_device_ip_hw_init_phase2(adev);
2512 if (r)
72d3f592 2513 goto init_failed;
d38ceaf9 2514
121a2bc6
AG
2515 /*
2516 * retired pages will be loaded from eeprom and reserved here,
2517 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2518 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2519 * for I2C communication which only true at this point.
b82e65a9
GC
2520 *
2521 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2522 * failure from bad gpu situation and stop amdgpu init process
2523 * accordingly. For other failed cases, it will still release all
2524 * the resource and print error message, rather than returning one
2525 * negative value to upper level.
121a2bc6
AG
2526 *
2527 * Note: theoretically, this should be called before all vram allocations
2528 * to protect retired page from abusing
2529 */
b82e65a9
GC
2530 r = amdgpu_ras_recovery_init(adev);
2531 if (r)
2532 goto init_failed;
121a2bc6 2533
cfbb6b00
AG
2534 /**
2535 * In case of XGMI grab extra reference for reset domain for this device
2536 */
a4c63caf 2537 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2538 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2539 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2540 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2541
dfd0287b
LH
2542 if (WARN_ON(!hive)) {
2543 r = -ENOENT;
2544 goto init_failed;
2545 }
2546
46c67660 2547 if (!hive->reset_domain ||
2548 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2549 r = -ENOENT;
2550 amdgpu_put_xgmi_hive(hive);
2551 goto init_failed;
2552 }
2553
2554 /* Drop the early temporary reset domain we created for device */
2555 amdgpu_reset_put_reset_domain(adev->reset_domain);
2556 adev->reset_domain = hive->reset_domain;
9dfa4860 2557 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2558 }
a4c63caf
AG
2559 }
2560 }
2561
5fd8518d
AG
2562 r = amdgpu_device_init_schedulers(adev);
2563 if (r)
2564 goto init_failed;
e3c1b071 2565
2566 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2567 if (!adev->gmc.xgmi.pending_reset) {
2568 kgd2kfd_init_zone_device(adev);
e3c1b071 2569 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2570 }
c6332b97 2571
bd607166
KR
2572 amdgpu_fru_get_product_info(adev);
2573
72d3f592 2574init_failed:
c6332b97 2575
72d3f592 2576 return r;
d38ceaf9
AD
2577}
2578
e3ecdffa
AD
2579/**
2580 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2581 *
2582 * @adev: amdgpu_device pointer
2583 *
2584 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2585 * this function before a GPU reset. If the value is retained after a
2586 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2587 */
06ec9070 2588static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2589{
2590 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2591}
2592
e3ecdffa
AD
2593/**
2594 * amdgpu_device_check_vram_lost - check if vram is valid
2595 *
2596 * @adev: amdgpu_device pointer
2597 *
2598 * Checks the reset magic value written to the gart pointer in VRAM.
2599 * The driver calls this after a GPU reset to see if the contents of
2600 * VRAM is lost or now.
2601 * returns true if vram is lost, false if not.
2602 */
06ec9070 2603static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2604{
dadce777
EQ
2605 if (memcmp(adev->gart.ptr, adev->reset_magic,
2606 AMDGPU_RESET_MAGIC_NUM))
2607 return true;
2608
53b3f8f4 2609 if (!amdgpu_in_reset(adev))
dadce777
EQ
2610 return false;
2611
2612 /*
2613 * For all ASICs with baco/mode1 reset, the VRAM is
2614 * always assumed to be lost.
2615 */
2616 switch (amdgpu_asic_reset_method(adev)) {
2617 case AMD_RESET_METHOD_BACO:
2618 case AMD_RESET_METHOD_MODE1:
2619 return true;
2620 default:
2621 return false;
2622 }
0c49e0b8
CZ
2623}
2624
e3ecdffa 2625/**
1112a46b 2626 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2627 *
2628 * @adev: amdgpu_device pointer
b8b72130 2629 * @state: clockgating state (gate or ungate)
e3ecdffa 2630 *
e3ecdffa 2631 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2632 * set_clockgating_state callbacks are run.
2633 * Late initialization pass enabling clockgating for hardware IPs.
2634 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2635 * Returns 0 on success, negative error code on failure.
2636 */
fdd34271 2637
5d89bb2d
LL
2638int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2639 enum amd_clockgating_state state)
d38ceaf9 2640{
1112a46b 2641 int i, j, r;
d38ceaf9 2642
4a2ba394
SL
2643 if (amdgpu_emu_mode == 1)
2644 return 0;
2645
1112a46b
RZ
2646 for (j = 0; j < adev->num_ip_blocks; j++) {
2647 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2648 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2649 continue;
47198eb7 2650 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2651 if (adev->in_s0ix &&
47198eb7
AD
2652 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2653 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2654 continue;
4a446d55 2655 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2656 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2657 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2658 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2659 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2660 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2661 /* enable clockgating to save power */
a1255107 2662 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2663 state);
4a446d55
AD
2664 if (r) {
2665 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2666 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2667 return r;
2668 }
b0b00ff1 2669 }
d38ceaf9 2670 }
06b18f61 2671
c9f96fd5
RZ
2672 return 0;
2673}
2674
5d89bb2d
LL
2675int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2676 enum amd_powergating_state state)
c9f96fd5 2677{
1112a46b 2678 int i, j, r;
06b18f61 2679
c9f96fd5
RZ
2680 if (amdgpu_emu_mode == 1)
2681 return 0;
2682
1112a46b
RZ
2683 for (j = 0; j < adev->num_ip_blocks; j++) {
2684 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2685 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2686 continue;
47198eb7 2687 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2688 if (adev->in_s0ix &&
47198eb7
AD
2689 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2690 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2691 continue;
c9f96fd5
RZ
2692 /* skip CG for VCE/UVD, it's handled specially */
2693 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2694 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2695 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2696 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2697 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2698 /* enable powergating to save power */
2699 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2700 state);
c9f96fd5
RZ
2701 if (r) {
2702 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2703 adev->ip_blocks[i].version->funcs->name, r);
2704 return r;
2705 }
2706 }
2707 }
2dc80b00
S
2708 return 0;
2709}
2710
beff74bc
AD
2711static int amdgpu_device_enable_mgpu_fan_boost(void)
2712{
2713 struct amdgpu_gpu_instance *gpu_ins;
2714 struct amdgpu_device *adev;
2715 int i, ret = 0;
2716
2717 mutex_lock(&mgpu_info.mutex);
2718
2719 /*
2720 * MGPU fan boost feature should be enabled
2721 * only when there are two or more dGPUs in
2722 * the system
2723 */
2724 if (mgpu_info.num_dgpu < 2)
2725 goto out;
2726
2727 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2728 gpu_ins = &(mgpu_info.gpu_ins[i]);
2729 adev = gpu_ins->adev;
2730 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2731 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2732 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2733 if (ret)
2734 break;
2735
2736 gpu_ins->mgpu_fan_enabled = 1;
2737 }
2738 }
2739
2740out:
2741 mutex_unlock(&mgpu_info.mutex);
2742
2743 return ret;
2744}
2745
e3ecdffa
AD
2746/**
2747 * amdgpu_device_ip_late_init - run late init for hardware IPs
2748 *
2749 * @adev: amdgpu_device pointer
2750 *
2751 * Late initialization pass for hardware IPs. The list of all the hardware
2752 * IPs that make up the asic is walked and the late_init callbacks are run.
2753 * late_init covers any special initialization that an IP requires
2754 * after all of the have been initialized or something that needs to happen
2755 * late in the init process.
2756 * Returns 0 on success, negative error code on failure.
2757 */
06ec9070 2758static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2759{
60599a03 2760 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2761 int i = 0, r;
2762
2763 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2764 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2765 continue;
2766 if (adev->ip_blocks[i].version->funcs->late_init) {
2767 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2768 if (r) {
2769 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2770 adev->ip_blocks[i].version->funcs->name, r);
2771 return r;
2772 }
2dc80b00 2773 }
73f847db 2774 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2775 }
2776
867e24ca 2777 r = amdgpu_ras_late_init(adev);
2778 if (r) {
2779 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2780 return r;
2781 }
2782
a891d239
DL
2783 amdgpu_ras_set_error_query_ready(adev, true);
2784
1112a46b
RZ
2785 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2786 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2787
06ec9070 2788 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2789
beff74bc
AD
2790 r = amdgpu_device_enable_mgpu_fan_boost();
2791 if (r)
2792 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2793
4da8b639 2794 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2795 if (amdgpu_passthrough(adev) &&
2796 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2797 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2798 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2799
2800 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2801 mutex_lock(&mgpu_info.mutex);
2802
2803 /*
2804 * Reset device p-state to low as this was booted with high.
2805 *
2806 * This should be performed only after all devices from the same
2807 * hive get initialized.
2808 *
2809 * However, it's unknown how many device in the hive in advance.
2810 * As this is counted one by one during devices initializations.
2811 *
2812 * So, we wait for all XGMI interlinked devices initialized.
2813 * This may bring some delays as those devices may come from
2814 * different hives. But that should be OK.
2815 */
2816 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2817 for (i = 0; i < mgpu_info.num_gpu; i++) {
2818 gpu_instance = &(mgpu_info.gpu_ins[i]);
2819 if (gpu_instance->adev->flags & AMD_IS_APU)
2820 continue;
2821
d84a430d
JK
2822 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2823 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2824 if (r) {
2825 DRM_ERROR("pstate setting failed (%d).\n", r);
2826 break;
2827 }
2828 }
2829 }
2830
2831 mutex_unlock(&mgpu_info.mutex);
2832 }
2833
d38ceaf9
AD
2834 return 0;
2835}
2836
613aa3ea
LY
2837/**
2838 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2839 *
2840 * @adev: amdgpu_device pointer
2841 *
2842 * For ASICs need to disable SMC first
2843 */
2844static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2845{
2846 int i, r;
2847
2848 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2849 return;
2850
2851 for (i = 0; i < adev->num_ip_blocks; i++) {
2852 if (!adev->ip_blocks[i].status.hw)
2853 continue;
2854 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2855 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2856 /* XXX handle errors */
2857 if (r) {
2858 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2859 adev->ip_blocks[i].version->funcs->name, r);
2860 }
2861 adev->ip_blocks[i].status.hw = false;
2862 break;
2863 }
2864 }
2865}
2866
e9669fb7 2867static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2868{
2869 int i, r;
2870
e9669fb7
AG
2871 for (i = 0; i < adev->num_ip_blocks; i++) {
2872 if (!adev->ip_blocks[i].version->funcs->early_fini)
2873 continue;
5278a159 2874
e9669fb7
AG
2875 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2876 if (r) {
2877 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2878 adev->ip_blocks[i].version->funcs->name, r);
2879 }
2880 }
c030f2e4 2881
05df1f01 2882 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2883 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2884
7270e895
TY
2885 amdgpu_amdkfd_suspend(adev, false);
2886
613aa3ea
LY
2887 /* Workaroud for ASICs need to disable SMC first */
2888 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2889
d38ceaf9 2890 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2891 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2892 continue;
8201a67a 2893
a1255107 2894 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2895 /* XXX handle errors */
2c1a2784 2896 if (r) {
a1255107
AD
2897 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2898 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2899 }
8201a67a 2900
a1255107 2901 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2902 }
2903
6effad8a
GC
2904 if (amdgpu_sriov_vf(adev)) {
2905 if (amdgpu_virt_release_full_gpu(adev, false))
2906 DRM_ERROR("failed to release exclusive mode on fini\n");
2907 }
2908
e9669fb7
AG
2909 return 0;
2910}
2911
2912/**
2913 * amdgpu_device_ip_fini - run fini for hardware IPs
2914 *
2915 * @adev: amdgpu_device pointer
2916 *
2917 * Main teardown pass for hardware IPs. The list of all the hardware
2918 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2919 * are run. hw_fini tears down the hardware associated with each IP
2920 * and sw_fini tears down any software state associated with each IP.
2921 * Returns 0 on success, negative error code on failure.
2922 */
2923static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2924{
2925 int i, r;
2926
2927 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2928 amdgpu_virt_release_ras_err_handler_data(adev);
2929
e9669fb7
AG
2930 if (adev->gmc.xgmi.num_physical_nodes > 1)
2931 amdgpu_xgmi_remove_device(adev);
2932
c004d44e 2933 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2934
d38ceaf9 2935 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2936 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2937 continue;
c12aba3a
ML
2938
2939 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2940 amdgpu_ucode_free_bo(adev);
1e256e27 2941 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2942 amdgpu_device_wb_fini(adev);
7ccfd79f 2943 amdgpu_device_mem_scratch_fini(adev);
533aed27 2944 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2945 }
2946
a1255107 2947 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2948 /* XXX handle errors */
2c1a2784 2949 if (r) {
a1255107
AD
2950 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2951 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2952 }
a1255107
AD
2953 adev->ip_blocks[i].status.sw = false;
2954 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2955 }
2956
a6dcfd9c 2957 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2958 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2959 continue;
a1255107
AD
2960 if (adev->ip_blocks[i].version->funcs->late_fini)
2961 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2962 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2963 }
2964
c030f2e4 2965 amdgpu_ras_fini(adev);
2966
d38ceaf9
AD
2967 return 0;
2968}
2969
e3ecdffa 2970/**
beff74bc 2971 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2972 *
1112a46b 2973 * @work: work_struct.
e3ecdffa 2974 */
beff74bc 2975static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2976{
2977 struct amdgpu_device *adev =
beff74bc 2978 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2979 int r;
2980
2981 r = amdgpu_ib_ring_tests(adev);
2982 if (r)
2983 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2984}
2985
1e317b99
RZ
2986static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2987{
2988 struct amdgpu_device *adev =
2989 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2990
90a92662
MD
2991 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2992 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2993
2994 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2995 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2996}
2997
e3ecdffa 2998/**
e7854a03 2999 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3000 *
3001 * @adev: amdgpu_device pointer
3002 *
3003 * Main suspend function for hardware IPs. The list of all the hardware
3004 * IPs that make up the asic is walked, clockgating is disabled and the
3005 * suspend callbacks are run. suspend puts the hardware and software state
3006 * in each IP into a state suitable for suspend.
3007 * Returns 0 on success, negative error code on failure.
3008 */
e7854a03
AD
3009static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3010{
3011 int i, r;
3012
50ec83f0
AD
3013 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3014 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3015
b31d6ada
EQ
3016 /*
3017 * Per PMFW team's suggestion, driver needs to handle gfxoff
3018 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3019 * scenario. Add the missing df cstate disablement here.
3020 */
3021 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3022 dev_warn(adev->dev, "Failed to disallow df cstate");
3023
e7854a03
AD
3024 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3025 if (!adev->ip_blocks[i].status.valid)
3026 continue;
2b9f7848 3027
e7854a03 3028 /* displays are handled separately */
2b9f7848
ND
3029 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3030 continue;
3031
3032 /* XXX handle errors */
3033 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3034 /* XXX handle errors */
3035 if (r) {
3036 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3037 adev->ip_blocks[i].version->funcs->name, r);
3038 return r;
e7854a03 3039 }
2b9f7848
ND
3040
3041 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3042 }
3043
e7854a03
AD
3044 return 0;
3045}
3046
3047/**
3048 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3049 *
3050 * @adev: amdgpu_device pointer
3051 *
3052 * Main suspend function for hardware IPs. The list of all the hardware
3053 * IPs that make up the asic is walked, clockgating is disabled and the
3054 * suspend callbacks are run. suspend puts the hardware and software state
3055 * in each IP into a state suitable for suspend.
3056 * Returns 0 on success, negative error code on failure.
3057 */
3058static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3059{
3060 int i, r;
3061
557f42a2 3062 if (adev->in_s0ix)
bc143d8b 3063 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3064
d38ceaf9 3065 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3066 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3067 continue;
e7854a03
AD
3068 /* displays are handled in phase1 */
3069 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3070 continue;
bff77e86
LM
3071 /* PSP lost connection when err_event_athub occurs */
3072 if (amdgpu_ras_intr_triggered() &&
3073 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3074 adev->ip_blocks[i].status.hw = false;
3075 continue;
3076 }
e3c1b071 3077
3078 /* skip unnecessary suspend if we do not initialize them yet */
3079 if (adev->gmc.xgmi.pending_reset &&
3080 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3081 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3082 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3083 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3084 adev->ip_blocks[i].status.hw = false;
3085 continue;
3086 }
557f42a2 3087
afa6646b 3088 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3089 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3090 * like at runtime. PSP is also part of the always on hardware
3091 * so no need to suspend it.
3092 */
557f42a2 3093 if (adev->in_s0ix &&
32ff160d 3094 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3095 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3096 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3097 continue;
3098
2a7798ea
AD
3099 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3100 if (adev->in_s0ix &&
3101 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3102 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3103 continue;
3104
e11c7750
TH
3105 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3106 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3107 * from this location and RLC Autoload automatically also gets loaded
3108 * from here based on PMFW -> PSP message during re-init sequence.
3109 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3110 * the TMR and reload FWs again for IMU enabled APU ASICs.
3111 */
3112 if (amdgpu_in_reset(adev) &&
3113 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3114 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3115 continue;
3116
d38ceaf9 3117 /* XXX handle errors */
a1255107 3118 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3119 /* XXX handle errors */
2c1a2784 3120 if (r) {
a1255107
AD
3121 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3122 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3123 }
876923fb 3124 adev->ip_blocks[i].status.hw = false;
a3a09142 3125 /* handle putting the SMC in the appropriate state */
47fc644f 3126 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3127 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3128 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3129 if (r) {
3130 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3131 adev->mp1_state, r);
3132 return r;
3133 }
a3a09142
AD
3134 }
3135 }
d38ceaf9
AD
3136 }
3137
3138 return 0;
3139}
3140
e7854a03
AD
3141/**
3142 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3143 *
3144 * @adev: amdgpu_device pointer
3145 *
3146 * Main suspend function for hardware IPs. The list of all the hardware
3147 * IPs that make up the asic is walked, clockgating is disabled and the
3148 * suspend callbacks are run. suspend puts the hardware and software state
3149 * in each IP into a state suitable for suspend.
3150 * Returns 0 on success, negative error code on failure.
3151 */
3152int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3153{
3154 int r;
3155
3c73683c
JC
3156 if (amdgpu_sriov_vf(adev)) {
3157 amdgpu_virt_fini_data_exchange(adev);
e7819644 3158 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3159 }
e7819644 3160
e7854a03
AD
3161 r = amdgpu_device_ip_suspend_phase1(adev);
3162 if (r)
3163 return r;
3164 r = amdgpu_device_ip_suspend_phase2(adev);
3165
e7819644
YT
3166 if (amdgpu_sriov_vf(adev))
3167 amdgpu_virt_release_full_gpu(adev, false);
3168
e7854a03
AD
3169 return r;
3170}
3171
06ec9070 3172static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3173{
3174 int i, r;
3175
2cb681b6 3176 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3177 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3178 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3179 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3180 AMD_IP_BLOCK_TYPE_IH,
3181 };
a90ad3c2 3182
95ea3dbc 3183 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3184 int j;
3185 struct amdgpu_ip_block *block;
a90ad3c2 3186
4cd2a96d
J
3187 block = &adev->ip_blocks[i];
3188 block->status.hw = false;
2cb681b6 3189
4cd2a96d 3190 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3191
4cd2a96d 3192 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3193 !block->status.valid)
3194 continue;
3195
3196 r = block->version->funcs->hw_init(adev);
0aaeefcc 3197 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3198 if (r)
3199 return r;
482f0e53 3200 block->status.hw = true;
a90ad3c2
ML
3201 }
3202 }
3203
3204 return 0;
3205}
3206
06ec9070 3207static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3208{
3209 int i, r;
3210
2cb681b6
ML
3211 static enum amd_ip_block_type ip_order[] = {
3212 AMD_IP_BLOCK_TYPE_SMC,
3213 AMD_IP_BLOCK_TYPE_DCE,
3214 AMD_IP_BLOCK_TYPE_GFX,
3215 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3216 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3217 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3218 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3219 AMD_IP_BLOCK_TYPE_VCN,
3220 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3221 };
a90ad3c2 3222
2cb681b6
ML
3223 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3224 int j;
3225 struct amdgpu_ip_block *block;
a90ad3c2 3226
2cb681b6
ML
3227 for (j = 0; j < adev->num_ip_blocks; j++) {
3228 block = &adev->ip_blocks[j];
3229
3230 if (block->version->type != ip_order[i] ||
482f0e53
ML
3231 !block->status.valid ||
3232 block->status.hw)
2cb681b6
ML
3233 continue;
3234
895bd048
JZ
3235 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3236 r = block->version->funcs->resume(adev);
3237 else
3238 r = block->version->funcs->hw_init(adev);
3239
0aaeefcc 3240 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3241 if (r)
3242 return r;
482f0e53 3243 block->status.hw = true;
a90ad3c2
ML
3244 }
3245 }
3246
3247 return 0;
3248}
3249
e3ecdffa
AD
3250/**
3251 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3252 *
3253 * @adev: amdgpu_device pointer
3254 *
3255 * First resume function for hardware IPs. The list of all the hardware
3256 * IPs that make up the asic is walked and the resume callbacks are run for
3257 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3258 * after a suspend and updates the software state as necessary. This
3259 * function is also used for restoring the GPU after a GPU reset.
3260 * Returns 0 on success, negative error code on failure.
3261 */
06ec9070 3262static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3263{
3264 int i, r;
3265
a90ad3c2 3266 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3267 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3268 continue;
a90ad3c2 3269 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3270 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3271 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3272 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3273
fcf0649f
CZ
3274 r = adev->ip_blocks[i].version->funcs->resume(adev);
3275 if (r) {
3276 DRM_ERROR("resume of IP block <%s> failed %d\n",
3277 adev->ip_blocks[i].version->funcs->name, r);
3278 return r;
3279 }
482f0e53 3280 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3281 }
3282 }
3283
3284 return 0;
3285}
3286
e3ecdffa
AD
3287/**
3288 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3289 *
3290 * @adev: amdgpu_device pointer
3291 *
3292 * First resume function for hardware IPs. The list of all the hardware
3293 * IPs that make up the asic is walked and the resume callbacks are run for
3294 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3295 * functional state after a suspend and updates the software state as
3296 * necessary. This function is also used for restoring the GPU after a GPU
3297 * reset.
3298 * Returns 0 on success, negative error code on failure.
3299 */
06ec9070 3300static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3301{
3302 int i, r;
3303
3304 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3305 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3306 continue;
fcf0649f 3307 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3308 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3309 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3310 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3311 continue;
a1255107 3312 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3313 if (r) {
a1255107
AD
3314 DRM_ERROR("resume of IP block <%s> failed %d\n",
3315 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3316 return r;
2c1a2784 3317 }
482f0e53 3318 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3319 }
3320
3321 return 0;
3322}
3323
e3ecdffa
AD
3324/**
3325 * amdgpu_device_ip_resume - run resume for hardware IPs
3326 *
3327 * @adev: amdgpu_device pointer
3328 *
3329 * Main resume function for hardware IPs. The hardware IPs
3330 * are split into two resume functions because they are
b8920e1e 3331 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3332 * steps need to be take between them. In this case (S3/S4) they are
3333 * run sequentially.
3334 * Returns 0 on success, negative error code on failure.
3335 */
06ec9070 3336static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3337{
3338 int r;
3339
06ec9070 3340 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3341 if (r)
3342 return r;
7a3e0bb2
RZ
3343
3344 r = amdgpu_device_fw_loading(adev);
3345 if (r)
3346 return r;
3347
06ec9070 3348 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3349
3350 return r;
3351}
3352
e3ecdffa
AD
3353/**
3354 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3355 *
3356 * @adev: amdgpu_device pointer
3357 *
3358 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3359 */
4e99a44e 3360static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3361{
6867e1b5
ML
3362 if (amdgpu_sriov_vf(adev)) {
3363 if (adev->is_atom_fw) {
58ff791a 3364 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3365 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3366 } else {
3367 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3368 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3369 }
3370
3371 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3372 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3373 }
048765ad
AR
3374}
3375
e3ecdffa
AD
3376/**
3377 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3378 *
3379 * @asic_type: AMD asic type
3380 *
3381 * Check if there is DC (new modesetting infrastructre) support for an asic.
3382 * returns true if DC has support, false if not.
3383 */
4562236b
HW
3384bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3385{
3386 switch (asic_type) {
0637d417
AD
3387#ifdef CONFIG_DRM_AMDGPU_SI
3388 case CHIP_HAINAN:
3389#endif
3390 case CHIP_TOPAZ:
3391 /* chips with no display hardware */
3392 return false;
4562236b 3393#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3394 case CHIP_TAHITI:
3395 case CHIP_PITCAIRN:
3396 case CHIP_VERDE:
3397 case CHIP_OLAND:
2d32ffd6
AD
3398 /*
3399 * We have systems in the wild with these ASICs that require
3400 * LVDS and VGA support which is not supported with DC.
3401 *
3402 * Fallback to the non-DC driver here by default so as not to
3403 * cause regressions.
3404 */
3405#if defined(CONFIG_DRM_AMD_DC_SI)
3406 return amdgpu_dc > 0;
3407#else
3408 return false;
64200c46 3409#endif
4562236b 3410 case CHIP_BONAIRE:
0d6fbccb 3411 case CHIP_KAVERI:
367e6687
AD
3412 case CHIP_KABINI:
3413 case CHIP_MULLINS:
d9fda248
HW
3414 /*
3415 * We have systems in the wild with these ASICs that require
b5a0168e 3416 * VGA support which is not supported with DC.
d9fda248
HW
3417 *
3418 * Fallback to the non-DC driver here by default so as not to
3419 * cause regressions.
3420 */
3421 return amdgpu_dc > 0;
f7f12b25 3422 default:
fd187853 3423 return amdgpu_dc != 0;
f7f12b25 3424#else
4562236b 3425 default:
93b09a9a 3426 if (amdgpu_dc > 0)
b8920e1e 3427 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3428 return false;
f7f12b25 3429#endif
4562236b
HW
3430 }
3431}
3432
3433/**
3434 * amdgpu_device_has_dc_support - check if dc is supported
3435 *
982a820b 3436 * @adev: amdgpu_device pointer
4562236b
HW
3437 *
3438 * Returns true for supported, false for not supported
3439 */
3440bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3441{
25263da3 3442 if (adev->enable_virtual_display ||
abaf210c 3443 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3444 return false;
3445
4562236b
HW
3446 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3447}
3448
d4535e2c
AG
3449static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3450{
3451 struct amdgpu_device *adev =
3452 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3453 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3454
c6a6e2db
AG
3455 /* It's a bug to not have a hive within this function */
3456 if (WARN_ON(!hive))
3457 return;
3458
3459 /*
3460 * Use task barrier to synchronize all xgmi reset works across the
3461 * hive. task_barrier_enter and task_barrier_exit will block
3462 * until all the threads running the xgmi reset works reach
3463 * those points. task_barrier_full will do both blocks.
3464 */
3465 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3466
3467 task_barrier_enter(&hive->tb);
4a580877 3468 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3469
3470 if (adev->asic_reset_res)
3471 goto fail;
3472
3473 task_barrier_exit(&hive->tb);
4a580877 3474 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3475
3476 if (adev->asic_reset_res)
3477 goto fail;
43c4d576 3478
5e67bba3 3479 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3480 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3481 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3482 } else {
3483
3484 task_barrier_full(&hive->tb);
3485 adev->asic_reset_res = amdgpu_asic_reset(adev);
3486 }
ce316fa5 3487
c6a6e2db 3488fail:
d4535e2c 3489 if (adev->asic_reset_res)
fed184e9 3490 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3491 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3492 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3493}
3494
71f98027
AD
3495static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3496{
3497 char *input = amdgpu_lockup_timeout;
3498 char *timeout_setting = NULL;
3499 int index = 0;
3500 long timeout;
3501 int ret = 0;
3502
3503 /*
67387dfe
AD
3504 * By default timeout for non compute jobs is 10000
3505 * and 60000 for compute jobs.
71f98027 3506 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3507 * jobs are 60000 by default.
71f98027
AD
3508 */
3509 adev->gfx_timeout = msecs_to_jiffies(10000);
3510 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3511 if (amdgpu_sriov_vf(adev))
3512 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3513 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3514 else
67387dfe 3515 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3516
f440ff44 3517 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3518 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3519 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3520 ret = kstrtol(timeout_setting, 0, &timeout);
3521 if (ret)
3522 return ret;
3523
3524 if (timeout == 0) {
3525 index++;
3526 continue;
3527 } else if (timeout < 0) {
3528 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3529 dev_warn(adev->dev, "lockup timeout disabled");
3530 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3531 } else {
3532 timeout = msecs_to_jiffies(timeout);
3533 }
3534
3535 switch (index++) {
3536 case 0:
3537 adev->gfx_timeout = timeout;
3538 break;
3539 case 1:
3540 adev->compute_timeout = timeout;
3541 break;
3542 case 2:
3543 adev->sdma_timeout = timeout;
3544 break;
3545 case 3:
3546 adev->video_timeout = timeout;
3547 break;
3548 default:
3549 break;
3550 }
3551 }
3552 /*
3553 * There is only one value specified and
3554 * it should apply to all non-compute jobs.
3555 */
bcccee89 3556 if (index == 1) {
71f98027 3557 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3558 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3559 adev->compute_timeout = adev->gfx_timeout;
3560 }
71f98027
AD
3561 }
3562
3563 return ret;
3564}
d4535e2c 3565
4a74c38c
PY
3566/**
3567 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3568 *
3569 * @adev: amdgpu_device pointer
3570 *
3571 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3572 */
3573static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3574{
3575 struct iommu_domain *domain;
3576
3577 domain = iommu_get_domain_for_dev(adev->dev);
3578 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3579 adev->ram_is_direct_mapped = true;
3580}
3581
77f3a5cd 3582static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3583 &dev_attr_pcie_replay_count.attr,
3584 NULL
3585};
3586
02ff519e
AD
3587static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3588{
3589 if (amdgpu_mcbp == 1)
3590 adev->gfx.mcbp = true;
1e9e15dc
JZ
3591 else if (amdgpu_mcbp == 0)
3592 adev->gfx.mcbp = false;
3593 else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
3594 (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
3595 adev->gfx.num_gfx_rings)
50a7c876
AD
3596 adev->gfx.mcbp = true;
3597
02ff519e
AD
3598 if (amdgpu_sriov_vf(adev))
3599 adev->gfx.mcbp = true;
3600
3601 if (adev->gfx.mcbp)
3602 DRM_INFO("MCBP is enabled\n");
3603}
3604
d38ceaf9
AD
3605/**
3606 * amdgpu_device_init - initialize the driver
3607 *
3608 * @adev: amdgpu_device pointer
d38ceaf9
AD
3609 * @flags: driver flags
3610 *
3611 * Initializes the driver info and hw (all asics).
3612 * Returns 0 for success or an error on failure.
3613 * Called at driver startup.
3614 */
3615int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3616 uint32_t flags)
3617{
8aba21b7
LT
3618 struct drm_device *ddev = adev_to_drm(adev);
3619 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3620 int r, i;
b98c6299 3621 bool px = false;
95844d20 3622 u32 max_MBps;
59e9fff1 3623 int tmp;
d38ceaf9
AD
3624
3625 adev->shutdown = false;
d38ceaf9 3626 adev->flags = flags;
4e66d7d2
YZ
3627
3628 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3629 adev->asic_type = amdgpu_force_asic_type;
3630 else
3631 adev->asic_type = flags & AMD_ASIC_MASK;
3632
d38ceaf9 3633 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3634 if (amdgpu_emu_mode == 1)
8bdab6bb 3635 adev->usec_timeout *= 10;
770d13b1 3636 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3637 adev->accel_working = false;
3638 adev->num_rings = 0;
68ce8b24 3639 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3640 adev->mman.buffer_funcs = NULL;
3641 adev->mman.buffer_funcs_ring = NULL;
3642 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3643 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3644 adev->gmc.gmc_funcs = NULL;
7bd939d0 3645 adev->harvest_ip_mask = 0x0;
f54d1867 3646 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3647 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3648
3649 adev->smc_rreg = &amdgpu_invalid_rreg;
3650 adev->smc_wreg = &amdgpu_invalid_wreg;
3651 adev->pcie_rreg = &amdgpu_invalid_rreg;
3652 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3653 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3654 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3655 adev->pciep_rreg = &amdgpu_invalid_rreg;
3656 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3657 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3658 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3659 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3660 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3661 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3662 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3663 adev->didt_rreg = &amdgpu_invalid_rreg;
3664 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3665 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3666 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3667 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3668 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3669
3e39ab90
AD
3670 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3671 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3672 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3673
3674 /* mutex initialization are all done here so we
b8920e1e
SS
3675 * can recall function without having locking issues
3676 */
0e5ca0d1 3677 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3678 mutex_init(&adev->pm.mutex);
3679 mutex_init(&adev->gfx.gpu_clock_mutex);
3680 mutex_init(&adev->srbm_mutex);
b8866c26 3681 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3682 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3683 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3684 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3685 mutex_init(&adev->mn_lock);
e23b74aa 3686 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3687 hash_init(adev->mn_hash);
32eaeae0 3688 mutex_init(&adev->psp.mutex);
bd052211 3689 mutex_init(&adev->notifier_lock);
8cda7a4f 3690 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3691 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3692
ab3b9de6 3693 amdgpu_device_init_apu_flags(adev);
9f6a7857 3694
912dfc84
EQ
3695 r = amdgpu_device_check_arguments(adev);
3696 if (r)
3697 return r;
d38ceaf9 3698
d38ceaf9
AD
3699 spin_lock_init(&adev->mmio_idx_lock);
3700 spin_lock_init(&adev->smc_idx_lock);
3701 spin_lock_init(&adev->pcie_idx_lock);
3702 spin_lock_init(&adev->uvd_ctx_idx_lock);
3703 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3704 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3705 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3706 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3707 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3708
0c4e7fa5
CZ
3709 INIT_LIST_HEAD(&adev->shadow_list);
3710 mutex_init(&adev->shadow_list_lock);
3711
655ce9cb 3712 INIT_LIST_HEAD(&adev->reset_list);
3713
6492e1b0 3714 INIT_LIST_HEAD(&adev->ras_list);
3715
3e38b634
EQ
3716 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3717
beff74bc
AD
3718 INIT_DELAYED_WORK(&adev->delayed_init_work,
3719 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3720 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3721 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3722
d4535e2c
AG
3723 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3724
d23ee13f 3725 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3726 adev->gfx.gfx_off_residency = 0;
3727 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3728 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3729
b265bdbd
EQ
3730 atomic_set(&adev->throttling_logging_enabled, 1);
3731 /*
3732 * If throttling continues, logging will be performed every minute
3733 * to avoid log flooding. "-1" is subtracted since the thermal
3734 * throttling interrupt comes every second. Thus, the total logging
3735 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3736 * for throttling interrupt) = 60 seconds.
3737 */
3738 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3739 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3740
0fa49558
AX
3741 /* Registers mapping */
3742 /* TODO: block userspace mapping of io register */
da69c161
KW
3743 if (adev->asic_type >= CHIP_BONAIRE) {
3744 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3745 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3746 } else {
3747 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3748 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3749 }
d38ceaf9 3750
6c08e0ef
EQ
3751 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3752 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3753
d38ceaf9 3754 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3755 if (!adev->rmmio)
d38ceaf9 3756 return -ENOMEM;
b8920e1e 3757
d38ceaf9 3758 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3759 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3760
436afdfa
PY
3761 /*
3762 * Reset domain needs to be present early, before XGMI hive discovered
3763 * (if any) and intitialized to use reset sem and in_gpu reset flag
3764 * early on during init and before calling to RREG32.
3765 */
3766 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3767 if (!adev->reset_domain)
3768 return -ENOMEM;
3769
3aa0115d
ML
3770 /* detect hw virtualization here */
3771 amdgpu_detect_virtualization(adev);
3772
04e85958
TL
3773 amdgpu_device_get_pcie_info(adev);
3774
dffa11b4
ML
3775 r = amdgpu_device_get_job_timeout_settings(adev);
3776 if (r) {
3777 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3778 return r;
a190d1c7
XY
3779 }
3780
d38ceaf9 3781 /* early init functions */
06ec9070 3782 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3783 if (r)
4ef87d8f 3784 return r;
d38ceaf9 3785
02ff519e
AD
3786 amdgpu_device_set_mcbp(adev);
3787
b7cdb41e
ML
3788 /* Get rid of things like offb */
3789 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3790 if (r)
3791 return r;
3792
4d33e704
SK
3793 /* Enable TMZ based on IP_VERSION */
3794 amdgpu_gmc_tmz_set(adev);
3795
957b0787 3796 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3797 /* Need to get xgmi info early to decide the reset behavior*/
3798 if (adev->gmc.xgmi.supported) {
3799 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3800 if (r)
3801 return r;
3802 }
3803
8e6d0b69 3804 /* enable PCIE atomic ops */
b4520bfd
GW
3805 if (amdgpu_sriov_vf(adev)) {
3806 if (adev->virt.fw_reserve.p_pf2vf)
3807 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3808 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3809 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3810 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3811 * internal path natively support atomics, set have_atomics_support to true.
3812 */
b4520bfd
GW
3813 } else if ((adev->flags & AMD_IS_APU) &&
3814 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) {
0e768043 3815 adev->have_atomics_support = true;
b4520bfd 3816 } else {
8e6d0b69 3817 adev->have_atomics_support =
3818 !pci_enable_atomic_ops_to_root(adev->pdev,
3819 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3820 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3821 }
3822
8e6d0b69 3823 if (!adev->have_atomics_support)
3824 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3825
6585661d 3826 /* doorbell bar mapping and doorbell index init*/
43c064db 3827 amdgpu_doorbell_init(adev);
6585661d 3828
9475a943
SL
3829 if (amdgpu_emu_mode == 1) {
3830 /* post the asic on emulation mode */
3831 emu_soc_asic_init(adev);
bfca0289 3832 goto fence_driver_init;
9475a943 3833 }
bfca0289 3834
04442bf7
LL
3835 amdgpu_reset_init(adev);
3836
4e99a44e 3837 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3838 if (adev->bios)
3839 amdgpu_device_detect_sriov_bios(adev);
048765ad 3840
95e8e59e
AD
3841 /* check if we need to reset the asic
3842 * E.g., driver was not cleanly unloaded previously, etc.
3843 */
f14899fd 3844 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3845 if (adev->gmc.xgmi.num_physical_nodes) {
3846 dev_info(adev->dev, "Pending hive reset.\n");
3847 adev->gmc.xgmi.pending_reset = true;
3848 /* Only need to init necessary block for SMU to handle the reset */
3849 for (i = 0; i < adev->num_ip_blocks; i++) {
3850 if (!adev->ip_blocks[i].status.valid)
3851 continue;
3852 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3853 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3854 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3855 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3856 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3857 adev->ip_blocks[i].version->funcs->name);
3858 adev->ip_blocks[i].status.hw = true;
3859 }
3860 }
3861 } else {
59e9fff1 3862 tmp = amdgpu_reset_method;
3863 /* It should do a default reset when loading or reloading the driver,
3864 * regardless of the module parameter reset_method.
3865 */
3866 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3867 r = amdgpu_asic_reset(adev);
59e9fff1 3868 amdgpu_reset_method = tmp;
e3c1b071 3869 if (r) {
3870 dev_err(adev->dev, "asic reset on init failed\n");
3871 goto failed;
3872 }
95e8e59e
AD
3873 }
3874 }
3875
d38ceaf9 3876 /* Post card if necessary */
39c640c0 3877 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3878 if (!adev->bios) {
bec86378 3879 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3880 r = -EINVAL;
3881 goto failed;
d38ceaf9 3882 }
bec86378 3883 DRM_INFO("GPU posting now...\n");
4d2997ab 3884 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3885 if (r) {
3886 dev_err(adev->dev, "gpu post error!\n");
3887 goto failed;
3888 }
d38ceaf9
AD
3889 }
3890
9535a86a
SZ
3891 if (adev->bios) {
3892 if (adev->is_atom_fw) {
3893 /* Initialize clocks */
3894 r = amdgpu_atomfirmware_get_clock_info(adev);
3895 if (r) {
3896 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3897 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3898 goto failed;
3899 }
3900 } else {
3901 /* Initialize clocks */
3902 r = amdgpu_atombios_get_clock_info(adev);
3903 if (r) {
3904 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3905 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3906 goto failed;
3907 }
3908 /* init i2c buses */
3909 if (!amdgpu_device_has_dc_support(adev))
3910 amdgpu_atombios_i2c_init(adev);
a5bde2f9 3911 }
2c1a2784 3912 }
d38ceaf9 3913
bfca0289 3914fence_driver_init:
d38ceaf9 3915 /* Fence driver */
067f44c8 3916 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3917 if (r) {
067f44c8 3918 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3919 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3920 goto failed;
2c1a2784 3921 }
d38ceaf9
AD
3922
3923 /* init the mode config */
4a580877 3924 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3925
06ec9070 3926 r = amdgpu_device_ip_init(adev);
d38ceaf9 3927 if (r) {
06ec9070 3928 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3929 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3930 goto release_ras_con;
d38ceaf9
AD
3931 }
3932
8d35a259
LG
3933 amdgpu_fence_driver_hw_init(adev);
3934
d69b8971
YZ
3935 dev_info(adev->dev,
3936 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3937 adev->gfx.config.max_shader_engines,
3938 adev->gfx.config.max_sh_per_se,
3939 adev->gfx.config.max_cu_per_sh,
3940 adev->gfx.cu_info.number);
3941
d38ceaf9
AD
3942 adev->accel_working = true;
3943
e59c0205
AX
3944 amdgpu_vm_check_compute_bug(adev);
3945
95844d20
MO
3946 /* Initialize the buffer migration limit. */
3947 if (amdgpu_moverate >= 0)
3948 max_MBps = amdgpu_moverate;
3949 else
3950 max_MBps = 8; /* Allow 8 MB/s. */
3951 /* Get a log2 for easy divisions. */
3952 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3953
b0adca4d
EQ
3954 /*
3955 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3956 * Otherwise the mgpu fan boost feature will be skipped due to the
3957 * gpu instance is counted less.
3958 */
3959 amdgpu_register_gpu_instance(adev);
3960
d38ceaf9
AD
3961 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3962 * explicit gating rather than handling it automatically.
3963 */
e3c1b071 3964 if (!adev->gmc.xgmi.pending_reset) {
3965 r = amdgpu_device_ip_late_init(adev);
3966 if (r) {
3967 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3968 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3969 goto release_ras_con;
e3c1b071 3970 }
3971 /* must succeed. */
3972 amdgpu_ras_resume(adev);
3973 queue_delayed_work(system_wq, &adev->delayed_init_work,
3974 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3975 }
d38ceaf9 3976
38eecbe0
CL
3977 if (amdgpu_sriov_vf(adev)) {
3978 amdgpu_virt_release_full_gpu(adev, true);
2c738637 3979 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 3980 }
2c738637 3981
90bcb9b5
EQ
3982 /*
3983 * Place those sysfs registering after `late_init`. As some of those
3984 * operations performed in `late_init` might affect the sysfs
3985 * interfaces creating.
3986 */
3987 r = amdgpu_atombios_sysfs_init(adev);
3988 if (r)
3989 drm_err(&adev->ddev,
3990 "registering atombios sysfs failed (%d).\n", r);
3991
3992 r = amdgpu_pm_sysfs_init(adev);
3993 if (r)
3994 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
3995
3996 r = amdgpu_ucode_sysfs_init(adev);
3997 if (r) {
3998 adev->ucode_sysfs_en = false;
3999 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4000 } else
4001 adev->ucode_sysfs_en = true;
4002
77f3a5cd 4003 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4004 if (r)
77f3a5cd 4005 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4006
7957ec80
LL
4007 amdgpu_fru_sysfs_init(adev);
4008
d155bef0
AB
4009 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4010 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4011 if (r)
4012 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4013
c1dd4aa6
AG
4014 /* Have stored pci confspace at hand for restore in sudden PCI error */
4015 if (amdgpu_device_cache_pci_state(adev->pdev))
4016 pci_restore_state(pdev);
4017
8c3dd61c
KHF
4018 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4019 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4020 * ignore it
4021 */
8c3dd61c 4022 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4023 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4024
d37a3929
OC
4025 px = amdgpu_device_supports_px(ddev);
4026
4027 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4028 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4029 vga_switcheroo_register_client(adev->pdev,
4030 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4031
4032 if (px)
8c3dd61c 4033 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4034
e3c1b071 4035 if (adev->gmc.xgmi.pending_reset)
4036 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4037 msecs_to_jiffies(AMDGPU_RESUME_MS));
4038
4a74c38c
PY
4039 amdgpu_device_check_iommu_direct_map(adev);
4040
d38ceaf9 4041 return 0;
83ba126a 4042
970fd197 4043release_ras_con:
38eecbe0
CL
4044 if (amdgpu_sriov_vf(adev))
4045 amdgpu_virt_release_full_gpu(adev, true);
4046
4047 /* failed in exclusive mode due to timeout */
4048 if (amdgpu_sriov_vf(adev) &&
4049 !amdgpu_sriov_runtime(adev) &&
4050 amdgpu_virt_mmio_blocked(adev) &&
4051 !amdgpu_virt_wait_reset(adev)) {
4052 dev_err(adev->dev, "VF exclusive mode timeout\n");
4053 /* Don't send request since VF is inactive. */
4054 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4055 adev->virt.ops = NULL;
4056 r = -EAGAIN;
4057 }
970fd197
SY
4058 amdgpu_release_ras_context(adev);
4059
83ba126a 4060failed:
89041940 4061 amdgpu_vf_error_trans_all(adev);
8840a387 4062
83ba126a 4063 return r;
d38ceaf9
AD
4064}
4065
07775fc1
AG
4066static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4067{
62d5f9f7 4068
07775fc1
AG
4069 /* Clear all CPU mappings pointing to this device */
4070 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4071
4072 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4073 amdgpu_doorbell_fini(adev);
07775fc1
AG
4074
4075 iounmap(adev->rmmio);
4076 adev->rmmio = NULL;
4077 if (adev->mman.aper_base_kaddr)
4078 iounmap(adev->mman.aper_base_kaddr);
4079 adev->mman.aper_base_kaddr = NULL;
4080
4081 /* Memory manager related */
a0ba1279 4082 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4083 arch_phys_wc_del(adev->gmc.vram_mtrr);
4084 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4085 }
4086}
4087
d38ceaf9 4088/**
bbe04dec 4089 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4090 *
4091 * @adev: amdgpu_device pointer
4092 *
4093 * Tear down the driver info (all asics).
4094 * Called at driver shutdown.
4095 */
72c8c97b 4096void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4097{
aac89168 4098 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4099 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4100 adev->shutdown = true;
9f875167 4101
752c683d
ML
4102 /* make sure IB test finished before entering exclusive mode
4103 * to avoid preemption on IB test
b8920e1e 4104 */
519b8b76 4105 if (amdgpu_sriov_vf(adev)) {
752c683d 4106 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4107 amdgpu_virt_fini_data_exchange(adev);
4108 }
752c683d 4109
e5b03032
ML
4110 /* disable all interrupts */
4111 amdgpu_irq_disable_all(adev);
47fc644f 4112 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4113 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4114 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4115 else
4a580877 4116 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4117 }
8d35a259 4118 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4119
cd3a8a59 4120 if (adev->mman.initialized)
9bff18d1 4121 drain_workqueue(adev->mman.bdev.wq);
98f56188 4122
53e9d836 4123 if (adev->pm.sysfs_initialized)
7c868b59 4124 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4125 if (adev->ucode_sysfs_en)
4126 amdgpu_ucode_sysfs_fini(adev);
4127 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4128 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4129
232d1d43
SY
4130 /* disable ras feature must before hw fini */
4131 amdgpu_ras_pre_fini(adev);
4132
e9669fb7 4133 amdgpu_device_ip_fini_early(adev);
d10d0daa 4134
a3848df6
YW
4135 amdgpu_irq_fini_hw(adev);
4136
b6fd6e0f
SK
4137 if (adev->mman.initialized)
4138 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4139
d10d0daa 4140 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4141
39934d3e
VP
4142 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4143 amdgpu_device_unmap_mmio(adev);
87172e89 4144
72c8c97b
AG
4145}
4146
4147void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4148{
62d5f9f7 4149 int idx;
d37a3929 4150 bool px;
62d5f9f7 4151
8d35a259 4152 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4153 amdgpu_device_ip_fini(adev);
b31d3063 4154 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4155 adev->accel_working = false;
68ce8b24 4156 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4157
4158 amdgpu_reset_fini(adev);
4159
d38ceaf9 4160 /* free i2c buses */
4562236b
HW
4161 if (!amdgpu_device_has_dc_support(adev))
4162 amdgpu_i2c_fini(adev);
bfca0289
SL
4163
4164 if (amdgpu_emu_mode != 1)
4165 amdgpu_atombios_fini(adev);
4166
d38ceaf9
AD
4167 kfree(adev->bios);
4168 adev->bios = NULL;
d37a3929
OC
4169
4170 px = amdgpu_device_supports_px(adev_to_drm(adev));
4171
4172 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4173 apple_gmux_detect(NULL, NULL)))
84c8b22e 4174 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4175
4176 if (px)
83ba126a 4177 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4178
38d6be81 4179 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4180 vga_client_unregister(adev->pdev);
e9bc1bf7 4181
62d5f9f7
LS
4182 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4183
4184 iounmap(adev->rmmio);
4185 adev->rmmio = NULL;
43c064db 4186 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4187 drm_dev_exit(idx);
4188 }
4189
d155bef0
AB
4190 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4191 amdgpu_pmu_fini(adev);
72de33f8 4192 if (adev->mman.discovery_bin)
a190d1c7 4193 amdgpu_discovery_fini(adev);
72c8c97b 4194
cfbb6b00
AG
4195 amdgpu_reset_put_reset_domain(adev->reset_domain);
4196 adev->reset_domain = NULL;
4197
72c8c97b
AG
4198 kfree(adev->pci_state);
4199
d38ceaf9
AD
4200}
4201
58144d28
ND
4202/**
4203 * amdgpu_device_evict_resources - evict device resources
4204 * @adev: amdgpu device object
4205 *
4206 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4207 * of the vram memory type. Mainly used for evicting device resources
4208 * at suspend time.
4209 *
4210 */
7863c155 4211static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4212{
7863c155
ML
4213 int ret;
4214
e53d9665
ML
4215 /* No need to evict vram on APUs for suspend to ram or s2idle */
4216 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4217 return 0;
58144d28 4218
7863c155
ML
4219 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4220 if (ret)
58144d28 4221 DRM_WARN("evicting device resources failed\n");
7863c155 4222 return ret;
58144d28 4223}
d38ceaf9
AD
4224
4225/*
4226 * Suspend & resume.
4227 */
4228/**
810ddc3a 4229 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4230 *
87e3f136 4231 * @dev: drm dev pointer
87e3f136 4232 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4233 *
4234 * Puts the hw in the suspend state (all asics).
4235 * Returns 0 for success or an error on failure.
4236 * Called at driver suspend.
4237 */
de185019 4238int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4239{
a2e15b0e 4240 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4241 int r = 0;
d38ceaf9 4242
d38ceaf9
AD
4243 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4244 return 0;
4245
44779b43 4246 adev->in_suspend = true;
3fa8f89d 4247
47ea2076
SF
4248 /* Evict the majority of BOs before grabbing the full access */
4249 r = amdgpu_device_evict_resources(adev);
4250 if (r)
4251 return r;
4252
d7274ec7
BZ
4253 if (amdgpu_sriov_vf(adev)) {
4254 amdgpu_virt_fini_data_exchange(adev);
4255 r = amdgpu_virt_request_full_gpu(adev, false);
4256 if (r)
4257 return r;
4258 }
4259
3fa8f89d
S
4260 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4261 DRM_WARN("smart shift update failed\n");
4262
5f818173 4263 if (fbcon)
087451f3 4264 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4265
beff74bc 4266 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4267 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4268
5e6932fe 4269 amdgpu_ras_suspend(adev);
4270
2196927b 4271 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4272
c004d44e 4273 if (!adev->in_s0ix)
5d3a2d95 4274 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4275
7863c155
ML
4276 r = amdgpu_device_evict_resources(adev);
4277 if (r)
4278 return r;
d38ceaf9 4279
8d35a259 4280 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4281
2196927b 4282 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4283
d7274ec7
BZ
4284 if (amdgpu_sriov_vf(adev))
4285 amdgpu_virt_release_full_gpu(adev, false);
4286
d38ceaf9
AD
4287 return 0;
4288}
4289
4290/**
810ddc3a 4291 * amdgpu_device_resume - initiate device resume
d38ceaf9 4292 *
87e3f136 4293 * @dev: drm dev pointer
87e3f136 4294 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4295 *
4296 * Bring the hw back to operating state (all asics).
4297 * Returns 0 for success or an error on failure.
4298 * Called at driver resume.
4299 */
de185019 4300int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4301{
1348969a 4302 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4303 int r = 0;
d38ceaf9 4304
d7274ec7
BZ
4305 if (amdgpu_sriov_vf(adev)) {
4306 r = amdgpu_virt_request_full_gpu(adev, true);
4307 if (r)
4308 return r;
4309 }
4310
d38ceaf9
AD
4311 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4312 return 0;
4313
62498733 4314 if (adev->in_s0ix)
bc143d8b 4315 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4316
d38ceaf9 4317 /* post card */
39c640c0 4318 if (amdgpu_device_need_post(adev)) {
4d2997ab 4319 r = amdgpu_device_asic_init(adev);
74b0b157 4320 if (r)
aac89168 4321 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4322 }
d38ceaf9 4323
06ec9070 4324 r = amdgpu_device_ip_resume(adev);
d7274ec7 4325
e6707218 4326 if (r) {
aac89168 4327 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4328 goto exit;
e6707218 4329 }
8d35a259 4330 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4331
06ec9070 4332 r = amdgpu_device_ip_late_init(adev);
03161a6e 4333 if (r)
3c22c1ea 4334 goto exit;
d38ceaf9 4335
beff74bc
AD
4336 queue_delayed_work(system_wq, &adev->delayed_init_work,
4337 msecs_to_jiffies(AMDGPU_RESUME_MS));
4338
c004d44e 4339 if (!adev->in_s0ix) {
5d3a2d95
AD
4340 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4341 if (r)
3c22c1ea 4342 goto exit;
5d3a2d95 4343 }
756e6880 4344
3c22c1ea
SF
4345exit:
4346 if (amdgpu_sriov_vf(adev)) {
4347 amdgpu_virt_init_data_exchange(adev);
4348 amdgpu_virt_release_full_gpu(adev, true);
4349 }
4350
4351 if (r)
4352 return r;
4353
96a5d8d4 4354 /* Make sure IB tests flushed */
beff74bc 4355 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4356
a2e15b0e 4357 if (fbcon)
087451f3 4358 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4359
5e6932fe 4360 amdgpu_ras_resume(adev);
4361
d09ef243
AD
4362 if (adev->mode_info.num_crtc) {
4363 /*
4364 * Most of the connector probing functions try to acquire runtime pm
4365 * refs to ensure that the GPU is powered on when connector polling is
4366 * performed. Since we're calling this from a runtime PM callback,
4367 * trying to acquire rpm refs will cause us to deadlock.
4368 *
4369 * Since we're guaranteed to be holding the rpm lock, it's safe to
4370 * temporarily disable the rpm helpers so this doesn't deadlock us.
4371 */
23a1a9e5 4372#ifdef CONFIG_PM
d09ef243 4373 dev->dev->power.disable_depth++;
23a1a9e5 4374#endif
d09ef243
AD
4375 if (!adev->dc_enabled)
4376 drm_helper_hpd_irq_event(dev);
4377 else
4378 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4379#ifdef CONFIG_PM
d09ef243 4380 dev->dev->power.disable_depth--;
23a1a9e5 4381#endif
d09ef243 4382 }
44779b43
RZ
4383 adev->in_suspend = false;
4384
dc907c9d
JX
4385 if (adev->enable_mes)
4386 amdgpu_mes_self_test(adev);
4387
3fa8f89d
S
4388 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4389 DRM_WARN("smart shift update failed\n");
4390
4d3b9ae5 4391 return 0;
d38ceaf9
AD
4392}
4393
e3ecdffa
AD
4394/**
4395 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4396 *
4397 * @adev: amdgpu_device pointer
4398 *
4399 * The list of all the hardware IPs that make up the asic is walked and
4400 * the check_soft_reset callbacks are run. check_soft_reset determines
4401 * if the asic is still hung or not.
4402 * Returns true if any of the IPs are still in a hung state, false if not.
4403 */
06ec9070 4404static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4405{
4406 int i;
4407 bool asic_hang = false;
4408
f993d628
ML
4409 if (amdgpu_sriov_vf(adev))
4410 return true;
4411
8bc04c29
AD
4412 if (amdgpu_asic_need_full_reset(adev))
4413 return true;
4414
63fbf42f 4415 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4416 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4417 continue;
a1255107
AD
4418 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4419 adev->ip_blocks[i].status.hang =
4420 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4421 if (adev->ip_blocks[i].status.hang) {
aac89168 4422 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4423 asic_hang = true;
4424 }
4425 }
4426 return asic_hang;
4427}
4428
e3ecdffa
AD
4429/**
4430 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4431 *
4432 * @adev: amdgpu_device pointer
4433 *
4434 * The list of all the hardware IPs that make up the asic is walked and the
4435 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4436 * handles any IP specific hardware or software state changes that are
4437 * necessary for a soft reset to succeed.
4438 * Returns 0 on success, negative error code on failure.
4439 */
06ec9070 4440static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4441{
4442 int i, r = 0;
4443
4444 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4445 if (!adev->ip_blocks[i].status.valid)
d31a501e 4446 continue;
a1255107
AD
4447 if (adev->ip_blocks[i].status.hang &&
4448 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4449 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4450 if (r)
4451 return r;
4452 }
4453 }
4454
4455 return 0;
4456}
4457
e3ecdffa
AD
4458/**
4459 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4460 *
4461 * @adev: amdgpu_device pointer
4462 *
4463 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4464 * reset is necessary to recover.
4465 * Returns true if a full asic reset is required, false if not.
4466 */
06ec9070 4467static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4468{
da146d3b
AD
4469 int i;
4470
8bc04c29
AD
4471 if (amdgpu_asic_need_full_reset(adev))
4472 return true;
4473
da146d3b 4474 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4475 if (!adev->ip_blocks[i].status.valid)
da146d3b 4476 continue;
a1255107
AD
4477 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4478 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4479 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4480 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4481 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4482 if (adev->ip_blocks[i].status.hang) {
aac89168 4483 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4484 return true;
4485 }
4486 }
35d782fe
CZ
4487 }
4488 return false;
4489}
4490
e3ecdffa
AD
4491/**
4492 * amdgpu_device_ip_soft_reset - do a soft reset
4493 *
4494 * @adev: amdgpu_device pointer
4495 *
4496 * The list of all the hardware IPs that make up the asic is walked and the
4497 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4498 * IP specific hardware or software state changes that are necessary to soft
4499 * reset the IP.
4500 * Returns 0 on success, negative error code on failure.
4501 */
06ec9070 4502static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4503{
4504 int i, r = 0;
4505
4506 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4507 if (!adev->ip_blocks[i].status.valid)
35d782fe 4508 continue;
a1255107
AD
4509 if (adev->ip_blocks[i].status.hang &&
4510 adev->ip_blocks[i].version->funcs->soft_reset) {
4511 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4512 if (r)
4513 return r;
4514 }
4515 }
4516
4517 return 0;
4518}
4519
e3ecdffa
AD
4520/**
4521 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4522 *
4523 * @adev: amdgpu_device pointer
4524 *
4525 * The list of all the hardware IPs that make up the asic is walked and the
4526 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4527 * handles any IP specific hardware or software state changes that are
4528 * necessary after the IP has been soft reset.
4529 * Returns 0 on success, negative error code on failure.
4530 */
06ec9070 4531static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4532{
4533 int i, r = 0;
4534
4535 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4536 if (!adev->ip_blocks[i].status.valid)
35d782fe 4537 continue;
a1255107
AD
4538 if (adev->ip_blocks[i].status.hang &&
4539 adev->ip_blocks[i].version->funcs->post_soft_reset)
4540 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4541 if (r)
4542 return r;
4543 }
4544
4545 return 0;
4546}
4547
e3ecdffa 4548/**
c33adbc7 4549 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4550 *
4551 * @adev: amdgpu_device pointer
4552 *
4553 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4554 * restore things like GPUVM page tables after a GPU reset where
4555 * the contents of VRAM might be lost.
403009bf
CK
4556 *
4557 * Returns:
4558 * 0 on success, negative error code on failure.
e3ecdffa 4559 */
c33adbc7 4560static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4561{
c41d1cf6 4562 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4563 struct amdgpu_bo *shadow;
e18aaea7 4564 struct amdgpu_bo_vm *vmbo;
403009bf 4565 long r = 1, tmo;
c41d1cf6
ML
4566
4567 if (amdgpu_sriov_runtime(adev))
b045d3af 4568 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4569 else
4570 tmo = msecs_to_jiffies(100);
4571
aac89168 4572 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4573 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4574 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4575 /* If vm is compute context or adev is APU, shadow will be NULL */
4576 if (!vmbo->shadow)
4577 continue;
4578 shadow = vmbo->shadow;
4579
403009bf 4580 /* No need to recover an evicted BO */
d3116756
CK
4581 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4582 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4583 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4584 continue;
4585
4586 r = amdgpu_bo_restore_shadow(shadow, &next);
4587 if (r)
4588 break;
4589
c41d1cf6 4590 if (fence) {
1712fb1a 4591 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4592 dma_fence_put(fence);
4593 fence = next;
1712fb1a 4594 if (tmo == 0) {
4595 r = -ETIMEDOUT;
c41d1cf6 4596 break;
1712fb1a 4597 } else if (tmo < 0) {
4598 r = tmo;
4599 break;
4600 }
403009bf
CK
4601 } else {
4602 fence = next;
c41d1cf6 4603 }
c41d1cf6
ML
4604 }
4605 mutex_unlock(&adev->shadow_list_lock);
4606
403009bf
CK
4607 if (fence)
4608 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4609 dma_fence_put(fence);
4610
1712fb1a 4611 if (r < 0 || tmo <= 0) {
aac89168 4612 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4613 return -EIO;
4614 }
c41d1cf6 4615
aac89168 4616 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4617 return 0;
c41d1cf6
ML
4618}
4619
a90ad3c2 4620
e3ecdffa 4621/**
06ec9070 4622 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4623 *
982a820b 4624 * @adev: amdgpu_device pointer
87e3f136 4625 * @from_hypervisor: request from hypervisor
5740682e
ML
4626 *
4627 * do VF FLR and reinitialize Asic
3f48c681 4628 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4629 */
4630static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4631 bool from_hypervisor)
5740682e
ML
4632{
4633 int r;
a5f67c93 4634 struct amdgpu_hive_info *hive = NULL;
7258fa31 4635 int retry_limit = 0;
5740682e 4636
7258fa31 4637retry:
c004d44e 4638 amdgpu_amdkfd_pre_reset(adev);
428890a3 4639
5740682e
ML
4640 if (from_hypervisor)
4641 r = amdgpu_virt_request_full_gpu(adev, true);
4642 else
4643 r = amdgpu_virt_reset_gpu(adev);
4644 if (r)
4645 return r;
f734b213 4646 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4647
83f24a8f
HC
4648 /* some sw clean up VF needs to do before recover */
4649 amdgpu_virt_post_reset(adev);
4650
a90ad3c2 4651 /* Resume IP prior to SMC */
06ec9070 4652 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4653 if (r)
4654 goto error;
a90ad3c2 4655
c9ffa427 4656 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4657
7a3e0bb2
RZ
4658 r = amdgpu_device_fw_loading(adev);
4659 if (r)
4660 return r;
4661
a90ad3c2 4662 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4663 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4664 if (r)
4665 goto error;
a90ad3c2 4666
a5f67c93
ZL
4667 hive = amdgpu_get_xgmi_hive(adev);
4668 /* Update PSP FW topology after reset */
4669 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4670 r = amdgpu_xgmi_update_topology(hive, adev);
4671
4672 if (hive)
4673 amdgpu_put_xgmi_hive(hive);
4674
4675 if (!r) {
a5f67c93 4676 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4677
c004d44e 4678 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4679 }
a90ad3c2 4680
abc34253 4681error:
c41d1cf6 4682 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4683 amdgpu_inc_vram_lost(adev);
c33adbc7 4684 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4685 }
437f3e0b 4686 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4687
7258fa31
SK
4688 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4689 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4690 retry_limit++;
4691 goto retry;
4692 } else
4693 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4694 }
4695
a90ad3c2
ML
4696 return r;
4697}
4698
9a1cddd6 4699/**
4700 * amdgpu_device_has_job_running - check if there is any job in mirror list
4701 *
982a820b 4702 * @adev: amdgpu_device pointer
9a1cddd6 4703 *
4704 * check if there is any job in mirror list
4705 */
4706bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4707{
4708 int i;
4709 struct drm_sched_job *job;
4710
4711 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4712 struct amdgpu_ring *ring = adev->rings[i];
4713
4714 if (!ring || !ring->sched.thread)
4715 continue;
4716
4717 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4718 job = list_first_entry_or_null(&ring->sched.pending_list,
4719 struct drm_sched_job, list);
9a1cddd6 4720 spin_unlock(&ring->sched.job_list_lock);
4721 if (job)
4722 return true;
4723 }
4724 return false;
4725}
4726
12938fad
CK
4727/**
4728 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4729 *
982a820b 4730 * @adev: amdgpu_device pointer
12938fad
CK
4731 *
4732 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4733 * a hung GPU.
4734 */
4735bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4736{
12938fad 4737
3ba7b418
AG
4738 if (amdgpu_gpu_recovery == 0)
4739 goto disabled;
4740
1a11a65d
YC
4741 /* Skip soft reset check in fatal error mode */
4742 if (!amdgpu_ras_is_poison_mode_supported(adev))
4743 return true;
4744
3ba7b418
AG
4745 if (amdgpu_sriov_vf(adev))
4746 return true;
4747
4748 if (amdgpu_gpu_recovery == -1) {
4749 switch (adev->asic_type) {
b3523c45
AD
4750#ifdef CONFIG_DRM_AMDGPU_SI
4751 case CHIP_VERDE:
4752 case CHIP_TAHITI:
4753 case CHIP_PITCAIRN:
4754 case CHIP_OLAND:
4755 case CHIP_HAINAN:
4756#endif
4757#ifdef CONFIG_DRM_AMDGPU_CIK
4758 case CHIP_KAVERI:
4759 case CHIP_KABINI:
4760 case CHIP_MULLINS:
4761#endif
4762 case CHIP_CARRIZO:
4763 case CHIP_STONEY:
4764 case CHIP_CYAN_SKILLFISH:
3ba7b418 4765 goto disabled;
b3523c45
AD
4766 default:
4767 break;
3ba7b418 4768 }
12938fad
CK
4769 }
4770
4771 return true;
3ba7b418
AG
4772
4773disabled:
aac89168 4774 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4775 return false;
12938fad
CK
4776}
4777
5c03e584
FX
4778int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4779{
47fc644f
SS
4780 u32 i;
4781 int ret = 0;
5c03e584 4782
47fc644f 4783 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4784
47fc644f 4785 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4786
47fc644f
SS
4787 /* disable BM */
4788 pci_clear_master(adev->pdev);
5c03e584 4789
47fc644f 4790 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4791
47fc644f
SS
4792 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4793 dev_info(adev->dev, "GPU smu mode1 reset\n");
4794 ret = amdgpu_dpm_mode1_reset(adev);
4795 } else {
4796 dev_info(adev->dev, "GPU psp mode1 reset\n");
4797 ret = psp_gpu_reset(adev);
4798 }
5c03e584 4799
47fc644f 4800 if (ret)
2c0f880a 4801 goto mode1_reset_failed;
5c03e584 4802
47fc644f 4803 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4804 ret = amdgpu_psp_wait_for_bootloader(adev);
4805 if (ret)
2c0f880a 4806 goto mode1_reset_failed;
5c03e584 4807
47fc644f
SS
4808 /* wait for asic to come out of reset */
4809 for (i = 0; i < adev->usec_timeout; i++) {
4810 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4811
47fc644f
SS
4812 if (memsize != 0xffffffff)
4813 break;
4814 udelay(1);
4815 }
5c03e584 4816
2c0f880a
HZ
4817 if (i >= adev->usec_timeout) {
4818 ret = -ETIMEDOUT;
4819 goto mode1_reset_failed;
4820 }
4821
47fc644f 4822 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4823
2c0f880a
HZ
4824 return 0;
4825
4826mode1_reset_failed:
4827 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4828 return ret;
5c03e584 4829}
5c6dd71e 4830
e3c1b071 4831int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4832 struct amdgpu_reset_context *reset_context)
26bc5340 4833{
5c1e6fa4 4834 int i, r = 0;
04442bf7
LL
4835 struct amdgpu_job *job = NULL;
4836 bool need_full_reset =
4837 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4838
4839 if (reset_context->reset_req_dev == adev)
4840 job = reset_context->job;
71182665 4841
b602ca5f
TZ
4842 if (amdgpu_sriov_vf(adev)) {
4843 /* stop the data exchange thread */
4844 amdgpu_virt_fini_data_exchange(adev);
4845 }
4846
9e225fb9
AG
4847 amdgpu_fence_driver_isr_toggle(adev, true);
4848
71182665 4849 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4850 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4851 struct amdgpu_ring *ring = adev->rings[i];
4852
51687759 4853 if (!ring || !ring->sched.thread)
0875dc9e 4854 continue;
5740682e 4855
b8920e1e
SS
4856 /* Clear job fence from fence drv to avoid force_completion
4857 * leave NULL and vm flush fence in fence drv
4858 */
5c1e6fa4 4859 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4860
2f9d4084
ML
4861 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4862 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4863 }
d38ceaf9 4864
9e225fb9
AG
4865 amdgpu_fence_driver_isr_toggle(adev, false);
4866
ff99849b 4867 if (job && job->vm)
222b5f04
AG
4868 drm_sched_increase_karma(&job->base);
4869
04442bf7 4870 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 4871 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4872 if (r == -EOPNOTSUPP)
404b277b
LL
4873 r = 0;
4874 else
04442bf7
LL
4875 return r;
4876
1d721ed6 4877 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4878 if (!amdgpu_sriov_vf(adev)) {
4879
4880 if (!need_full_reset)
4881 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4882
360cd081
LG
4883 if (!need_full_reset && amdgpu_gpu_recovery &&
4884 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4885 amdgpu_device_ip_pre_soft_reset(adev);
4886 r = amdgpu_device_ip_soft_reset(adev);
4887 amdgpu_device_ip_post_soft_reset(adev);
4888 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4889 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4890 need_full_reset = true;
4891 }
4892 }
4893
4894 if (need_full_reset)
4895 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4896 if (need_full_reset)
4897 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4898 else
4899 clear_bit(AMDGPU_NEED_FULL_RESET,
4900 &reset_context->flags);
26bc5340
AG
4901 }
4902
4903 return r;
4904}
4905
15fd09a0
SA
4906static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4907{
15fd09a0
SA
4908 int i;
4909
38a15ad9 4910 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4911
4912 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4913 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4914 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4915 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4916 }
4917
4918 return 0;
4919}
4920
3d8785f6
SA
4921#ifdef CONFIG_DEV_COREDUMP
4922static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4923 size_t count, void *data, size_t datalen)
4924{
4925 struct drm_printer p;
4926 struct amdgpu_device *adev = data;
4927 struct drm_print_iterator iter;
4928 int i;
4929
4930 iter.data = buffer;
4931 iter.offset = 0;
4932 iter.start = offset;
4933 iter.remain = count;
4934
4935 p = drm_coredump_printer(&iter);
4936
4937 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4938 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4939 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4940 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4941 if (adev->reset_task_info.pid)
4942 drm_printf(&p, "process_name: %s PID: %d\n",
4943 adev->reset_task_info.process_name,
4944 adev->reset_task_info.pid);
4945
4946 if (adev->reset_vram_lost)
4947 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4948 if (adev->num_regs) {
4949 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4950
4951 for (i = 0; i < adev->num_regs; i++)
4952 drm_printf(&p, "0x%08x: 0x%08x\n",
4953 adev->reset_dump_reg_list[i],
4954 adev->reset_dump_reg_value[i]);
4955 }
4956
4957 return count - iter.remain;
4958}
4959
4960static void amdgpu_devcoredump_free(void *data)
4961{
4962}
4963
4964static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4965{
4966 struct drm_device *dev = adev_to_drm(adev);
4967
4968 ktime_get_ts64(&adev->reset_time);
d68ccdb2 4969 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
3d8785f6
SA
4970 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4971}
4972#endif
4973
04442bf7
LL
4974int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4975 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4976{
4977 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4978 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4979 int r = 0;
f5c7e779 4980 bool gpu_reset_for_dev_remove = 0;
26bc5340 4981
04442bf7
LL
4982 /* Try reset handler method first */
4983 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4984 reset_list);
15fd09a0 4985 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4986
4987 reset_context->reset_device_list = device_list_handle;
04442bf7 4988 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 4989 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4990 if (r == -EOPNOTSUPP)
404b277b
LL
4991 r = 0;
4992 else
04442bf7
LL
4993 return r;
4994
4995 /* Reset handler not implemented, use the default method */
4996 need_full_reset =
4997 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4998 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4999
f5c7e779
YC
5000 gpu_reset_for_dev_remove =
5001 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5002 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5003
26bc5340 5004 /*
655ce9cb 5005 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5006 * to allow proper links negotiation in FW (within 1 sec)
5007 */
7ac71382 5008 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5009 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5010 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5011 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5012 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5013 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5014 r = -EALREADY;
5015 } else
5016 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5017
041a62bc 5018 if (r) {
aac89168 5019 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5020 r, adev_to_drm(tmp_adev)->unique);
041a62bc 5021 break;
ce316fa5
LM
5022 }
5023 }
5024
041a62bc
AG
5025 /* For XGMI wait for all resets to complete before proceed */
5026 if (!r) {
655ce9cb 5027 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5028 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5029 flush_work(&tmp_adev->xgmi_reset_work);
5030 r = tmp_adev->asic_reset_res;
5031 if (r)
5032 break;
ce316fa5
LM
5033 }
5034 }
5035 }
ce316fa5 5036 }
26bc5340 5037
43c4d576 5038 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5039 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 5040 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
5041 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
5042 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
5043 }
5044
00eaa571 5045 amdgpu_ras_intr_cleared();
43c4d576 5046 }
00eaa571 5047
f5c7e779
YC
5048 /* Since the mode1 reset affects base ip blocks, the
5049 * phase1 ip blocks need to be resumed. Otherwise there
5050 * will be a BIOS signature error and the psp bootloader
5051 * can't load kdb on the next amdgpu install.
5052 */
5053 if (gpu_reset_for_dev_remove) {
5054 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5055 amdgpu_device_ip_resume_phase1(tmp_adev);
5056
5057 goto end;
5058 }
5059
655ce9cb 5060 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5061 if (need_full_reset) {
5062 /* post card */
e3c1b071 5063 r = amdgpu_device_asic_init(tmp_adev);
5064 if (r) {
aac89168 5065 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5066 } else {
26bc5340 5067 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5068
26bc5340
AG
5069 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5070 if (r)
5071 goto out;
5072
5073 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
5074#ifdef CONFIG_DEV_COREDUMP
5075 tmp_adev->reset_vram_lost = vram_lost;
5076 memset(&tmp_adev->reset_task_info, 0,
5077 sizeof(tmp_adev->reset_task_info));
5078 if (reset_context->job && reset_context->job->vm)
5079 tmp_adev->reset_task_info =
5080 reset_context->job->vm->task_info;
5081 amdgpu_reset_capture_coredumpm(tmp_adev);
5082#endif
26bc5340 5083 if (vram_lost) {
77e7f829 5084 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5085 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5086 }
5087
26bc5340
AG
5088 r = amdgpu_device_fw_loading(tmp_adev);
5089 if (r)
5090 return r;
5091
5092 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5093 if (r)
5094 goto out;
5095
5096 if (vram_lost)
5097 amdgpu_device_fill_reset_magic(tmp_adev);
5098
fdafb359
EQ
5099 /*
5100 * Add this ASIC as tracked as reset was already
5101 * complete successfully.
5102 */
5103 amdgpu_register_gpu_instance(tmp_adev);
5104
04442bf7
LL
5105 if (!reset_context->hive &&
5106 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5107 amdgpu_xgmi_add_device(tmp_adev);
5108
7c04ca50 5109 r = amdgpu_device_ip_late_init(tmp_adev);
5110 if (r)
5111 goto out;
5112
087451f3 5113 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5114
e8fbaf03
GC
5115 /*
5116 * The GPU enters bad state once faulty pages
5117 * by ECC has reached the threshold, and ras
5118 * recovery is scheduled next. So add one check
5119 * here to break recovery if it indeed exceeds
5120 * bad page threshold, and remind user to
5121 * retire this GPU or setting one bigger
5122 * bad_page_threshold value to fix this once
5123 * probing driver again.
5124 */
11003c68 5125 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5126 /* must succeed. */
5127 amdgpu_ras_resume(tmp_adev);
5128 } else {
5129 r = -EINVAL;
5130 goto out;
5131 }
e79a04d5 5132
26bc5340 5133 /* Update PSP FW topology after reset */
04442bf7
LL
5134 if (reset_context->hive &&
5135 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5136 r = amdgpu_xgmi_update_topology(
5137 reset_context->hive, tmp_adev);
26bc5340
AG
5138 }
5139 }
5140
26bc5340
AG
5141out:
5142 if (!r) {
5143 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5144 r = amdgpu_ib_ring_tests(tmp_adev);
5145 if (r) {
5146 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5147 need_full_reset = true;
5148 r = -EAGAIN;
5149 goto end;
5150 }
5151 }
5152
5153 if (!r)
5154 r = amdgpu_device_recover_vram(tmp_adev);
5155 else
5156 tmp_adev->asic_reset_res = r;
5157 }
5158
5159end:
04442bf7
LL
5160 if (need_full_reset)
5161 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5162 else
5163 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5164 return r;
5165}
5166
e923be99 5167static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5168{
5740682e 5169
a3a09142
AD
5170 switch (amdgpu_asic_reset_method(adev)) {
5171 case AMD_RESET_METHOD_MODE1:
5172 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5173 break;
5174 case AMD_RESET_METHOD_MODE2:
5175 adev->mp1_state = PP_MP1_STATE_RESET;
5176 break;
5177 default:
5178 adev->mp1_state = PP_MP1_STATE_NONE;
5179 break;
5180 }
26bc5340 5181}
d38ceaf9 5182
e923be99 5183static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5184{
89041940 5185 amdgpu_vf_error_trans_all(adev);
a3a09142 5186 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5187}
5188
3f12acc8
EQ
5189static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5190{
5191 struct pci_dev *p = NULL;
5192
5193 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5194 adev->pdev->bus->number, 1);
5195 if (p) {
5196 pm_runtime_enable(&(p->dev));
5197 pm_runtime_resume(&(p->dev));
5198 }
b85e285e
YY
5199
5200 pci_dev_put(p);
3f12acc8
EQ
5201}
5202
5203static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5204{
5205 enum amd_reset_method reset_method;
5206 struct pci_dev *p = NULL;
5207 u64 expires;
5208
5209 /*
5210 * For now, only BACO and mode1 reset are confirmed
5211 * to suffer the audio issue without proper suspended.
5212 */
5213 reset_method = amdgpu_asic_reset_method(adev);
5214 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5215 (reset_method != AMD_RESET_METHOD_MODE1))
5216 return -EINVAL;
5217
5218 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5219 adev->pdev->bus->number, 1);
5220 if (!p)
5221 return -ENODEV;
5222
5223 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5224 if (!expires)
5225 /*
5226 * If we cannot get the audio device autosuspend delay,
5227 * a fixed 4S interval will be used. Considering 3S is
5228 * the audio controller default autosuspend delay setting.
5229 * 4S used here is guaranteed to cover that.
5230 */
54b7feb9 5231 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5232
5233 while (!pm_runtime_status_suspended(&(p->dev))) {
5234 if (!pm_runtime_suspend(&(p->dev)))
5235 break;
5236
5237 if (expires < ktime_get_mono_fast_ns()) {
5238 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5239 pci_dev_put(p);
3f12acc8
EQ
5240 /* TODO: abort the succeeding gpu reset? */
5241 return -ETIMEDOUT;
5242 }
5243 }
5244
5245 pm_runtime_disable(&(p->dev));
5246
b85e285e 5247 pci_dev_put(p);
3f12acc8
EQ
5248 return 0;
5249}
5250
d193b12b 5251static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5252{
5253 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5254
5255#if defined(CONFIG_DEBUG_FS)
5256 if (!amdgpu_sriov_vf(adev))
5257 cancel_work(&adev->reset_work);
5258#endif
5259
5260 if (adev->kfd.dev)
5261 cancel_work(&adev->kfd.reset_work);
5262
5263 if (amdgpu_sriov_vf(adev))
5264 cancel_work(&adev->virt.flr_work);
5265
5266 if (con && adev->ras_enabled)
5267 cancel_work(&con->recovery_work);
5268
5269}
5270
26bc5340 5271/**
6e9c65f7 5272 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5273 *
982a820b 5274 * @adev: amdgpu_device pointer
26bc5340 5275 * @job: which job trigger hang
80bd2de1 5276 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5277 *
5278 * Attempt to reset the GPU if it has hung (all asics).
5279 * Attempt to do soft-reset or full-reset and reinitialize Asic
5280 * Returns 0 for success or an error on failure.
5281 */
5282
cf727044 5283int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5284 struct amdgpu_job *job,
5285 struct amdgpu_reset_context *reset_context)
26bc5340 5286{
1d721ed6 5287 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5288 bool job_signaled = false;
26bc5340 5289 struct amdgpu_hive_info *hive = NULL;
26bc5340 5290 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5291 int i, r = 0;
bb5c7235 5292 bool need_emergency_restart = false;
3f12acc8 5293 bool audio_suspended = false;
f5c7e779
YC
5294 bool gpu_reset_for_dev_remove = false;
5295
5296 gpu_reset_for_dev_remove =
5297 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5298 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5299
6e3cd2a9 5300 /*
bb5c7235
WS
5301 * Special case: RAS triggered and full reset isn't supported
5302 */
5303 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5304
d5ea093e
AG
5305 /*
5306 * Flush RAM to disk so that after reboot
5307 * the user can read log and see why the system rebooted.
5308 */
bb5c7235 5309 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5310 DRM_WARN("Emergency reboot.");
5311
5312 ksys_sync_helper();
5313 emergency_restart();
5314 }
5315
b823821f 5316 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5317 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5318
175ac6ec
ZL
5319 if (!amdgpu_sriov_vf(adev))
5320 hive = amdgpu_get_xgmi_hive(adev);
681260df 5321 if (hive)
53b3f8f4 5322 mutex_lock(&hive->hive_lock);
26bc5340 5323
f1549c09
LG
5324 reset_context->job = job;
5325 reset_context->hive = hive;
9e94d22c
EQ
5326 /*
5327 * Build list of devices to reset.
5328 * In case we are in XGMI hive mode, resort the device list
5329 * to put adev in the 1st position.
5330 */
5331 INIT_LIST_HEAD(&device_list);
175ac6ec 5332 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5333 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5334 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5335 if (gpu_reset_for_dev_remove && adev->shutdown)
5336 tmp_adev->shutdown = true;
5337 }
655ce9cb 5338 if (!list_is_first(&adev->reset_list, &device_list))
5339 list_rotate_to_front(&adev->reset_list, &device_list);
5340 device_list_handle = &device_list;
26bc5340 5341 } else {
655ce9cb 5342 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5343 device_list_handle = &device_list;
5344 }
5345
e923be99
AG
5346 /* We need to lock reset domain only once both for XGMI and single device */
5347 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5348 reset_list);
3675c2f2 5349 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5350
1d721ed6 5351 /* block all schedulers and reset given job's ring */
655ce9cb 5352 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5353
e923be99 5354 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5355
3f12acc8
EQ
5356 /*
5357 * Try to put the audio codec into suspend state
5358 * before gpu reset started.
5359 *
5360 * Due to the power domain of the graphics device
5361 * is shared with AZ power domain. Without this,
5362 * we may change the audio hardware from behind
5363 * the audio driver's back. That will trigger
5364 * some audio codec errors.
5365 */
5366 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5367 audio_suspended = true;
5368
9e94d22c
EQ
5369 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5370
52fb44cf
EQ
5371 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5372
c004d44e 5373 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5374 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5375
12ffa55d
AG
5376 /*
5377 * Mark these ASICs to be reseted as untracked first
5378 * And add them back after reset completed
5379 */
5380 amdgpu_unregister_gpu_instance(tmp_adev);
5381
163d4cd2 5382 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5383
f1c1314b 5384 /* disable ras on ALL IPs */
bb5c7235 5385 if (!need_emergency_restart &&
b823821f 5386 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5387 amdgpu_ras_suspend(tmp_adev);
5388
1d721ed6
AG
5389 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5390 struct amdgpu_ring *ring = tmp_adev->rings[i];
5391
5392 if (!ring || !ring->sched.thread)
5393 continue;
5394
0b2d2c2e 5395 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5396
bb5c7235 5397 if (need_emergency_restart)
7c6e68c7 5398 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5399 }
8f8c80f4 5400 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5401 }
5402
bb5c7235 5403 if (need_emergency_restart)
7c6e68c7
AG
5404 goto skip_sched_resume;
5405
1d721ed6
AG
5406 /*
5407 * Must check guilty signal here since after this point all old
5408 * HW fences are force signaled.
5409 *
5410 * job->base holds a reference to parent fence
5411 */
f6a3f660 5412 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5413 job_signaled = true;
1d721ed6
AG
5414 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5415 goto skip_hw_reset;
5416 }
5417
26bc5340 5418retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5419 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5420 if (gpu_reset_for_dev_remove) {
5421 /* Workaroud for ASICs need to disable SMC first */
5422 amdgpu_device_smu_fini_early(tmp_adev);
5423 }
f1549c09 5424 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5425 /*TODO Should we stop ?*/
5426 if (r) {
aac89168 5427 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5428 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5429 tmp_adev->asic_reset_res = r;
5430 }
247c7b0d
AG
5431
5432 /*
5433 * Drop all pending non scheduler resets. Scheduler resets
5434 * were already dropped during drm_sched_stop
5435 */
d193b12b 5436 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5437 }
5438
5439 /* Actual ASIC resets if needed.*/
4f30d920 5440 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5441 if (amdgpu_sriov_vf(adev)) {
5442 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5443 if (r)
5444 adev->asic_reset_res = r;
950d6425 5445
28606c4e
YC
5446 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5447 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5448 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
950d6425 5449 amdgpu_ras_resume(adev);
26bc5340 5450 } else {
f1549c09 5451 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5452 if (r && r == -EAGAIN)
26bc5340 5453 goto retry;
f5c7e779
YC
5454
5455 if (!r && gpu_reset_for_dev_remove)
5456 goto recover_end;
26bc5340
AG
5457 }
5458
1d721ed6
AG
5459skip_hw_reset:
5460
26bc5340 5461 /* Post ASIC reset for all devs .*/
655ce9cb 5462 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5463
1d721ed6
AG
5464 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5465 struct amdgpu_ring *ring = tmp_adev->rings[i];
5466
5467 if (!ring || !ring->sched.thread)
5468 continue;
5469
6868a2c4 5470 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5471 }
5472
693073a0 5473 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5474 amdgpu_mes_self_test(tmp_adev);
5475
b8920e1e 5476 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5477 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5478
7258fa31
SK
5479 if (tmp_adev->asic_reset_res)
5480 r = tmp_adev->asic_reset_res;
5481
1d721ed6 5482 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5483
5484 if (r) {
5485 /* bad news, how to tell it to userspace ? */
12ffa55d 5486 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5487 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5488 } else {
12ffa55d 5489 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5490 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5491 DRM_WARN("smart shift update failed\n");
26bc5340 5492 }
7c6e68c7 5493 }
26bc5340 5494
7c6e68c7 5495skip_sched_resume:
655ce9cb 5496 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5497 /* unlock kfd: SRIOV would do it separately */
c004d44e 5498 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5499 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5500
5501 /* kfd_post_reset will do nothing if kfd device is not initialized,
5502 * need to bring up kfd here if it's not be initialized before
5503 */
5504 if (!adev->kfd.init_complete)
5505 amdgpu_amdkfd_device_init(adev);
5506
3f12acc8
EQ
5507 if (audio_suspended)
5508 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5509
5510 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5511
5512 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5513 }
5514
f5c7e779 5515recover_end:
e923be99
AG
5516 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5517 reset_list);
5518 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5519
9e94d22c 5520 if (hive) {
9e94d22c 5521 mutex_unlock(&hive->hive_lock);
d95e8e97 5522 amdgpu_put_xgmi_hive(hive);
9e94d22c 5523 }
26bc5340 5524
f287a3c5 5525 if (r)
26bc5340 5526 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5527
5528 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5529 return r;
5530}
5531
e3ecdffa
AD
5532/**
5533 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5534 *
5535 * @adev: amdgpu_device pointer
5536 *
5537 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5538 * and lanes) of the slot the device is in. Handles APUs and
5539 * virtualized environments where PCIE config space may not be available.
5540 */
5494d864 5541static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5542{
5d9a6330 5543 struct pci_dev *pdev;
c5313457
HK
5544 enum pci_bus_speed speed_cap, platform_speed_cap;
5545 enum pcie_link_width platform_link_width;
d0dd7f0c 5546
cd474ba0
AD
5547 if (amdgpu_pcie_gen_cap)
5548 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5549
cd474ba0
AD
5550 if (amdgpu_pcie_lane_cap)
5551 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5552
cd474ba0 5553 /* covers APUs as well */
04e85958 5554 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5555 if (adev->pm.pcie_gen_mask == 0)
5556 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5557 if (adev->pm.pcie_mlw_mask == 0)
5558 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5559 return;
cd474ba0 5560 }
d0dd7f0c 5561
c5313457
HK
5562 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5563 return;
5564
dbaa922b
AD
5565 pcie_bandwidth_available(adev->pdev, NULL,
5566 &platform_speed_cap, &platform_link_width);
c5313457 5567
cd474ba0 5568 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5569 /* asic caps */
5570 pdev = adev->pdev;
5571 speed_cap = pcie_get_speed_cap(pdev);
5572 if (speed_cap == PCI_SPEED_UNKNOWN) {
5573 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5574 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5575 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5576 } else {
2b3a1f51
FX
5577 if (speed_cap == PCIE_SPEED_32_0GT)
5578 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5579 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5580 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5581 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5582 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5583 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5584 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5585 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5586 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5587 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5588 else if (speed_cap == PCIE_SPEED_8_0GT)
5589 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5590 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5591 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5592 else if (speed_cap == PCIE_SPEED_5_0GT)
5593 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5594 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5595 else
5596 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5597 }
5598 /* platform caps */
c5313457 5599 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5600 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5601 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5602 } else {
2b3a1f51
FX
5603 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5604 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5605 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5606 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5607 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5608 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5609 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5610 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5611 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5612 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5613 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5614 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5615 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5616 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5617 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5618 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5619 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5620 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5621 else
5622 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5623
cd474ba0
AD
5624 }
5625 }
5626 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5627 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5628 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5629 } else {
c5313457 5630 switch (platform_link_width) {
5d9a6330 5631 case PCIE_LNK_X32:
cd474ba0
AD
5632 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5633 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5634 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5635 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5636 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5637 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5638 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5639 break;
5d9a6330 5640 case PCIE_LNK_X16:
cd474ba0
AD
5641 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5642 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5643 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5644 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5645 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5646 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5647 break;
5d9a6330 5648 case PCIE_LNK_X12:
cd474ba0
AD
5649 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5650 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5651 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5652 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5653 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5654 break;
5d9a6330 5655 case PCIE_LNK_X8:
cd474ba0
AD
5656 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5657 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5658 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5659 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5660 break;
5d9a6330 5661 case PCIE_LNK_X4:
cd474ba0
AD
5662 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5663 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5664 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5665 break;
5d9a6330 5666 case PCIE_LNK_X2:
cd474ba0
AD
5667 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5668 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5669 break;
5d9a6330 5670 case PCIE_LNK_X1:
cd474ba0
AD
5671 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5672 break;
5673 default:
5674 break;
5675 }
d0dd7f0c
AD
5676 }
5677 }
5678}
d38ceaf9 5679
08a2fd23
RE
5680/**
5681 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5682 *
5683 * @adev: amdgpu_device pointer
5684 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5685 *
5686 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5687 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5688 * @peer_adev.
5689 */
5690bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5691 struct amdgpu_device *peer_adev)
5692{
5693#ifdef CONFIG_HSA_AMD_P2P
5694 uint64_t address_mask = peer_adev->dev->dma_mask ?
5695 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5696 resource_size_t aper_limit =
5697 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5698 bool p2p_access =
5699 !adev->gmc.xgmi.connected_to_cpu &&
5700 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5701
5702 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5703 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5704 !(adev->gmc.aper_base & address_mask ||
5705 aper_limit & address_mask));
5706#else
5707 return false;
5708#endif
5709}
5710
361dbd01
AD
5711int amdgpu_device_baco_enter(struct drm_device *dev)
5712{
1348969a 5713 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5714 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5715
6ab68650 5716 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5717 return -ENOTSUPP;
5718
8ab0d6f0 5719 if (ras && adev->ras_enabled &&
acdae216 5720 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5721 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5722
9530273e 5723 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5724}
5725
5726int amdgpu_device_baco_exit(struct drm_device *dev)
5727{
1348969a 5728 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5729 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5730 int ret = 0;
361dbd01 5731
6ab68650 5732 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5733 return -ENOTSUPP;
5734
9530273e
EQ
5735 ret = amdgpu_dpm_baco_exit(adev);
5736 if (ret)
5737 return ret;
7a22677b 5738
8ab0d6f0 5739 if (ras && adev->ras_enabled &&
acdae216 5740 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5741 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5742
1bece222
CL
5743 if (amdgpu_passthrough(adev) &&
5744 adev->nbio.funcs->clear_doorbell_interrupt)
5745 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5746
7a22677b 5747 return 0;
361dbd01 5748}
c9a6b82f
AG
5749
5750/**
5751 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5752 * @pdev: PCI device struct
5753 * @state: PCI channel state
5754 *
5755 * Description: Called when a PCI error is detected.
5756 *
5757 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5758 */
5759pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5760{
5761 struct drm_device *dev = pci_get_drvdata(pdev);
5762 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5763 int i;
c9a6b82f
AG
5764
5765 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5766
6894305c
AG
5767 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5768 DRM_WARN("No support for XGMI hive yet...");
5769 return PCI_ERS_RESULT_DISCONNECT;
5770 }
5771
e17e27f9
GC
5772 adev->pci_channel_state = state;
5773
c9a6b82f
AG
5774 switch (state) {
5775 case pci_channel_io_normal:
5776 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5777 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5778 case pci_channel_io_frozen:
5779 /*
d0fb18b5 5780 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5781 * to GPU during PCI error recovery
5782 */
3675c2f2 5783 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5784 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5785
5786 /*
5787 * Block any work scheduling as we do for regular GPU reset
5788 * for the duration of the recovery
5789 */
5790 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5791 struct amdgpu_ring *ring = adev->rings[i];
5792
5793 if (!ring || !ring->sched.thread)
5794 continue;
5795
5796 drm_sched_stop(&ring->sched, NULL);
5797 }
8f8c80f4 5798 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5799 return PCI_ERS_RESULT_NEED_RESET;
5800 case pci_channel_io_perm_failure:
5801 /* Permanent error, prepare for device removal */
5802 return PCI_ERS_RESULT_DISCONNECT;
5803 }
5804
5805 return PCI_ERS_RESULT_NEED_RESET;
5806}
5807
5808/**
5809 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5810 * @pdev: pointer to PCI device
5811 */
5812pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5813{
5814
5815 DRM_INFO("PCI error: mmio enabled callback!!\n");
5816
5817 /* TODO - dump whatever for debugging purposes */
5818
5819 /* This called only if amdgpu_pci_error_detected returns
5820 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5821 * works, no need to reset slot.
5822 */
5823
5824 return PCI_ERS_RESULT_RECOVERED;
5825}
5826
5827/**
5828 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5829 * @pdev: PCI device struct
5830 *
5831 * Description: This routine is called by the pci error recovery
5832 * code after the PCI slot has been reset, just before we
5833 * should resume normal operations.
5834 */
5835pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5836{
5837 struct drm_device *dev = pci_get_drvdata(pdev);
5838 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5839 int r, i;
04442bf7 5840 struct amdgpu_reset_context reset_context;
362c7b91 5841 u32 memsize;
7ac71382 5842 struct list_head device_list;
c9a6b82f
AG
5843
5844 DRM_INFO("PCI error: slot reset callback!!\n");
5845
04442bf7
LL
5846 memset(&reset_context, 0, sizeof(reset_context));
5847
7ac71382 5848 INIT_LIST_HEAD(&device_list);
655ce9cb 5849 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5850
362c7b91
AG
5851 /* wait for asic to come out of reset */
5852 msleep(500);
5853
7ac71382 5854 /* Restore PCI confspace */
c1dd4aa6 5855 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5856
362c7b91
AG
5857 /* confirm ASIC came out of reset */
5858 for (i = 0; i < adev->usec_timeout; i++) {
5859 memsize = amdgpu_asic_get_config_memsize(adev);
5860
5861 if (memsize != 0xffffffff)
5862 break;
5863 udelay(1);
5864 }
5865 if (memsize == 0xffffffff) {
5866 r = -ETIME;
5867 goto out;
5868 }
5869
04442bf7
LL
5870 reset_context.method = AMD_RESET_METHOD_NONE;
5871 reset_context.reset_req_dev = adev;
5872 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5873 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5874
7afefb81 5875 adev->no_hw_access = true;
04442bf7 5876 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5877 adev->no_hw_access = false;
c9a6b82f
AG
5878 if (r)
5879 goto out;
5880
04442bf7 5881 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5882
5883out:
c9a6b82f 5884 if (!r) {
c1dd4aa6
AG
5885 if (amdgpu_device_cache_pci_state(adev->pdev))
5886 pci_restore_state(adev->pdev);
5887
c9a6b82f
AG
5888 DRM_INFO("PCIe error recovery succeeded\n");
5889 } else {
5890 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5891 amdgpu_device_unset_mp1_state(adev);
5892 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5893 }
5894
5895 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5896}
5897
5898/**
5899 * amdgpu_pci_resume() - resume normal ops after PCI reset
5900 * @pdev: pointer to PCI device
5901 *
5902 * Called when the error recovery driver tells us that its
505199a3 5903 * OK to resume normal operation.
c9a6b82f
AG
5904 */
5905void amdgpu_pci_resume(struct pci_dev *pdev)
5906{
5907 struct drm_device *dev = pci_get_drvdata(pdev);
5908 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5909 int i;
c9a6b82f 5910
c9a6b82f
AG
5911
5912 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5913
e17e27f9
GC
5914 /* Only continue execution for the case of pci_channel_io_frozen */
5915 if (adev->pci_channel_state != pci_channel_io_frozen)
5916 return;
5917
acd89fca
AG
5918 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5919 struct amdgpu_ring *ring = adev->rings[i];
5920
5921 if (!ring || !ring->sched.thread)
5922 continue;
5923
acd89fca
AG
5924 drm_sched_start(&ring->sched, true);
5925 }
5926
e923be99
AG
5927 amdgpu_device_unset_mp1_state(adev);
5928 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5929}
c1dd4aa6
AG
5930
5931bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5932{
5933 struct drm_device *dev = pci_get_drvdata(pdev);
5934 struct amdgpu_device *adev = drm_to_adev(dev);
5935 int r;
5936
5937 r = pci_save_state(pdev);
5938 if (!r) {
5939 kfree(adev->pci_state);
5940
5941 adev->pci_state = pci_store_saved_state(pdev);
5942
5943 if (!adev->pci_state) {
5944 DRM_ERROR("Failed to store PCI saved state");
5945 return false;
5946 }
5947 } else {
5948 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5949 return false;
5950 }
5951
5952 return true;
5953}
5954
5955bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5956{
5957 struct drm_device *dev = pci_get_drvdata(pdev);
5958 struct amdgpu_device *adev = drm_to_adev(dev);
5959 int r;
5960
5961 if (!adev->pci_state)
5962 return false;
5963
5964 r = pci_load_saved_state(pdev, adev->pci_state);
5965
5966 if (!r) {
5967 pci_restore_state(pdev);
5968 } else {
5969 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5970 return false;
5971 }
5972
5973 return true;
5974}
5975
810085dd
EH
5976void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5977 struct amdgpu_ring *ring)
5978{
5979#ifdef CONFIG_X86_64
b818a5d3 5980 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5981 return;
5982#endif
5983 if (adev->gmc.xgmi.connected_to_cpu)
5984 return;
5985
5986 if (ring && ring->funcs->emit_hdp_flush)
5987 amdgpu_ring_emit_hdp_flush(ring);
5988 else
5989 amdgpu_asic_flush_hdp(adev, ring);
5990}
c1dd4aa6 5991
810085dd
EH
5992void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5993 struct amdgpu_ring *ring)
5994{
5995#ifdef CONFIG_X86_64
b818a5d3 5996 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5997 return;
5998#endif
5999 if (adev->gmc.xgmi.connected_to_cpu)
6000 return;
c1dd4aa6 6001
810085dd
EH
6002 amdgpu_asic_invalidate_hdp(adev, ring);
6003}
34f3a4a9 6004
89a7a870
AG
6005int amdgpu_in_reset(struct amdgpu_device *adev)
6006{
6007 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6008}
6009
34f3a4a9
LY
6010/**
6011 * amdgpu_device_halt() - bring hardware to some kind of halt state
6012 *
6013 * @adev: amdgpu_device pointer
6014 *
6015 * Bring hardware to some kind of halt state so that no one can touch it
6016 * any more. It will help to maintain error context when error occurred.
6017 * Compare to a simple hang, the system will keep stable at least for SSH
6018 * access. Then it should be trivial to inspect the hardware state and
6019 * see what's going on. Implemented as following:
6020 *
6021 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6022 * clears all CPU mappings to device, disallows remappings through page faults
6023 * 2. amdgpu_irq_disable_all() disables all interrupts
6024 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6025 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6026 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6027 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6028 * flush any in flight DMA operations
6029 */
6030void amdgpu_device_halt(struct amdgpu_device *adev)
6031{
6032 struct pci_dev *pdev = adev->pdev;
e0f943b4 6033 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6034
2c1c7ba4 6035 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6036 drm_dev_unplug(ddev);
6037
6038 amdgpu_irq_disable_all(adev);
6039
6040 amdgpu_fence_driver_hw_fini(adev);
6041
6042 adev->no_hw_access = true;
6043
6044 amdgpu_device_unmap_mmio(adev);
6045
6046 pci_disable_device(pdev);
6047 pci_wait_for_pending_transaction(pdev);
6048}
86700a40
XD
6049
6050u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6051 u32 reg)
6052{
6053 unsigned long flags, address, data;
6054 u32 r;
6055
6056 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6057 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6058
6059 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6060 WREG32(address, reg * 4);
6061 (void)RREG32(address);
6062 r = RREG32(data);
6063 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6064 return r;
6065}
6066
6067void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6068 u32 reg, u32 v)
6069{
6070 unsigned long flags, address, data;
6071
6072 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6073 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6074
6075 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6076 WREG32(address, reg * 4);
6077 (void)RREG32(address);
6078 WREG32(data, v);
6079 (void)RREG32(data);
6080 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6081}
68ce8b24
CK
6082
6083/**
6084 * amdgpu_device_switch_gang - switch to a new gang
6085 * @adev: amdgpu_device pointer
6086 * @gang: the gang to switch to
6087 *
6088 * Try to switch to a new gang.
6089 * Returns: NULL if we switched to the new gang or a reference to the current
6090 * gang leader.
6091 */
6092struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6093 struct dma_fence *gang)
6094{
6095 struct dma_fence *old = NULL;
6096
6097 do {
6098 dma_fence_put(old);
6099 rcu_read_lock();
6100 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6101 rcu_read_unlock();
6102
6103 if (old == gang)
6104 break;
6105
6106 if (!dma_fence_is_signaled(old))
6107 return old;
6108
6109 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6110 old, gang) != old);
6111
6112 dma_fence_put(old);
6113 return NULL;
6114}
220c8cc8
AD
6115
6116bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6117{
6118 switch (adev->asic_type) {
6119#ifdef CONFIG_DRM_AMDGPU_SI
6120 case CHIP_HAINAN:
6121#endif
6122 case CHIP_TOPAZ:
6123 /* chips with no display hardware */
6124 return false;
6125#ifdef CONFIG_DRM_AMDGPU_SI
6126 case CHIP_TAHITI:
6127 case CHIP_PITCAIRN:
6128 case CHIP_VERDE:
6129 case CHIP_OLAND:
6130#endif
6131#ifdef CONFIG_DRM_AMDGPU_CIK
6132 case CHIP_BONAIRE:
6133 case CHIP_HAWAII:
6134 case CHIP_KAVERI:
6135 case CHIP_KABINI:
6136 case CHIP_MULLINS:
6137#endif
6138 case CHIP_TONGA:
6139 case CHIP_FIJI:
6140 case CHIP_POLARIS10:
6141 case CHIP_POLARIS11:
6142 case CHIP_POLARIS12:
6143 case CHIP_VEGAM:
6144 case CHIP_CARRIZO:
6145 case CHIP_STONEY:
6146 /* chips with display hardware */
6147 return true;
6148 default:
6149 /* IP discovery */
6150 if (!adev->ip_versions[DCE_HWIP][0] ||
6151 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6152 return false;
6153 return true;
6154 }
6155}
81283fee
JZ
6156
6157uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6158 uint32_t inst, uint32_t reg_addr, char reg_name[],
6159 uint32_t expected_value, uint32_t mask)
6160{
6161 uint32_t ret = 0;
6162 uint32_t old_ = 0;
6163 uint32_t tmp_ = RREG32(reg_addr);
6164 uint32_t loop = adev->usec_timeout;
6165
6166 while ((tmp_ & (mask)) != (expected_value)) {
6167 if (old_ != tmp_) {
6168 loop = adev->usec_timeout;
6169 old_ = tmp_;
6170 } else
6171 udelay(1);
6172 tmp_ = RREG32(reg_addr);
6173 loop--;
6174 if (!loop) {
6175 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6176 inst, reg_name, (uint32_t)expected_value,
6177 (uint32_t)(tmp_ & (mask)));
6178 ret = -ETIMEDOUT;
6179 break;
6180 }
6181 }
6182 return ret;
6183}