drm/amd: Add HDP flush during jpeg init
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
5494d864
AD
165static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
bd607166 167
fd496ca8 168/**
b98c6299 169 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
170 *
171 * @dev: drm_device pointer
172 *
b98c6299 173 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
174 * otherwise return false.
175 */
b98c6299 176bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
177{
178 struct amdgpu_device *adev = drm_to_adev(dev);
179
b98c6299 180 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
181 return true;
182 return false;
183}
184
e3ecdffa 185/**
0330b848 186 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
187 *
188 * @dev: drm_device pointer
189 *
b98c6299 190 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
191 * otherwise return false.
192 */
31af062a 193bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 194{
1348969a 195 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 196
b98c6299
AD
197 if (adev->has_pr3 ||
198 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
199 return true;
200 return false;
201}
202
a69cba42
AD
203/**
204 * amdgpu_device_supports_baco - Does the device support BACO
205 *
206 * @dev: drm_device pointer
207 *
208 * Returns true if the device supporte BACO,
209 * otherwise return false.
210 */
211bool amdgpu_device_supports_baco(struct drm_device *dev)
212{
1348969a 213 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
214
215 return amdgpu_asic_supports_baco(adev);
216}
217
3fa8f89d
S
218/**
219 * amdgpu_device_supports_smart_shift - Is the device dGPU with
220 * smart shift support
221 *
222 * @dev: drm_device pointer
223 *
224 * Returns true if the device is a dGPU with Smart Shift support,
225 * otherwise returns false.
226 */
227bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
228{
229 return (amdgpu_device_supports_boco(dev) &&
230 amdgpu_acpi_is_power_shift_control_supported());
231}
232
6e3cd2a9
MCC
233/*
234 * VRAM access helper functions
235 */
236
e35e2b11 237/**
048af66b 238 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
239 *
240 * @adev: amdgpu_device pointer
241 * @pos: offset of the buffer in vram
242 * @buf: virtual address of the buffer in system memory
243 * @size: read/write size, sizeof(@buf) must > @size
244 * @write: true - write to vram, otherwise - read from vram
245 */
048af66b
KW
246void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
247 void *buf, size_t size, bool write)
e35e2b11 248{
e35e2b11 249 unsigned long flags;
048af66b
KW
250 uint32_t hi = ~0, tmp = 0;
251 uint32_t *data = buf;
ce05ac56 252 uint64_t last;
f89f8c6b 253 int idx;
ce05ac56 254
c58a863b 255 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 256 return;
9d11eb0d 257
048af66b
KW
258 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
259
260 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
261 for (last = pos + size; pos < last; pos += 4) {
262 tmp = pos >> 31;
263
264 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
265 if (tmp != hi) {
266 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
267 hi = tmp;
268 }
269 if (write)
270 WREG32_NO_KIQ(mmMM_DATA, *data++);
271 else
272 *data++ = RREG32_NO_KIQ(mmMM_DATA);
273 }
274
275 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
276 drm_dev_exit(idx);
277}
278
279/**
bbe04dec 280 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
281 *
282 * @adev: amdgpu_device pointer
283 * @pos: offset of the buffer in vram
284 * @buf: virtual address of the buffer in system memory
285 * @size: read/write size, sizeof(@buf) must > @size
286 * @write: true - write to vram, otherwise - read from vram
287 *
288 * The return value means how many bytes have been transferred.
289 */
290size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
291 void *buf, size_t size, bool write)
292{
9d11eb0d 293#ifdef CONFIG_64BIT
048af66b
KW
294 void __iomem *addr;
295 size_t count = 0;
296 uint64_t last;
297
298 if (!adev->mman.aper_base_kaddr)
299 return 0;
300
9d11eb0d
CK
301 last = min(pos + size, adev->gmc.visible_vram_size);
302 if (last > pos) {
048af66b
KW
303 addr = adev->mman.aper_base_kaddr + pos;
304 count = last - pos;
9d11eb0d
CK
305
306 if (write) {
307 memcpy_toio(addr, buf, count);
4c452b5c
SS
308 /* Make sure HDP write cache flush happens without any reordering
309 * after the system memory contents are sent over PCIe device
310 */
9d11eb0d 311 mb();
810085dd 312 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 313 } else {
810085dd 314 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
315 /* Make sure HDP read cache is invalidated before issuing a read
316 * to the PCIe device
317 */
9d11eb0d
CK
318 mb();
319 memcpy_fromio(buf, addr, count);
320 }
321
9d11eb0d 322 }
048af66b
KW
323
324 return count;
325#else
326 return 0;
9d11eb0d 327#endif
048af66b 328}
9d11eb0d 329
048af66b
KW
330/**
331 * amdgpu_device_vram_access - read/write a buffer in vram
332 *
333 * @adev: amdgpu_device pointer
334 * @pos: offset of the buffer in vram
335 * @buf: virtual address of the buffer in system memory
336 * @size: read/write size, sizeof(@buf) must > @size
337 * @write: true - write to vram, otherwise - read from vram
338 */
339void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
340 void *buf, size_t size, bool write)
341{
342 size_t count;
e35e2b11 343
048af66b
KW
344 /* try to using vram apreature to access vram first */
345 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
346 size -= count;
347 if (size) {
348 /* using MM to access rest vram */
349 pos += count;
350 buf += count;
351 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
352 }
353}
354
d38ceaf9 355/*
f7ee1874 356 * register access helper functions.
d38ceaf9 357 */
56b53c0b
DL
358
359/* Check if hw access should be skipped because of hotplug or device error */
360bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
361{
7afefb81 362 if (adev->no_hw_access)
56b53c0b
DL
363 return true;
364
365#ifdef CONFIG_LOCKDEP
366 /*
367 * This is a bit complicated to understand, so worth a comment. What we assert
368 * here is that the GPU reset is not running on another thread in parallel.
369 *
370 * For this we trylock the read side of the reset semaphore, if that succeeds
371 * we know that the reset is not running in paralell.
372 *
373 * If the trylock fails we assert that we are either already holding the read
374 * side of the lock or are the reset thread itself and hold the write side of
375 * the lock.
376 */
377 if (in_task()) {
d0fb18b5
AG
378 if (down_read_trylock(&adev->reset_domain->sem))
379 up_read(&adev->reset_domain->sem);
56b53c0b 380 else
d0fb18b5 381 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
382 }
383#endif
384 return false;
385}
386
e3ecdffa 387/**
f7ee1874 388 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
389 *
390 * @adev: amdgpu_device pointer
391 * @reg: dword aligned register offset
392 * @acc_flags: access flags which require special behavior
393 *
394 * Returns the 32 bit value from the offset specified.
395 */
f7ee1874
HZ
396uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
397 uint32_t reg, uint32_t acc_flags)
d38ceaf9 398{
f4b373f4
TSD
399 uint32_t ret;
400
56b53c0b 401 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
402 return 0;
403
f7ee1874
HZ
404 if ((reg * 4) < adev->rmmio_size) {
405 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
406 amdgpu_sriov_runtime(adev) &&
d0fb18b5 407 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 408 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 409 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
410 } else {
411 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
412 }
413 } else {
414 ret = adev->pcie_rreg(adev, reg * 4);
81202807 415 }
bc992ba5 416
f7ee1874 417 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 418
f4b373f4 419 return ret;
d38ceaf9
AD
420}
421
421a2a30
ML
422/*
423 * MMIO register read with bytes helper functions
424 * @offset:bytes offset from MMIO start
b8920e1e 425 */
421a2a30 426
e3ecdffa
AD
427/**
428 * amdgpu_mm_rreg8 - read a memory mapped IO register
429 *
430 * @adev: amdgpu_device pointer
431 * @offset: byte aligned register offset
432 *
433 * Returns the 8 bit value from the offset specified.
434 */
7cbbc745
AG
435uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
436{
56b53c0b 437 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
438 return 0;
439
421a2a30
ML
440 if (offset < adev->rmmio_size)
441 return (readb(adev->rmmio + offset));
442 BUG();
443}
444
445/*
446 * MMIO register write with bytes helper functions
447 * @offset:bytes offset from MMIO start
448 * @value: the value want to be written to the register
b8920e1e
SS
449 */
450
e3ecdffa
AD
451/**
452 * amdgpu_mm_wreg8 - read a memory mapped IO register
453 *
454 * @adev: amdgpu_device pointer
455 * @offset: byte aligned register offset
456 * @value: 8 bit value to write
457 *
458 * Writes the value specified to the offset specified.
459 */
7cbbc745
AG
460void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
461{
56b53c0b 462 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
463 return;
464
421a2a30
ML
465 if (offset < adev->rmmio_size)
466 writeb(value, adev->rmmio + offset);
467 else
468 BUG();
469}
470
e3ecdffa 471/**
f7ee1874 472 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
473 *
474 * @adev: amdgpu_device pointer
475 * @reg: dword aligned register offset
476 * @v: 32 bit value to write to the register
477 * @acc_flags: access flags which require special behavior
478 *
479 * Writes the value specified to the offset specified.
480 */
f7ee1874
HZ
481void amdgpu_device_wreg(struct amdgpu_device *adev,
482 uint32_t reg, uint32_t v,
483 uint32_t acc_flags)
d38ceaf9 484{
56b53c0b 485 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
486 return;
487
f7ee1874
HZ
488 if ((reg * 4) < adev->rmmio_size) {
489 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
490 amdgpu_sriov_runtime(adev) &&
d0fb18b5 491 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 492 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 493 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
494 } else {
495 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
496 }
497 } else {
498 adev->pcie_wreg(adev, reg * 4, v);
81202807 499 }
bc992ba5 500
f7ee1874 501 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 502}
d38ceaf9 503
03f2abb0 504/**
4cc9f86f 505 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 506 *
71579346
RB
507 * @adev: amdgpu_device pointer
508 * @reg: mmio/rlc register
509 * @v: value to write
8057a9d6 510 * @xcc_id: xcc accelerated compute core id
71579346
RB
511 *
512 * this function is invoked only for the debugfs register access
03f2abb0 513 */
f7ee1874 514void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
515 uint32_t reg, uint32_t v,
516 uint32_t xcc_id)
2e0cc4d4 517{
56b53c0b 518 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
519 return;
520
2e0cc4d4 521 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
522 adev->gfx.rlc.funcs &&
523 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 524 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 525 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
526 } else if ((reg * 4) >= adev->rmmio_size) {
527 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
528 } else {
529 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 530 }
d38ceaf9
AD
531}
532
1bba3683
HZ
533/**
534 * amdgpu_device_indirect_rreg - read an indirect register
535 *
536 * @adev: amdgpu_device pointer
22f453fb 537 * @reg_addr: indirect register address to read from
1bba3683
HZ
538 *
539 * Returns the value of indirect register @reg_addr
540 */
541u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
542 u32 reg_addr)
543{
65ba96e9 544 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
545 void __iomem *pcie_index_offset;
546 void __iomem *pcie_data_offset;
65ba96e9
HZ
547 u32 r;
548
549 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
550 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
551
552 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
553 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
554 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
555
556 writel(reg_addr, pcie_index_offset);
557 readl(pcie_index_offset);
558 r = readl(pcie_data_offset);
559 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
560
561 return r;
562}
563
0c552ed3
LM
564u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
565 u64 reg_addr)
566{
567 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
568 u32 r;
569 void __iomem *pcie_index_offset;
570 void __iomem *pcie_index_hi_offset;
571 void __iomem *pcie_data_offset;
572
573 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
574 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 575 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
576 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
577 else
578 pcie_index_hi = 0;
579
580 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
581 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
582 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
583 if (pcie_index_hi != 0)
584 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
585 pcie_index_hi * 4;
586
587 writel(reg_addr, pcie_index_offset);
588 readl(pcie_index_offset);
589 if (pcie_index_hi != 0) {
590 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
591 readl(pcie_index_hi_offset);
592 }
593 r = readl(pcie_data_offset);
594
595 /* clear the high bits */
596 if (pcie_index_hi != 0) {
597 writel(0, pcie_index_hi_offset);
598 readl(pcie_index_hi_offset);
599 }
600
601 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
602
603 return r;
604}
605
1bba3683
HZ
606/**
607 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
608 *
609 * @adev: amdgpu_device pointer
22f453fb 610 * @reg_addr: indirect register address to read from
1bba3683
HZ
611 *
612 * Returns the value of indirect register @reg_addr
613 */
614u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
615 u32 reg_addr)
616{
65ba96e9 617 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
618 void __iomem *pcie_index_offset;
619 void __iomem *pcie_data_offset;
65ba96e9
HZ
620 u64 r;
621
622 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
623 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
624
625 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
626 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
627 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
628
629 /* read low 32 bits */
630 writel(reg_addr, pcie_index_offset);
631 readl(pcie_index_offset);
632 r = readl(pcie_data_offset);
633 /* read high 32 bits */
634 writel(reg_addr + 4, pcie_index_offset);
635 readl(pcie_index_offset);
636 r |= ((u64)readl(pcie_data_offset) << 32);
637 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
638
639 return r;
640}
641
a76b2870
CL
642u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
643 u64 reg_addr)
644{
645 unsigned long flags, pcie_index, pcie_data;
646 unsigned long pcie_index_hi = 0;
647 void __iomem *pcie_index_offset;
648 void __iomem *pcie_index_hi_offset;
649 void __iomem *pcie_data_offset;
650 u64 r;
651
652 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
653 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
654 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
655 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
656
657 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
658 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
659 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
660 if (pcie_index_hi != 0)
661 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
662 pcie_index_hi * 4;
663
664 /* read low 32 bits */
665 writel(reg_addr, pcie_index_offset);
666 readl(pcie_index_offset);
667 if (pcie_index_hi != 0) {
668 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
669 readl(pcie_index_hi_offset);
670 }
671 r = readl(pcie_data_offset);
672 /* read high 32 bits */
673 writel(reg_addr + 4, pcie_index_offset);
674 readl(pcie_index_offset);
675 if (pcie_index_hi != 0) {
676 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
677 readl(pcie_index_hi_offset);
678 }
679 r |= ((u64)readl(pcie_data_offset) << 32);
680
681 /* clear the high bits */
682 if (pcie_index_hi != 0) {
683 writel(0, pcie_index_hi_offset);
684 readl(pcie_index_hi_offset);
685 }
686
687 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
688
689 return r;
690}
691
1bba3683
HZ
692/**
693 * amdgpu_device_indirect_wreg - write an indirect register address
694 *
695 * @adev: amdgpu_device pointer
1bba3683
HZ
696 * @reg_addr: indirect register offset
697 * @reg_data: indirect register data
698 *
699 */
700void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
701 u32 reg_addr, u32 reg_data)
702{
65ba96e9 703 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
704 void __iomem *pcie_index_offset;
705 void __iomem *pcie_data_offset;
706
65ba96e9
HZ
707 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
708 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
709
1bba3683
HZ
710 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
711 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
712 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
713
714 writel(reg_addr, pcie_index_offset);
715 readl(pcie_index_offset);
716 writel(reg_data, pcie_data_offset);
717 readl(pcie_data_offset);
718 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
719}
720
0c552ed3
LM
721void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
722 u64 reg_addr, u32 reg_data)
723{
724 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
725 void __iomem *pcie_index_offset;
726 void __iomem *pcie_index_hi_offset;
727 void __iomem *pcie_data_offset;
728
729 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
730 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 731 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
732 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
733 else
734 pcie_index_hi = 0;
735
736 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
737 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
738 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
739 if (pcie_index_hi != 0)
740 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
741 pcie_index_hi * 4;
742
743 writel(reg_addr, pcie_index_offset);
744 readl(pcie_index_offset);
745 if (pcie_index_hi != 0) {
746 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
747 readl(pcie_index_hi_offset);
748 }
749 writel(reg_data, pcie_data_offset);
750 readl(pcie_data_offset);
751
752 /* clear the high bits */
753 if (pcie_index_hi != 0) {
754 writel(0, pcie_index_hi_offset);
755 readl(pcie_index_hi_offset);
756 }
757
758 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
759}
760
1bba3683
HZ
761/**
762 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
763 *
764 * @adev: amdgpu_device pointer
1bba3683
HZ
765 * @reg_addr: indirect register offset
766 * @reg_data: indirect register data
767 *
768 */
769void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
770 u32 reg_addr, u64 reg_data)
771{
65ba96e9 772 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
773 void __iomem *pcie_index_offset;
774 void __iomem *pcie_data_offset;
775
65ba96e9
HZ
776 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
777 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
778
1bba3683
HZ
779 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
780 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
781 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
782
783 /* write low 32 bits */
784 writel(reg_addr, pcie_index_offset);
785 readl(pcie_index_offset);
786 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
787 readl(pcie_data_offset);
788 /* write high 32 bits */
789 writel(reg_addr + 4, pcie_index_offset);
790 readl(pcie_index_offset);
791 writel((u32)(reg_data >> 32), pcie_data_offset);
792 readl(pcie_data_offset);
793 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
794}
795
a76b2870
CL
796void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
797 u64 reg_addr, u64 reg_data)
798{
799 unsigned long flags, pcie_index, pcie_data;
800 unsigned long pcie_index_hi = 0;
801 void __iomem *pcie_index_offset;
802 void __iomem *pcie_index_hi_offset;
803 void __iomem *pcie_data_offset;
804
805 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
806 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
807 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
808 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
809
810 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
811 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
812 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
813 if (pcie_index_hi != 0)
814 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
815 pcie_index_hi * 4;
816
817 /* write low 32 bits */
818 writel(reg_addr, pcie_index_offset);
819 readl(pcie_index_offset);
820 if (pcie_index_hi != 0) {
821 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
822 readl(pcie_index_hi_offset);
823 }
824 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
825 readl(pcie_data_offset);
826 /* write high 32 bits */
827 writel(reg_addr + 4, pcie_index_offset);
828 readl(pcie_index_offset);
829 if (pcie_index_hi != 0) {
830 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
831 readl(pcie_index_hi_offset);
832 }
833 writel((u32)(reg_data >> 32), pcie_data_offset);
834 readl(pcie_data_offset);
835
836 /* clear the high bits */
837 if (pcie_index_hi != 0) {
838 writel(0, pcie_index_hi_offset);
839 readl(pcie_index_hi_offset);
840 }
841
842 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
843}
844
dabc114e
HZ
845/**
846 * amdgpu_device_get_rev_id - query device rev_id
847 *
848 * @adev: amdgpu_device pointer
849 *
850 * Return device rev_id
851 */
852u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
853{
854 return adev->nbio.funcs->get_rev_id(adev);
855}
856
d38ceaf9
AD
857/**
858 * amdgpu_invalid_rreg - dummy reg read function
859 *
982a820b 860 * @adev: amdgpu_device pointer
d38ceaf9
AD
861 * @reg: offset of register
862 *
863 * Dummy register read function. Used for register blocks
864 * that certain asics don't have (all asics).
865 * Returns the value in the register.
866 */
867static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
868{
869 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
870 BUG();
871 return 0;
872}
873
0c552ed3
LM
874static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
875{
876 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
877 BUG();
878 return 0;
879}
880
d38ceaf9
AD
881/**
882 * amdgpu_invalid_wreg - dummy reg write function
883 *
982a820b 884 * @adev: amdgpu_device pointer
d38ceaf9
AD
885 * @reg: offset of register
886 * @v: value to write to the register
887 *
888 * Dummy register read function. Used for register blocks
889 * that certain asics don't have (all asics).
890 */
891static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
892{
893 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
894 reg, v);
895 BUG();
896}
897
0c552ed3
LM
898static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
899{
900 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
901 reg, v);
902 BUG();
903}
904
4fa1c6a6
TZ
905/**
906 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
907 *
982a820b 908 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
909 * @reg: offset of register
910 *
911 * Dummy register read function. Used for register blocks
912 * that certain asics don't have (all asics).
913 * Returns the value in the register.
914 */
915static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
916{
917 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
918 BUG();
919 return 0;
920}
921
a76b2870
CL
922static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
923{
924 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
925 BUG();
926 return 0;
927}
928
4fa1c6a6
TZ
929/**
930 * amdgpu_invalid_wreg64 - dummy reg write function
931 *
982a820b 932 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
933 * @reg: offset of register
934 * @v: value to write to the register
935 *
936 * Dummy register read function. Used for register blocks
937 * that certain asics don't have (all asics).
938 */
939static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
940{
941 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
942 reg, v);
943 BUG();
944}
945
a76b2870
CL
946static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
947{
948 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
949 reg, v);
950 BUG();
951}
952
d38ceaf9
AD
953/**
954 * amdgpu_block_invalid_rreg - dummy reg read function
955 *
982a820b 956 * @adev: amdgpu_device pointer
d38ceaf9
AD
957 * @block: offset of instance
958 * @reg: offset of register
959 *
960 * Dummy register read function. Used for register blocks
961 * that certain asics don't have (all asics).
962 * Returns the value in the register.
963 */
964static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
965 uint32_t block, uint32_t reg)
966{
967 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
968 reg, block);
969 BUG();
970 return 0;
971}
972
973/**
974 * amdgpu_block_invalid_wreg - dummy reg write function
975 *
982a820b 976 * @adev: amdgpu_device pointer
d38ceaf9
AD
977 * @block: offset of instance
978 * @reg: offset of register
979 * @v: value to write to the register
980 *
981 * Dummy register read function. Used for register blocks
982 * that certain asics don't have (all asics).
983 */
984static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
985 uint32_t block,
986 uint32_t reg, uint32_t v)
987{
988 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
989 reg, block, v);
990 BUG();
991}
992
4d2997ab
AD
993/**
994 * amdgpu_device_asic_init - Wrapper for atom asic_init
995 *
982a820b 996 * @adev: amdgpu_device pointer
4d2997ab
AD
997 *
998 * Does any asic specific work and then calls atom asic init.
999 */
1000static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1001{
15c5c5f5
LL
1002 int ret;
1003
4d2997ab
AD
1004 amdgpu_asic_pre_asic_init(adev);
1005
4e8303cf
LL
1006 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1007 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1008 amdgpu_psp_wait_for_bootloader(adev);
1009 ret = amdgpu_atomfirmware_asic_init(adev, true);
1010 return ret;
1011 } else {
85d1bcc6 1012 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1013 }
1014
1015 return 0;
4d2997ab
AD
1016}
1017
e3ecdffa 1018/**
7ccfd79f 1019 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1020 *
982a820b 1021 * @adev: amdgpu_device pointer
e3ecdffa
AD
1022 *
1023 * Allocates a scratch page of VRAM for use by various things in the
1024 * driver.
1025 */
7ccfd79f 1026static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1027{
7ccfd79f
CK
1028 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1029 AMDGPU_GEM_DOMAIN_VRAM |
1030 AMDGPU_GEM_DOMAIN_GTT,
1031 &adev->mem_scratch.robj,
1032 &adev->mem_scratch.gpu_addr,
1033 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1034}
1035
e3ecdffa 1036/**
7ccfd79f 1037 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1038 *
982a820b 1039 * @adev: amdgpu_device pointer
e3ecdffa
AD
1040 *
1041 * Frees the VRAM scratch page.
1042 */
7ccfd79f 1043static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1044{
7ccfd79f 1045 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1046}
1047
1048/**
9c3f2b54 1049 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1050 *
1051 * @adev: amdgpu_device pointer
1052 * @registers: pointer to the register array
1053 * @array_size: size of the register array
1054 *
b8920e1e 1055 * Programs an array or registers with and or masks.
d38ceaf9
AD
1056 * This is a helper for setting golden registers.
1057 */
9c3f2b54
AD
1058void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1059 const u32 *registers,
1060 const u32 array_size)
d38ceaf9
AD
1061{
1062 u32 tmp, reg, and_mask, or_mask;
1063 int i;
1064
1065 if (array_size % 3)
1066 return;
1067
47fc644f 1068 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1069 reg = registers[i + 0];
1070 and_mask = registers[i + 1];
1071 or_mask = registers[i + 2];
1072
1073 if (and_mask == 0xffffffff) {
1074 tmp = or_mask;
1075 } else {
1076 tmp = RREG32(reg);
1077 tmp &= ~and_mask;
e0d07657
HZ
1078 if (adev->family >= AMDGPU_FAMILY_AI)
1079 tmp |= (or_mask & and_mask);
1080 else
1081 tmp |= or_mask;
d38ceaf9
AD
1082 }
1083 WREG32(reg, tmp);
1084 }
1085}
1086
e3ecdffa
AD
1087/**
1088 * amdgpu_device_pci_config_reset - reset the GPU
1089 *
1090 * @adev: amdgpu_device pointer
1091 *
1092 * Resets the GPU using the pci config reset sequence.
1093 * Only applicable to asics prior to vega10.
1094 */
8111c387 1095void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1096{
1097 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1098}
1099
af484df8
AD
1100/**
1101 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1102 *
1103 * @adev: amdgpu_device pointer
1104 *
1105 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1106 */
1107int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1108{
1109 return pci_reset_function(adev->pdev);
1110}
1111
d38ceaf9 1112/*
06ec9070 1113 * amdgpu_device_wb_*()
455a7bc2 1114 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1115 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1116 */
1117
1118/**
06ec9070 1119 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1120 *
1121 * @adev: amdgpu_device pointer
1122 *
1123 * Disables Writeback and frees the Writeback memory (all asics).
1124 * Used at driver shutdown.
1125 */
06ec9070 1126static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1127{
1128 if (adev->wb.wb_obj) {
a76ed485
AD
1129 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1130 &adev->wb.gpu_addr,
1131 (void **)&adev->wb.wb);
d38ceaf9
AD
1132 adev->wb.wb_obj = NULL;
1133 }
1134}
1135
1136/**
03f2abb0 1137 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1138 *
1139 * @adev: amdgpu_device pointer
1140 *
455a7bc2 1141 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1142 * Used at driver startup.
1143 * Returns 0 on success or an -error on failure.
1144 */
06ec9070 1145static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1146{
1147 int r;
1148
1149 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1150 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1151 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1152 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1153 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1154 (void **)&adev->wb.wb);
d38ceaf9
AD
1155 if (r) {
1156 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1157 return r;
1158 }
d38ceaf9
AD
1159
1160 adev->wb.num_wb = AMDGPU_MAX_WB;
1161 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1162
1163 /* clear wb memory */
73469585 1164 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1165 }
1166
1167 return 0;
1168}
1169
1170/**
131b4b36 1171 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1172 *
1173 * @adev: amdgpu_device pointer
1174 * @wb: wb index
1175 *
1176 * Allocate a wb slot for use by the driver (all asics).
1177 * Returns 0 on success or -EINVAL on failure.
1178 */
131b4b36 1179int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1180{
1181 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1182
97407b63 1183 if (offset < adev->wb.num_wb) {
7014285a 1184 __set_bit(offset, adev->wb.used);
63ae07ca 1185 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1186 return 0;
1187 } else {
1188 return -EINVAL;
1189 }
1190}
1191
d38ceaf9 1192/**
131b4b36 1193 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1194 *
1195 * @adev: amdgpu_device pointer
1196 * @wb: wb index
1197 *
1198 * Free a wb slot allocated for use by the driver (all asics)
1199 */
131b4b36 1200void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1201{
73469585 1202 wb >>= 3;
d38ceaf9 1203 if (wb < adev->wb.num_wb)
73469585 1204 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1205}
1206
d6895ad3
CK
1207/**
1208 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1209 *
1210 * @adev: amdgpu_device pointer
1211 *
1212 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1213 * to fail, but if any of the BARs is not accessible after the size we abort
1214 * driver loading by returning -ENODEV.
1215 */
1216int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1217{
453f617a 1218 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1219 struct pci_bus *root;
1220 struct resource *res;
b8920e1e 1221 unsigned int i;
d6895ad3
CK
1222 u16 cmd;
1223 int r;
1224
822130b5
AB
1225 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1226 return 0;
1227
0c03b912 1228 /* Bypass for VF */
1229 if (amdgpu_sriov_vf(adev))
1230 return 0;
1231
b7221f2b
AD
1232 /* skip if the bios has already enabled large BAR */
1233 if (adev->gmc.real_vram_size &&
1234 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1235 return 0;
1236
31b8adab
CK
1237 /* Check if the root BUS has 64bit memory resources */
1238 root = adev->pdev->bus;
1239 while (root->parent)
1240 root = root->parent;
1241
1242 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1243 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1244 res->start > 0x100000000ull)
1245 break;
1246 }
1247
1248 /* Trying to resize is pointless without a root hub window above 4GB */
1249 if (!res)
1250 return 0;
1251
453f617a
ND
1252 /* Limit the BAR size to what is available */
1253 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1254 rbar_size);
1255
d6895ad3
CK
1256 /* Disable memory decoding while we change the BAR addresses and size */
1257 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1258 pci_write_config_word(adev->pdev, PCI_COMMAND,
1259 cmd & ~PCI_COMMAND_MEMORY);
1260
1261 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1262 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1263 if (adev->asic_type >= CHIP_BONAIRE)
1264 pci_release_resource(adev->pdev, 2);
1265
1266 pci_release_resource(adev->pdev, 0);
1267
1268 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1269 if (r == -ENOSPC)
1270 DRM_INFO("Not enough PCI address space for a large BAR.");
1271 else if (r && r != -ENOTSUPP)
1272 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1273
1274 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1275
1276 /* When the doorbell or fb BAR isn't available we have no chance of
1277 * using the device.
1278 */
43c064db 1279 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1280 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1281 return -ENODEV;
1282
1283 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1284
1285 return 0;
1286}
a05502e5 1287
9535a86a
SZ
1288static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1289{
b8920e1e 1290 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1291 return false;
9535a86a
SZ
1292
1293 return true;
1294}
1295
d38ceaf9
AD
1296/*
1297 * GPU helpers function.
1298 */
1299/**
39c640c0 1300 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1301 *
1302 * @adev: amdgpu_device pointer
1303 *
c836fec5
JQ
1304 * Check if the asic has been initialized (all asics) at driver startup
1305 * or post is needed if hw reset is performed.
1306 * Returns true if need or false if not.
d38ceaf9 1307 */
39c640c0 1308bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1309{
1310 uint32_t reg;
1311
bec86378
ML
1312 if (amdgpu_sriov_vf(adev))
1313 return false;
1314
9535a86a
SZ
1315 if (!amdgpu_device_read_bios(adev))
1316 return false;
1317
bec86378 1318 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1319 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1320 * some old smc fw still need driver do vPost otherwise gpu hang, while
1321 * those smc fw version above 22.15 doesn't have this flaw, so we force
1322 * vpost executed for smc version below 22.15
bec86378
ML
1323 */
1324 if (adev->asic_type == CHIP_FIJI) {
1325 int err;
1326 uint32_t fw_ver;
b8920e1e 1327
bec86378
ML
1328 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1329 /* force vPost if error occured */
1330 if (err)
1331 return true;
1332
1333 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1334 if (fw_ver < 0x00160e00)
1335 return true;
bec86378 1336 }
bec86378 1337 }
91fe77eb 1338
e3c1b071 1339 /* Don't post if we need to reset whole hive on init */
1340 if (adev->gmc.xgmi.pending_reset)
1341 return false;
1342
91fe77eb 1343 if (adev->has_hw_reset) {
1344 adev->has_hw_reset = false;
1345 return true;
1346 }
1347
1348 /* bios scratch used on CIK+ */
1349 if (adev->asic_type >= CHIP_BONAIRE)
1350 return amdgpu_atombios_scratch_need_asic_init(adev);
1351
1352 /* check MEM_SIZE for older asics */
1353 reg = amdgpu_asic_get_config_memsize(adev);
1354
1355 if ((reg != 0) && (reg != 0xffffffff))
1356 return false;
1357
1358 return true;
70e64c4d
ML
1359}
1360
bb0f8429
ML
1361/*
1362 * Check whether seamless boot is supported.
1363 *
1364 * So far we only support seamless boot on select ASICs.
1365 * If everything goes well, we may consider expanding
1366 * seamless boot to other ASICs.
1367 */
1368bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1369{
1370 switch (adev->ip_versions[DCE_HWIP][0]) {
1371 case IP_VERSION(3, 0, 1):
1372 if (!adev->mman.keep_stolen_vga_memory)
1373 return true;
1374 break;
1375 default:
1376 break;
1377 }
1378
1379 return false;
1380}
1381
5d1eb4c4
ML
1382/*
1383 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1384 * speed switching. Until we have confirmation from Intel that a specific host
1385 * supports it, it's safer that we keep it disabled for all.
1386 *
1387 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1388 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1389 */
1390bool amdgpu_device_pcie_dynamic_switching_supported(void)
1391{
1392#if IS_ENABLED(CONFIG_X86)
1393 struct cpuinfo_x86 *c = &cpu_data(0);
1394
1395 if (c->x86_vendor == X86_VENDOR_INTEL)
1396 return false;
1397#endif
1398 return true;
1399}
1400
0ab5d711
ML
1401/**
1402 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1403 *
1404 * @adev: amdgpu_device pointer
1405 *
1406 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1407 * be set for this device.
1408 *
1409 * Returns true if it should be used or false if not.
1410 */
1411bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1412{
1413 switch (amdgpu_aspm) {
1414 case -1:
1415 break;
1416 case 0:
1417 return false;
1418 case 1:
1419 return true;
1420 default:
1421 return false;
1422 }
1423 return pcie_aspm_enabled(adev->pdev);
1424}
1425
3ad5dcfe
KHF
1426bool amdgpu_device_aspm_support_quirk(void)
1427{
1428#if IS_ENABLED(CONFIG_X86)
1429 struct cpuinfo_x86 *c = &cpu_data(0);
1430
1431 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1432#else
1433 return true;
1434#endif
1435}
1436
d38ceaf9
AD
1437/* if we get transitioned to only one device, take VGA back */
1438/**
06ec9070 1439 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1440 *
bf44e8ce 1441 * @pdev: PCI device pointer
d38ceaf9
AD
1442 * @state: enable/disable vga decode
1443 *
1444 * Enable/disable vga decode (all asics).
1445 * Returns VGA resource flags.
1446 */
bf44e8ce
CH
1447static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1448 bool state)
d38ceaf9 1449{
bf44e8ce 1450 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1451
d38ceaf9
AD
1452 amdgpu_asic_set_vga_state(adev, state);
1453 if (state)
1454 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1455 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1456 else
1457 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1458}
1459
e3ecdffa
AD
1460/**
1461 * amdgpu_device_check_block_size - validate the vm block size
1462 *
1463 * @adev: amdgpu_device pointer
1464 *
1465 * Validates the vm block size specified via module parameter.
1466 * The vm block size defines number of bits in page table versus page directory,
1467 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1468 * page table and the remaining bits are in the page directory.
1469 */
06ec9070 1470static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1471{
1472 /* defines number of bits in page table versus page directory,
1473 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1474 * page table and the remaining bits are in the page directory
1475 */
bab4fee7
JZ
1476 if (amdgpu_vm_block_size == -1)
1477 return;
a1adf8be 1478
bab4fee7 1479 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1480 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1481 amdgpu_vm_block_size);
97489129 1482 amdgpu_vm_block_size = -1;
a1adf8be 1483 }
a1adf8be
CZ
1484}
1485
e3ecdffa
AD
1486/**
1487 * amdgpu_device_check_vm_size - validate the vm size
1488 *
1489 * @adev: amdgpu_device pointer
1490 *
1491 * Validates the vm size in GB specified via module parameter.
1492 * The VM size is the size of the GPU virtual memory space in GB.
1493 */
06ec9070 1494static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1495{
64dab074
AD
1496 /* no need to check the default value */
1497 if (amdgpu_vm_size == -1)
1498 return;
1499
83ca145d
ZJ
1500 if (amdgpu_vm_size < 1) {
1501 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1502 amdgpu_vm_size);
f3368128 1503 amdgpu_vm_size = -1;
83ca145d 1504 }
83ca145d
ZJ
1505}
1506
7951e376
RZ
1507static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1508{
1509 struct sysinfo si;
a9d4fe2f 1510 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1511 uint64_t total_memory;
1512 uint64_t dram_size_seven_GB = 0x1B8000000;
1513 uint64_t dram_size_three_GB = 0xB8000000;
1514
1515 if (amdgpu_smu_memory_pool_size == 0)
1516 return;
1517
1518 if (!is_os_64) {
1519 DRM_WARN("Not 64-bit OS, feature not supported\n");
1520 goto def_value;
1521 }
1522 si_meminfo(&si);
1523 total_memory = (uint64_t)si.totalram * si.mem_unit;
1524
1525 if ((amdgpu_smu_memory_pool_size == 1) ||
1526 (amdgpu_smu_memory_pool_size == 2)) {
1527 if (total_memory < dram_size_three_GB)
1528 goto def_value1;
1529 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1530 (amdgpu_smu_memory_pool_size == 8)) {
1531 if (total_memory < dram_size_seven_GB)
1532 goto def_value1;
1533 } else {
1534 DRM_WARN("Smu memory pool size not supported\n");
1535 goto def_value;
1536 }
1537 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1538
1539 return;
1540
1541def_value1:
1542 DRM_WARN("No enough system memory\n");
1543def_value:
1544 adev->pm.smu_prv_buffer_size = 0;
1545}
1546
9f6a7857
HR
1547static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1548{
1549 if (!(adev->flags & AMD_IS_APU) ||
1550 adev->asic_type < CHIP_RAVEN)
1551 return 0;
1552
1553 switch (adev->asic_type) {
1554 case CHIP_RAVEN:
1555 if (adev->pdev->device == 0x15dd)
1556 adev->apu_flags |= AMD_APU_IS_RAVEN;
1557 if (adev->pdev->device == 0x15d8)
1558 adev->apu_flags |= AMD_APU_IS_PICASSO;
1559 break;
1560 case CHIP_RENOIR:
1561 if ((adev->pdev->device == 0x1636) ||
1562 (adev->pdev->device == 0x164c))
1563 adev->apu_flags |= AMD_APU_IS_RENOIR;
1564 else
1565 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1566 break;
1567 case CHIP_VANGOGH:
1568 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1569 break;
1570 case CHIP_YELLOW_CARP:
1571 break;
d0f56dc2 1572 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1573 if ((adev->pdev->device == 0x13FE) ||
1574 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1575 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1576 break;
9f6a7857 1577 default:
4eaf21b7 1578 break;
9f6a7857
HR
1579 }
1580
1581 return 0;
1582}
1583
d38ceaf9 1584/**
06ec9070 1585 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1586 *
1587 * @adev: amdgpu_device pointer
1588 *
1589 * Validates certain module parameters and updates
1590 * the associated values used by the driver (all asics).
1591 */
912dfc84 1592static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1593{
5b011235
CZ
1594 if (amdgpu_sched_jobs < 4) {
1595 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1596 amdgpu_sched_jobs);
1597 amdgpu_sched_jobs = 4;
47fc644f 1598 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1599 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1600 amdgpu_sched_jobs);
1601 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1602 }
d38ceaf9 1603
83e74db6 1604 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1605 /* gart size must be greater or equal to 32M */
1606 dev_warn(adev->dev, "gart size (%d) too small\n",
1607 amdgpu_gart_size);
83e74db6 1608 amdgpu_gart_size = -1;
d38ceaf9
AD
1609 }
1610
36d38372 1611 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1612 /* gtt size must be greater or equal to 32M */
36d38372
CK
1613 dev_warn(adev->dev, "gtt size (%d) too small\n",
1614 amdgpu_gtt_size);
1615 amdgpu_gtt_size = -1;
d38ceaf9
AD
1616 }
1617
d07f14be
RH
1618 /* valid range is between 4 and 9 inclusive */
1619 if (amdgpu_vm_fragment_size != -1 &&
1620 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1621 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1622 amdgpu_vm_fragment_size = -1;
1623 }
1624
5d5bd5e3
KW
1625 if (amdgpu_sched_hw_submission < 2) {
1626 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1627 amdgpu_sched_hw_submission);
1628 amdgpu_sched_hw_submission = 2;
1629 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1630 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1631 amdgpu_sched_hw_submission);
1632 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1633 }
1634
2656fd23
AG
1635 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1636 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1637 amdgpu_reset_method = -1;
1638 }
1639
7951e376
RZ
1640 amdgpu_device_check_smu_prv_buffer_size(adev);
1641
06ec9070 1642 amdgpu_device_check_vm_size(adev);
d38ceaf9 1643
06ec9070 1644 amdgpu_device_check_block_size(adev);
6a7f76e7 1645
19aede77 1646 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1647
e3c00faa 1648 return 0;
d38ceaf9
AD
1649}
1650
1651/**
1652 * amdgpu_switcheroo_set_state - set switcheroo state
1653 *
1654 * @pdev: pci dev pointer
1694467b 1655 * @state: vga_switcheroo state
d38ceaf9 1656 *
12024b17 1657 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1658 * the asics before or after it is powered up using ACPI methods.
1659 */
8aba21b7
LT
1660static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1661 enum vga_switcheroo_state state)
d38ceaf9
AD
1662{
1663 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1664 int r;
d38ceaf9 1665
b98c6299 1666 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1667 return;
1668
1669 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1670 pr_info("switched on\n");
d38ceaf9
AD
1671 /* don't suspend or resume card normally */
1672 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1673
8f66090b
TZ
1674 pci_set_power_state(pdev, PCI_D0);
1675 amdgpu_device_load_pci_state(pdev);
1676 r = pci_enable_device(pdev);
de185019
AD
1677 if (r)
1678 DRM_WARN("pci_enable_device failed (%d)\n", r);
1679 amdgpu_device_resume(dev, true);
d38ceaf9 1680
d38ceaf9 1681 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1682 } else {
dd4fa6c1 1683 pr_info("switched off\n");
d38ceaf9 1684 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1685 amdgpu_device_suspend(dev, true);
8f66090b 1686 amdgpu_device_cache_pci_state(pdev);
de185019 1687 /* Shut down the device */
8f66090b
TZ
1688 pci_disable_device(pdev);
1689 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1690 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1691 }
1692}
1693
1694/**
1695 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1696 *
1697 * @pdev: pci dev pointer
1698 *
1699 * Callback for the switcheroo driver. Check of the switcheroo
1700 * state can be changed.
1701 * Returns true if the state can be changed, false if not.
1702 */
1703static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1704{
1705 struct drm_device *dev = pci_get_drvdata(pdev);
1706
b8920e1e 1707 /*
d38ceaf9
AD
1708 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1709 * locking inversion with the driver load path. And the access here is
1710 * completely racy anyway. So don't bother with locking for now.
1711 */
7e13ad89 1712 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1713}
1714
1715static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1716 .set_gpu_state = amdgpu_switcheroo_set_state,
1717 .reprobe = NULL,
1718 .can_switch = amdgpu_switcheroo_can_switch,
1719};
1720
e3ecdffa
AD
1721/**
1722 * amdgpu_device_ip_set_clockgating_state - set the CG state
1723 *
87e3f136 1724 * @dev: amdgpu_device pointer
e3ecdffa
AD
1725 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1726 * @state: clockgating state (gate or ungate)
1727 *
1728 * Sets the requested clockgating state for all instances of
1729 * the hardware IP specified.
1730 * Returns the error code from the last instance.
1731 */
43fa561f 1732int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1733 enum amd_ip_block_type block_type,
1734 enum amd_clockgating_state state)
d38ceaf9 1735{
43fa561f 1736 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1737 int i, r = 0;
1738
1739 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1740 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1741 continue;
c722865a
RZ
1742 if (adev->ip_blocks[i].version->type != block_type)
1743 continue;
1744 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1745 continue;
1746 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1747 (void *)adev, state);
1748 if (r)
1749 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1750 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1751 }
1752 return r;
1753}
1754
e3ecdffa
AD
1755/**
1756 * amdgpu_device_ip_set_powergating_state - set the PG state
1757 *
87e3f136 1758 * @dev: amdgpu_device pointer
e3ecdffa
AD
1759 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1760 * @state: powergating state (gate or ungate)
1761 *
1762 * Sets the requested powergating state for all instances of
1763 * the hardware IP specified.
1764 * Returns the error code from the last instance.
1765 */
43fa561f 1766int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1767 enum amd_ip_block_type block_type,
1768 enum amd_powergating_state state)
d38ceaf9 1769{
43fa561f 1770 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1771 int i, r = 0;
1772
1773 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1774 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1775 continue;
c722865a
RZ
1776 if (adev->ip_blocks[i].version->type != block_type)
1777 continue;
1778 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1779 continue;
1780 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1781 (void *)adev, state);
1782 if (r)
1783 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1784 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1785 }
1786 return r;
1787}
1788
e3ecdffa
AD
1789/**
1790 * amdgpu_device_ip_get_clockgating_state - get the CG state
1791 *
1792 * @adev: amdgpu_device pointer
1793 * @flags: clockgating feature flags
1794 *
1795 * Walks the list of IPs on the device and updates the clockgating
1796 * flags for each IP.
1797 * Updates @flags with the feature flags for each hardware IP where
1798 * clockgating is enabled.
1799 */
2990a1fc 1800void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1801 u64 *flags)
6cb2d4e4
HR
1802{
1803 int i;
1804
1805 for (i = 0; i < adev->num_ip_blocks; i++) {
1806 if (!adev->ip_blocks[i].status.valid)
1807 continue;
1808 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1809 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1810 }
1811}
1812
e3ecdffa
AD
1813/**
1814 * amdgpu_device_ip_wait_for_idle - wait for idle
1815 *
1816 * @adev: amdgpu_device pointer
1817 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1818 *
1819 * Waits for the request hardware IP to be idle.
1820 * Returns 0 for success or a negative error code on failure.
1821 */
2990a1fc
AD
1822int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1823 enum amd_ip_block_type block_type)
5dbbb60b
AD
1824{
1825 int i, r;
1826
1827 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1828 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1829 continue;
a1255107
AD
1830 if (adev->ip_blocks[i].version->type == block_type) {
1831 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1832 if (r)
1833 return r;
1834 break;
1835 }
1836 }
1837 return 0;
1838
1839}
1840
e3ecdffa
AD
1841/**
1842 * amdgpu_device_ip_is_idle - is the hardware IP idle
1843 *
1844 * @adev: amdgpu_device pointer
1845 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1846 *
1847 * Check if the hardware IP is idle or not.
1848 * Returns true if it the IP is idle, false if not.
1849 */
2990a1fc
AD
1850bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1851 enum amd_ip_block_type block_type)
5dbbb60b
AD
1852{
1853 int i;
1854
1855 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1856 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1857 continue;
a1255107
AD
1858 if (adev->ip_blocks[i].version->type == block_type)
1859 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1860 }
1861 return true;
1862
1863}
1864
e3ecdffa
AD
1865/**
1866 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1867 *
1868 * @adev: amdgpu_device pointer
87e3f136 1869 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1870 *
1871 * Returns a pointer to the hardware IP block structure
1872 * if it exists for the asic, otherwise NULL.
1873 */
2990a1fc
AD
1874struct amdgpu_ip_block *
1875amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1876 enum amd_ip_block_type type)
d38ceaf9
AD
1877{
1878 int i;
1879
1880 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1881 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1882 return &adev->ip_blocks[i];
1883
1884 return NULL;
1885}
1886
1887/**
2990a1fc 1888 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1889 *
1890 * @adev: amdgpu_device pointer
5fc3aeeb 1891 * @type: enum amd_ip_block_type
d38ceaf9
AD
1892 * @major: major version
1893 * @minor: minor version
1894 *
1895 * return 0 if equal or greater
1896 * return 1 if smaller or the ip_block doesn't exist
1897 */
2990a1fc
AD
1898int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1899 enum amd_ip_block_type type,
1900 u32 major, u32 minor)
d38ceaf9 1901{
2990a1fc 1902 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1903
a1255107
AD
1904 if (ip_block && ((ip_block->version->major > major) ||
1905 ((ip_block->version->major == major) &&
1906 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1907 return 0;
1908
1909 return 1;
1910}
1911
a1255107 1912/**
2990a1fc 1913 * amdgpu_device_ip_block_add
a1255107
AD
1914 *
1915 * @adev: amdgpu_device pointer
1916 * @ip_block_version: pointer to the IP to add
1917 *
1918 * Adds the IP block driver information to the collection of IPs
1919 * on the asic.
1920 */
2990a1fc
AD
1921int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1922 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1923{
1924 if (!ip_block_version)
1925 return -EINVAL;
1926
7bd939d0
LG
1927 switch (ip_block_version->type) {
1928 case AMD_IP_BLOCK_TYPE_VCN:
1929 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1930 return 0;
1931 break;
1932 case AMD_IP_BLOCK_TYPE_JPEG:
1933 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1934 return 0;
1935 break;
1936 default:
1937 break;
1938 }
1939
e966a725 1940 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1941 ip_block_version->funcs->name);
1942
a1255107
AD
1943 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1944
1945 return 0;
1946}
1947
e3ecdffa
AD
1948/**
1949 * amdgpu_device_enable_virtual_display - enable virtual display feature
1950 *
1951 * @adev: amdgpu_device pointer
1952 *
1953 * Enabled the virtual display feature if the user has enabled it via
1954 * the module parameter virtual_display. This feature provides a virtual
1955 * display hardware on headless boards or in virtualized environments.
1956 * This function parses and validates the configuration string specified by
1957 * the user and configues the virtual display configuration (number of
1958 * virtual connectors, crtcs, etc.) specified.
1959 */
483ef985 1960static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1961{
1962 adev->enable_virtual_display = false;
1963
1964 if (amdgpu_virtual_display) {
8f66090b 1965 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1966 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1967
1968 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1969 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1970 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1971 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1972 if (!strcmp("all", pciaddname)
1973 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1974 long num_crtc;
1975 int res = -1;
1976
9accf2fd 1977 adev->enable_virtual_display = true;
0f66356d
ED
1978
1979 if (pciaddname_tmp)
1980 res = kstrtol(pciaddname_tmp, 10,
1981 &num_crtc);
1982
1983 if (!res) {
1984 if (num_crtc < 1)
1985 num_crtc = 1;
1986 if (num_crtc > 6)
1987 num_crtc = 6;
1988 adev->mode_info.num_crtc = num_crtc;
1989 } else {
1990 adev->mode_info.num_crtc = 1;
1991 }
9accf2fd
ED
1992 break;
1993 }
1994 }
1995
0f66356d
ED
1996 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1997 amdgpu_virtual_display, pci_address_name,
1998 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1999
2000 kfree(pciaddstr);
2001 }
2002}
2003
25263da3
AD
2004void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2005{
2006 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2007 adev->mode_info.num_crtc = 1;
2008 adev->enable_virtual_display = true;
2009 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2010 adev->enable_virtual_display, adev->mode_info.num_crtc);
2011 }
2012}
2013
e3ecdffa
AD
2014/**
2015 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2016 *
2017 * @adev: amdgpu_device pointer
2018 *
2019 * Parses the asic configuration parameters specified in the gpu info
2020 * firmware and makes them availale to the driver for use in configuring
2021 * the asic.
2022 * Returns 0 on success, -EINVAL on failure.
2023 */
e2a75f88
AD
2024static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2025{
e2a75f88 2026 const char *chip_name;
c0a43457 2027 char fw_name[40];
e2a75f88
AD
2028 int err;
2029 const struct gpu_info_firmware_header_v1_0 *hdr;
2030
ab4fe3e1
HR
2031 adev->firmware.gpu_info_fw = NULL;
2032
72de33f8 2033 if (adev->mman.discovery_bin) {
cc375d8c
TY
2034 /*
2035 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2036 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2037 * when DAL no longer needs it.
2038 */
2039 if (adev->asic_type != CHIP_NAVI12)
2040 return 0;
258620d0
AD
2041 }
2042
e2a75f88 2043 switch (adev->asic_type) {
e2a75f88
AD
2044 default:
2045 return 0;
2046 case CHIP_VEGA10:
2047 chip_name = "vega10";
2048 break;
3f76dced
AD
2049 case CHIP_VEGA12:
2050 chip_name = "vega12";
2051 break;
2d2e5e7e 2052 case CHIP_RAVEN:
54f78a76 2053 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2054 chip_name = "raven2";
54f78a76 2055 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2056 chip_name = "picasso";
54c4d17e
FX
2057 else
2058 chip_name = "raven";
2d2e5e7e 2059 break;
65e60f6e
LM
2060 case CHIP_ARCTURUS:
2061 chip_name = "arcturus";
2062 break;
42b325e5
XY
2063 case CHIP_NAVI12:
2064 chip_name = "navi12";
2065 break;
e2a75f88
AD
2066 }
2067
2068 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2069 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2070 if (err) {
2071 dev_err(adev->dev,
b31d3063 2072 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2073 fw_name);
2074 goto out;
2075 }
2076
ab4fe3e1 2077 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2078 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2079
2080 switch (hdr->version_major) {
2081 case 1:
2082 {
2083 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2084 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2085 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2086
cc375d8c
TY
2087 /*
2088 * Should be droped when DAL no longer needs it.
2089 */
2090 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2091 goto parse_soc_bounding_box;
2092
b5ab16bf
AD
2093 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2094 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2095 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2096 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2097 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2098 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2099 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2100 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2101 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2102 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2103 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2104 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2105 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2106 adev->gfx.cu_info.max_waves_per_simd =
2107 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2108 adev->gfx.cu_info.max_scratch_slots_per_cu =
2109 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2110 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2111 if (hdr->version_minor >= 1) {
35c2e910
HZ
2112 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2113 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2114 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2115 adev->gfx.config.num_sc_per_sh =
2116 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2117 adev->gfx.config.num_packer_per_sc =
2118 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2119 }
ec51d3fa
XY
2120
2121parse_soc_bounding_box:
ec51d3fa
XY
2122 /*
2123 * soc bounding box info is not integrated in disocovery table,
258620d0 2124 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2125 */
48321c3d
HW
2126 if (hdr->version_minor == 2) {
2127 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2128 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2129 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2130 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2131 }
e2a75f88
AD
2132 break;
2133 }
2134 default:
2135 dev_err(adev->dev,
2136 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2137 err = -EINVAL;
2138 goto out;
2139 }
2140out:
e2a75f88
AD
2141 return err;
2142}
2143
e3ecdffa
AD
2144/**
2145 * amdgpu_device_ip_early_init - run early init for hardware IPs
2146 *
2147 * @adev: amdgpu_device pointer
2148 *
2149 * Early initialization pass for hardware IPs. The hardware IPs that make
2150 * up each asic are discovered each IP's early_init callback is run. This
2151 * is the first stage in initializing the asic.
2152 * Returns 0 on success, negative error code on failure.
2153 */
06ec9070 2154static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2155{
901e2be2
AD
2156 struct drm_device *dev = adev_to_drm(adev);
2157 struct pci_dev *parent;
aaa36a97 2158 int i, r;
ced69502 2159 bool total;
d38ceaf9 2160
483ef985 2161 amdgpu_device_enable_virtual_display(adev);
a6be7570 2162
00a979f3 2163 if (amdgpu_sriov_vf(adev)) {
00a979f3 2164 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2165 if (r)
2166 return r;
00a979f3
WS
2167 }
2168
d38ceaf9 2169 switch (adev->asic_type) {
33f34802
KW
2170#ifdef CONFIG_DRM_AMDGPU_SI
2171 case CHIP_VERDE:
2172 case CHIP_TAHITI:
2173 case CHIP_PITCAIRN:
2174 case CHIP_OLAND:
2175 case CHIP_HAINAN:
295d0daf 2176 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2177 r = si_set_ip_blocks(adev);
2178 if (r)
2179 return r;
2180 break;
2181#endif
a2e73f56
AD
2182#ifdef CONFIG_DRM_AMDGPU_CIK
2183 case CHIP_BONAIRE:
2184 case CHIP_HAWAII:
2185 case CHIP_KAVERI:
2186 case CHIP_KABINI:
2187 case CHIP_MULLINS:
e1ad2d53 2188 if (adev->flags & AMD_IS_APU)
a2e73f56 2189 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2190 else
2191 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2192
2193 r = cik_set_ip_blocks(adev);
2194 if (r)
2195 return r;
2196 break;
2197#endif
da87c30b
AD
2198 case CHIP_TOPAZ:
2199 case CHIP_TONGA:
2200 case CHIP_FIJI:
2201 case CHIP_POLARIS10:
2202 case CHIP_POLARIS11:
2203 case CHIP_POLARIS12:
2204 case CHIP_VEGAM:
2205 case CHIP_CARRIZO:
2206 case CHIP_STONEY:
2207 if (adev->flags & AMD_IS_APU)
2208 adev->family = AMDGPU_FAMILY_CZ;
2209 else
2210 adev->family = AMDGPU_FAMILY_VI;
2211
2212 r = vi_set_ip_blocks(adev);
2213 if (r)
2214 return r;
2215 break;
d38ceaf9 2216 default:
63352b7f
AD
2217 r = amdgpu_discovery_set_ip_blocks(adev);
2218 if (r)
2219 return r;
2220 break;
d38ceaf9
AD
2221 }
2222
901e2be2
AD
2223 if (amdgpu_has_atpx() &&
2224 (amdgpu_is_atpx_hybrid() ||
2225 amdgpu_has_atpx_dgpu_power_cntl()) &&
2226 ((adev->flags & AMD_IS_APU) == 0) &&
2227 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2228 adev->flags |= AMD_IS_PX;
2229
85ac2021
AD
2230 if (!(adev->flags & AMD_IS_APU)) {
2231 parent = pci_upstream_bridge(adev->pdev);
2232 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2233 }
901e2be2 2234
1884734a 2235
3b94fb10 2236 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2237 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2238 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2239 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2240 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2241
ced69502 2242 total = true;
d38ceaf9
AD
2243 for (i = 0; i < adev->num_ip_blocks; i++) {
2244 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2245 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2246 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2247 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2248 } else {
a1255107
AD
2249 if (adev->ip_blocks[i].version->funcs->early_init) {
2250 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2251 if (r == -ENOENT) {
a1255107 2252 adev->ip_blocks[i].status.valid = false;
2c1a2784 2253 } else if (r) {
a1255107
AD
2254 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2255 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2256 total = false;
2c1a2784 2257 } else {
a1255107 2258 adev->ip_blocks[i].status.valid = true;
2c1a2784 2259 }
974e6b64 2260 } else {
a1255107 2261 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2262 }
d38ceaf9 2263 }
21a249ca
AD
2264 /* get the vbios after the asic_funcs are set up */
2265 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2266 r = amdgpu_device_parse_gpu_info_fw(adev);
2267 if (r)
2268 return r;
2269
21a249ca 2270 /* Read BIOS */
9535a86a
SZ
2271 if (amdgpu_device_read_bios(adev)) {
2272 if (!amdgpu_get_bios(adev))
2273 return -EINVAL;
21a249ca 2274
9535a86a
SZ
2275 r = amdgpu_atombios_init(adev);
2276 if (r) {
2277 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2278 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2279 return r;
2280 }
21a249ca 2281 }
77eabc6f
PJZ
2282
2283 /*get pf2vf msg info at it's earliest time*/
2284 if (amdgpu_sriov_vf(adev))
2285 amdgpu_virt_init_data_exchange(adev);
2286
21a249ca 2287 }
d38ceaf9 2288 }
ced69502
ML
2289 if (!total)
2290 return -ENODEV;
d38ceaf9 2291
00fa4035 2292 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2293 adev->cg_flags &= amdgpu_cg_mask;
2294 adev->pg_flags &= amdgpu_pg_mask;
2295
d38ceaf9
AD
2296 return 0;
2297}
2298
0a4f2520
RZ
2299static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2300{
2301 int i, r;
2302
2303 for (i = 0; i < adev->num_ip_blocks; i++) {
2304 if (!adev->ip_blocks[i].status.sw)
2305 continue;
2306 if (adev->ip_blocks[i].status.hw)
2307 continue;
2308 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2309 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2310 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2311 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2312 if (r) {
2313 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2314 adev->ip_blocks[i].version->funcs->name, r);
2315 return r;
2316 }
2317 adev->ip_blocks[i].status.hw = true;
2318 }
2319 }
2320
2321 return 0;
2322}
2323
2324static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2325{
2326 int i, r;
2327
2328 for (i = 0; i < adev->num_ip_blocks; i++) {
2329 if (!adev->ip_blocks[i].status.sw)
2330 continue;
2331 if (adev->ip_blocks[i].status.hw)
2332 continue;
2333 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2334 if (r) {
2335 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2336 adev->ip_blocks[i].version->funcs->name, r);
2337 return r;
2338 }
2339 adev->ip_blocks[i].status.hw = true;
2340 }
2341
2342 return 0;
2343}
2344
7a3e0bb2
RZ
2345static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2346{
2347 int r = 0;
2348 int i;
80f41f84 2349 uint32_t smu_version;
7a3e0bb2
RZ
2350
2351 if (adev->asic_type >= CHIP_VEGA10) {
2352 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2353 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2354 continue;
2355
e3c1b071 2356 if (!adev->ip_blocks[i].status.sw)
2357 continue;
2358
482f0e53
ML
2359 /* no need to do the fw loading again if already done*/
2360 if (adev->ip_blocks[i].status.hw == true)
2361 break;
2362
53b3f8f4 2363 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2364 r = adev->ip_blocks[i].version->funcs->resume(adev);
2365 if (r) {
2366 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2367 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2368 return r;
2369 }
2370 } else {
2371 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2372 if (r) {
2373 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2374 adev->ip_blocks[i].version->funcs->name, r);
2375 return r;
7a3e0bb2 2376 }
7a3e0bb2 2377 }
482f0e53
ML
2378
2379 adev->ip_blocks[i].status.hw = true;
2380 break;
7a3e0bb2
RZ
2381 }
2382 }
482f0e53 2383
8973d9ec
ED
2384 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2385 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2386
80f41f84 2387 return r;
7a3e0bb2
RZ
2388}
2389
5fd8518d
AG
2390static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2391{
2392 long timeout;
2393 int r, i;
2394
2395 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2396 struct amdgpu_ring *ring = adev->rings[i];
2397
2398 /* No need to setup the GPU scheduler for rings that don't need it */
2399 if (!ring || ring->no_scheduler)
2400 continue;
2401
2402 switch (ring->funcs->type) {
2403 case AMDGPU_RING_TYPE_GFX:
2404 timeout = adev->gfx_timeout;
2405 break;
2406 case AMDGPU_RING_TYPE_COMPUTE:
2407 timeout = adev->compute_timeout;
2408 break;
2409 case AMDGPU_RING_TYPE_SDMA:
2410 timeout = adev->sdma_timeout;
2411 break;
2412 default:
2413 timeout = adev->video_timeout;
2414 break;
2415 }
2416
2417 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2418 ring->num_hw_submission, 0,
8ab62eda
JG
2419 timeout, adev->reset_domain->wq,
2420 ring->sched_score, ring->name,
2421 adev->dev);
5fd8518d
AG
2422 if (r) {
2423 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2424 ring->name);
2425 return r;
2426 }
2427 }
2428
d425c6f4
JZ
2429 amdgpu_xcp_update_partition_sched_list(adev);
2430
5fd8518d
AG
2431 return 0;
2432}
2433
2434
e3ecdffa
AD
2435/**
2436 * amdgpu_device_ip_init - run init for hardware IPs
2437 *
2438 * @adev: amdgpu_device pointer
2439 *
2440 * Main initialization pass for hardware IPs. The list of all the hardware
2441 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2442 * are run. sw_init initializes the software state associated with each IP
2443 * and hw_init initializes the hardware associated with each IP.
2444 * Returns 0 on success, negative error code on failure.
2445 */
06ec9070 2446static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2447{
2448 int i, r;
2449
c030f2e4 2450 r = amdgpu_ras_init(adev);
2451 if (r)
2452 return r;
2453
d38ceaf9 2454 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2455 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2456 continue;
a1255107 2457 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2458 if (r) {
a1255107
AD
2459 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2460 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2461 goto init_failed;
2c1a2784 2462 }
a1255107 2463 adev->ip_blocks[i].status.sw = true;
bfca0289 2464
c1c39032
AD
2465 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2466 /* need to do common hw init early so everything is set up for gmc */
2467 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2468 if (r) {
2469 DRM_ERROR("hw_init %d failed %d\n", i, r);
2470 goto init_failed;
2471 }
2472 adev->ip_blocks[i].status.hw = true;
2473 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2474 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2475 /* Try to reserve bad pages early */
2476 if (amdgpu_sriov_vf(adev))
2477 amdgpu_virt_exchange_data(adev);
2478
7ccfd79f 2479 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2480 if (r) {
7ccfd79f 2481 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2482 goto init_failed;
2c1a2784 2483 }
a1255107 2484 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2485 if (r) {
2486 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2487 goto init_failed;
2c1a2784 2488 }
06ec9070 2489 r = amdgpu_device_wb_init(adev);
2c1a2784 2490 if (r) {
06ec9070 2491 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2492 goto init_failed;
2c1a2784 2493 }
a1255107 2494 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2495
2496 /* right after GMC hw init, we create CSA */
02ff519e 2497 if (adev->gfx.mcbp) {
1e256e27 2498 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2499 AMDGPU_GEM_DOMAIN_VRAM |
2500 AMDGPU_GEM_DOMAIN_GTT,
2501 AMDGPU_CSA_SIZE);
2493664f
ML
2502 if (r) {
2503 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2504 goto init_failed;
2493664f
ML
2505 }
2506 }
d38ceaf9
AD
2507 }
2508 }
2509
c9ffa427 2510 if (amdgpu_sriov_vf(adev))
22c16d25 2511 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2512
533aed27
AG
2513 r = amdgpu_ib_pool_init(adev);
2514 if (r) {
2515 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2516 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2517 goto init_failed;
2518 }
2519
c8963ea4
RZ
2520 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2521 if (r)
72d3f592 2522 goto init_failed;
0a4f2520
RZ
2523
2524 r = amdgpu_device_ip_hw_init_phase1(adev);
2525 if (r)
72d3f592 2526 goto init_failed;
0a4f2520 2527
7a3e0bb2
RZ
2528 r = amdgpu_device_fw_loading(adev);
2529 if (r)
72d3f592 2530 goto init_failed;
7a3e0bb2 2531
0a4f2520
RZ
2532 r = amdgpu_device_ip_hw_init_phase2(adev);
2533 if (r)
72d3f592 2534 goto init_failed;
d38ceaf9 2535
121a2bc6
AG
2536 /*
2537 * retired pages will be loaded from eeprom and reserved here,
2538 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2539 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2540 * for I2C communication which only true at this point.
b82e65a9
GC
2541 *
2542 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2543 * failure from bad gpu situation and stop amdgpu init process
2544 * accordingly. For other failed cases, it will still release all
2545 * the resource and print error message, rather than returning one
2546 * negative value to upper level.
121a2bc6
AG
2547 *
2548 * Note: theoretically, this should be called before all vram allocations
2549 * to protect retired page from abusing
2550 */
b82e65a9
GC
2551 r = amdgpu_ras_recovery_init(adev);
2552 if (r)
2553 goto init_failed;
121a2bc6 2554
cfbb6b00
AG
2555 /**
2556 * In case of XGMI grab extra reference for reset domain for this device
2557 */
a4c63caf 2558 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2559 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2560 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2561 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2562
dfd0287b
LH
2563 if (WARN_ON(!hive)) {
2564 r = -ENOENT;
2565 goto init_failed;
2566 }
2567
46c67660 2568 if (!hive->reset_domain ||
2569 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2570 r = -ENOENT;
2571 amdgpu_put_xgmi_hive(hive);
2572 goto init_failed;
2573 }
2574
2575 /* Drop the early temporary reset domain we created for device */
2576 amdgpu_reset_put_reset_domain(adev->reset_domain);
2577 adev->reset_domain = hive->reset_domain;
9dfa4860 2578 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2579 }
a4c63caf
AG
2580 }
2581 }
2582
5fd8518d
AG
2583 r = amdgpu_device_init_schedulers(adev);
2584 if (r)
2585 goto init_failed;
e3c1b071 2586
2587 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2588 if (!adev->gmc.xgmi.pending_reset) {
2589 kgd2kfd_init_zone_device(adev);
e3c1b071 2590 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2591 }
c6332b97 2592
bd607166
KR
2593 amdgpu_fru_get_product_info(adev);
2594
72d3f592 2595init_failed:
c6332b97 2596
72d3f592 2597 return r;
d38ceaf9
AD
2598}
2599
e3ecdffa
AD
2600/**
2601 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2602 *
2603 * @adev: amdgpu_device pointer
2604 *
2605 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2606 * this function before a GPU reset. If the value is retained after a
2607 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2608 */
06ec9070 2609static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2610{
2611 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2612}
2613
e3ecdffa
AD
2614/**
2615 * amdgpu_device_check_vram_lost - check if vram is valid
2616 *
2617 * @adev: amdgpu_device pointer
2618 *
2619 * Checks the reset magic value written to the gart pointer in VRAM.
2620 * The driver calls this after a GPU reset to see if the contents of
2621 * VRAM is lost or now.
2622 * returns true if vram is lost, false if not.
2623 */
06ec9070 2624static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2625{
dadce777
EQ
2626 if (memcmp(adev->gart.ptr, adev->reset_magic,
2627 AMDGPU_RESET_MAGIC_NUM))
2628 return true;
2629
53b3f8f4 2630 if (!amdgpu_in_reset(adev))
dadce777
EQ
2631 return false;
2632
2633 /*
2634 * For all ASICs with baco/mode1 reset, the VRAM is
2635 * always assumed to be lost.
2636 */
2637 switch (amdgpu_asic_reset_method(adev)) {
2638 case AMD_RESET_METHOD_BACO:
2639 case AMD_RESET_METHOD_MODE1:
2640 return true;
2641 default:
2642 return false;
2643 }
0c49e0b8
CZ
2644}
2645
e3ecdffa 2646/**
1112a46b 2647 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2648 *
2649 * @adev: amdgpu_device pointer
b8b72130 2650 * @state: clockgating state (gate or ungate)
e3ecdffa 2651 *
e3ecdffa 2652 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2653 * set_clockgating_state callbacks are run.
2654 * Late initialization pass enabling clockgating for hardware IPs.
2655 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2656 * Returns 0 on success, negative error code on failure.
2657 */
fdd34271 2658
5d89bb2d
LL
2659int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2660 enum amd_clockgating_state state)
d38ceaf9 2661{
1112a46b 2662 int i, j, r;
d38ceaf9 2663
4a2ba394
SL
2664 if (amdgpu_emu_mode == 1)
2665 return 0;
2666
1112a46b
RZ
2667 for (j = 0; j < adev->num_ip_blocks; j++) {
2668 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2669 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2670 continue;
47198eb7 2671 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2672 if (adev->in_s0ix &&
47198eb7
AD
2673 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2674 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2675 continue;
4a446d55 2676 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2677 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2678 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2679 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2680 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2681 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2682 /* enable clockgating to save power */
a1255107 2683 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2684 state);
4a446d55
AD
2685 if (r) {
2686 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2687 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2688 return r;
2689 }
b0b00ff1 2690 }
d38ceaf9 2691 }
06b18f61 2692
c9f96fd5
RZ
2693 return 0;
2694}
2695
5d89bb2d
LL
2696int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2697 enum amd_powergating_state state)
c9f96fd5 2698{
1112a46b 2699 int i, j, r;
06b18f61 2700
c9f96fd5
RZ
2701 if (amdgpu_emu_mode == 1)
2702 return 0;
2703
1112a46b
RZ
2704 for (j = 0; j < adev->num_ip_blocks; j++) {
2705 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2706 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2707 continue;
47198eb7 2708 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2709 if (adev->in_s0ix &&
47198eb7
AD
2710 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2711 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2712 continue;
c9f96fd5
RZ
2713 /* skip CG for VCE/UVD, it's handled specially */
2714 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2715 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2716 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2717 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2718 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2719 /* enable powergating to save power */
2720 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2721 state);
c9f96fd5
RZ
2722 if (r) {
2723 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2724 adev->ip_blocks[i].version->funcs->name, r);
2725 return r;
2726 }
2727 }
2728 }
2dc80b00
S
2729 return 0;
2730}
2731
beff74bc
AD
2732static int amdgpu_device_enable_mgpu_fan_boost(void)
2733{
2734 struct amdgpu_gpu_instance *gpu_ins;
2735 struct amdgpu_device *adev;
2736 int i, ret = 0;
2737
2738 mutex_lock(&mgpu_info.mutex);
2739
2740 /*
2741 * MGPU fan boost feature should be enabled
2742 * only when there are two or more dGPUs in
2743 * the system
2744 */
2745 if (mgpu_info.num_dgpu < 2)
2746 goto out;
2747
2748 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2749 gpu_ins = &(mgpu_info.gpu_ins[i]);
2750 adev = gpu_ins->adev;
2751 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2752 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2753 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2754 if (ret)
2755 break;
2756
2757 gpu_ins->mgpu_fan_enabled = 1;
2758 }
2759 }
2760
2761out:
2762 mutex_unlock(&mgpu_info.mutex);
2763
2764 return ret;
2765}
2766
e3ecdffa
AD
2767/**
2768 * amdgpu_device_ip_late_init - run late init for hardware IPs
2769 *
2770 * @adev: amdgpu_device pointer
2771 *
2772 * Late initialization pass for hardware IPs. The list of all the hardware
2773 * IPs that make up the asic is walked and the late_init callbacks are run.
2774 * late_init covers any special initialization that an IP requires
2775 * after all of the have been initialized or something that needs to happen
2776 * late in the init process.
2777 * Returns 0 on success, negative error code on failure.
2778 */
06ec9070 2779static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2780{
60599a03 2781 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2782 int i = 0, r;
2783
2784 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2785 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2786 continue;
2787 if (adev->ip_blocks[i].version->funcs->late_init) {
2788 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2789 if (r) {
2790 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2791 adev->ip_blocks[i].version->funcs->name, r);
2792 return r;
2793 }
2dc80b00 2794 }
73f847db 2795 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2796 }
2797
867e24ca 2798 r = amdgpu_ras_late_init(adev);
2799 if (r) {
2800 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2801 return r;
2802 }
2803
a891d239
DL
2804 amdgpu_ras_set_error_query_ready(adev, true);
2805
1112a46b
RZ
2806 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2807 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2808
06ec9070 2809 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2810
beff74bc
AD
2811 r = amdgpu_device_enable_mgpu_fan_boost();
2812 if (r)
2813 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2814
4da8b639 2815 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2816 if (amdgpu_passthrough(adev) &&
2817 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2818 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2819 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2820
2821 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2822 mutex_lock(&mgpu_info.mutex);
2823
2824 /*
2825 * Reset device p-state to low as this was booted with high.
2826 *
2827 * This should be performed only after all devices from the same
2828 * hive get initialized.
2829 *
2830 * However, it's unknown how many device in the hive in advance.
2831 * As this is counted one by one during devices initializations.
2832 *
2833 * So, we wait for all XGMI interlinked devices initialized.
2834 * This may bring some delays as those devices may come from
2835 * different hives. But that should be OK.
2836 */
2837 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2838 for (i = 0; i < mgpu_info.num_gpu; i++) {
2839 gpu_instance = &(mgpu_info.gpu_ins[i]);
2840 if (gpu_instance->adev->flags & AMD_IS_APU)
2841 continue;
2842
d84a430d
JK
2843 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2844 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2845 if (r) {
2846 DRM_ERROR("pstate setting failed (%d).\n", r);
2847 break;
2848 }
2849 }
2850 }
2851
2852 mutex_unlock(&mgpu_info.mutex);
2853 }
2854
d38ceaf9
AD
2855 return 0;
2856}
2857
613aa3ea
LY
2858/**
2859 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2860 *
2861 * @adev: amdgpu_device pointer
2862 *
2863 * For ASICs need to disable SMC first
2864 */
2865static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2866{
2867 int i, r;
2868
4e8303cf 2869 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
2870 return;
2871
2872 for (i = 0; i < adev->num_ip_blocks; i++) {
2873 if (!adev->ip_blocks[i].status.hw)
2874 continue;
2875 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2876 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2877 /* XXX handle errors */
2878 if (r) {
2879 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2880 adev->ip_blocks[i].version->funcs->name, r);
2881 }
2882 adev->ip_blocks[i].status.hw = false;
2883 break;
2884 }
2885 }
2886}
2887
e9669fb7 2888static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2889{
2890 int i, r;
2891
e9669fb7
AG
2892 for (i = 0; i < adev->num_ip_blocks; i++) {
2893 if (!adev->ip_blocks[i].version->funcs->early_fini)
2894 continue;
5278a159 2895
e9669fb7
AG
2896 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2897 if (r) {
2898 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2899 adev->ip_blocks[i].version->funcs->name, r);
2900 }
2901 }
c030f2e4 2902
05df1f01 2903 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2904 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2905
7270e895
TY
2906 amdgpu_amdkfd_suspend(adev, false);
2907
613aa3ea
LY
2908 /* Workaroud for ASICs need to disable SMC first */
2909 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2910
d38ceaf9 2911 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2912 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2913 continue;
8201a67a 2914
a1255107 2915 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2916 /* XXX handle errors */
2c1a2784 2917 if (r) {
a1255107
AD
2918 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2919 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2920 }
8201a67a 2921
a1255107 2922 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2923 }
2924
6effad8a
GC
2925 if (amdgpu_sriov_vf(adev)) {
2926 if (amdgpu_virt_release_full_gpu(adev, false))
2927 DRM_ERROR("failed to release exclusive mode on fini\n");
2928 }
2929
e9669fb7
AG
2930 return 0;
2931}
2932
2933/**
2934 * amdgpu_device_ip_fini - run fini for hardware IPs
2935 *
2936 * @adev: amdgpu_device pointer
2937 *
2938 * Main teardown pass for hardware IPs. The list of all the hardware
2939 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2940 * are run. hw_fini tears down the hardware associated with each IP
2941 * and sw_fini tears down any software state associated with each IP.
2942 * Returns 0 on success, negative error code on failure.
2943 */
2944static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2945{
2946 int i, r;
2947
2948 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2949 amdgpu_virt_release_ras_err_handler_data(adev);
2950
e9669fb7
AG
2951 if (adev->gmc.xgmi.num_physical_nodes > 1)
2952 amdgpu_xgmi_remove_device(adev);
2953
c004d44e 2954 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2955
d38ceaf9 2956 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2957 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2958 continue;
c12aba3a
ML
2959
2960 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2961 amdgpu_ucode_free_bo(adev);
1e256e27 2962 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2963 amdgpu_device_wb_fini(adev);
7ccfd79f 2964 amdgpu_device_mem_scratch_fini(adev);
533aed27 2965 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2966 }
2967
a1255107 2968 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2969 /* XXX handle errors */
2c1a2784 2970 if (r) {
a1255107
AD
2971 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2972 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2973 }
a1255107
AD
2974 adev->ip_blocks[i].status.sw = false;
2975 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2976 }
2977
a6dcfd9c 2978 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2979 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2980 continue;
a1255107
AD
2981 if (adev->ip_blocks[i].version->funcs->late_fini)
2982 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2983 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2984 }
2985
c030f2e4 2986 amdgpu_ras_fini(adev);
2987
d38ceaf9
AD
2988 return 0;
2989}
2990
e3ecdffa 2991/**
beff74bc 2992 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2993 *
1112a46b 2994 * @work: work_struct.
e3ecdffa 2995 */
beff74bc 2996static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2997{
2998 struct amdgpu_device *adev =
beff74bc 2999 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3000 int r;
3001
3002 r = amdgpu_ib_ring_tests(adev);
3003 if (r)
3004 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3005}
3006
1e317b99
RZ
3007static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3008{
3009 struct amdgpu_device *adev =
3010 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3011
90a92662
MD
3012 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3013 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3014
3015 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3016 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3017}
3018
e3ecdffa 3019/**
e7854a03 3020 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3021 *
3022 * @adev: amdgpu_device pointer
3023 *
3024 * Main suspend function for hardware IPs. The list of all the hardware
3025 * IPs that make up the asic is walked, clockgating is disabled and the
3026 * suspend callbacks are run. suspend puts the hardware and software state
3027 * in each IP into a state suitable for suspend.
3028 * Returns 0 on success, negative error code on failure.
3029 */
e7854a03
AD
3030static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3031{
3032 int i, r;
3033
50ec83f0
AD
3034 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3035 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3036
b31d6ada
EQ
3037 /*
3038 * Per PMFW team's suggestion, driver needs to handle gfxoff
3039 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3040 * scenario. Add the missing df cstate disablement here.
3041 */
3042 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3043 dev_warn(adev->dev, "Failed to disallow df cstate");
3044
e7854a03
AD
3045 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3046 if (!adev->ip_blocks[i].status.valid)
3047 continue;
2b9f7848 3048
e7854a03 3049 /* displays are handled separately */
2b9f7848
ND
3050 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3051 continue;
3052
3053 /* XXX handle errors */
3054 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3055 /* XXX handle errors */
3056 if (r) {
3057 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3058 adev->ip_blocks[i].version->funcs->name, r);
3059 return r;
e7854a03 3060 }
2b9f7848
ND
3061
3062 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3063 }
3064
e7854a03
AD
3065 return 0;
3066}
3067
3068/**
3069 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3070 *
3071 * @adev: amdgpu_device pointer
3072 *
3073 * Main suspend function for hardware IPs. The list of all the hardware
3074 * IPs that make up the asic is walked, clockgating is disabled and the
3075 * suspend callbacks are run. suspend puts the hardware and software state
3076 * in each IP into a state suitable for suspend.
3077 * Returns 0 on success, negative error code on failure.
3078 */
3079static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3080{
3081 int i, r;
3082
557f42a2 3083 if (adev->in_s0ix)
bc143d8b 3084 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3085
d38ceaf9 3086 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3087 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3088 continue;
e7854a03
AD
3089 /* displays are handled in phase1 */
3090 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3091 continue;
bff77e86
LM
3092 /* PSP lost connection when err_event_athub occurs */
3093 if (amdgpu_ras_intr_triggered() &&
3094 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3095 adev->ip_blocks[i].status.hw = false;
3096 continue;
3097 }
e3c1b071 3098
3099 /* skip unnecessary suspend if we do not initialize them yet */
3100 if (adev->gmc.xgmi.pending_reset &&
3101 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3102 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3103 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3104 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3105 adev->ip_blocks[i].status.hw = false;
3106 continue;
3107 }
557f42a2 3108
afa6646b 3109 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3110 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3111 * like at runtime. PSP is also part of the always on hardware
3112 * so no need to suspend it.
3113 */
557f42a2 3114 if (adev->in_s0ix &&
32ff160d 3115 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3116 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3117 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3118 continue;
3119
2a7798ea
AD
3120 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3121 if (adev->in_s0ix &&
4e8303cf
LL
3122 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3123 IP_VERSION(5, 0, 0)) &&
3124 (adev->ip_blocks[i].version->type ==
3125 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3126 continue;
3127
e11c7750
TH
3128 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3129 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3130 * from this location and RLC Autoload automatically also gets loaded
3131 * from here based on PMFW -> PSP message during re-init sequence.
3132 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3133 * the TMR and reload FWs again for IMU enabled APU ASICs.
3134 */
3135 if (amdgpu_in_reset(adev) &&
3136 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3137 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3138 continue;
3139
d38ceaf9 3140 /* XXX handle errors */
a1255107 3141 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3142 /* XXX handle errors */
2c1a2784 3143 if (r) {
a1255107
AD
3144 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3145 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3146 }
876923fb 3147 adev->ip_blocks[i].status.hw = false;
a3a09142 3148 /* handle putting the SMC in the appropriate state */
47fc644f 3149 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3150 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3151 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3152 if (r) {
3153 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3154 adev->mp1_state, r);
3155 return r;
3156 }
a3a09142
AD
3157 }
3158 }
d38ceaf9
AD
3159 }
3160
3161 return 0;
3162}
3163
e7854a03
AD
3164/**
3165 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3166 *
3167 * @adev: amdgpu_device pointer
3168 *
3169 * Main suspend function for hardware IPs. The list of all the hardware
3170 * IPs that make up the asic is walked, clockgating is disabled and the
3171 * suspend callbacks are run. suspend puts the hardware and software state
3172 * in each IP into a state suitable for suspend.
3173 * Returns 0 on success, negative error code on failure.
3174 */
3175int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3176{
3177 int r;
3178
3c73683c
JC
3179 if (amdgpu_sriov_vf(adev)) {
3180 amdgpu_virt_fini_data_exchange(adev);
e7819644 3181 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3182 }
e7819644 3183
e7854a03
AD
3184 r = amdgpu_device_ip_suspend_phase1(adev);
3185 if (r)
3186 return r;
3187 r = amdgpu_device_ip_suspend_phase2(adev);
3188
e7819644
YT
3189 if (amdgpu_sriov_vf(adev))
3190 amdgpu_virt_release_full_gpu(adev, false);
3191
e7854a03
AD
3192 return r;
3193}
3194
06ec9070 3195static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3196{
3197 int i, r;
3198
2cb681b6 3199 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3200 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3201 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3202 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3203 AMD_IP_BLOCK_TYPE_IH,
3204 };
a90ad3c2 3205
95ea3dbc 3206 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3207 int j;
3208 struct amdgpu_ip_block *block;
a90ad3c2 3209
4cd2a96d
J
3210 block = &adev->ip_blocks[i];
3211 block->status.hw = false;
2cb681b6 3212
4cd2a96d 3213 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3214
4cd2a96d 3215 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3216 !block->status.valid)
3217 continue;
3218
3219 r = block->version->funcs->hw_init(adev);
0aaeefcc 3220 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3221 if (r)
3222 return r;
482f0e53 3223 block->status.hw = true;
a90ad3c2
ML
3224 }
3225 }
3226
3227 return 0;
3228}
3229
06ec9070 3230static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3231{
3232 int i, r;
3233
2cb681b6
ML
3234 static enum amd_ip_block_type ip_order[] = {
3235 AMD_IP_BLOCK_TYPE_SMC,
3236 AMD_IP_BLOCK_TYPE_DCE,
3237 AMD_IP_BLOCK_TYPE_GFX,
3238 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3239 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3240 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3241 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3242 AMD_IP_BLOCK_TYPE_VCN,
3243 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3244 };
a90ad3c2 3245
2cb681b6
ML
3246 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3247 int j;
3248 struct amdgpu_ip_block *block;
a90ad3c2 3249
2cb681b6
ML
3250 for (j = 0; j < adev->num_ip_blocks; j++) {
3251 block = &adev->ip_blocks[j];
3252
3253 if (block->version->type != ip_order[i] ||
482f0e53
ML
3254 !block->status.valid ||
3255 block->status.hw)
2cb681b6
ML
3256 continue;
3257
895bd048
JZ
3258 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3259 r = block->version->funcs->resume(adev);
3260 else
3261 r = block->version->funcs->hw_init(adev);
3262
0aaeefcc 3263 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3264 if (r)
3265 return r;
482f0e53 3266 block->status.hw = true;
a90ad3c2
ML
3267 }
3268 }
3269
3270 return 0;
3271}
3272
e3ecdffa
AD
3273/**
3274 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3275 *
3276 * @adev: amdgpu_device pointer
3277 *
3278 * First resume function for hardware IPs. The list of all the hardware
3279 * IPs that make up the asic is walked and the resume callbacks are run for
3280 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3281 * after a suspend and updates the software state as necessary. This
3282 * function is also used for restoring the GPU after a GPU reset.
3283 * Returns 0 on success, negative error code on failure.
3284 */
06ec9070 3285static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3286{
3287 int i, r;
3288
a90ad3c2 3289 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3290 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3291 continue;
a90ad3c2 3292 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3293 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3294 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3295 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3296
fcf0649f
CZ
3297 r = adev->ip_blocks[i].version->funcs->resume(adev);
3298 if (r) {
3299 DRM_ERROR("resume of IP block <%s> failed %d\n",
3300 adev->ip_blocks[i].version->funcs->name, r);
3301 return r;
3302 }
482f0e53 3303 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3304 }
3305 }
3306
3307 return 0;
3308}
3309
e3ecdffa
AD
3310/**
3311 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3312 *
3313 * @adev: amdgpu_device pointer
3314 *
3315 * First resume function for hardware IPs. The list of all the hardware
3316 * IPs that make up the asic is walked and the resume callbacks are run for
3317 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3318 * functional state after a suspend and updates the software state as
3319 * necessary. This function is also used for restoring the GPU after a GPU
3320 * reset.
3321 * Returns 0 on success, negative error code on failure.
3322 */
06ec9070 3323static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3324{
3325 int i, r;
3326
3327 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3328 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3329 continue;
fcf0649f 3330 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3331 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3332 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3333 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3334 continue;
a1255107 3335 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3336 if (r) {
a1255107
AD
3337 DRM_ERROR("resume of IP block <%s> failed %d\n",
3338 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3339 return r;
2c1a2784 3340 }
482f0e53 3341 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3342 }
3343
3344 return 0;
3345}
3346
e3ecdffa
AD
3347/**
3348 * amdgpu_device_ip_resume - run resume for hardware IPs
3349 *
3350 * @adev: amdgpu_device pointer
3351 *
3352 * Main resume function for hardware IPs. The hardware IPs
3353 * are split into two resume functions because they are
b8920e1e 3354 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3355 * steps need to be take between them. In this case (S3/S4) they are
3356 * run sequentially.
3357 * Returns 0 on success, negative error code on failure.
3358 */
06ec9070 3359static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3360{
3361 int r;
3362
06ec9070 3363 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3364 if (r)
3365 return r;
7a3e0bb2
RZ
3366
3367 r = amdgpu_device_fw_loading(adev);
3368 if (r)
3369 return r;
3370
06ec9070 3371 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3372
3373 return r;
3374}
3375
e3ecdffa
AD
3376/**
3377 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3378 *
3379 * @adev: amdgpu_device pointer
3380 *
3381 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3382 */
4e99a44e 3383static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3384{
6867e1b5
ML
3385 if (amdgpu_sriov_vf(adev)) {
3386 if (adev->is_atom_fw) {
58ff791a 3387 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3388 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3389 } else {
3390 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3391 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3392 }
3393
3394 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3395 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3396 }
048765ad
AR
3397}
3398
e3ecdffa
AD
3399/**
3400 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3401 *
3402 * @asic_type: AMD asic type
3403 *
3404 * Check if there is DC (new modesetting infrastructre) support for an asic.
3405 * returns true if DC has support, false if not.
3406 */
4562236b
HW
3407bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3408{
3409 switch (asic_type) {
0637d417
AD
3410#ifdef CONFIG_DRM_AMDGPU_SI
3411 case CHIP_HAINAN:
3412#endif
3413 case CHIP_TOPAZ:
3414 /* chips with no display hardware */
3415 return false;
4562236b 3416#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3417 case CHIP_TAHITI:
3418 case CHIP_PITCAIRN:
3419 case CHIP_VERDE:
3420 case CHIP_OLAND:
2d32ffd6
AD
3421 /*
3422 * We have systems in the wild with these ASICs that require
3423 * LVDS and VGA support which is not supported with DC.
3424 *
3425 * Fallback to the non-DC driver here by default so as not to
3426 * cause regressions.
3427 */
3428#if defined(CONFIG_DRM_AMD_DC_SI)
3429 return amdgpu_dc > 0;
3430#else
3431 return false;
64200c46 3432#endif
4562236b 3433 case CHIP_BONAIRE:
0d6fbccb 3434 case CHIP_KAVERI:
367e6687
AD
3435 case CHIP_KABINI:
3436 case CHIP_MULLINS:
d9fda248
HW
3437 /*
3438 * We have systems in the wild with these ASICs that require
b5a0168e 3439 * VGA support which is not supported with DC.
d9fda248
HW
3440 *
3441 * Fallback to the non-DC driver here by default so as not to
3442 * cause regressions.
3443 */
3444 return amdgpu_dc > 0;
f7f12b25 3445 default:
fd187853 3446 return amdgpu_dc != 0;
f7f12b25 3447#else
4562236b 3448 default:
93b09a9a 3449 if (amdgpu_dc > 0)
b8920e1e 3450 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3451 return false;
f7f12b25 3452#endif
4562236b
HW
3453 }
3454}
3455
3456/**
3457 * amdgpu_device_has_dc_support - check if dc is supported
3458 *
982a820b 3459 * @adev: amdgpu_device pointer
4562236b
HW
3460 *
3461 * Returns true for supported, false for not supported
3462 */
3463bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3464{
25263da3 3465 if (adev->enable_virtual_display ||
abaf210c 3466 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3467 return false;
3468
4562236b
HW
3469 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3470}
3471
d4535e2c
AG
3472static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3473{
3474 struct amdgpu_device *adev =
3475 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3476 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3477
c6a6e2db
AG
3478 /* It's a bug to not have a hive within this function */
3479 if (WARN_ON(!hive))
3480 return;
3481
3482 /*
3483 * Use task barrier to synchronize all xgmi reset works across the
3484 * hive. task_barrier_enter and task_barrier_exit will block
3485 * until all the threads running the xgmi reset works reach
3486 * those points. task_barrier_full will do both blocks.
3487 */
3488 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3489
3490 task_barrier_enter(&hive->tb);
4a580877 3491 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3492
3493 if (adev->asic_reset_res)
3494 goto fail;
3495
3496 task_barrier_exit(&hive->tb);
4a580877 3497 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3498
3499 if (adev->asic_reset_res)
3500 goto fail;
43c4d576 3501
5e67bba3 3502 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3503 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3504 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3505 } else {
3506
3507 task_barrier_full(&hive->tb);
3508 adev->asic_reset_res = amdgpu_asic_reset(adev);
3509 }
ce316fa5 3510
c6a6e2db 3511fail:
d4535e2c 3512 if (adev->asic_reset_res)
fed184e9 3513 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3514 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3515 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3516}
3517
71f98027
AD
3518static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3519{
3520 char *input = amdgpu_lockup_timeout;
3521 char *timeout_setting = NULL;
3522 int index = 0;
3523 long timeout;
3524 int ret = 0;
3525
3526 /*
67387dfe
AD
3527 * By default timeout for non compute jobs is 10000
3528 * and 60000 for compute jobs.
71f98027 3529 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3530 * jobs are 60000 by default.
71f98027
AD
3531 */
3532 adev->gfx_timeout = msecs_to_jiffies(10000);
3533 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3534 if (amdgpu_sriov_vf(adev))
3535 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3536 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3537 else
67387dfe 3538 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3539
f440ff44 3540 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3541 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3542 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3543 ret = kstrtol(timeout_setting, 0, &timeout);
3544 if (ret)
3545 return ret;
3546
3547 if (timeout == 0) {
3548 index++;
3549 continue;
3550 } else if (timeout < 0) {
3551 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3552 dev_warn(adev->dev, "lockup timeout disabled");
3553 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3554 } else {
3555 timeout = msecs_to_jiffies(timeout);
3556 }
3557
3558 switch (index++) {
3559 case 0:
3560 adev->gfx_timeout = timeout;
3561 break;
3562 case 1:
3563 adev->compute_timeout = timeout;
3564 break;
3565 case 2:
3566 adev->sdma_timeout = timeout;
3567 break;
3568 case 3:
3569 adev->video_timeout = timeout;
3570 break;
3571 default:
3572 break;
3573 }
3574 }
3575 /*
3576 * There is only one value specified and
3577 * it should apply to all non-compute jobs.
3578 */
bcccee89 3579 if (index == 1) {
71f98027 3580 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3581 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3582 adev->compute_timeout = adev->gfx_timeout;
3583 }
71f98027
AD
3584 }
3585
3586 return ret;
3587}
d4535e2c 3588
4a74c38c
PY
3589/**
3590 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3591 *
3592 * @adev: amdgpu_device pointer
3593 *
3594 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3595 */
3596static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3597{
3598 struct iommu_domain *domain;
3599
3600 domain = iommu_get_domain_for_dev(adev->dev);
3601 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3602 adev->ram_is_direct_mapped = true;
3603}
3604
77f3a5cd 3605static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3606 &dev_attr_pcie_replay_count.attr,
3607 NULL
3608};
3609
02ff519e
AD
3610static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3611{
3612 if (amdgpu_mcbp == 1)
3613 adev->gfx.mcbp = true;
1e9e15dc
JZ
3614 else if (amdgpu_mcbp == 0)
3615 adev->gfx.mcbp = false;
4e8303cf
LL
3616 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3617 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3618 adev->gfx.num_gfx_rings)
50a7c876
AD
3619 adev->gfx.mcbp = true;
3620
02ff519e
AD
3621 if (amdgpu_sriov_vf(adev))
3622 adev->gfx.mcbp = true;
3623
3624 if (adev->gfx.mcbp)
3625 DRM_INFO("MCBP is enabled\n");
3626}
3627
d38ceaf9
AD
3628/**
3629 * amdgpu_device_init - initialize the driver
3630 *
3631 * @adev: amdgpu_device pointer
d38ceaf9
AD
3632 * @flags: driver flags
3633 *
3634 * Initializes the driver info and hw (all asics).
3635 * Returns 0 for success or an error on failure.
3636 * Called at driver startup.
3637 */
3638int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3639 uint32_t flags)
3640{
8aba21b7
LT
3641 struct drm_device *ddev = adev_to_drm(adev);
3642 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3643 int r, i;
b98c6299 3644 bool px = false;
95844d20 3645 u32 max_MBps;
59e9fff1 3646 int tmp;
d38ceaf9
AD
3647
3648 adev->shutdown = false;
d38ceaf9 3649 adev->flags = flags;
4e66d7d2
YZ
3650
3651 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3652 adev->asic_type = amdgpu_force_asic_type;
3653 else
3654 adev->asic_type = flags & AMD_ASIC_MASK;
3655
d38ceaf9 3656 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3657 if (amdgpu_emu_mode == 1)
8bdab6bb 3658 adev->usec_timeout *= 10;
770d13b1 3659 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3660 adev->accel_working = false;
3661 adev->num_rings = 0;
68ce8b24 3662 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3663 adev->mman.buffer_funcs = NULL;
3664 adev->mman.buffer_funcs_ring = NULL;
3665 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3666 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3667 adev->gmc.gmc_funcs = NULL;
7bd939d0 3668 adev->harvest_ip_mask = 0x0;
f54d1867 3669 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3670 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3671
3672 adev->smc_rreg = &amdgpu_invalid_rreg;
3673 adev->smc_wreg = &amdgpu_invalid_wreg;
3674 adev->pcie_rreg = &amdgpu_invalid_rreg;
3675 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3676 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3677 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3678 adev->pciep_rreg = &amdgpu_invalid_rreg;
3679 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3680 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3681 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3682 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3683 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3684 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3685 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3686 adev->didt_rreg = &amdgpu_invalid_rreg;
3687 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3688 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3689 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3690 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3691 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3692
3e39ab90
AD
3693 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3694 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3695 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3696
3697 /* mutex initialization are all done here so we
b8920e1e
SS
3698 * can recall function without having locking issues
3699 */
0e5ca0d1 3700 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3701 mutex_init(&adev->pm.mutex);
3702 mutex_init(&adev->gfx.gpu_clock_mutex);
3703 mutex_init(&adev->srbm_mutex);
b8866c26 3704 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3705 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3706 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3707 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3708 mutex_init(&adev->mn_lock);
e23b74aa 3709 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3710 hash_init(adev->mn_hash);
32eaeae0 3711 mutex_init(&adev->psp.mutex);
bd052211 3712 mutex_init(&adev->notifier_lock);
8cda7a4f 3713 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3714 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3715
ab3b9de6 3716 amdgpu_device_init_apu_flags(adev);
9f6a7857 3717
912dfc84
EQ
3718 r = amdgpu_device_check_arguments(adev);
3719 if (r)
3720 return r;
d38ceaf9 3721
d38ceaf9
AD
3722 spin_lock_init(&adev->mmio_idx_lock);
3723 spin_lock_init(&adev->smc_idx_lock);
3724 spin_lock_init(&adev->pcie_idx_lock);
3725 spin_lock_init(&adev->uvd_ctx_idx_lock);
3726 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3727 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3728 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3729 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3730 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3731
0c4e7fa5
CZ
3732 INIT_LIST_HEAD(&adev->shadow_list);
3733 mutex_init(&adev->shadow_list_lock);
3734
655ce9cb 3735 INIT_LIST_HEAD(&adev->reset_list);
3736
6492e1b0 3737 INIT_LIST_HEAD(&adev->ras_list);
3738
3e38b634
EQ
3739 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3740
beff74bc
AD
3741 INIT_DELAYED_WORK(&adev->delayed_init_work,
3742 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3743 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3744 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3745
d4535e2c
AG
3746 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3747
d23ee13f 3748 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3749 adev->gfx.gfx_off_residency = 0;
3750 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3751 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3752
b265bdbd
EQ
3753 atomic_set(&adev->throttling_logging_enabled, 1);
3754 /*
3755 * If throttling continues, logging will be performed every minute
3756 * to avoid log flooding. "-1" is subtracted since the thermal
3757 * throttling interrupt comes every second. Thus, the total logging
3758 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3759 * for throttling interrupt) = 60 seconds.
3760 */
3761 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3762 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3763
0fa49558
AX
3764 /* Registers mapping */
3765 /* TODO: block userspace mapping of io register */
da69c161
KW
3766 if (adev->asic_type >= CHIP_BONAIRE) {
3767 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3768 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3769 } else {
3770 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3771 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3772 }
d38ceaf9 3773
6c08e0ef
EQ
3774 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3775 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3776
d38ceaf9 3777 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3778 if (!adev->rmmio)
d38ceaf9 3779 return -ENOMEM;
b8920e1e 3780
d38ceaf9 3781 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3782 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3783
436afdfa
PY
3784 /*
3785 * Reset domain needs to be present early, before XGMI hive discovered
3786 * (if any) and intitialized to use reset sem and in_gpu reset flag
3787 * early on during init and before calling to RREG32.
3788 */
3789 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3790 if (!adev->reset_domain)
3791 return -ENOMEM;
3792
3aa0115d
ML
3793 /* detect hw virtualization here */
3794 amdgpu_detect_virtualization(adev);
3795
04e85958
TL
3796 amdgpu_device_get_pcie_info(adev);
3797
dffa11b4
ML
3798 r = amdgpu_device_get_job_timeout_settings(adev);
3799 if (r) {
3800 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3801 return r;
a190d1c7
XY
3802 }
3803
d38ceaf9 3804 /* early init functions */
06ec9070 3805 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3806 if (r)
4ef87d8f 3807 return r;
d38ceaf9 3808
02ff519e
AD
3809 amdgpu_device_set_mcbp(adev);
3810
b7cdb41e
ML
3811 /* Get rid of things like offb */
3812 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3813 if (r)
3814 return r;
3815
4d33e704
SK
3816 /* Enable TMZ based on IP_VERSION */
3817 amdgpu_gmc_tmz_set(adev);
3818
957b0787 3819 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3820 /* Need to get xgmi info early to decide the reset behavior*/
3821 if (adev->gmc.xgmi.supported) {
3822 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3823 if (r)
3824 return r;
3825 }
3826
8e6d0b69 3827 /* enable PCIE atomic ops */
b4520bfd
GW
3828 if (amdgpu_sriov_vf(adev)) {
3829 if (adev->virt.fw_reserve.p_pf2vf)
3830 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3831 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3832 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3833 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3834 * internal path natively support atomics, set have_atomics_support to true.
3835 */
b4520bfd 3836 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
3837 (amdgpu_ip_version(adev, GC_HWIP, 0) >
3838 IP_VERSION(9, 0, 0))) {
0e768043 3839 adev->have_atomics_support = true;
b4520bfd 3840 } else {
8e6d0b69 3841 adev->have_atomics_support =
3842 !pci_enable_atomic_ops_to_root(adev->pdev,
3843 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3844 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3845 }
3846
8e6d0b69 3847 if (!adev->have_atomics_support)
3848 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3849
6585661d 3850 /* doorbell bar mapping and doorbell index init*/
43c064db 3851 amdgpu_doorbell_init(adev);
6585661d 3852
9475a943
SL
3853 if (amdgpu_emu_mode == 1) {
3854 /* post the asic on emulation mode */
3855 emu_soc_asic_init(adev);
bfca0289 3856 goto fence_driver_init;
9475a943 3857 }
bfca0289 3858
04442bf7
LL
3859 amdgpu_reset_init(adev);
3860
4e99a44e 3861 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3862 if (adev->bios)
3863 amdgpu_device_detect_sriov_bios(adev);
048765ad 3864
95e8e59e
AD
3865 /* check if we need to reset the asic
3866 * E.g., driver was not cleanly unloaded previously, etc.
3867 */
f14899fd 3868 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3869 if (adev->gmc.xgmi.num_physical_nodes) {
3870 dev_info(adev->dev, "Pending hive reset.\n");
3871 adev->gmc.xgmi.pending_reset = true;
3872 /* Only need to init necessary block for SMU to handle the reset */
3873 for (i = 0; i < adev->num_ip_blocks; i++) {
3874 if (!adev->ip_blocks[i].status.valid)
3875 continue;
3876 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3877 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3878 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3879 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3880 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3881 adev->ip_blocks[i].version->funcs->name);
3882 adev->ip_blocks[i].status.hw = true;
3883 }
3884 }
3885 } else {
59e9fff1 3886 tmp = amdgpu_reset_method;
3887 /* It should do a default reset when loading or reloading the driver,
3888 * regardless of the module parameter reset_method.
3889 */
3890 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3891 r = amdgpu_asic_reset(adev);
59e9fff1 3892 amdgpu_reset_method = tmp;
e3c1b071 3893 if (r) {
3894 dev_err(adev->dev, "asic reset on init failed\n");
3895 goto failed;
3896 }
95e8e59e
AD
3897 }
3898 }
3899
d38ceaf9 3900 /* Post card if necessary */
39c640c0 3901 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3902 if (!adev->bios) {
bec86378 3903 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3904 r = -EINVAL;
3905 goto failed;
d38ceaf9 3906 }
bec86378 3907 DRM_INFO("GPU posting now...\n");
4d2997ab 3908 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3909 if (r) {
3910 dev_err(adev->dev, "gpu post error!\n");
3911 goto failed;
3912 }
d38ceaf9
AD
3913 }
3914
9535a86a
SZ
3915 if (adev->bios) {
3916 if (adev->is_atom_fw) {
3917 /* Initialize clocks */
3918 r = amdgpu_atomfirmware_get_clock_info(adev);
3919 if (r) {
3920 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3921 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3922 goto failed;
3923 }
3924 } else {
3925 /* Initialize clocks */
3926 r = amdgpu_atombios_get_clock_info(adev);
3927 if (r) {
3928 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3929 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3930 goto failed;
3931 }
3932 /* init i2c buses */
3933 if (!amdgpu_device_has_dc_support(adev))
3934 amdgpu_atombios_i2c_init(adev);
a5bde2f9 3935 }
2c1a2784 3936 }
d38ceaf9 3937
bfca0289 3938fence_driver_init:
d38ceaf9 3939 /* Fence driver */
067f44c8 3940 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3941 if (r) {
067f44c8 3942 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3943 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3944 goto failed;
2c1a2784 3945 }
d38ceaf9
AD
3946
3947 /* init the mode config */
4a580877 3948 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3949
06ec9070 3950 r = amdgpu_device_ip_init(adev);
d38ceaf9 3951 if (r) {
06ec9070 3952 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3953 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3954 goto release_ras_con;
d38ceaf9
AD
3955 }
3956
8d35a259
LG
3957 amdgpu_fence_driver_hw_init(adev);
3958
d69b8971
YZ
3959 dev_info(adev->dev,
3960 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3961 adev->gfx.config.max_shader_engines,
3962 adev->gfx.config.max_sh_per_se,
3963 adev->gfx.config.max_cu_per_sh,
3964 adev->gfx.cu_info.number);
3965
d38ceaf9
AD
3966 adev->accel_working = true;
3967
e59c0205
AX
3968 amdgpu_vm_check_compute_bug(adev);
3969
95844d20
MO
3970 /* Initialize the buffer migration limit. */
3971 if (amdgpu_moverate >= 0)
3972 max_MBps = amdgpu_moverate;
3973 else
3974 max_MBps = 8; /* Allow 8 MB/s. */
3975 /* Get a log2 for easy divisions. */
3976 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3977
b0adca4d
EQ
3978 /*
3979 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3980 * Otherwise the mgpu fan boost feature will be skipped due to the
3981 * gpu instance is counted less.
3982 */
3983 amdgpu_register_gpu_instance(adev);
3984
d38ceaf9
AD
3985 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3986 * explicit gating rather than handling it automatically.
3987 */
e3c1b071 3988 if (!adev->gmc.xgmi.pending_reset) {
3989 r = amdgpu_device_ip_late_init(adev);
3990 if (r) {
3991 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3992 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3993 goto release_ras_con;
e3c1b071 3994 }
3995 /* must succeed. */
3996 amdgpu_ras_resume(adev);
3997 queue_delayed_work(system_wq, &adev->delayed_init_work,
3998 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3999 }
d38ceaf9 4000
38eecbe0
CL
4001 if (amdgpu_sriov_vf(adev)) {
4002 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4003 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4004 }
2c738637 4005
90bcb9b5
EQ
4006 /*
4007 * Place those sysfs registering after `late_init`. As some of those
4008 * operations performed in `late_init` might affect the sysfs
4009 * interfaces creating.
4010 */
4011 r = amdgpu_atombios_sysfs_init(adev);
4012 if (r)
4013 drm_err(&adev->ddev,
4014 "registering atombios sysfs failed (%d).\n", r);
4015
4016 r = amdgpu_pm_sysfs_init(adev);
4017 if (r)
4018 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4019
4020 r = amdgpu_ucode_sysfs_init(adev);
4021 if (r) {
4022 adev->ucode_sysfs_en = false;
4023 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4024 } else
4025 adev->ucode_sysfs_en = true;
4026
77f3a5cd 4027 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4028 if (r)
77f3a5cd 4029 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4030
7957ec80
LL
4031 amdgpu_fru_sysfs_init(adev);
4032
d155bef0
AB
4033 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4034 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4035 if (r)
4036 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4037
c1dd4aa6
AG
4038 /* Have stored pci confspace at hand for restore in sudden PCI error */
4039 if (amdgpu_device_cache_pci_state(adev->pdev))
4040 pci_restore_state(pdev);
4041
8c3dd61c
KHF
4042 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4043 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4044 * ignore it
4045 */
8c3dd61c 4046 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4047 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4048
d37a3929
OC
4049 px = amdgpu_device_supports_px(ddev);
4050
4051 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4052 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4053 vga_switcheroo_register_client(adev->pdev,
4054 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4055
4056 if (px)
8c3dd61c 4057 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4058
e3c1b071 4059 if (adev->gmc.xgmi.pending_reset)
4060 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4061 msecs_to_jiffies(AMDGPU_RESUME_MS));
4062
4a74c38c
PY
4063 amdgpu_device_check_iommu_direct_map(adev);
4064
d38ceaf9 4065 return 0;
83ba126a 4066
970fd197 4067release_ras_con:
38eecbe0
CL
4068 if (amdgpu_sriov_vf(adev))
4069 amdgpu_virt_release_full_gpu(adev, true);
4070
4071 /* failed in exclusive mode due to timeout */
4072 if (amdgpu_sriov_vf(adev) &&
4073 !amdgpu_sriov_runtime(adev) &&
4074 amdgpu_virt_mmio_blocked(adev) &&
4075 !amdgpu_virt_wait_reset(adev)) {
4076 dev_err(adev->dev, "VF exclusive mode timeout\n");
4077 /* Don't send request since VF is inactive. */
4078 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4079 adev->virt.ops = NULL;
4080 r = -EAGAIN;
4081 }
970fd197
SY
4082 amdgpu_release_ras_context(adev);
4083
83ba126a 4084failed:
89041940 4085 amdgpu_vf_error_trans_all(adev);
8840a387 4086
83ba126a 4087 return r;
d38ceaf9
AD
4088}
4089
07775fc1
AG
4090static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4091{
62d5f9f7 4092
07775fc1
AG
4093 /* Clear all CPU mappings pointing to this device */
4094 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4095
4096 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4097 amdgpu_doorbell_fini(adev);
07775fc1
AG
4098
4099 iounmap(adev->rmmio);
4100 adev->rmmio = NULL;
4101 if (adev->mman.aper_base_kaddr)
4102 iounmap(adev->mman.aper_base_kaddr);
4103 adev->mman.aper_base_kaddr = NULL;
4104
4105 /* Memory manager related */
a0ba1279 4106 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4107 arch_phys_wc_del(adev->gmc.vram_mtrr);
4108 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4109 }
4110}
4111
d38ceaf9 4112/**
bbe04dec 4113 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4114 *
4115 * @adev: amdgpu_device pointer
4116 *
4117 * Tear down the driver info (all asics).
4118 * Called at driver shutdown.
4119 */
72c8c97b 4120void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4121{
aac89168 4122 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4123 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4124 adev->shutdown = true;
9f875167 4125
752c683d
ML
4126 /* make sure IB test finished before entering exclusive mode
4127 * to avoid preemption on IB test
b8920e1e 4128 */
519b8b76 4129 if (amdgpu_sriov_vf(adev)) {
752c683d 4130 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4131 amdgpu_virt_fini_data_exchange(adev);
4132 }
752c683d 4133
e5b03032
ML
4134 /* disable all interrupts */
4135 amdgpu_irq_disable_all(adev);
47fc644f 4136 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4137 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4138 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4139 else
4a580877 4140 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4141 }
8d35a259 4142 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4143
cd3a8a59 4144 if (adev->mman.initialized)
9bff18d1 4145 drain_workqueue(adev->mman.bdev.wq);
98f56188 4146
53e9d836 4147 if (adev->pm.sysfs_initialized)
7c868b59 4148 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4149 if (adev->ucode_sysfs_en)
4150 amdgpu_ucode_sysfs_fini(adev);
4151 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4152 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4153
232d1d43
SY
4154 /* disable ras feature must before hw fini */
4155 amdgpu_ras_pre_fini(adev);
4156
e9669fb7 4157 amdgpu_device_ip_fini_early(adev);
d10d0daa 4158
a3848df6
YW
4159 amdgpu_irq_fini_hw(adev);
4160
b6fd6e0f
SK
4161 if (adev->mman.initialized)
4162 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4163
d10d0daa 4164 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4165
39934d3e
VP
4166 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4167 amdgpu_device_unmap_mmio(adev);
87172e89 4168
72c8c97b
AG
4169}
4170
4171void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4172{
62d5f9f7 4173 int idx;
d37a3929 4174 bool px;
62d5f9f7 4175
8d35a259 4176 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4177 amdgpu_device_ip_fini(adev);
b31d3063 4178 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4179 adev->accel_working = false;
68ce8b24 4180 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4181
4182 amdgpu_reset_fini(adev);
4183
d38ceaf9 4184 /* free i2c buses */
4562236b
HW
4185 if (!amdgpu_device_has_dc_support(adev))
4186 amdgpu_i2c_fini(adev);
bfca0289
SL
4187
4188 if (amdgpu_emu_mode != 1)
4189 amdgpu_atombios_fini(adev);
4190
d38ceaf9
AD
4191 kfree(adev->bios);
4192 adev->bios = NULL;
d37a3929
OC
4193
4194 px = amdgpu_device_supports_px(adev_to_drm(adev));
4195
4196 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4197 apple_gmux_detect(NULL, NULL)))
84c8b22e 4198 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4199
4200 if (px)
83ba126a 4201 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4202
38d6be81 4203 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4204 vga_client_unregister(adev->pdev);
e9bc1bf7 4205
62d5f9f7
LS
4206 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4207
4208 iounmap(adev->rmmio);
4209 adev->rmmio = NULL;
43c064db 4210 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4211 drm_dev_exit(idx);
4212 }
4213
d155bef0
AB
4214 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4215 amdgpu_pmu_fini(adev);
72de33f8 4216 if (adev->mman.discovery_bin)
a190d1c7 4217 amdgpu_discovery_fini(adev);
72c8c97b 4218
cfbb6b00
AG
4219 amdgpu_reset_put_reset_domain(adev->reset_domain);
4220 adev->reset_domain = NULL;
4221
72c8c97b
AG
4222 kfree(adev->pci_state);
4223
d38ceaf9
AD
4224}
4225
58144d28
ND
4226/**
4227 * amdgpu_device_evict_resources - evict device resources
4228 * @adev: amdgpu device object
4229 *
4230 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4231 * of the vram memory type. Mainly used for evicting device resources
4232 * at suspend time.
4233 *
4234 */
7863c155 4235static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4236{
7863c155
ML
4237 int ret;
4238
e53d9665
ML
4239 /* No need to evict vram on APUs for suspend to ram or s2idle */
4240 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4241 return 0;
58144d28 4242
7863c155
ML
4243 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4244 if (ret)
58144d28 4245 DRM_WARN("evicting device resources failed\n");
7863c155 4246 return ret;
58144d28 4247}
d38ceaf9
AD
4248
4249/*
4250 * Suspend & resume.
4251 */
4252/**
810ddc3a 4253 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4254 *
87e3f136 4255 * @dev: drm dev pointer
87e3f136 4256 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4257 *
4258 * Puts the hw in the suspend state (all asics).
4259 * Returns 0 for success or an error on failure.
4260 * Called at driver suspend.
4261 */
de185019 4262int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4263{
a2e15b0e 4264 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4265 int r = 0;
d38ceaf9 4266
d38ceaf9
AD
4267 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4268 return 0;
4269
44779b43 4270 adev->in_suspend = true;
3fa8f89d 4271
47ea2076
SF
4272 /* Evict the majority of BOs before grabbing the full access */
4273 r = amdgpu_device_evict_resources(adev);
4274 if (r)
4275 return r;
4276
d7274ec7
BZ
4277 if (amdgpu_sriov_vf(adev)) {
4278 amdgpu_virt_fini_data_exchange(adev);
4279 r = amdgpu_virt_request_full_gpu(adev, false);
4280 if (r)
4281 return r;
4282 }
4283
3fa8f89d
S
4284 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4285 DRM_WARN("smart shift update failed\n");
4286
5f818173 4287 if (fbcon)
087451f3 4288 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4289
beff74bc 4290 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4291 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4292
5e6932fe 4293 amdgpu_ras_suspend(adev);
4294
2196927b 4295 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4296
c004d44e 4297 if (!adev->in_s0ix)
5d3a2d95 4298 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4299
7863c155
ML
4300 r = amdgpu_device_evict_resources(adev);
4301 if (r)
4302 return r;
d38ceaf9 4303
8d35a259 4304 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4305
2196927b 4306 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4307
d7274ec7
BZ
4308 if (amdgpu_sriov_vf(adev))
4309 amdgpu_virt_release_full_gpu(adev, false);
4310
d38ceaf9
AD
4311 return 0;
4312}
4313
4314/**
810ddc3a 4315 * amdgpu_device_resume - initiate device resume
d38ceaf9 4316 *
87e3f136 4317 * @dev: drm dev pointer
87e3f136 4318 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4319 *
4320 * Bring the hw back to operating state (all asics).
4321 * Returns 0 for success or an error on failure.
4322 * Called at driver resume.
4323 */
de185019 4324int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4325{
1348969a 4326 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4327 int r = 0;
d38ceaf9 4328
d7274ec7
BZ
4329 if (amdgpu_sriov_vf(adev)) {
4330 r = amdgpu_virt_request_full_gpu(adev, true);
4331 if (r)
4332 return r;
4333 }
4334
d38ceaf9
AD
4335 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4336 return 0;
4337
62498733 4338 if (adev->in_s0ix)
bc143d8b 4339 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4340
d38ceaf9 4341 /* post card */
39c640c0 4342 if (amdgpu_device_need_post(adev)) {
4d2997ab 4343 r = amdgpu_device_asic_init(adev);
74b0b157 4344 if (r)
aac89168 4345 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4346 }
d38ceaf9 4347
06ec9070 4348 r = amdgpu_device_ip_resume(adev);
d7274ec7 4349
e6707218 4350 if (r) {
aac89168 4351 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4352 goto exit;
e6707218 4353 }
8d35a259 4354 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4355
06ec9070 4356 r = amdgpu_device_ip_late_init(adev);
03161a6e 4357 if (r)
3c22c1ea 4358 goto exit;
d38ceaf9 4359
beff74bc
AD
4360 queue_delayed_work(system_wq, &adev->delayed_init_work,
4361 msecs_to_jiffies(AMDGPU_RESUME_MS));
4362
c004d44e 4363 if (!adev->in_s0ix) {
5d3a2d95
AD
4364 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4365 if (r)
3c22c1ea 4366 goto exit;
5d3a2d95 4367 }
756e6880 4368
3c22c1ea
SF
4369exit:
4370 if (amdgpu_sriov_vf(adev)) {
4371 amdgpu_virt_init_data_exchange(adev);
4372 amdgpu_virt_release_full_gpu(adev, true);
4373 }
4374
4375 if (r)
4376 return r;
4377
96a5d8d4 4378 /* Make sure IB tests flushed */
beff74bc 4379 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4380
a2e15b0e 4381 if (fbcon)
087451f3 4382 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4383
5e6932fe 4384 amdgpu_ras_resume(adev);
4385
d09ef243
AD
4386 if (adev->mode_info.num_crtc) {
4387 /*
4388 * Most of the connector probing functions try to acquire runtime pm
4389 * refs to ensure that the GPU is powered on when connector polling is
4390 * performed. Since we're calling this from a runtime PM callback,
4391 * trying to acquire rpm refs will cause us to deadlock.
4392 *
4393 * Since we're guaranteed to be holding the rpm lock, it's safe to
4394 * temporarily disable the rpm helpers so this doesn't deadlock us.
4395 */
23a1a9e5 4396#ifdef CONFIG_PM
d09ef243 4397 dev->dev->power.disable_depth++;
23a1a9e5 4398#endif
d09ef243
AD
4399 if (!adev->dc_enabled)
4400 drm_helper_hpd_irq_event(dev);
4401 else
4402 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4403#ifdef CONFIG_PM
d09ef243 4404 dev->dev->power.disable_depth--;
23a1a9e5 4405#endif
d09ef243 4406 }
44779b43
RZ
4407 adev->in_suspend = false;
4408
dc907c9d
JX
4409 if (adev->enable_mes)
4410 amdgpu_mes_self_test(adev);
4411
3fa8f89d
S
4412 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4413 DRM_WARN("smart shift update failed\n");
4414
4d3b9ae5 4415 return 0;
d38ceaf9
AD
4416}
4417
e3ecdffa
AD
4418/**
4419 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4420 *
4421 * @adev: amdgpu_device pointer
4422 *
4423 * The list of all the hardware IPs that make up the asic is walked and
4424 * the check_soft_reset callbacks are run. check_soft_reset determines
4425 * if the asic is still hung or not.
4426 * Returns true if any of the IPs are still in a hung state, false if not.
4427 */
06ec9070 4428static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4429{
4430 int i;
4431 bool asic_hang = false;
4432
f993d628
ML
4433 if (amdgpu_sriov_vf(adev))
4434 return true;
4435
8bc04c29
AD
4436 if (amdgpu_asic_need_full_reset(adev))
4437 return true;
4438
63fbf42f 4439 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4440 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4441 continue;
a1255107
AD
4442 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4443 adev->ip_blocks[i].status.hang =
4444 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4445 if (adev->ip_blocks[i].status.hang) {
aac89168 4446 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4447 asic_hang = true;
4448 }
4449 }
4450 return asic_hang;
4451}
4452
e3ecdffa
AD
4453/**
4454 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4455 *
4456 * @adev: amdgpu_device pointer
4457 *
4458 * The list of all the hardware IPs that make up the asic is walked and the
4459 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4460 * handles any IP specific hardware or software state changes that are
4461 * necessary for a soft reset to succeed.
4462 * Returns 0 on success, negative error code on failure.
4463 */
06ec9070 4464static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4465{
4466 int i, r = 0;
4467
4468 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4469 if (!adev->ip_blocks[i].status.valid)
d31a501e 4470 continue;
a1255107
AD
4471 if (adev->ip_blocks[i].status.hang &&
4472 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4473 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4474 if (r)
4475 return r;
4476 }
4477 }
4478
4479 return 0;
4480}
4481
e3ecdffa
AD
4482/**
4483 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4484 *
4485 * @adev: amdgpu_device pointer
4486 *
4487 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4488 * reset is necessary to recover.
4489 * Returns true if a full asic reset is required, false if not.
4490 */
06ec9070 4491static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4492{
da146d3b
AD
4493 int i;
4494
8bc04c29
AD
4495 if (amdgpu_asic_need_full_reset(adev))
4496 return true;
4497
da146d3b 4498 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4499 if (!adev->ip_blocks[i].status.valid)
da146d3b 4500 continue;
a1255107
AD
4501 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4502 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4503 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4504 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4505 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4506 if (adev->ip_blocks[i].status.hang) {
aac89168 4507 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4508 return true;
4509 }
4510 }
35d782fe
CZ
4511 }
4512 return false;
4513}
4514
e3ecdffa
AD
4515/**
4516 * amdgpu_device_ip_soft_reset - do a soft reset
4517 *
4518 * @adev: amdgpu_device pointer
4519 *
4520 * The list of all the hardware IPs that make up the asic is walked and the
4521 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4522 * IP specific hardware or software state changes that are necessary to soft
4523 * reset the IP.
4524 * Returns 0 on success, negative error code on failure.
4525 */
06ec9070 4526static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4527{
4528 int i, r = 0;
4529
4530 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4531 if (!adev->ip_blocks[i].status.valid)
35d782fe 4532 continue;
a1255107
AD
4533 if (adev->ip_blocks[i].status.hang &&
4534 adev->ip_blocks[i].version->funcs->soft_reset) {
4535 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4536 if (r)
4537 return r;
4538 }
4539 }
4540
4541 return 0;
4542}
4543
e3ecdffa
AD
4544/**
4545 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4546 *
4547 * @adev: amdgpu_device pointer
4548 *
4549 * The list of all the hardware IPs that make up the asic is walked and the
4550 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4551 * handles any IP specific hardware or software state changes that are
4552 * necessary after the IP has been soft reset.
4553 * Returns 0 on success, negative error code on failure.
4554 */
06ec9070 4555static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4556{
4557 int i, r = 0;
4558
4559 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4560 if (!adev->ip_blocks[i].status.valid)
35d782fe 4561 continue;
a1255107
AD
4562 if (adev->ip_blocks[i].status.hang &&
4563 adev->ip_blocks[i].version->funcs->post_soft_reset)
4564 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4565 if (r)
4566 return r;
4567 }
4568
4569 return 0;
4570}
4571
e3ecdffa 4572/**
c33adbc7 4573 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4574 *
4575 * @adev: amdgpu_device pointer
4576 *
4577 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4578 * restore things like GPUVM page tables after a GPU reset where
4579 * the contents of VRAM might be lost.
403009bf
CK
4580 *
4581 * Returns:
4582 * 0 on success, negative error code on failure.
e3ecdffa 4583 */
c33adbc7 4584static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4585{
c41d1cf6 4586 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4587 struct amdgpu_bo *shadow;
e18aaea7 4588 struct amdgpu_bo_vm *vmbo;
403009bf 4589 long r = 1, tmo;
c41d1cf6
ML
4590
4591 if (amdgpu_sriov_runtime(adev))
b045d3af 4592 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4593 else
4594 tmo = msecs_to_jiffies(100);
4595
aac89168 4596 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4597 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4598 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4599 /* If vm is compute context or adev is APU, shadow will be NULL */
4600 if (!vmbo->shadow)
4601 continue;
4602 shadow = vmbo->shadow;
4603
403009bf 4604 /* No need to recover an evicted BO */
d3116756
CK
4605 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4606 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4607 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4608 continue;
4609
4610 r = amdgpu_bo_restore_shadow(shadow, &next);
4611 if (r)
4612 break;
4613
c41d1cf6 4614 if (fence) {
1712fb1a 4615 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4616 dma_fence_put(fence);
4617 fence = next;
1712fb1a 4618 if (tmo == 0) {
4619 r = -ETIMEDOUT;
c41d1cf6 4620 break;
1712fb1a 4621 } else if (tmo < 0) {
4622 r = tmo;
4623 break;
4624 }
403009bf
CK
4625 } else {
4626 fence = next;
c41d1cf6 4627 }
c41d1cf6
ML
4628 }
4629 mutex_unlock(&adev->shadow_list_lock);
4630
403009bf
CK
4631 if (fence)
4632 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4633 dma_fence_put(fence);
4634
1712fb1a 4635 if (r < 0 || tmo <= 0) {
aac89168 4636 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4637 return -EIO;
4638 }
c41d1cf6 4639
aac89168 4640 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4641 return 0;
c41d1cf6
ML
4642}
4643
a90ad3c2 4644
e3ecdffa 4645/**
06ec9070 4646 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4647 *
982a820b 4648 * @adev: amdgpu_device pointer
87e3f136 4649 * @from_hypervisor: request from hypervisor
5740682e
ML
4650 *
4651 * do VF FLR and reinitialize Asic
3f48c681 4652 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4653 */
4654static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4655 bool from_hypervisor)
5740682e
ML
4656{
4657 int r;
a5f67c93 4658 struct amdgpu_hive_info *hive = NULL;
7258fa31 4659 int retry_limit = 0;
5740682e 4660
7258fa31 4661retry:
c004d44e 4662 amdgpu_amdkfd_pre_reset(adev);
428890a3 4663
5740682e
ML
4664 if (from_hypervisor)
4665 r = amdgpu_virt_request_full_gpu(adev, true);
4666 else
4667 r = amdgpu_virt_reset_gpu(adev);
4668 if (r)
4669 return r;
f734b213 4670 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4671
83f24a8f
HC
4672 /* some sw clean up VF needs to do before recover */
4673 amdgpu_virt_post_reset(adev);
4674
a90ad3c2 4675 /* Resume IP prior to SMC */
06ec9070 4676 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4677 if (r)
4678 goto error;
a90ad3c2 4679
c9ffa427 4680 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4681
7a3e0bb2
RZ
4682 r = amdgpu_device_fw_loading(adev);
4683 if (r)
4684 return r;
4685
a90ad3c2 4686 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4687 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4688 if (r)
4689 goto error;
a90ad3c2 4690
a5f67c93
ZL
4691 hive = amdgpu_get_xgmi_hive(adev);
4692 /* Update PSP FW topology after reset */
4693 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4694 r = amdgpu_xgmi_update_topology(hive, adev);
4695
4696 if (hive)
4697 amdgpu_put_xgmi_hive(hive);
4698
4699 if (!r) {
a5f67c93 4700 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4701
c004d44e 4702 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4703 }
a90ad3c2 4704
abc34253 4705error:
c41d1cf6 4706 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4707 amdgpu_inc_vram_lost(adev);
c33adbc7 4708 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4709 }
437f3e0b 4710 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4711
7258fa31
SK
4712 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4713 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4714 retry_limit++;
4715 goto retry;
4716 } else
4717 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4718 }
4719
a90ad3c2
ML
4720 return r;
4721}
4722
9a1cddd6 4723/**
4724 * amdgpu_device_has_job_running - check if there is any job in mirror list
4725 *
982a820b 4726 * @adev: amdgpu_device pointer
9a1cddd6 4727 *
4728 * check if there is any job in mirror list
4729 */
4730bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4731{
4732 int i;
4733 struct drm_sched_job *job;
4734
4735 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4736 struct amdgpu_ring *ring = adev->rings[i];
4737
4738 if (!ring || !ring->sched.thread)
4739 continue;
4740
4741 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4742 job = list_first_entry_or_null(&ring->sched.pending_list,
4743 struct drm_sched_job, list);
9a1cddd6 4744 spin_unlock(&ring->sched.job_list_lock);
4745 if (job)
4746 return true;
4747 }
4748 return false;
4749}
4750
12938fad
CK
4751/**
4752 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4753 *
982a820b 4754 * @adev: amdgpu_device pointer
12938fad
CK
4755 *
4756 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4757 * a hung GPU.
4758 */
4759bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4760{
12938fad 4761
3ba7b418
AG
4762 if (amdgpu_gpu_recovery == 0)
4763 goto disabled;
4764
1a11a65d
YC
4765 /* Skip soft reset check in fatal error mode */
4766 if (!amdgpu_ras_is_poison_mode_supported(adev))
4767 return true;
4768
3ba7b418
AG
4769 if (amdgpu_sriov_vf(adev))
4770 return true;
4771
4772 if (amdgpu_gpu_recovery == -1) {
4773 switch (adev->asic_type) {
b3523c45
AD
4774#ifdef CONFIG_DRM_AMDGPU_SI
4775 case CHIP_VERDE:
4776 case CHIP_TAHITI:
4777 case CHIP_PITCAIRN:
4778 case CHIP_OLAND:
4779 case CHIP_HAINAN:
4780#endif
4781#ifdef CONFIG_DRM_AMDGPU_CIK
4782 case CHIP_KAVERI:
4783 case CHIP_KABINI:
4784 case CHIP_MULLINS:
4785#endif
4786 case CHIP_CARRIZO:
4787 case CHIP_STONEY:
4788 case CHIP_CYAN_SKILLFISH:
3ba7b418 4789 goto disabled;
b3523c45
AD
4790 default:
4791 break;
3ba7b418 4792 }
12938fad
CK
4793 }
4794
4795 return true;
3ba7b418
AG
4796
4797disabled:
aac89168 4798 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4799 return false;
12938fad
CK
4800}
4801
5c03e584
FX
4802int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4803{
47fc644f
SS
4804 u32 i;
4805 int ret = 0;
5c03e584 4806
47fc644f 4807 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4808
47fc644f 4809 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4810
47fc644f
SS
4811 /* disable BM */
4812 pci_clear_master(adev->pdev);
5c03e584 4813
47fc644f 4814 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4815
47fc644f
SS
4816 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4817 dev_info(adev->dev, "GPU smu mode1 reset\n");
4818 ret = amdgpu_dpm_mode1_reset(adev);
4819 } else {
4820 dev_info(adev->dev, "GPU psp mode1 reset\n");
4821 ret = psp_gpu_reset(adev);
4822 }
5c03e584 4823
47fc644f 4824 if (ret)
2c0f880a 4825 goto mode1_reset_failed;
5c03e584 4826
47fc644f 4827 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4828 ret = amdgpu_psp_wait_for_bootloader(adev);
4829 if (ret)
2c0f880a 4830 goto mode1_reset_failed;
5c03e584 4831
47fc644f
SS
4832 /* wait for asic to come out of reset */
4833 for (i = 0; i < adev->usec_timeout; i++) {
4834 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4835
47fc644f
SS
4836 if (memsize != 0xffffffff)
4837 break;
4838 udelay(1);
4839 }
5c03e584 4840
2c0f880a
HZ
4841 if (i >= adev->usec_timeout) {
4842 ret = -ETIMEDOUT;
4843 goto mode1_reset_failed;
4844 }
4845
47fc644f 4846 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4847
2c0f880a
HZ
4848 return 0;
4849
4850mode1_reset_failed:
4851 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4852 return ret;
5c03e584 4853}
5c6dd71e 4854
e3c1b071 4855int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4856 struct amdgpu_reset_context *reset_context)
26bc5340 4857{
5c1e6fa4 4858 int i, r = 0;
04442bf7
LL
4859 struct amdgpu_job *job = NULL;
4860 bool need_full_reset =
4861 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4862
4863 if (reset_context->reset_req_dev == adev)
4864 job = reset_context->job;
71182665 4865
b602ca5f
TZ
4866 if (amdgpu_sriov_vf(adev)) {
4867 /* stop the data exchange thread */
4868 amdgpu_virt_fini_data_exchange(adev);
4869 }
4870
9e225fb9
AG
4871 amdgpu_fence_driver_isr_toggle(adev, true);
4872
71182665 4873 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4874 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4875 struct amdgpu_ring *ring = adev->rings[i];
4876
51687759 4877 if (!ring || !ring->sched.thread)
0875dc9e 4878 continue;
5740682e 4879
b8920e1e
SS
4880 /* Clear job fence from fence drv to avoid force_completion
4881 * leave NULL and vm flush fence in fence drv
4882 */
5c1e6fa4 4883 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4884
2f9d4084
ML
4885 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4886 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4887 }
d38ceaf9 4888
9e225fb9
AG
4889 amdgpu_fence_driver_isr_toggle(adev, false);
4890
ff99849b 4891 if (job && job->vm)
222b5f04
AG
4892 drm_sched_increase_karma(&job->base);
4893
04442bf7 4894 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 4895 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4896 if (r == -EOPNOTSUPP)
404b277b
LL
4897 r = 0;
4898 else
04442bf7
LL
4899 return r;
4900
1d721ed6 4901 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4902 if (!amdgpu_sriov_vf(adev)) {
4903
4904 if (!need_full_reset)
4905 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4906
360cd081
LG
4907 if (!need_full_reset && amdgpu_gpu_recovery &&
4908 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4909 amdgpu_device_ip_pre_soft_reset(adev);
4910 r = amdgpu_device_ip_soft_reset(adev);
4911 amdgpu_device_ip_post_soft_reset(adev);
4912 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4913 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4914 need_full_reset = true;
4915 }
4916 }
4917
4918 if (need_full_reset)
4919 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4920 if (need_full_reset)
4921 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4922 else
4923 clear_bit(AMDGPU_NEED_FULL_RESET,
4924 &reset_context->flags);
26bc5340
AG
4925 }
4926
4927 return r;
4928}
4929
15fd09a0
SA
4930static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4931{
15fd09a0
SA
4932 int i;
4933
38a15ad9 4934 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4935
4936 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4937 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4938 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4939 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4940 }
4941
4942 return 0;
4943}
4944
3d8785f6
SA
4945#ifdef CONFIG_DEV_COREDUMP
4946static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4947 size_t count, void *data, size_t datalen)
4948{
4949 struct drm_printer p;
4950 struct amdgpu_device *adev = data;
4951 struct drm_print_iterator iter;
4952 int i;
4953
4954 iter.data = buffer;
4955 iter.offset = 0;
4956 iter.start = offset;
4957 iter.remain = count;
4958
4959 p = drm_coredump_printer(&iter);
4960
4961 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4962 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4963 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4964 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4965 if (adev->reset_task_info.pid)
4966 drm_printf(&p, "process_name: %s PID: %d\n",
4967 adev->reset_task_info.process_name,
4968 adev->reset_task_info.pid);
4969
4970 if (adev->reset_vram_lost)
4971 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4972 if (adev->num_regs) {
4973 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4974
4975 for (i = 0; i < adev->num_regs; i++)
4976 drm_printf(&p, "0x%08x: 0x%08x\n",
4977 adev->reset_dump_reg_list[i],
4978 adev->reset_dump_reg_value[i]);
4979 }
4980
4981 return count - iter.remain;
4982}
4983
4984static void amdgpu_devcoredump_free(void *data)
4985{
4986}
4987
4988static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4989{
4990 struct drm_device *dev = adev_to_drm(adev);
4991
4992 ktime_get_ts64(&adev->reset_time);
d68ccdb2 4993 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
3d8785f6
SA
4994 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4995}
4996#endif
4997
04442bf7
LL
4998int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4999 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5000{
5001 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5002 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5003 int r = 0;
f5c7e779 5004 bool gpu_reset_for_dev_remove = 0;
26bc5340 5005
04442bf7
LL
5006 /* Try reset handler method first */
5007 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5008 reset_list);
15fd09a0 5009 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5010
5011 reset_context->reset_device_list = device_list_handle;
04442bf7 5012 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5013 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5014 if (r == -EOPNOTSUPP)
404b277b
LL
5015 r = 0;
5016 else
04442bf7
LL
5017 return r;
5018
5019 /* Reset handler not implemented, use the default method */
5020 need_full_reset =
5021 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5022 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5023
f5c7e779
YC
5024 gpu_reset_for_dev_remove =
5025 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5026 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5027
26bc5340 5028 /*
655ce9cb 5029 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5030 * to allow proper links negotiation in FW (within 1 sec)
5031 */
7ac71382 5032 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5033 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5034 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5035 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5036 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5037 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5038 r = -EALREADY;
5039 } else
5040 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5041
041a62bc 5042 if (r) {
aac89168 5043 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5044 r, adev_to_drm(tmp_adev)->unique);
041a62bc 5045 break;
ce316fa5
LM
5046 }
5047 }
5048
041a62bc
AG
5049 /* For XGMI wait for all resets to complete before proceed */
5050 if (!r) {
655ce9cb 5051 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5052 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5053 flush_work(&tmp_adev->xgmi_reset_work);
5054 r = tmp_adev->asic_reset_res;
5055 if (r)
5056 break;
ce316fa5
LM
5057 }
5058 }
5059 }
ce316fa5 5060 }
26bc5340 5061
43c4d576 5062 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5063 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 5064 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
5065 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
5066 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
5067 }
5068
00eaa571 5069 amdgpu_ras_intr_cleared();
43c4d576 5070 }
00eaa571 5071
f5c7e779
YC
5072 /* Since the mode1 reset affects base ip blocks, the
5073 * phase1 ip blocks need to be resumed. Otherwise there
5074 * will be a BIOS signature error and the psp bootloader
5075 * can't load kdb on the next amdgpu install.
5076 */
5077 if (gpu_reset_for_dev_remove) {
5078 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5079 amdgpu_device_ip_resume_phase1(tmp_adev);
5080
5081 goto end;
5082 }
5083
655ce9cb 5084 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5085 if (need_full_reset) {
5086 /* post card */
e3c1b071 5087 r = amdgpu_device_asic_init(tmp_adev);
5088 if (r) {
aac89168 5089 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5090 } else {
26bc5340 5091 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5092
26bc5340
AG
5093 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5094 if (r)
5095 goto out;
5096
5097 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
5098#ifdef CONFIG_DEV_COREDUMP
5099 tmp_adev->reset_vram_lost = vram_lost;
5100 memset(&tmp_adev->reset_task_info, 0,
5101 sizeof(tmp_adev->reset_task_info));
5102 if (reset_context->job && reset_context->job->vm)
5103 tmp_adev->reset_task_info =
5104 reset_context->job->vm->task_info;
5105 amdgpu_reset_capture_coredumpm(tmp_adev);
5106#endif
26bc5340 5107 if (vram_lost) {
77e7f829 5108 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5109 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5110 }
5111
26bc5340
AG
5112 r = amdgpu_device_fw_loading(tmp_adev);
5113 if (r)
5114 return r;
5115
5116 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5117 if (r)
5118 goto out;
5119
5120 if (vram_lost)
5121 amdgpu_device_fill_reset_magic(tmp_adev);
5122
fdafb359
EQ
5123 /*
5124 * Add this ASIC as tracked as reset was already
5125 * complete successfully.
5126 */
5127 amdgpu_register_gpu_instance(tmp_adev);
5128
04442bf7
LL
5129 if (!reset_context->hive &&
5130 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5131 amdgpu_xgmi_add_device(tmp_adev);
5132
7c04ca50 5133 r = amdgpu_device_ip_late_init(tmp_adev);
5134 if (r)
5135 goto out;
5136
087451f3 5137 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5138
e8fbaf03
GC
5139 /*
5140 * The GPU enters bad state once faulty pages
5141 * by ECC has reached the threshold, and ras
5142 * recovery is scheduled next. So add one check
5143 * here to break recovery if it indeed exceeds
5144 * bad page threshold, and remind user to
5145 * retire this GPU or setting one bigger
5146 * bad_page_threshold value to fix this once
5147 * probing driver again.
5148 */
11003c68 5149 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5150 /* must succeed. */
5151 amdgpu_ras_resume(tmp_adev);
5152 } else {
5153 r = -EINVAL;
5154 goto out;
5155 }
e79a04d5 5156
26bc5340 5157 /* Update PSP FW topology after reset */
04442bf7
LL
5158 if (reset_context->hive &&
5159 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5160 r = amdgpu_xgmi_update_topology(
5161 reset_context->hive, tmp_adev);
26bc5340
AG
5162 }
5163 }
5164
26bc5340
AG
5165out:
5166 if (!r) {
5167 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5168 r = amdgpu_ib_ring_tests(tmp_adev);
5169 if (r) {
5170 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5171 need_full_reset = true;
5172 r = -EAGAIN;
5173 goto end;
5174 }
5175 }
5176
5177 if (!r)
5178 r = amdgpu_device_recover_vram(tmp_adev);
5179 else
5180 tmp_adev->asic_reset_res = r;
5181 }
5182
5183end:
04442bf7
LL
5184 if (need_full_reset)
5185 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5186 else
5187 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5188 return r;
5189}
5190
e923be99 5191static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5192{
5740682e 5193
a3a09142
AD
5194 switch (amdgpu_asic_reset_method(adev)) {
5195 case AMD_RESET_METHOD_MODE1:
5196 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5197 break;
5198 case AMD_RESET_METHOD_MODE2:
5199 adev->mp1_state = PP_MP1_STATE_RESET;
5200 break;
5201 default:
5202 adev->mp1_state = PP_MP1_STATE_NONE;
5203 break;
5204 }
26bc5340 5205}
d38ceaf9 5206
e923be99 5207static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5208{
89041940 5209 amdgpu_vf_error_trans_all(adev);
a3a09142 5210 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5211}
5212
3f12acc8
EQ
5213static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5214{
5215 struct pci_dev *p = NULL;
5216
5217 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5218 adev->pdev->bus->number, 1);
5219 if (p) {
5220 pm_runtime_enable(&(p->dev));
5221 pm_runtime_resume(&(p->dev));
5222 }
b85e285e
YY
5223
5224 pci_dev_put(p);
3f12acc8
EQ
5225}
5226
5227static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5228{
5229 enum amd_reset_method reset_method;
5230 struct pci_dev *p = NULL;
5231 u64 expires;
5232
5233 /*
5234 * For now, only BACO and mode1 reset are confirmed
5235 * to suffer the audio issue without proper suspended.
5236 */
5237 reset_method = amdgpu_asic_reset_method(adev);
5238 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5239 (reset_method != AMD_RESET_METHOD_MODE1))
5240 return -EINVAL;
5241
5242 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5243 adev->pdev->bus->number, 1);
5244 if (!p)
5245 return -ENODEV;
5246
5247 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5248 if (!expires)
5249 /*
5250 * If we cannot get the audio device autosuspend delay,
5251 * a fixed 4S interval will be used. Considering 3S is
5252 * the audio controller default autosuspend delay setting.
5253 * 4S used here is guaranteed to cover that.
5254 */
54b7feb9 5255 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5256
5257 while (!pm_runtime_status_suspended(&(p->dev))) {
5258 if (!pm_runtime_suspend(&(p->dev)))
5259 break;
5260
5261 if (expires < ktime_get_mono_fast_ns()) {
5262 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5263 pci_dev_put(p);
3f12acc8
EQ
5264 /* TODO: abort the succeeding gpu reset? */
5265 return -ETIMEDOUT;
5266 }
5267 }
5268
5269 pm_runtime_disable(&(p->dev));
5270
b85e285e 5271 pci_dev_put(p);
3f12acc8
EQ
5272 return 0;
5273}
5274
d193b12b 5275static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5276{
5277 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5278
5279#if defined(CONFIG_DEBUG_FS)
5280 if (!amdgpu_sriov_vf(adev))
5281 cancel_work(&adev->reset_work);
5282#endif
5283
5284 if (adev->kfd.dev)
5285 cancel_work(&adev->kfd.reset_work);
5286
5287 if (amdgpu_sriov_vf(adev))
5288 cancel_work(&adev->virt.flr_work);
5289
5290 if (con && adev->ras_enabled)
5291 cancel_work(&con->recovery_work);
5292
5293}
5294
26bc5340 5295/**
6e9c65f7 5296 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5297 *
982a820b 5298 * @adev: amdgpu_device pointer
26bc5340 5299 * @job: which job trigger hang
80bd2de1 5300 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5301 *
5302 * Attempt to reset the GPU if it has hung (all asics).
5303 * Attempt to do soft-reset or full-reset and reinitialize Asic
5304 * Returns 0 for success or an error on failure.
5305 */
5306
cf727044 5307int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5308 struct amdgpu_job *job,
5309 struct amdgpu_reset_context *reset_context)
26bc5340 5310{
1d721ed6 5311 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5312 bool job_signaled = false;
26bc5340 5313 struct amdgpu_hive_info *hive = NULL;
26bc5340 5314 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5315 int i, r = 0;
bb5c7235 5316 bool need_emergency_restart = false;
3f12acc8 5317 bool audio_suspended = false;
f5c7e779
YC
5318 bool gpu_reset_for_dev_remove = false;
5319
5320 gpu_reset_for_dev_remove =
5321 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5322 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5323
6e3cd2a9 5324 /*
bb5c7235
WS
5325 * Special case: RAS triggered and full reset isn't supported
5326 */
5327 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5328
d5ea093e
AG
5329 /*
5330 * Flush RAM to disk so that after reboot
5331 * the user can read log and see why the system rebooted.
5332 */
bb5c7235 5333 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5334 DRM_WARN("Emergency reboot.");
5335
5336 ksys_sync_helper();
5337 emergency_restart();
5338 }
5339
b823821f 5340 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5341 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5342
175ac6ec
ZL
5343 if (!amdgpu_sriov_vf(adev))
5344 hive = amdgpu_get_xgmi_hive(adev);
681260df 5345 if (hive)
53b3f8f4 5346 mutex_lock(&hive->hive_lock);
26bc5340 5347
f1549c09
LG
5348 reset_context->job = job;
5349 reset_context->hive = hive;
9e94d22c
EQ
5350 /*
5351 * Build list of devices to reset.
5352 * In case we are in XGMI hive mode, resort the device list
5353 * to put adev in the 1st position.
5354 */
5355 INIT_LIST_HEAD(&device_list);
175ac6ec 5356 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5357 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5358 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5359 if (gpu_reset_for_dev_remove && adev->shutdown)
5360 tmp_adev->shutdown = true;
5361 }
655ce9cb 5362 if (!list_is_first(&adev->reset_list, &device_list))
5363 list_rotate_to_front(&adev->reset_list, &device_list);
5364 device_list_handle = &device_list;
26bc5340 5365 } else {
655ce9cb 5366 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5367 device_list_handle = &device_list;
5368 }
5369
e923be99
AG
5370 /* We need to lock reset domain only once both for XGMI and single device */
5371 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5372 reset_list);
3675c2f2 5373 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5374
1d721ed6 5375 /* block all schedulers and reset given job's ring */
655ce9cb 5376 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5377
e923be99 5378 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5379
3f12acc8
EQ
5380 /*
5381 * Try to put the audio codec into suspend state
5382 * before gpu reset started.
5383 *
5384 * Due to the power domain of the graphics device
5385 * is shared with AZ power domain. Without this,
5386 * we may change the audio hardware from behind
5387 * the audio driver's back. That will trigger
5388 * some audio codec errors.
5389 */
5390 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5391 audio_suspended = true;
5392
9e94d22c
EQ
5393 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5394
52fb44cf
EQ
5395 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5396
c004d44e 5397 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5398 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5399
12ffa55d
AG
5400 /*
5401 * Mark these ASICs to be reseted as untracked first
5402 * And add them back after reset completed
5403 */
5404 amdgpu_unregister_gpu_instance(tmp_adev);
5405
163d4cd2 5406 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5407
f1c1314b 5408 /* disable ras on ALL IPs */
bb5c7235 5409 if (!need_emergency_restart &&
b823821f 5410 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5411 amdgpu_ras_suspend(tmp_adev);
5412
1d721ed6
AG
5413 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5414 struct amdgpu_ring *ring = tmp_adev->rings[i];
5415
5416 if (!ring || !ring->sched.thread)
5417 continue;
5418
0b2d2c2e 5419 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5420
bb5c7235 5421 if (need_emergency_restart)
7c6e68c7 5422 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5423 }
8f8c80f4 5424 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5425 }
5426
bb5c7235 5427 if (need_emergency_restart)
7c6e68c7
AG
5428 goto skip_sched_resume;
5429
1d721ed6
AG
5430 /*
5431 * Must check guilty signal here since after this point all old
5432 * HW fences are force signaled.
5433 *
5434 * job->base holds a reference to parent fence
5435 */
f6a3f660 5436 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5437 job_signaled = true;
1d721ed6
AG
5438 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5439 goto skip_hw_reset;
5440 }
5441
26bc5340 5442retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5443 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5444 if (gpu_reset_for_dev_remove) {
5445 /* Workaroud for ASICs need to disable SMC first */
5446 amdgpu_device_smu_fini_early(tmp_adev);
5447 }
f1549c09 5448 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5449 /*TODO Should we stop ?*/
5450 if (r) {
aac89168 5451 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5452 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5453 tmp_adev->asic_reset_res = r;
5454 }
247c7b0d
AG
5455
5456 /*
5457 * Drop all pending non scheduler resets. Scheduler resets
5458 * were already dropped during drm_sched_stop
5459 */
d193b12b 5460 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5461 }
5462
5463 /* Actual ASIC resets if needed.*/
4f30d920 5464 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5465 if (amdgpu_sriov_vf(adev)) {
5466 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5467 if (r)
5468 adev->asic_reset_res = r;
950d6425 5469
28606c4e 5470 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5471 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5472 IP_VERSION(9, 4, 2) ||
5473 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5474 amdgpu_ras_resume(adev);
26bc5340 5475 } else {
f1549c09 5476 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5477 if (r && r == -EAGAIN)
26bc5340 5478 goto retry;
f5c7e779
YC
5479
5480 if (!r && gpu_reset_for_dev_remove)
5481 goto recover_end;
26bc5340
AG
5482 }
5483
1d721ed6
AG
5484skip_hw_reset:
5485
26bc5340 5486 /* Post ASIC reset for all devs .*/
655ce9cb 5487 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5488
1d721ed6
AG
5489 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5490 struct amdgpu_ring *ring = tmp_adev->rings[i];
5491
5492 if (!ring || !ring->sched.thread)
5493 continue;
5494
6868a2c4 5495 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5496 }
5497
4e8303cf
LL
5498 if (adev->enable_mes &&
5499 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
ed67f729
JX
5500 amdgpu_mes_self_test(tmp_adev);
5501
b8920e1e 5502 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5503 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5504
7258fa31
SK
5505 if (tmp_adev->asic_reset_res)
5506 r = tmp_adev->asic_reset_res;
5507
1d721ed6 5508 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5509
5510 if (r) {
5511 /* bad news, how to tell it to userspace ? */
12ffa55d 5512 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5513 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5514 } else {
12ffa55d 5515 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5516 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5517 DRM_WARN("smart shift update failed\n");
26bc5340 5518 }
7c6e68c7 5519 }
26bc5340 5520
7c6e68c7 5521skip_sched_resume:
655ce9cb 5522 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5523 /* unlock kfd: SRIOV would do it separately */
c004d44e 5524 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5525 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5526
5527 /* kfd_post_reset will do nothing if kfd device is not initialized,
5528 * need to bring up kfd here if it's not be initialized before
5529 */
5530 if (!adev->kfd.init_complete)
5531 amdgpu_amdkfd_device_init(adev);
5532
3f12acc8
EQ
5533 if (audio_suspended)
5534 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5535
5536 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5537
5538 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5539 }
5540
f5c7e779 5541recover_end:
e923be99
AG
5542 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5543 reset_list);
5544 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5545
9e94d22c 5546 if (hive) {
9e94d22c 5547 mutex_unlock(&hive->hive_lock);
d95e8e97 5548 amdgpu_put_xgmi_hive(hive);
9e94d22c 5549 }
26bc5340 5550
f287a3c5 5551 if (r)
26bc5340 5552 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5553
5554 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5555 return r;
5556}
5557
e3ecdffa
AD
5558/**
5559 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5560 *
5561 * @adev: amdgpu_device pointer
5562 *
5563 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5564 * and lanes) of the slot the device is in. Handles APUs and
5565 * virtualized environments where PCIE config space may not be available.
5566 */
5494d864 5567static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5568{
5d9a6330 5569 struct pci_dev *pdev;
c5313457
HK
5570 enum pci_bus_speed speed_cap, platform_speed_cap;
5571 enum pcie_link_width platform_link_width;
d0dd7f0c 5572
cd474ba0
AD
5573 if (amdgpu_pcie_gen_cap)
5574 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5575
cd474ba0
AD
5576 if (amdgpu_pcie_lane_cap)
5577 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5578
cd474ba0 5579 /* covers APUs as well */
04e85958 5580 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5581 if (adev->pm.pcie_gen_mask == 0)
5582 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5583 if (adev->pm.pcie_mlw_mask == 0)
5584 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5585 return;
cd474ba0 5586 }
d0dd7f0c 5587
c5313457
HK
5588 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5589 return;
5590
dbaa922b
AD
5591 pcie_bandwidth_available(adev->pdev, NULL,
5592 &platform_speed_cap, &platform_link_width);
c5313457 5593
cd474ba0 5594 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5595 /* asic caps */
5596 pdev = adev->pdev;
5597 speed_cap = pcie_get_speed_cap(pdev);
5598 if (speed_cap == PCI_SPEED_UNKNOWN) {
5599 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5600 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5601 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5602 } else {
2b3a1f51
FX
5603 if (speed_cap == PCIE_SPEED_32_0GT)
5604 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5605 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5606 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5607 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5608 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5609 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5610 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5611 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5612 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5613 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5614 else if (speed_cap == PCIE_SPEED_8_0GT)
5615 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5616 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5617 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5618 else if (speed_cap == PCIE_SPEED_5_0GT)
5619 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5620 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5621 else
5622 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5623 }
5624 /* platform caps */
c5313457 5625 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5626 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5627 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5628 } else {
2b3a1f51
FX
5629 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5630 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5631 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5632 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5633 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5634 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5635 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5636 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5637 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5638 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5639 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5640 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5641 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5642 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5643 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5644 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5645 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5646 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5647 else
5648 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5649
cd474ba0
AD
5650 }
5651 }
5652 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5653 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5654 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5655 } else {
c5313457 5656 switch (platform_link_width) {
5d9a6330 5657 case PCIE_LNK_X32:
cd474ba0
AD
5658 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5659 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5660 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5661 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5662 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5663 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5664 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5665 break;
5d9a6330 5666 case PCIE_LNK_X16:
cd474ba0
AD
5667 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5668 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5669 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5670 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5671 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5672 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5673 break;
5d9a6330 5674 case PCIE_LNK_X12:
cd474ba0
AD
5675 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5676 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5677 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5678 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5679 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5680 break;
5d9a6330 5681 case PCIE_LNK_X8:
cd474ba0
AD
5682 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5683 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5684 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5685 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5686 break;
5d9a6330 5687 case PCIE_LNK_X4:
cd474ba0
AD
5688 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5689 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5690 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5691 break;
5d9a6330 5692 case PCIE_LNK_X2:
cd474ba0
AD
5693 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5694 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5695 break;
5d9a6330 5696 case PCIE_LNK_X1:
cd474ba0
AD
5697 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5698 break;
5699 default:
5700 break;
5701 }
d0dd7f0c
AD
5702 }
5703 }
5704}
d38ceaf9 5705
08a2fd23
RE
5706/**
5707 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5708 *
5709 * @adev: amdgpu_device pointer
5710 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5711 *
5712 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5713 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5714 * @peer_adev.
5715 */
5716bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5717 struct amdgpu_device *peer_adev)
5718{
5719#ifdef CONFIG_HSA_AMD_P2P
5720 uint64_t address_mask = peer_adev->dev->dma_mask ?
5721 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5722 resource_size_t aper_limit =
5723 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5724 bool p2p_access =
5725 !adev->gmc.xgmi.connected_to_cpu &&
5726 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5727
5728 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5729 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5730 !(adev->gmc.aper_base & address_mask ||
5731 aper_limit & address_mask));
5732#else
5733 return false;
5734#endif
5735}
5736
361dbd01
AD
5737int amdgpu_device_baco_enter(struct drm_device *dev)
5738{
1348969a 5739 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5740 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5741
6ab68650 5742 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5743 return -ENOTSUPP;
5744
8ab0d6f0 5745 if (ras && adev->ras_enabled &&
acdae216 5746 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5747 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5748
9530273e 5749 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5750}
5751
5752int amdgpu_device_baco_exit(struct drm_device *dev)
5753{
1348969a 5754 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5755 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5756 int ret = 0;
361dbd01 5757
6ab68650 5758 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5759 return -ENOTSUPP;
5760
9530273e
EQ
5761 ret = amdgpu_dpm_baco_exit(adev);
5762 if (ret)
5763 return ret;
7a22677b 5764
8ab0d6f0 5765 if (ras && adev->ras_enabled &&
acdae216 5766 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5767 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5768
1bece222
CL
5769 if (amdgpu_passthrough(adev) &&
5770 adev->nbio.funcs->clear_doorbell_interrupt)
5771 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5772
7a22677b 5773 return 0;
361dbd01 5774}
c9a6b82f
AG
5775
5776/**
5777 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5778 * @pdev: PCI device struct
5779 * @state: PCI channel state
5780 *
5781 * Description: Called when a PCI error is detected.
5782 *
5783 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5784 */
5785pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5786{
5787 struct drm_device *dev = pci_get_drvdata(pdev);
5788 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5789 int i;
c9a6b82f
AG
5790
5791 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5792
6894305c
AG
5793 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5794 DRM_WARN("No support for XGMI hive yet...");
5795 return PCI_ERS_RESULT_DISCONNECT;
5796 }
5797
e17e27f9
GC
5798 adev->pci_channel_state = state;
5799
c9a6b82f
AG
5800 switch (state) {
5801 case pci_channel_io_normal:
5802 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5803 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5804 case pci_channel_io_frozen:
5805 /*
d0fb18b5 5806 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5807 * to GPU during PCI error recovery
5808 */
3675c2f2 5809 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5810 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5811
5812 /*
5813 * Block any work scheduling as we do for regular GPU reset
5814 * for the duration of the recovery
5815 */
5816 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5817 struct amdgpu_ring *ring = adev->rings[i];
5818
5819 if (!ring || !ring->sched.thread)
5820 continue;
5821
5822 drm_sched_stop(&ring->sched, NULL);
5823 }
8f8c80f4 5824 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5825 return PCI_ERS_RESULT_NEED_RESET;
5826 case pci_channel_io_perm_failure:
5827 /* Permanent error, prepare for device removal */
5828 return PCI_ERS_RESULT_DISCONNECT;
5829 }
5830
5831 return PCI_ERS_RESULT_NEED_RESET;
5832}
5833
5834/**
5835 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5836 * @pdev: pointer to PCI device
5837 */
5838pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5839{
5840
5841 DRM_INFO("PCI error: mmio enabled callback!!\n");
5842
5843 /* TODO - dump whatever for debugging purposes */
5844
5845 /* This called only if amdgpu_pci_error_detected returns
5846 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5847 * works, no need to reset slot.
5848 */
5849
5850 return PCI_ERS_RESULT_RECOVERED;
5851}
5852
5853/**
5854 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5855 * @pdev: PCI device struct
5856 *
5857 * Description: This routine is called by the pci error recovery
5858 * code after the PCI slot has been reset, just before we
5859 * should resume normal operations.
5860 */
5861pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5862{
5863 struct drm_device *dev = pci_get_drvdata(pdev);
5864 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5865 int r, i;
04442bf7 5866 struct amdgpu_reset_context reset_context;
362c7b91 5867 u32 memsize;
7ac71382 5868 struct list_head device_list;
c9a6b82f
AG
5869
5870 DRM_INFO("PCI error: slot reset callback!!\n");
5871
04442bf7
LL
5872 memset(&reset_context, 0, sizeof(reset_context));
5873
7ac71382 5874 INIT_LIST_HEAD(&device_list);
655ce9cb 5875 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5876
362c7b91
AG
5877 /* wait for asic to come out of reset */
5878 msleep(500);
5879
7ac71382 5880 /* Restore PCI confspace */
c1dd4aa6 5881 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5882
362c7b91
AG
5883 /* confirm ASIC came out of reset */
5884 for (i = 0; i < adev->usec_timeout; i++) {
5885 memsize = amdgpu_asic_get_config_memsize(adev);
5886
5887 if (memsize != 0xffffffff)
5888 break;
5889 udelay(1);
5890 }
5891 if (memsize == 0xffffffff) {
5892 r = -ETIME;
5893 goto out;
5894 }
5895
04442bf7
LL
5896 reset_context.method = AMD_RESET_METHOD_NONE;
5897 reset_context.reset_req_dev = adev;
5898 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5899 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5900
7afefb81 5901 adev->no_hw_access = true;
04442bf7 5902 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5903 adev->no_hw_access = false;
c9a6b82f
AG
5904 if (r)
5905 goto out;
5906
04442bf7 5907 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5908
5909out:
c9a6b82f 5910 if (!r) {
c1dd4aa6
AG
5911 if (amdgpu_device_cache_pci_state(adev->pdev))
5912 pci_restore_state(adev->pdev);
5913
c9a6b82f
AG
5914 DRM_INFO("PCIe error recovery succeeded\n");
5915 } else {
5916 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5917 amdgpu_device_unset_mp1_state(adev);
5918 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5919 }
5920
5921 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5922}
5923
5924/**
5925 * amdgpu_pci_resume() - resume normal ops after PCI reset
5926 * @pdev: pointer to PCI device
5927 *
5928 * Called when the error recovery driver tells us that its
505199a3 5929 * OK to resume normal operation.
c9a6b82f
AG
5930 */
5931void amdgpu_pci_resume(struct pci_dev *pdev)
5932{
5933 struct drm_device *dev = pci_get_drvdata(pdev);
5934 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5935 int i;
c9a6b82f 5936
c9a6b82f
AG
5937
5938 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5939
e17e27f9
GC
5940 /* Only continue execution for the case of pci_channel_io_frozen */
5941 if (adev->pci_channel_state != pci_channel_io_frozen)
5942 return;
5943
acd89fca
AG
5944 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5945 struct amdgpu_ring *ring = adev->rings[i];
5946
5947 if (!ring || !ring->sched.thread)
5948 continue;
5949
acd89fca
AG
5950 drm_sched_start(&ring->sched, true);
5951 }
5952
e923be99
AG
5953 amdgpu_device_unset_mp1_state(adev);
5954 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5955}
c1dd4aa6
AG
5956
5957bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5958{
5959 struct drm_device *dev = pci_get_drvdata(pdev);
5960 struct amdgpu_device *adev = drm_to_adev(dev);
5961 int r;
5962
5963 r = pci_save_state(pdev);
5964 if (!r) {
5965 kfree(adev->pci_state);
5966
5967 adev->pci_state = pci_store_saved_state(pdev);
5968
5969 if (!adev->pci_state) {
5970 DRM_ERROR("Failed to store PCI saved state");
5971 return false;
5972 }
5973 } else {
5974 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5975 return false;
5976 }
5977
5978 return true;
5979}
5980
5981bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5982{
5983 struct drm_device *dev = pci_get_drvdata(pdev);
5984 struct amdgpu_device *adev = drm_to_adev(dev);
5985 int r;
5986
5987 if (!adev->pci_state)
5988 return false;
5989
5990 r = pci_load_saved_state(pdev, adev->pci_state);
5991
5992 if (!r) {
5993 pci_restore_state(pdev);
5994 } else {
5995 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5996 return false;
5997 }
5998
5999 return true;
6000}
6001
810085dd
EH
6002void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6003 struct amdgpu_ring *ring)
6004{
6005#ifdef CONFIG_X86_64
b818a5d3 6006 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6007 return;
6008#endif
6009 if (adev->gmc.xgmi.connected_to_cpu)
6010 return;
6011
6012 if (ring && ring->funcs->emit_hdp_flush)
6013 amdgpu_ring_emit_hdp_flush(ring);
6014 else
6015 amdgpu_asic_flush_hdp(adev, ring);
6016}
c1dd4aa6 6017
810085dd
EH
6018void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6019 struct amdgpu_ring *ring)
6020{
6021#ifdef CONFIG_X86_64
b818a5d3 6022 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6023 return;
6024#endif
6025 if (adev->gmc.xgmi.connected_to_cpu)
6026 return;
c1dd4aa6 6027
810085dd
EH
6028 amdgpu_asic_invalidate_hdp(adev, ring);
6029}
34f3a4a9 6030
89a7a870
AG
6031int amdgpu_in_reset(struct amdgpu_device *adev)
6032{
6033 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6034}
6035
34f3a4a9
LY
6036/**
6037 * amdgpu_device_halt() - bring hardware to some kind of halt state
6038 *
6039 * @adev: amdgpu_device pointer
6040 *
6041 * Bring hardware to some kind of halt state so that no one can touch it
6042 * any more. It will help to maintain error context when error occurred.
6043 * Compare to a simple hang, the system will keep stable at least for SSH
6044 * access. Then it should be trivial to inspect the hardware state and
6045 * see what's going on. Implemented as following:
6046 *
6047 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6048 * clears all CPU mappings to device, disallows remappings through page faults
6049 * 2. amdgpu_irq_disable_all() disables all interrupts
6050 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6051 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6052 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6053 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6054 * flush any in flight DMA operations
6055 */
6056void amdgpu_device_halt(struct amdgpu_device *adev)
6057{
6058 struct pci_dev *pdev = adev->pdev;
e0f943b4 6059 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6060
2c1c7ba4 6061 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6062 drm_dev_unplug(ddev);
6063
6064 amdgpu_irq_disable_all(adev);
6065
6066 amdgpu_fence_driver_hw_fini(adev);
6067
6068 adev->no_hw_access = true;
6069
6070 amdgpu_device_unmap_mmio(adev);
6071
6072 pci_disable_device(pdev);
6073 pci_wait_for_pending_transaction(pdev);
6074}
86700a40
XD
6075
6076u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6077 u32 reg)
6078{
6079 unsigned long flags, address, data;
6080 u32 r;
6081
6082 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6083 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6084
6085 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6086 WREG32(address, reg * 4);
6087 (void)RREG32(address);
6088 r = RREG32(data);
6089 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6090 return r;
6091}
6092
6093void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6094 u32 reg, u32 v)
6095{
6096 unsigned long flags, address, data;
6097
6098 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6099 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6100
6101 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6102 WREG32(address, reg * 4);
6103 (void)RREG32(address);
6104 WREG32(data, v);
6105 (void)RREG32(data);
6106 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6107}
68ce8b24
CK
6108
6109/**
6110 * amdgpu_device_switch_gang - switch to a new gang
6111 * @adev: amdgpu_device pointer
6112 * @gang: the gang to switch to
6113 *
6114 * Try to switch to a new gang.
6115 * Returns: NULL if we switched to the new gang or a reference to the current
6116 * gang leader.
6117 */
6118struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6119 struct dma_fence *gang)
6120{
6121 struct dma_fence *old = NULL;
6122
6123 do {
6124 dma_fence_put(old);
6125 rcu_read_lock();
6126 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6127 rcu_read_unlock();
6128
6129 if (old == gang)
6130 break;
6131
6132 if (!dma_fence_is_signaled(old))
6133 return old;
6134
6135 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6136 old, gang) != old);
6137
6138 dma_fence_put(old);
6139 return NULL;
6140}
220c8cc8
AD
6141
6142bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6143{
6144 switch (adev->asic_type) {
6145#ifdef CONFIG_DRM_AMDGPU_SI
6146 case CHIP_HAINAN:
6147#endif
6148 case CHIP_TOPAZ:
6149 /* chips with no display hardware */
6150 return false;
6151#ifdef CONFIG_DRM_AMDGPU_SI
6152 case CHIP_TAHITI:
6153 case CHIP_PITCAIRN:
6154 case CHIP_VERDE:
6155 case CHIP_OLAND:
6156#endif
6157#ifdef CONFIG_DRM_AMDGPU_CIK
6158 case CHIP_BONAIRE:
6159 case CHIP_HAWAII:
6160 case CHIP_KAVERI:
6161 case CHIP_KABINI:
6162 case CHIP_MULLINS:
6163#endif
6164 case CHIP_TONGA:
6165 case CHIP_FIJI:
6166 case CHIP_POLARIS10:
6167 case CHIP_POLARIS11:
6168 case CHIP_POLARIS12:
6169 case CHIP_VEGAM:
6170 case CHIP_CARRIZO:
6171 case CHIP_STONEY:
6172 /* chips with display hardware */
6173 return true;
6174 default:
6175 /* IP discovery */
4e8303cf 6176 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6177 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6178 return false;
6179 return true;
6180 }
6181}
81283fee
JZ
6182
6183uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6184 uint32_t inst, uint32_t reg_addr, char reg_name[],
6185 uint32_t expected_value, uint32_t mask)
6186{
6187 uint32_t ret = 0;
6188 uint32_t old_ = 0;
6189 uint32_t tmp_ = RREG32(reg_addr);
6190 uint32_t loop = adev->usec_timeout;
6191
6192 while ((tmp_ & (mask)) != (expected_value)) {
6193 if (old_ != tmp_) {
6194 loop = adev->usec_timeout;
6195 old_ = tmp_;
6196 } else
6197 udelay(1);
6198 tmp_ = RREG32(reg_addr);
6199 loop--;
6200 if (!loop) {
6201 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6202 inst, reg_name, (uint32_t)expected_value,
6203 (uint32_t)(tmp_ & (mask)));
6204 ret = -ETIMEDOUT;
6205 break;
6206 }
6207 }
6208 return ret;
6209}