drm/amd: Explicitly check for GFXOFF to be enabled for s0ix
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
08a2fd23 35#include <linux/pci-p2pdma.h>
d37a3929 36#include <linux/apple-gmux.h>
fdf2f6c5 37
b7cdb41e 38#include <drm/drm_aperture.h>
4562236b 39#include <drm/drm_atomic_helper.h>
973ad627 40#include <drm/drm_crtc_helper.h>
45b64fd9 41#include <drm/drm_fb_helper.h>
fcd70cd3 42#include <drm/drm_probe_helper.h>
d38ceaf9 43#include <drm/amdgpu_drm.h>
7b1c6263 44#include <linux/device.h>
d38ceaf9
AD
45#include <linux/vgaarb.h>
46#include <linux/vga_switcheroo.h>
47#include <linux/efi.h>
48#include "amdgpu.h"
f4b373f4 49#include "amdgpu_trace.h"
d38ceaf9
AD
50#include "amdgpu_i2c.h"
51#include "atom.h"
52#include "amdgpu_atombios.h"
a5bde2f9 53#include "amdgpu_atomfirmware.h"
d0dd7f0c 54#include "amd_pcie.h"
33f34802
KW
55#ifdef CONFIG_DRM_AMDGPU_SI
56#include "si.h"
57#endif
a2e73f56
AD
58#ifdef CONFIG_DRM_AMDGPU_CIK
59#include "cik.h"
60#endif
aaa36a97 61#include "vi.h"
460826e6 62#include "soc15.h"
0a5b8c7b 63#include "nv.h"
d38ceaf9 64#include "bif/bif_4_1_d.h"
bec86378 65#include <linux/firmware.h>
89041940 66#include "amdgpu_vf_error.h"
d38ceaf9 67
ba997709 68#include "amdgpu_amdkfd.h"
d2f52ac8 69#include "amdgpu_pm.h"
d38ceaf9 70
5183411b 71#include "amdgpu_xgmi.h"
c030f2e4 72#include "amdgpu_ras.h"
9c7c85f7 73#include "amdgpu_pmu.h"
bd607166 74#include "amdgpu_fru_eeprom.h"
04442bf7 75#include "amdgpu_reset.h"
85150626 76#include "amdgpu_virt.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
4798db85
LL
165/**
166 * DOC: board_info
167 *
168 * The amdgpu driver provides a sysfs API for giving board related information.
169 * It provides the form factor information in the format
170 *
171 * type : form factor
172 *
173 * Possible form factor values
174 *
175 * - "cem" - PCIE CEM card
176 * - "oam" - Open Compute Accelerator Module
177 * - "unknown" - Not known
178 *
179 */
180
76da73f0
LL
181static ssize_t amdgpu_device_get_board_info(struct device *dev,
182 struct device_attribute *attr,
183 char *buf)
184{
185 struct drm_device *ddev = dev_get_drvdata(dev);
186 struct amdgpu_device *adev = drm_to_adev(ddev);
187 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
188 const char *pkg;
189
190 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
191 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
192
193 switch (pkg_type) {
194 case AMDGPU_PKG_TYPE_CEM:
195 pkg = "cem";
196 break;
197 case AMDGPU_PKG_TYPE_OAM:
198 pkg = "oam";
199 break;
200 default:
201 pkg = "unknown";
202 break;
203 }
204
205 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
206}
207
208static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
209
210static struct attribute *amdgpu_board_attrs[] = {
211 &dev_attr_board_info.attr,
212 NULL,
213};
214
215static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
216 struct attribute *attr, int n)
217{
218 struct device *dev = kobj_to_dev(kobj);
219 struct drm_device *ddev = dev_get_drvdata(dev);
220 struct amdgpu_device *adev = drm_to_adev(ddev);
221
222 if (adev->flags & AMD_IS_APU)
223 return 0;
224
225 return attr->mode;
226}
227
228static const struct attribute_group amdgpu_board_attrs_group = {
229 .attrs = amdgpu_board_attrs,
230 .is_visible = amdgpu_board_attrs_is_visible
231};
232
5494d864
AD
233static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
234
bd607166 235
fd496ca8 236/**
b98c6299 237 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
238 *
239 * @dev: drm_device pointer
240 *
b98c6299 241 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
242 * otherwise return false.
243 */
b98c6299 244bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
245{
246 struct amdgpu_device *adev = drm_to_adev(dev);
247
b98c6299 248 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
249 return true;
250 return false;
251}
252
e3ecdffa 253/**
0330b848 254 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
255 *
256 * @dev: drm_device pointer
257 *
b98c6299 258 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
259 * otherwise return false.
260 */
31af062a 261bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 262{
1348969a 263 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 264
b98c6299
AD
265 if (adev->has_pr3 ||
266 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
267 return true;
268 return false;
269}
270
a69cba42
AD
271/**
272 * amdgpu_device_supports_baco - Does the device support BACO
273 *
274 * @dev: drm_device pointer
275 *
276 * Returns true if the device supporte BACO,
277 * otherwise return false.
278 */
279bool amdgpu_device_supports_baco(struct drm_device *dev)
280{
1348969a 281 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
282
283 return amdgpu_asic_supports_baco(adev);
284}
285
3fa8f89d
S
286/**
287 * amdgpu_device_supports_smart_shift - Is the device dGPU with
288 * smart shift support
289 *
290 * @dev: drm_device pointer
291 *
292 * Returns true if the device is a dGPU with Smart Shift support,
293 * otherwise returns false.
294 */
295bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
296{
297 return (amdgpu_device_supports_boco(dev) &&
298 amdgpu_acpi_is_power_shift_control_supported());
299}
300
6e3cd2a9
MCC
301/*
302 * VRAM access helper functions
303 */
304
e35e2b11 305/**
048af66b 306 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
307 *
308 * @adev: amdgpu_device pointer
309 * @pos: offset of the buffer in vram
310 * @buf: virtual address of the buffer in system memory
311 * @size: read/write size, sizeof(@buf) must > @size
312 * @write: true - write to vram, otherwise - read from vram
313 */
048af66b
KW
314void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
315 void *buf, size_t size, bool write)
e35e2b11 316{
e35e2b11 317 unsigned long flags;
048af66b
KW
318 uint32_t hi = ~0, tmp = 0;
319 uint32_t *data = buf;
ce05ac56 320 uint64_t last;
f89f8c6b 321 int idx;
ce05ac56 322
c58a863b 323 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 324 return;
9d11eb0d 325
048af66b
KW
326 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
327
328 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
329 for (last = pos + size; pos < last; pos += 4) {
330 tmp = pos >> 31;
331
332 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
333 if (tmp != hi) {
334 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
335 hi = tmp;
336 }
337 if (write)
338 WREG32_NO_KIQ(mmMM_DATA, *data++);
339 else
340 *data++ = RREG32_NO_KIQ(mmMM_DATA);
341 }
342
343 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
344 drm_dev_exit(idx);
345}
346
347/**
bbe04dec 348 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
349 *
350 * @adev: amdgpu_device pointer
351 * @pos: offset of the buffer in vram
352 * @buf: virtual address of the buffer in system memory
353 * @size: read/write size, sizeof(@buf) must > @size
354 * @write: true - write to vram, otherwise - read from vram
355 *
356 * The return value means how many bytes have been transferred.
357 */
358size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
359 void *buf, size_t size, bool write)
360{
9d11eb0d 361#ifdef CONFIG_64BIT
048af66b
KW
362 void __iomem *addr;
363 size_t count = 0;
364 uint64_t last;
365
366 if (!adev->mman.aper_base_kaddr)
367 return 0;
368
9d11eb0d
CK
369 last = min(pos + size, adev->gmc.visible_vram_size);
370 if (last > pos) {
048af66b
KW
371 addr = adev->mman.aper_base_kaddr + pos;
372 count = last - pos;
9d11eb0d
CK
373
374 if (write) {
375 memcpy_toio(addr, buf, count);
4c452b5c
SS
376 /* Make sure HDP write cache flush happens without any reordering
377 * after the system memory contents are sent over PCIe device
378 */
9d11eb0d 379 mb();
810085dd 380 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 381 } else {
810085dd 382 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
383 /* Make sure HDP read cache is invalidated before issuing a read
384 * to the PCIe device
385 */
9d11eb0d
CK
386 mb();
387 memcpy_fromio(buf, addr, count);
388 }
389
9d11eb0d 390 }
048af66b
KW
391
392 return count;
393#else
394 return 0;
9d11eb0d 395#endif
048af66b 396}
9d11eb0d 397
048af66b
KW
398/**
399 * amdgpu_device_vram_access - read/write a buffer in vram
400 *
401 * @adev: amdgpu_device pointer
402 * @pos: offset of the buffer in vram
403 * @buf: virtual address of the buffer in system memory
404 * @size: read/write size, sizeof(@buf) must > @size
405 * @write: true - write to vram, otherwise - read from vram
406 */
407void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
408 void *buf, size_t size, bool write)
409{
410 size_t count;
e35e2b11 411
048af66b
KW
412 /* try to using vram apreature to access vram first */
413 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
414 size -= count;
415 if (size) {
416 /* using MM to access rest vram */
417 pos += count;
418 buf += count;
419 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
420 }
421}
422
d38ceaf9 423/*
f7ee1874 424 * register access helper functions.
d38ceaf9 425 */
56b53c0b
DL
426
427/* Check if hw access should be skipped because of hotplug or device error */
428bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
429{
7afefb81 430 if (adev->no_hw_access)
56b53c0b
DL
431 return true;
432
433#ifdef CONFIG_LOCKDEP
434 /*
435 * This is a bit complicated to understand, so worth a comment. What we assert
436 * here is that the GPU reset is not running on another thread in parallel.
437 *
438 * For this we trylock the read side of the reset semaphore, if that succeeds
439 * we know that the reset is not running in paralell.
440 *
441 * If the trylock fails we assert that we are either already holding the read
442 * side of the lock or are the reset thread itself and hold the write side of
443 * the lock.
444 */
445 if (in_task()) {
d0fb18b5
AG
446 if (down_read_trylock(&adev->reset_domain->sem))
447 up_read(&adev->reset_domain->sem);
56b53c0b 448 else
d0fb18b5 449 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
450 }
451#endif
452 return false;
453}
454
e3ecdffa 455/**
f7ee1874 456 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
457 *
458 * @adev: amdgpu_device pointer
459 * @reg: dword aligned register offset
460 * @acc_flags: access flags which require special behavior
461 *
462 * Returns the 32 bit value from the offset specified.
463 */
f7ee1874
HZ
464uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
465 uint32_t reg, uint32_t acc_flags)
d38ceaf9 466{
f4b373f4
TSD
467 uint32_t ret;
468
56b53c0b 469 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
470 return 0;
471
f7ee1874
HZ
472 if ((reg * 4) < adev->rmmio_size) {
473 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
474 amdgpu_sriov_runtime(adev) &&
d0fb18b5 475 down_read_trylock(&adev->reset_domain->sem)) {
85150626 476 ret = amdgpu_kiq_rreg(adev, reg, 0);
d0fb18b5 477 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
478 } else {
479 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
480 }
481 } else {
482 ret = adev->pcie_rreg(adev, reg * 4);
81202807 483 }
bc992ba5 484
f7ee1874 485 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 486
f4b373f4 487 return ret;
d38ceaf9
AD
488}
489
421a2a30
ML
490/*
491 * MMIO register read with bytes helper functions
492 * @offset:bytes offset from MMIO start
b8920e1e 493 */
421a2a30 494
e3ecdffa
AD
495/**
496 * amdgpu_mm_rreg8 - read a memory mapped IO register
497 *
498 * @adev: amdgpu_device pointer
499 * @offset: byte aligned register offset
500 *
501 * Returns the 8 bit value from the offset specified.
502 */
7cbbc745
AG
503uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
504{
56b53c0b 505 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
506 return 0;
507
421a2a30
ML
508 if (offset < adev->rmmio_size)
509 return (readb(adev->rmmio + offset));
510 BUG();
511}
512
85150626
VL
513
514/**
515 * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
516 *
517 * @adev: amdgpu_device pointer
518 * @reg: dword aligned register offset
519 * @acc_flags: access flags which require special behavior
520 * @xcc_id: xcc accelerated compute core id
521 *
522 * Returns the 32 bit value from the offset specified.
523 */
524uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
525 uint32_t reg, uint32_t acc_flags,
526 uint32_t xcc_id)
527{
528 uint32_t ret, rlcg_flag;
529
530 if (amdgpu_device_skip_hw_access(adev))
531 return 0;
532
533 if ((reg * 4) < adev->rmmio_size) {
534 if (amdgpu_sriov_vf(adev) &&
535 !amdgpu_sriov_runtime(adev) &&
536 adev->gfx.rlc.rlcg_reg_access_supported &&
537 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
538 GC_HWIP, false,
539 &rlcg_flag)) {
540 ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
541 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
542 amdgpu_sriov_runtime(adev) &&
543 down_read_trylock(&adev->reset_domain->sem)) {
544 ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
545 up_read(&adev->reset_domain->sem);
546 } else {
547 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
548 }
549 } else {
550 ret = adev->pcie_rreg(adev, reg * 4);
551 }
552
553 return ret;
554}
555
421a2a30
ML
556/*
557 * MMIO register write with bytes helper functions
558 * @offset:bytes offset from MMIO start
559 * @value: the value want to be written to the register
b8920e1e
SS
560 */
561
e3ecdffa
AD
562/**
563 * amdgpu_mm_wreg8 - read a memory mapped IO register
564 *
565 * @adev: amdgpu_device pointer
566 * @offset: byte aligned register offset
567 * @value: 8 bit value to write
568 *
569 * Writes the value specified to the offset specified.
570 */
7cbbc745
AG
571void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
572{
56b53c0b 573 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
574 return;
575
421a2a30
ML
576 if (offset < adev->rmmio_size)
577 writeb(value, adev->rmmio + offset);
578 else
579 BUG();
580}
581
e3ecdffa 582/**
f7ee1874 583 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
584 *
585 * @adev: amdgpu_device pointer
586 * @reg: dword aligned register offset
587 * @v: 32 bit value to write to the register
588 * @acc_flags: access flags which require special behavior
589 *
590 * Writes the value specified to the offset specified.
591 */
f7ee1874
HZ
592void amdgpu_device_wreg(struct amdgpu_device *adev,
593 uint32_t reg, uint32_t v,
594 uint32_t acc_flags)
d38ceaf9 595{
56b53c0b 596 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
597 return;
598
f7ee1874
HZ
599 if ((reg * 4) < adev->rmmio_size) {
600 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
601 amdgpu_sriov_runtime(adev) &&
d0fb18b5 602 down_read_trylock(&adev->reset_domain->sem)) {
85150626 603 amdgpu_kiq_wreg(adev, reg, v, 0);
d0fb18b5 604 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
605 } else {
606 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
607 }
608 } else {
609 adev->pcie_wreg(adev, reg * 4, v);
81202807 610 }
bc992ba5 611
f7ee1874 612 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 613}
d38ceaf9 614
03f2abb0 615/**
4cc9f86f 616 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 617 *
71579346
RB
618 * @adev: amdgpu_device pointer
619 * @reg: mmio/rlc register
620 * @v: value to write
8057a9d6 621 * @xcc_id: xcc accelerated compute core id
71579346
RB
622 *
623 * this function is invoked only for the debugfs register access
03f2abb0 624 */
f7ee1874 625void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
626 uint32_t reg, uint32_t v,
627 uint32_t xcc_id)
2e0cc4d4 628{
56b53c0b 629 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
630 return;
631
2e0cc4d4 632 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
633 adev->gfx.rlc.funcs &&
634 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 635 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 636 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
637 } else if ((reg * 4) >= adev->rmmio_size) {
638 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
639 } else {
640 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 641 }
d38ceaf9
AD
642}
643
85150626
VL
644/**
645 * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
646 *
647 * @adev: amdgpu_device pointer
648 * @reg: dword aligned register offset
649 * @v: 32 bit value to write to the register
650 * @acc_flags: access flags which require special behavior
651 * @xcc_id: xcc accelerated compute core id
652 *
653 * Writes the value specified to the offset specified.
654 */
655void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
656 uint32_t reg, uint32_t v,
657 uint32_t acc_flags, uint32_t xcc_id)
658{
659 uint32_t rlcg_flag;
660
661 if (amdgpu_device_skip_hw_access(adev))
662 return;
663
664 if ((reg * 4) < adev->rmmio_size) {
665 if (amdgpu_sriov_vf(adev) &&
666 !amdgpu_sriov_runtime(adev) &&
667 adev->gfx.rlc.rlcg_reg_access_supported &&
668 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
669 GC_HWIP, true,
670 &rlcg_flag)) {
671 amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
672 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
673 amdgpu_sriov_runtime(adev) &&
674 down_read_trylock(&adev->reset_domain->sem)) {
675 amdgpu_kiq_wreg(adev, reg, v, xcc_id);
676 up_read(&adev->reset_domain->sem);
677 } else {
678 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
679 }
680 } else {
681 adev->pcie_wreg(adev, reg * 4, v);
682 }
683}
684
1bba3683
HZ
685/**
686 * amdgpu_device_indirect_rreg - read an indirect register
687 *
688 * @adev: amdgpu_device pointer
22f453fb 689 * @reg_addr: indirect register address to read from
1bba3683
HZ
690 *
691 * Returns the value of indirect register @reg_addr
692 */
693u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
694 u32 reg_addr)
695{
65ba96e9 696 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
697 void __iomem *pcie_index_offset;
698 void __iomem *pcie_data_offset;
65ba96e9
HZ
699 u32 r;
700
701 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
702 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
703
704 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
705 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
706 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
707
708 writel(reg_addr, pcie_index_offset);
709 readl(pcie_index_offset);
710 r = readl(pcie_data_offset);
711 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
712
713 return r;
714}
715
0c552ed3
LM
716u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
717 u64 reg_addr)
718{
719 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
720 u32 r;
721 void __iomem *pcie_index_offset;
722 void __iomem *pcie_index_hi_offset;
723 void __iomem *pcie_data_offset;
724
725 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
726 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 727 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
728 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
729 else
730 pcie_index_hi = 0;
731
732 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
733 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
734 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
735 if (pcie_index_hi != 0)
736 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
737 pcie_index_hi * 4;
738
739 writel(reg_addr, pcie_index_offset);
740 readl(pcie_index_offset);
741 if (pcie_index_hi != 0) {
742 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
743 readl(pcie_index_hi_offset);
744 }
745 r = readl(pcie_data_offset);
746
747 /* clear the high bits */
748 if (pcie_index_hi != 0) {
749 writel(0, pcie_index_hi_offset);
750 readl(pcie_index_hi_offset);
751 }
752
753 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
754
755 return r;
756}
757
1bba3683
HZ
758/**
759 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
760 *
761 * @adev: amdgpu_device pointer
22f453fb 762 * @reg_addr: indirect register address to read from
1bba3683
HZ
763 *
764 * Returns the value of indirect register @reg_addr
765 */
766u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
767 u32 reg_addr)
768{
65ba96e9 769 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
770 void __iomem *pcie_index_offset;
771 void __iomem *pcie_data_offset;
65ba96e9
HZ
772 u64 r;
773
774 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
775 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
776
777 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
778 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
779 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
780
781 /* read low 32 bits */
782 writel(reg_addr, pcie_index_offset);
783 readl(pcie_index_offset);
784 r = readl(pcie_data_offset);
785 /* read high 32 bits */
786 writel(reg_addr + 4, pcie_index_offset);
787 readl(pcie_index_offset);
788 r |= ((u64)readl(pcie_data_offset) << 32);
789 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
790
791 return r;
792}
793
a76b2870
CL
794u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
795 u64 reg_addr)
796{
797 unsigned long flags, pcie_index, pcie_data;
798 unsigned long pcie_index_hi = 0;
799 void __iomem *pcie_index_offset;
800 void __iomem *pcie_index_hi_offset;
801 void __iomem *pcie_data_offset;
802 u64 r;
803
804 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
805 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
806 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
807 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
808
809 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
810 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
811 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
812 if (pcie_index_hi != 0)
813 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
814 pcie_index_hi * 4;
815
816 /* read low 32 bits */
817 writel(reg_addr, pcie_index_offset);
818 readl(pcie_index_offset);
819 if (pcie_index_hi != 0) {
820 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
821 readl(pcie_index_hi_offset);
822 }
823 r = readl(pcie_data_offset);
824 /* read high 32 bits */
825 writel(reg_addr + 4, pcie_index_offset);
826 readl(pcie_index_offset);
827 if (pcie_index_hi != 0) {
828 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
829 readl(pcie_index_hi_offset);
830 }
831 r |= ((u64)readl(pcie_data_offset) << 32);
832
833 /* clear the high bits */
834 if (pcie_index_hi != 0) {
835 writel(0, pcie_index_hi_offset);
836 readl(pcie_index_hi_offset);
837 }
838
839 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
840
841 return r;
842}
843
1bba3683
HZ
844/**
845 * amdgpu_device_indirect_wreg - write an indirect register address
846 *
847 * @adev: amdgpu_device pointer
1bba3683
HZ
848 * @reg_addr: indirect register offset
849 * @reg_data: indirect register data
850 *
851 */
852void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
853 u32 reg_addr, u32 reg_data)
854{
65ba96e9 855 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
856 void __iomem *pcie_index_offset;
857 void __iomem *pcie_data_offset;
858
65ba96e9
HZ
859 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
860 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
861
1bba3683
HZ
862 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
863 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
864 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
865
866 writel(reg_addr, pcie_index_offset);
867 readl(pcie_index_offset);
868 writel(reg_data, pcie_data_offset);
869 readl(pcie_data_offset);
870 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
871}
872
0c552ed3
LM
873void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
874 u64 reg_addr, u32 reg_data)
875{
876 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
877 void __iomem *pcie_index_offset;
878 void __iomem *pcie_index_hi_offset;
879 void __iomem *pcie_data_offset;
880
881 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
882 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 883 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
884 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
885 else
886 pcie_index_hi = 0;
887
888 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
889 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
890 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
891 if (pcie_index_hi != 0)
892 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
893 pcie_index_hi * 4;
894
895 writel(reg_addr, pcie_index_offset);
896 readl(pcie_index_offset);
897 if (pcie_index_hi != 0) {
898 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
899 readl(pcie_index_hi_offset);
900 }
901 writel(reg_data, pcie_data_offset);
902 readl(pcie_data_offset);
903
904 /* clear the high bits */
905 if (pcie_index_hi != 0) {
906 writel(0, pcie_index_hi_offset);
907 readl(pcie_index_hi_offset);
908 }
909
910 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
911}
912
1bba3683
HZ
913/**
914 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
915 *
916 * @adev: amdgpu_device pointer
1bba3683
HZ
917 * @reg_addr: indirect register offset
918 * @reg_data: indirect register data
919 *
920 */
921void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
922 u32 reg_addr, u64 reg_data)
923{
65ba96e9 924 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
925 void __iomem *pcie_index_offset;
926 void __iomem *pcie_data_offset;
927
65ba96e9
HZ
928 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
929 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
930
1bba3683
HZ
931 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
932 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
933 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
934
935 /* write low 32 bits */
936 writel(reg_addr, pcie_index_offset);
937 readl(pcie_index_offset);
938 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
939 readl(pcie_data_offset);
940 /* write high 32 bits */
941 writel(reg_addr + 4, pcie_index_offset);
942 readl(pcie_index_offset);
943 writel((u32)(reg_data >> 32), pcie_data_offset);
944 readl(pcie_data_offset);
945 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
946}
947
a76b2870
CL
948void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
949 u64 reg_addr, u64 reg_data)
950{
951 unsigned long flags, pcie_index, pcie_data;
952 unsigned long pcie_index_hi = 0;
953 void __iomem *pcie_index_offset;
954 void __iomem *pcie_index_hi_offset;
955 void __iomem *pcie_data_offset;
956
957 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
958 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
959 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
960 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
961
962 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
963 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
964 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
965 if (pcie_index_hi != 0)
966 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
967 pcie_index_hi * 4;
968
969 /* write low 32 bits */
970 writel(reg_addr, pcie_index_offset);
971 readl(pcie_index_offset);
972 if (pcie_index_hi != 0) {
973 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
974 readl(pcie_index_hi_offset);
975 }
976 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
977 readl(pcie_data_offset);
978 /* write high 32 bits */
979 writel(reg_addr + 4, pcie_index_offset);
980 readl(pcie_index_offset);
981 if (pcie_index_hi != 0) {
982 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
983 readl(pcie_index_hi_offset);
984 }
985 writel((u32)(reg_data >> 32), pcie_data_offset);
986 readl(pcie_data_offset);
987
988 /* clear the high bits */
989 if (pcie_index_hi != 0) {
990 writel(0, pcie_index_hi_offset);
991 readl(pcie_index_hi_offset);
992 }
993
994 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
995}
996
dabc114e
HZ
997/**
998 * amdgpu_device_get_rev_id - query device rev_id
999 *
1000 * @adev: amdgpu_device pointer
1001 *
1002 * Return device rev_id
1003 */
1004u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1005{
1006 return adev->nbio.funcs->get_rev_id(adev);
1007}
1008
d38ceaf9
AD
1009/**
1010 * amdgpu_invalid_rreg - dummy reg read function
1011 *
982a820b 1012 * @adev: amdgpu_device pointer
d38ceaf9
AD
1013 * @reg: offset of register
1014 *
1015 * Dummy register read function. Used for register blocks
1016 * that certain asics don't have (all asics).
1017 * Returns the value in the register.
1018 */
1019static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1020{
1021 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1022 BUG();
1023 return 0;
1024}
1025
0c552ed3
LM
1026static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1027{
1028 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1029 BUG();
1030 return 0;
1031}
1032
d38ceaf9
AD
1033/**
1034 * amdgpu_invalid_wreg - dummy reg write function
1035 *
982a820b 1036 * @adev: amdgpu_device pointer
d38ceaf9
AD
1037 * @reg: offset of register
1038 * @v: value to write to the register
1039 *
1040 * Dummy register read function. Used for register blocks
1041 * that certain asics don't have (all asics).
1042 */
1043static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1044{
1045 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1046 reg, v);
1047 BUG();
1048}
1049
0c552ed3
LM
1050static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1051{
1052 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1053 reg, v);
1054 BUG();
1055}
1056
4fa1c6a6
TZ
1057/**
1058 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1059 *
982a820b 1060 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1061 * @reg: offset of register
1062 *
1063 * Dummy register read function. Used for register blocks
1064 * that certain asics don't have (all asics).
1065 * Returns the value in the register.
1066 */
1067static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1068{
1069 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1070 BUG();
1071 return 0;
1072}
1073
a76b2870
CL
1074static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1075{
1076 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1077 BUG();
1078 return 0;
1079}
1080
4fa1c6a6
TZ
1081/**
1082 * amdgpu_invalid_wreg64 - dummy reg write function
1083 *
982a820b 1084 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1085 * @reg: offset of register
1086 * @v: value to write to the register
1087 *
1088 * Dummy register read function. Used for register blocks
1089 * that certain asics don't have (all asics).
1090 */
1091static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1092{
1093 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1094 reg, v);
1095 BUG();
1096}
1097
a76b2870
CL
1098static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1099{
1100 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1101 reg, v);
1102 BUG();
1103}
1104
d38ceaf9
AD
1105/**
1106 * amdgpu_block_invalid_rreg - dummy reg read function
1107 *
982a820b 1108 * @adev: amdgpu_device pointer
d38ceaf9
AD
1109 * @block: offset of instance
1110 * @reg: offset of register
1111 *
1112 * Dummy register read function. Used for register blocks
1113 * that certain asics don't have (all asics).
1114 * Returns the value in the register.
1115 */
1116static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1117 uint32_t block, uint32_t reg)
1118{
1119 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1120 reg, block);
1121 BUG();
1122 return 0;
1123}
1124
1125/**
1126 * amdgpu_block_invalid_wreg - dummy reg write function
1127 *
982a820b 1128 * @adev: amdgpu_device pointer
d38ceaf9
AD
1129 * @block: offset of instance
1130 * @reg: offset of register
1131 * @v: value to write to the register
1132 *
1133 * Dummy register read function. Used for register blocks
1134 * that certain asics don't have (all asics).
1135 */
1136static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1137 uint32_t block,
1138 uint32_t reg, uint32_t v)
1139{
1140 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1141 reg, block, v);
1142 BUG();
1143}
1144
4d2997ab
AD
1145/**
1146 * amdgpu_device_asic_init - Wrapper for atom asic_init
1147 *
982a820b 1148 * @adev: amdgpu_device pointer
4d2997ab
AD
1149 *
1150 * Does any asic specific work and then calls atom asic init.
1151 */
1152static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1153{
15c5c5f5
LL
1154 int ret;
1155
4d2997ab
AD
1156 amdgpu_asic_pre_asic_init(adev);
1157
4e8303cf
LL
1158 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1159 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1160 amdgpu_psp_wait_for_bootloader(adev);
1161 ret = amdgpu_atomfirmware_asic_init(adev, true);
23618280
HZ
1162 /* TODO: check the return val and stop device initialization if boot fails */
1163 amdgpu_psp_query_boot_status(adev);
15c5c5f5
LL
1164 return ret;
1165 } else {
85d1bcc6 1166 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1167 }
1168
1169 return 0;
4d2997ab
AD
1170}
1171
e3ecdffa 1172/**
7ccfd79f 1173 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1174 *
982a820b 1175 * @adev: amdgpu_device pointer
e3ecdffa
AD
1176 *
1177 * Allocates a scratch page of VRAM for use by various things in the
1178 * driver.
1179 */
7ccfd79f 1180static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1181{
7ccfd79f
CK
1182 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1183 AMDGPU_GEM_DOMAIN_VRAM |
1184 AMDGPU_GEM_DOMAIN_GTT,
1185 &adev->mem_scratch.robj,
1186 &adev->mem_scratch.gpu_addr,
1187 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1188}
1189
e3ecdffa 1190/**
7ccfd79f 1191 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1192 *
982a820b 1193 * @adev: amdgpu_device pointer
e3ecdffa
AD
1194 *
1195 * Frees the VRAM scratch page.
1196 */
7ccfd79f 1197static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1198{
7ccfd79f 1199 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1200}
1201
1202/**
9c3f2b54 1203 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1204 *
1205 * @adev: amdgpu_device pointer
1206 * @registers: pointer to the register array
1207 * @array_size: size of the register array
1208 *
b8920e1e 1209 * Programs an array or registers with and or masks.
d38ceaf9
AD
1210 * This is a helper for setting golden registers.
1211 */
9c3f2b54
AD
1212void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1213 const u32 *registers,
1214 const u32 array_size)
d38ceaf9
AD
1215{
1216 u32 tmp, reg, and_mask, or_mask;
1217 int i;
1218
1219 if (array_size % 3)
1220 return;
1221
47fc644f 1222 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1223 reg = registers[i + 0];
1224 and_mask = registers[i + 1];
1225 or_mask = registers[i + 2];
1226
1227 if (and_mask == 0xffffffff) {
1228 tmp = or_mask;
1229 } else {
1230 tmp = RREG32(reg);
1231 tmp &= ~and_mask;
e0d07657
HZ
1232 if (adev->family >= AMDGPU_FAMILY_AI)
1233 tmp |= (or_mask & and_mask);
1234 else
1235 tmp |= or_mask;
d38ceaf9
AD
1236 }
1237 WREG32(reg, tmp);
1238 }
1239}
1240
e3ecdffa
AD
1241/**
1242 * amdgpu_device_pci_config_reset - reset the GPU
1243 *
1244 * @adev: amdgpu_device pointer
1245 *
1246 * Resets the GPU using the pci config reset sequence.
1247 * Only applicable to asics prior to vega10.
1248 */
8111c387 1249void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1250{
1251 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1252}
1253
af484df8
AD
1254/**
1255 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1256 *
1257 * @adev: amdgpu_device pointer
1258 *
1259 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1260 */
1261int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1262{
1263 return pci_reset_function(adev->pdev);
1264}
1265
d38ceaf9 1266/*
06ec9070 1267 * amdgpu_device_wb_*()
455a7bc2 1268 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1269 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1270 */
1271
1272/**
06ec9070 1273 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1274 *
1275 * @adev: amdgpu_device pointer
1276 *
1277 * Disables Writeback and frees the Writeback memory (all asics).
1278 * Used at driver shutdown.
1279 */
06ec9070 1280static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1281{
1282 if (adev->wb.wb_obj) {
a76ed485
AD
1283 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1284 &adev->wb.gpu_addr,
1285 (void **)&adev->wb.wb);
d38ceaf9
AD
1286 adev->wb.wb_obj = NULL;
1287 }
1288}
1289
1290/**
03f2abb0 1291 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1292 *
1293 * @adev: amdgpu_device pointer
1294 *
455a7bc2 1295 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1296 * Used at driver startup.
1297 * Returns 0 on success or an -error on failure.
1298 */
06ec9070 1299static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1300{
1301 int r;
1302
1303 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1304 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1305 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1306 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1307 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1308 (void **)&adev->wb.wb);
d38ceaf9
AD
1309 if (r) {
1310 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1311 return r;
1312 }
d38ceaf9
AD
1313
1314 adev->wb.num_wb = AMDGPU_MAX_WB;
1315 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1316
1317 /* clear wb memory */
73469585 1318 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1319 }
1320
1321 return 0;
1322}
1323
1324/**
131b4b36 1325 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1326 *
1327 * @adev: amdgpu_device pointer
1328 * @wb: wb index
1329 *
1330 * Allocate a wb slot for use by the driver (all asics).
1331 * Returns 0 on success or -EINVAL on failure.
1332 */
131b4b36 1333int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1334{
1335 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1336
97407b63 1337 if (offset < adev->wb.num_wb) {
7014285a 1338 __set_bit(offset, adev->wb.used);
63ae07ca 1339 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1340 return 0;
1341 } else {
1342 return -EINVAL;
1343 }
1344}
1345
d38ceaf9 1346/**
131b4b36 1347 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1348 *
1349 * @adev: amdgpu_device pointer
1350 * @wb: wb index
1351 *
1352 * Free a wb slot allocated for use by the driver (all asics)
1353 */
131b4b36 1354void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1355{
73469585 1356 wb >>= 3;
d38ceaf9 1357 if (wb < adev->wb.num_wb)
73469585 1358 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1359}
1360
d6895ad3
CK
1361/**
1362 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1363 *
1364 * @adev: amdgpu_device pointer
1365 *
1366 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1367 * to fail, but if any of the BARs is not accessible after the size we abort
1368 * driver loading by returning -ENODEV.
1369 */
1370int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1371{
453f617a 1372 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1373 struct pci_bus *root;
1374 struct resource *res;
b8920e1e 1375 unsigned int i;
d6895ad3
CK
1376 u16 cmd;
1377 int r;
1378
822130b5
AB
1379 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1380 return 0;
1381
0c03b912 1382 /* Bypass for VF */
1383 if (amdgpu_sriov_vf(adev))
1384 return 0;
1385
b7221f2b
AD
1386 /* skip if the bios has already enabled large BAR */
1387 if (adev->gmc.real_vram_size &&
1388 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1389 return 0;
1390
31b8adab
CK
1391 /* Check if the root BUS has 64bit memory resources */
1392 root = adev->pdev->bus;
1393 while (root->parent)
1394 root = root->parent;
1395
1396 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1397 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1398 res->start > 0x100000000ull)
1399 break;
1400 }
1401
1402 /* Trying to resize is pointless without a root hub window above 4GB */
1403 if (!res)
1404 return 0;
1405
453f617a
ND
1406 /* Limit the BAR size to what is available */
1407 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1408 rbar_size);
1409
d6895ad3
CK
1410 /* Disable memory decoding while we change the BAR addresses and size */
1411 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1412 pci_write_config_word(adev->pdev, PCI_COMMAND,
1413 cmd & ~PCI_COMMAND_MEMORY);
1414
1415 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1416 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1417 if (adev->asic_type >= CHIP_BONAIRE)
1418 pci_release_resource(adev->pdev, 2);
1419
1420 pci_release_resource(adev->pdev, 0);
1421
1422 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1423 if (r == -ENOSPC)
1424 DRM_INFO("Not enough PCI address space for a large BAR.");
1425 else if (r && r != -ENOTSUPP)
1426 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1427
1428 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1429
1430 /* When the doorbell or fb BAR isn't available we have no chance of
1431 * using the device.
1432 */
43c064db 1433 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1434 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1435 return -ENODEV;
1436
1437 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1438
1439 return 0;
1440}
a05502e5 1441
9535a86a
SZ
1442static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1443{
b8920e1e 1444 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1445 return false;
9535a86a
SZ
1446
1447 return true;
1448}
1449
d38ceaf9
AD
1450/*
1451 * GPU helpers function.
1452 */
1453/**
39c640c0 1454 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1455 *
1456 * @adev: amdgpu_device pointer
1457 *
c836fec5
JQ
1458 * Check if the asic has been initialized (all asics) at driver startup
1459 * or post is needed if hw reset is performed.
1460 * Returns true if need or false if not.
d38ceaf9 1461 */
39c640c0 1462bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1463{
1464 uint32_t reg;
1465
bec86378
ML
1466 if (amdgpu_sriov_vf(adev))
1467 return false;
1468
9535a86a
SZ
1469 if (!amdgpu_device_read_bios(adev))
1470 return false;
1471
bec86378 1472 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1473 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1474 * some old smc fw still need driver do vPost otherwise gpu hang, while
1475 * those smc fw version above 22.15 doesn't have this flaw, so we force
1476 * vpost executed for smc version below 22.15
bec86378
ML
1477 */
1478 if (adev->asic_type == CHIP_FIJI) {
1479 int err;
1480 uint32_t fw_ver;
b8920e1e 1481
bec86378
ML
1482 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1483 /* force vPost if error occured */
1484 if (err)
1485 return true;
1486
1487 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1488 if (fw_ver < 0x00160e00)
1489 return true;
bec86378 1490 }
bec86378 1491 }
91fe77eb 1492
e3c1b071 1493 /* Don't post if we need to reset whole hive on init */
1494 if (adev->gmc.xgmi.pending_reset)
1495 return false;
1496
91fe77eb 1497 if (adev->has_hw_reset) {
1498 adev->has_hw_reset = false;
1499 return true;
1500 }
1501
1502 /* bios scratch used on CIK+ */
1503 if (adev->asic_type >= CHIP_BONAIRE)
1504 return amdgpu_atombios_scratch_need_asic_init(adev);
1505
1506 /* check MEM_SIZE for older asics */
1507 reg = amdgpu_asic_get_config_memsize(adev);
1508
1509 if ((reg != 0) && (reg != 0xffffffff))
1510 return false;
1511
1512 return true;
70e64c4d
ML
1513}
1514
bb0f8429
ML
1515/*
1516 * Check whether seamless boot is supported.
1517 *
7f4ce7b5
ML
1518 * So far we only support seamless boot on DCE 3.0 or later.
1519 * If users report that it works on older ASICS as well, we may
1520 * loosen this.
bb0f8429
ML
1521 */
1522bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1523{
5dc270d3
ML
1524 switch (amdgpu_seamless) {
1525 case -1:
1526 break;
1527 case 1:
1528 return true;
1529 case 0:
1530 return false;
1531 default:
1532 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1533 amdgpu_seamless);
1534 return false;
1535 }
1536
3657a1d5
ML
1537 if (!(adev->flags & AMD_IS_APU))
1538 return false;
1539
5dc270d3
ML
1540 if (adev->mman.keep_stolen_vga_memory)
1541 return false;
1542
7f4ce7b5 1543 return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1544}
1545
5d1eb4c4 1546/*
2757a848
ML
1547 * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1548 * don't support dynamic speed switching. Until we have confirmation from Intel
1549 * that a specific host supports it, it's safer that we keep it disabled for all.
5d1eb4c4
ML
1550 *
1551 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1552 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1553 */
2757a848 1554static bool amdgpu_device_pcie_dynamic_switching_supported(void)
5d1eb4c4
ML
1555{
1556#if IS_ENABLED(CONFIG_X86)
1557 struct cpuinfo_x86 *c = &cpu_data(0);
1558
1559 if (c->x86_vendor == X86_VENDOR_INTEL)
1560 return false;
1561#endif
1562 return true;
1563}
1564
0ab5d711
ML
1565/**
1566 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1567 *
1568 * @adev: amdgpu_device pointer
1569 *
1570 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1571 * be set for this device.
1572 *
1573 * Returns true if it should be used or false if not.
1574 */
1575bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1576{
1577 switch (amdgpu_aspm) {
1578 case -1:
1579 break;
1580 case 0:
1581 return false;
1582 case 1:
1583 return true;
1584 default:
1585 return false;
1586 }
1a6513de
ML
1587 if (adev->flags & AMD_IS_APU)
1588 return false;
2757a848
ML
1589 if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1590 return false;
0ab5d711
ML
1591 return pcie_aspm_enabled(adev->pdev);
1592}
1593
d38ceaf9
AD
1594/* if we get transitioned to only one device, take VGA back */
1595/**
06ec9070 1596 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1597 *
bf44e8ce 1598 * @pdev: PCI device pointer
d38ceaf9
AD
1599 * @state: enable/disable vga decode
1600 *
1601 * Enable/disable vga decode (all asics).
1602 * Returns VGA resource flags.
1603 */
bf44e8ce
CH
1604static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1605 bool state)
d38ceaf9 1606{
bf44e8ce 1607 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1608
d38ceaf9
AD
1609 amdgpu_asic_set_vga_state(adev, state);
1610 if (state)
1611 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1612 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1613 else
1614 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1615}
1616
e3ecdffa
AD
1617/**
1618 * amdgpu_device_check_block_size - validate the vm block size
1619 *
1620 * @adev: amdgpu_device pointer
1621 *
1622 * Validates the vm block size specified via module parameter.
1623 * The vm block size defines number of bits in page table versus page directory,
1624 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1625 * page table and the remaining bits are in the page directory.
1626 */
06ec9070 1627static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1628{
1629 /* defines number of bits in page table versus page directory,
1630 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1631 * page table and the remaining bits are in the page directory
1632 */
bab4fee7
JZ
1633 if (amdgpu_vm_block_size == -1)
1634 return;
a1adf8be 1635
bab4fee7 1636 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1637 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1638 amdgpu_vm_block_size);
97489129 1639 amdgpu_vm_block_size = -1;
a1adf8be 1640 }
a1adf8be
CZ
1641}
1642
e3ecdffa
AD
1643/**
1644 * amdgpu_device_check_vm_size - validate the vm size
1645 *
1646 * @adev: amdgpu_device pointer
1647 *
1648 * Validates the vm size in GB specified via module parameter.
1649 * The VM size is the size of the GPU virtual memory space in GB.
1650 */
06ec9070 1651static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1652{
64dab074
AD
1653 /* no need to check the default value */
1654 if (amdgpu_vm_size == -1)
1655 return;
1656
83ca145d
ZJ
1657 if (amdgpu_vm_size < 1) {
1658 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1659 amdgpu_vm_size);
f3368128 1660 amdgpu_vm_size = -1;
83ca145d 1661 }
83ca145d
ZJ
1662}
1663
7951e376
RZ
1664static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1665{
1666 struct sysinfo si;
a9d4fe2f 1667 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1668 uint64_t total_memory;
1669 uint64_t dram_size_seven_GB = 0x1B8000000;
1670 uint64_t dram_size_three_GB = 0xB8000000;
1671
1672 if (amdgpu_smu_memory_pool_size == 0)
1673 return;
1674
1675 if (!is_os_64) {
1676 DRM_WARN("Not 64-bit OS, feature not supported\n");
1677 goto def_value;
1678 }
1679 si_meminfo(&si);
1680 total_memory = (uint64_t)si.totalram * si.mem_unit;
1681
1682 if ((amdgpu_smu_memory_pool_size == 1) ||
1683 (amdgpu_smu_memory_pool_size == 2)) {
1684 if (total_memory < dram_size_three_GB)
1685 goto def_value1;
1686 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1687 (amdgpu_smu_memory_pool_size == 8)) {
1688 if (total_memory < dram_size_seven_GB)
1689 goto def_value1;
1690 } else {
1691 DRM_WARN("Smu memory pool size not supported\n");
1692 goto def_value;
1693 }
1694 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1695
1696 return;
1697
1698def_value1:
1699 DRM_WARN("No enough system memory\n");
1700def_value:
1701 adev->pm.smu_prv_buffer_size = 0;
1702}
1703
9f6a7857
HR
1704static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1705{
1706 if (!(adev->flags & AMD_IS_APU) ||
1707 adev->asic_type < CHIP_RAVEN)
1708 return 0;
1709
1710 switch (adev->asic_type) {
1711 case CHIP_RAVEN:
1712 if (adev->pdev->device == 0x15dd)
1713 adev->apu_flags |= AMD_APU_IS_RAVEN;
1714 if (adev->pdev->device == 0x15d8)
1715 adev->apu_flags |= AMD_APU_IS_PICASSO;
1716 break;
1717 case CHIP_RENOIR:
1718 if ((adev->pdev->device == 0x1636) ||
1719 (adev->pdev->device == 0x164c))
1720 adev->apu_flags |= AMD_APU_IS_RENOIR;
1721 else
1722 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1723 break;
1724 case CHIP_VANGOGH:
1725 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1726 break;
1727 case CHIP_YELLOW_CARP:
1728 break;
d0f56dc2 1729 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1730 if ((adev->pdev->device == 0x13FE) ||
1731 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1732 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1733 break;
9f6a7857 1734 default:
4eaf21b7 1735 break;
9f6a7857
HR
1736 }
1737
1738 return 0;
1739}
1740
d38ceaf9 1741/**
06ec9070 1742 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1743 *
1744 * @adev: amdgpu_device pointer
1745 *
1746 * Validates certain module parameters and updates
1747 * the associated values used by the driver (all asics).
1748 */
912dfc84 1749static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1750{
5b011235
CZ
1751 if (amdgpu_sched_jobs < 4) {
1752 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1753 amdgpu_sched_jobs);
1754 amdgpu_sched_jobs = 4;
47fc644f 1755 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1756 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1757 amdgpu_sched_jobs);
1758 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1759 }
d38ceaf9 1760
83e74db6 1761 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1762 /* gart size must be greater or equal to 32M */
1763 dev_warn(adev->dev, "gart size (%d) too small\n",
1764 amdgpu_gart_size);
83e74db6 1765 amdgpu_gart_size = -1;
d38ceaf9
AD
1766 }
1767
36d38372 1768 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1769 /* gtt size must be greater or equal to 32M */
36d38372
CK
1770 dev_warn(adev->dev, "gtt size (%d) too small\n",
1771 amdgpu_gtt_size);
1772 amdgpu_gtt_size = -1;
d38ceaf9
AD
1773 }
1774
d07f14be
RH
1775 /* valid range is between 4 and 9 inclusive */
1776 if (amdgpu_vm_fragment_size != -1 &&
1777 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1778 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1779 amdgpu_vm_fragment_size = -1;
1780 }
1781
5d5bd5e3
KW
1782 if (amdgpu_sched_hw_submission < 2) {
1783 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1784 amdgpu_sched_hw_submission);
1785 amdgpu_sched_hw_submission = 2;
1786 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1787 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1788 amdgpu_sched_hw_submission);
1789 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1790 }
1791
2656fd23
AG
1792 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1793 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1794 amdgpu_reset_method = -1;
1795 }
1796
7951e376
RZ
1797 amdgpu_device_check_smu_prv_buffer_size(adev);
1798
06ec9070 1799 amdgpu_device_check_vm_size(adev);
d38ceaf9 1800
06ec9070 1801 amdgpu_device_check_block_size(adev);
6a7f76e7 1802
19aede77 1803 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1804
e3c00faa 1805 return 0;
d38ceaf9
AD
1806}
1807
1808/**
1809 * amdgpu_switcheroo_set_state - set switcheroo state
1810 *
1811 * @pdev: pci dev pointer
1694467b 1812 * @state: vga_switcheroo state
d38ceaf9 1813 *
12024b17 1814 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1815 * the asics before or after it is powered up using ACPI methods.
1816 */
8aba21b7
LT
1817static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1818 enum vga_switcheroo_state state)
d38ceaf9
AD
1819{
1820 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1821 int r;
d38ceaf9 1822
b98c6299 1823 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1824 return;
1825
1826 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1827 pr_info("switched on\n");
d38ceaf9
AD
1828 /* don't suspend or resume card normally */
1829 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1830
8f66090b
TZ
1831 pci_set_power_state(pdev, PCI_D0);
1832 amdgpu_device_load_pci_state(pdev);
1833 r = pci_enable_device(pdev);
de185019
AD
1834 if (r)
1835 DRM_WARN("pci_enable_device failed (%d)\n", r);
1836 amdgpu_device_resume(dev, true);
d38ceaf9 1837
d38ceaf9 1838 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1839 } else {
dd4fa6c1 1840 pr_info("switched off\n");
d38ceaf9 1841 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
5095d541 1842 amdgpu_device_prepare(dev);
de185019 1843 amdgpu_device_suspend(dev, true);
8f66090b 1844 amdgpu_device_cache_pci_state(pdev);
de185019 1845 /* Shut down the device */
8f66090b
TZ
1846 pci_disable_device(pdev);
1847 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1848 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1849 }
1850}
1851
1852/**
1853 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1854 *
1855 * @pdev: pci dev pointer
1856 *
1857 * Callback for the switcheroo driver. Check of the switcheroo
1858 * state can be changed.
1859 * Returns true if the state can be changed, false if not.
1860 */
1861static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1862{
1863 struct drm_device *dev = pci_get_drvdata(pdev);
1864
b8920e1e 1865 /*
d38ceaf9
AD
1866 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1867 * locking inversion with the driver load path. And the access here is
1868 * completely racy anyway. So don't bother with locking for now.
1869 */
7e13ad89 1870 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1871}
1872
1873static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1874 .set_gpu_state = amdgpu_switcheroo_set_state,
1875 .reprobe = NULL,
1876 .can_switch = amdgpu_switcheroo_can_switch,
1877};
1878
e3ecdffa
AD
1879/**
1880 * amdgpu_device_ip_set_clockgating_state - set the CG state
1881 *
87e3f136 1882 * @dev: amdgpu_device pointer
e3ecdffa
AD
1883 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1884 * @state: clockgating state (gate or ungate)
1885 *
1886 * Sets the requested clockgating state for all instances of
1887 * the hardware IP specified.
1888 * Returns the error code from the last instance.
1889 */
43fa561f 1890int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1891 enum amd_ip_block_type block_type,
1892 enum amd_clockgating_state state)
d38ceaf9 1893{
43fa561f 1894 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1895 int i, r = 0;
1896
1897 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1898 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1899 continue;
c722865a
RZ
1900 if (adev->ip_blocks[i].version->type != block_type)
1901 continue;
1902 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1903 continue;
1904 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1905 (void *)adev, state);
1906 if (r)
1907 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1908 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1909 }
1910 return r;
1911}
1912
e3ecdffa
AD
1913/**
1914 * amdgpu_device_ip_set_powergating_state - set the PG state
1915 *
87e3f136 1916 * @dev: amdgpu_device pointer
e3ecdffa
AD
1917 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1918 * @state: powergating state (gate or ungate)
1919 *
1920 * Sets the requested powergating state for all instances of
1921 * the hardware IP specified.
1922 * Returns the error code from the last instance.
1923 */
43fa561f 1924int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1925 enum amd_ip_block_type block_type,
1926 enum amd_powergating_state state)
d38ceaf9 1927{
43fa561f 1928 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1929 int i, r = 0;
1930
1931 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1932 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1933 continue;
c722865a
RZ
1934 if (adev->ip_blocks[i].version->type != block_type)
1935 continue;
1936 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1937 continue;
1938 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1939 (void *)adev, state);
1940 if (r)
1941 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1942 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1943 }
1944 return r;
1945}
1946
e3ecdffa
AD
1947/**
1948 * amdgpu_device_ip_get_clockgating_state - get the CG state
1949 *
1950 * @adev: amdgpu_device pointer
1951 * @flags: clockgating feature flags
1952 *
1953 * Walks the list of IPs on the device and updates the clockgating
1954 * flags for each IP.
1955 * Updates @flags with the feature flags for each hardware IP where
1956 * clockgating is enabled.
1957 */
2990a1fc 1958void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1959 u64 *flags)
6cb2d4e4
HR
1960{
1961 int i;
1962
1963 for (i = 0; i < adev->num_ip_blocks; i++) {
1964 if (!adev->ip_blocks[i].status.valid)
1965 continue;
1966 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1967 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1968 }
1969}
1970
e3ecdffa
AD
1971/**
1972 * amdgpu_device_ip_wait_for_idle - wait for idle
1973 *
1974 * @adev: amdgpu_device pointer
1975 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1976 *
1977 * Waits for the request hardware IP to be idle.
1978 * Returns 0 for success or a negative error code on failure.
1979 */
2990a1fc
AD
1980int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1981 enum amd_ip_block_type block_type)
5dbbb60b
AD
1982{
1983 int i, r;
1984
1985 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1986 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1987 continue;
a1255107
AD
1988 if (adev->ip_blocks[i].version->type == block_type) {
1989 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1990 if (r)
1991 return r;
1992 break;
1993 }
1994 }
1995 return 0;
1996
1997}
1998
e3ecdffa
AD
1999/**
2000 * amdgpu_device_ip_is_idle - is the hardware IP idle
2001 *
2002 * @adev: amdgpu_device pointer
2003 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2004 *
2005 * Check if the hardware IP is idle or not.
2006 * Returns true if it the IP is idle, false if not.
2007 */
2990a1fc
AD
2008bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2009 enum amd_ip_block_type block_type)
5dbbb60b
AD
2010{
2011 int i;
2012
2013 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2014 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2015 continue;
a1255107
AD
2016 if (adev->ip_blocks[i].version->type == block_type)
2017 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
2018 }
2019 return true;
2020
2021}
2022
e3ecdffa
AD
2023/**
2024 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2025 *
2026 * @adev: amdgpu_device pointer
87e3f136 2027 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
2028 *
2029 * Returns a pointer to the hardware IP block structure
2030 * if it exists for the asic, otherwise NULL.
2031 */
2990a1fc
AD
2032struct amdgpu_ip_block *
2033amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2034 enum amd_ip_block_type type)
d38ceaf9
AD
2035{
2036 int i;
2037
2038 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 2039 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
2040 return &adev->ip_blocks[i];
2041
2042 return NULL;
2043}
2044
2045/**
2990a1fc 2046 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
2047 *
2048 * @adev: amdgpu_device pointer
5fc3aeeb 2049 * @type: enum amd_ip_block_type
d38ceaf9
AD
2050 * @major: major version
2051 * @minor: minor version
2052 *
2053 * return 0 if equal or greater
2054 * return 1 if smaller or the ip_block doesn't exist
2055 */
2990a1fc
AD
2056int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2057 enum amd_ip_block_type type,
2058 u32 major, u32 minor)
d38ceaf9 2059{
2990a1fc 2060 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 2061
a1255107
AD
2062 if (ip_block && ((ip_block->version->major > major) ||
2063 ((ip_block->version->major == major) &&
2064 (ip_block->version->minor >= minor))))
d38ceaf9
AD
2065 return 0;
2066
2067 return 1;
2068}
2069
a1255107 2070/**
2990a1fc 2071 * amdgpu_device_ip_block_add
a1255107
AD
2072 *
2073 * @adev: amdgpu_device pointer
2074 * @ip_block_version: pointer to the IP to add
2075 *
2076 * Adds the IP block driver information to the collection of IPs
2077 * on the asic.
2078 */
2990a1fc
AD
2079int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2080 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
2081{
2082 if (!ip_block_version)
2083 return -EINVAL;
2084
7bd939d0
LG
2085 switch (ip_block_version->type) {
2086 case AMD_IP_BLOCK_TYPE_VCN:
2087 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2088 return 0;
2089 break;
2090 case AMD_IP_BLOCK_TYPE_JPEG:
2091 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2092 return 0;
2093 break;
2094 default:
2095 break;
2096 }
2097
e966a725 2098 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
2099 ip_block_version->funcs->name);
2100
a1255107
AD
2101 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2102
2103 return 0;
2104}
2105
e3ecdffa
AD
2106/**
2107 * amdgpu_device_enable_virtual_display - enable virtual display feature
2108 *
2109 * @adev: amdgpu_device pointer
2110 *
2111 * Enabled the virtual display feature if the user has enabled it via
2112 * the module parameter virtual_display. This feature provides a virtual
2113 * display hardware on headless boards or in virtualized environments.
2114 * This function parses and validates the configuration string specified by
2115 * the user and configues the virtual display configuration (number of
2116 * virtual connectors, crtcs, etc.) specified.
2117 */
483ef985 2118static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2119{
2120 adev->enable_virtual_display = false;
2121
2122 if (amdgpu_virtual_display) {
8f66090b 2123 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2124 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2125
2126 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2127 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2128 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2129 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2130 if (!strcmp("all", pciaddname)
2131 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2132 long num_crtc;
2133 int res = -1;
2134
9accf2fd 2135 adev->enable_virtual_display = true;
0f66356d
ED
2136
2137 if (pciaddname_tmp)
2138 res = kstrtol(pciaddname_tmp, 10,
2139 &num_crtc);
2140
2141 if (!res) {
2142 if (num_crtc < 1)
2143 num_crtc = 1;
2144 if (num_crtc > 6)
2145 num_crtc = 6;
2146 adev->mode_info.num_crtc = num_crtc;
2147 } else {
2148 adev->mode_info.num_crtc = 1;
2149 }
9accf2fd
ED
2150 break;
2151 }
2152 }
2153
0f66356d
ED
2154 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2155 amdgpu_virtual_display, pci_address_name,
2156 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2157
2158 kfree(pciaddstr);
2159 }
2160}
2161
25263da3
AD
2162void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2163{
2164 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2165 adev->mode_info.num_crtc = 1;
2166 adev->enable_virtual_display = true;
2167 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2168 adev->enable_virtual_display, adev->mode_info.num_crtc);
2169 }
2170}
2171
e3ecdffa
AD
2172/**
2173 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2174 *
2175 * @adev: amdgpu_device pointer
2176 *
2177 * Parses the asic configuration parameters specified in the gpu info
2178 * firmware and makes them availale to the driver for use in configuring
2179 * the asic.
2180 * Returns 0 on success, -EINVAL on failure.
2181 */
e2a75f88
AD
2182static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2183{
e2a75f88 2184 const char *chip_name;
c0a43457 2185 char fw_name[40];
e2a75f88
AD
2186 int err;
2187 const struct gpu_info_firmware_header_v1_0 *hdr;
2188
ab4fe3e1
HR
2189 adev->firmware.gpu_info_fw = NULL;
2190
72de33f8 2191 if (adev->mman.discovery_bin) {
cc375d8c
TY
2192 /*
2193 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2194 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2195 * when DAL no longer needs it.
2196 */
2197 if (adev->asic_type != CHIP_NAVI12)
2198 return 0;
258620d0
AD
2199 }
2200
e2a75f88 2201 switch (adev->asic_type) {
e2a75f88
AD
2202 default:
2203 return 0;
2204 case CHIP_VEGA10:
2205 chip_name = "vega10";
2206 break;
3f76dced
AD
2207 case CHIP_VEGA12:
2208 chip_name = "vega12";
2209 break;
2d2e5e7e 2210 case CHIP_RAVEN:
54f78a76 2211 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2212 chip_name = "raven2";
54f78a76 2213 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2214 chip_name = "picasso";
54c4d17e
FX
2215 else
2216 chip_name = "raven";
2d2e5e7e 2217 break;
65e60f6e
LM
2218 case CHIP_ARCTURUS:
2219 chip_name = "arcturus";
2220 break;
42b325e5
XY
2221 case CHIP_NAVI12:
2222 chip_name = "navi12";
2223 break;
e2a75f88
AD
2224 }
2225
2226 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2227 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2228 if (err) {
2229 dev_err(adev->dev,
b31d3063 2230 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2231 fw_name);
2232 goto out;
2233 }
2234
ab4fe3e1 2235 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2236 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2237
2238 switch (hdr->version_major) {
2239 case 1:
2240 {
2241 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2242 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2243 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2244
cc375d8c
TY
2245 /*
2246 * Should be droped when DAL no longer needs it.
2247 */
2248 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2249 goto parse_soc_bounding_box;
2250
b5ab16bf
AD
2251 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2252 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2253 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2254 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2255 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2256 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2257 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2258 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2259 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2260 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2261 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2262 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2263 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2264 adev->gfx.cu_info.max_waves_per_simd =
2265 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2266 adev->gfx.cu_info.max_scratch_slots_per_cu =
2267 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2268 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2269 if (hdr->version_minor >= 1) {
35c2e910
HZ
2270 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2271 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2272 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2273 adev->gfx.config.num_sc_per_sh =
2274 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2275 adev->gfx.config.num_packer_per_sc =
2276 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2277 }
ec51d3fa
XY
2278
2279parse_soc_bounding_box:
ec51d3fa
XY
2280 /*
2281 * soc bounding box info is not integrated in disocovery table,
258620d0 2282 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2283 */
48321c3d
HW
2284 if (hdr->version_minor == 2) {
2285 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2286 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2287 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2288 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2289 }
e2a75f88
AD
2290 break;
2291 }
2292 default:
2293 dev_err(adev->dev,
2294 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2295 err = -EINVAL;
2296 goto out;
2297 }
2298out:
e2a75f88
AD
2299 return err;
2300}
2301
e3ecdffa
AD
2302/**
2303 * amdgpu_device_ip_early_init - run early init for hardware IPs
2304 *
2305 * @adev: amdgpu_device pointer
2306 *
2307 * Early initialization pass for hardware IPs. The hardware IPs that make
2308 * up each asic are discovered each IP's early_init callback is run. This
2309 * is the first stage in initializing the asic.
2310 * Returns 0 on success, negative error code on failure.
2311 */
06ec9070 2312static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2313{
901e2be2 2314 struct pci_dev *parent;
aaa36a97 2315 int i, r;
ced69502 2316 bool total;
d38ceaf9 2317
483ef985 2318 amdgpu_device_enable_virtual_display(adev);
a6be7570 2319
00a979f3 2320 if (amdgpu_sriov_vf(adev)) {
00a979f3 2321 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2322 if (r)
2323 return r;
00a979f3
WS
2324 }
2325
d38ceaf9 2326 switch (adev->asic_type) {
33f34802
KW
2327#ifdef CONFIG_DRM_AMDGPU_SI
2328 case CHIP_VERDE:
2329 case CHIP_TAHITI:
2330 case CHIP_PITCAIRN:
2331 case CHIP_OLAND:
2332 case CHIP_HAINAN:
295d0daf 2333 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2334 r = si_set_ip_blocks(adev);
2335 if (r)
2336 return r;
2337 break;
2338#endif
a2e73f56
AD
2339#ifdef CONFIG_DRM_AMDGPU_CIK
2340 case CHIP_BONAIRE:
2341 case CHIP_HAWAII:
2342 case CHIP_KAVERI:
2343 case CHIP_KABINI:
2344 case CHIP_MULLINS:
e1ad2d53 2345 if (adev->flags & AMD_IS_APU)
a2e73f56 2346 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2347 else
2348 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2349
2350 r = cik_set_ip_blocks(adev);
2351 if (r)
2352 return r;
2353 break;
2354#endif
da87c30b
AD
2355 case CHIP_TOPAZ:
2356 case CHIP_TONGA:
2357 case CHIP_FIJI:
2358 case CHIP_POLARIS10:
2359 case CHIP_POLARIS11:
2360 case CHIP_POLARIS12:
2361 case CHIP_VEGAM:
2362 case CHIP_CARRIZO:
2363 case CHIP_STONEY:
2364 if (adev->flags & AMD_IS_APU)
2365 adev->family = AMDGPU_FAMILY_CZ;
2366 else
2367 adev->family = AMDGPU_FAMILY_VI;
2368
2369 r = vi_set_ip_blocks(adev);
2370 if (r)
2371 return r;
2372 break;
d38ceaf9 2373 default:
63352b7f
AD
2374 r = amdgpu_discovery_set_ip_blocks(adev);
2375 if (r)
2376 return r;
2377 break;
d38ceaf9
AD
2378 }
2379
901e2be2
AD
2380 if (amdgpu_has_atpx() &&
2381 (amdgpu_is_atpx_hybrid() ||
2382 amdgpu_has_atpx_dgpu_power_cntl()) &&
2383 ((adev->flags & AMD_IS_APU) == 0) &&
7b1c6263 2384 !dev_is_removable(&adev->pdev->dev))
901e2be2
AD
2385 adev->flags |= AMD_IS_PX;
2386
85ac2021 2387 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2388 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2389 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2390 }
901e2be2 2391
1884734a 2392
3b94fb10 2393 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2394 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2395 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2396 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2397 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
fbf1035b
ML
2398 if (!amdgpu_device_pcie_dynamic_switching_supported())
2399 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
00f54b97 2400
ced69502 2401 total = true;
d38ceaf9
AD
2402 for (i = 0; i < adev->num_ip_blocks; i++) {
2403 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2404 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2405 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2406 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2407 } else {
a1255107
AD
2408 if (adev->ip_blocks[i].version->funcs->early_init) {
2409 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2410 if (r == -ENOENT) {
a1255107 2411 adev->ip_blocks[i].status.valid = false;
2c1a2784 2412 } else if (r) {
a1255107
AD
2413 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2414 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2415 total = false;
2c1a2784 2416 } else {
a1255107 2417 adev->ip_blocks[i].status.valid = true;
2c1a2784 2418 }
974e6b64 2419 } else {
a1255107 2420 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2421 }
d38ceaf9 2422 }
21a249ca
AD
2423 /* get the vbios after the asic_funcs are set up */
2424 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2425 r = amdgpu_device_parse_gpu_info_fw(adev);
2426 if (r)
2427 return r;
2428
21a249ca 2429 /* Read BIOS */
9535a86a
SZ
2430 if (amdgpu_device_read_bios(adev)) {
2431 if (!amdgpu_get_bios(adev))
2432 return -EINVAL;
21a249ca 2433
9535a86a
SZ
2434 r = amdgpu_atombios_init(adev);
2435 if (r) {
2436 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2437 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2438 return r;
2439 }
21a249ca 2440 }
77eabc6f
PJZ
2441
2442 /*get pf2vf msg info at it's earliest time*/
2443 if (amdgpu_sriov_vf(adev))
2444 amdgpu_virt_init_data_exchange(adev);
2445
21a249ca 2446 }
d38ceaf9 2447 }
ced69502
ML
2448 if (!total)
2449 return -ENODEV;
d38ceaf9 2450
00fa4035 2451 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2452 adev->cg_flags &= amdgpu_cg_mask;
2453 adev->pg_flags &= amdgpu_pg_mask;
2454
d38ceaf9
AD
2455 return 0;
2456}
2457
0a4f2520
RZ
2458static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2459{
2460 int i, r;
2461
2462 for (i = 0; i < adev->num_ip_blocks; i++) {
2463 if (!adev->ip_blocks[i].status.sw)
2464 continue;
2465 if (adev->ip_blocks[i].status.hw)
2466 continue;
2467 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2468 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2469 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2470 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2471 if (r) {
2472 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2473 adev->ip_blocks[i].version->funcs->name, r);
2474 return r;
2475 }
2476 adev->ip_blocks[i].status.hw = true;
2477 }
2478 }
2479
2480 return 0;
2481}
2482
2483static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2484{
2485 int i, r;
2486
2487 for (i = 0; i < adev->num_ip_blocks; i++) {
2488 if (!adev->ip_blocks[i].status.sw)
2489 continue;
2490 if (adev->ip_blocks[i].status.hw)
2491 continue;
2492 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2493 if (r) {
2494 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2495 adev->ip_blocks[i].version->funcs->name, r);
2496 return r;
2497 }
2498 adev->ip_blocks[i].status.hw = true;
2499 }
2500
2501 return 0;
2502}
2503
7a3e0bb2
RZ
2504static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2505{
2506 int r = 0;
2507 int i;
80f41f84 2508 uint32_t smu_version;
7a3e0bb2
RZ
2509
2510 if (adev->asic_type >= CHIP_VEGA10) {
2511 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2512 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2513 continue;
2514
e3c1b071 2515 if (!adev->ip_blocks[i].status.sw)
2516 continue;
2517
482f0e53
ML
2518 /* no need to do the fw loading again if already done*/
2519 if (adev->ip_blocks[i].status.hw == true)
2520 break;
2521
53b3f8f4 2522 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2523 r = adev->ip_blocks[i].version->funcs->resume(adev);
2524 if (r) {
2525 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2526 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2527 return r;
2528 }
2529 } else {
2530 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2531 if (r) {
2532 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2533 adev->ip_blocks[i].version->funcs->name, r);
2534 return r;
7a3e0bb2 2535 }
7a3e0bb2 2536 }
482f0e53
ML
2537
2538 adev->ip_blocks[i].status.hw = true;
2539 break;
7a3e0bb2
RZ
2540 }
2541 }
482f0e53 2542
8973d9ec
ED
2543 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2544 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2545
80f41f84 2546 return r;
7a3e0bb2
RZ
2547}
2548
5fd8518d
AG
2549static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2550{
2551 long timeout;
2552 int r, i;
2553
2554 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2555 struct amdgpu_ring *ring = adev->rings[i];
2556
2557 /* No need to setup the GPU scheduler for rings that don't need it */
2558 if (!ring || ring->no_scheduler)
2559 continue;
2560
2561 switch (ring->funcs->type) {
2562 case AMDGPU_RING_TYPE_GFX:
2563 timeout = adev->gfx_timeout;
2564 break;
2565 case AMDGPU_RING_TYPE_COMPUTE:
2566 timeout = adev->compute_timeout;
2567 break;
2568 case AMDGPU_RING_TYPE_SDMA:
2569 timeout = adev->sdma_timeout;
2570 break;
2571 default:
2572 timeout = adev->video_timeout;
2573 break;
2574 }
2575
2576 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
56e44960 2577 DRM_SCHED_PRIORITY_COUNT,
11f25c84 2578 ring->num_hw_submission, 0,
8ab62eda
JG
2579 timeout, adev->reset_domain->wq,
2580 ring->sched_score, ring->name,
2581 adev->dev);
5fd8518d
AG
2582 if (r) {
2583 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2584 ring->name);
2585 return r;
2586 }
2587 }
2588
d425c6f4
JZ
2589 amdgpu_xcp_update_partition_sched_list(adev);
2590
5fd8518d
AG
2591 return 0;
2592}
2593
2594
e3ecdffa
AD
2595/**
2596 * amdgpu_device_ip_init - run init for hardware IPs
2597 *
2598 * @adev: amdgpu_device pointer
2599 *
2600 * Main initialization pass for hardware IPs. The list of all the hardware
2601 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2602 * are run. sw_init initializes the software state associated with each IP
2603 * and hw_init initializes the hardware associated with each IP.
2604 * Returns 0 on success, negative error code on failure.
2605 */
06ec9070 2606static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2607{
2608 int i, r;
2609
c030f2e4 2610 r = amdgpu_ras_init(adev);
2611 if (r)
2612 return r;
2613
d38ceaf9 2614 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2615 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2616 continue;
a1255107 2617 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2618 if (r) {
a1255107
AD
2619 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2620 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2621 goto init_failed;
2c1a2784 2622 }
a1255107 2623 adev->ip_blocks[i].status.sw = true;
bfca0289 2624
c1c39032
AD
2625 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2626 /* need to do common hw init early so everything is set up for gmc */
2627 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2628 if (r) {
2629 DRM_ERROR("hw_init %d failed %d\n", i, r);
2630 goto init_failed;
2631 }
2632 adev->ip_blocks[i].status.hw = true;
2633 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2634 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2635 /* Try to reserve bad pages early */
2636 if (amdgpu_sriov_vf(adev))
2637 amdgpu_virt_exchange_data(adev);
2638
7ccfd79f 2639 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2640 if (r) {
7ccfd79f 2641 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2642 goto init_failed;
2c1a2784 2643 }
a1255107 2644 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2645 if (r) {
2646 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2647 goto init_failed;
2c1a2784 2648 }
06ec9070 2649 r = amdgpu_device_wb_init(adev);
2c1a2784 2650 if (r) {
06ec9070 2651 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2652 goto init_failed;
2c1a2784 2653 }
a1255107 2654 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2655
2656 /* right after GMC hw init, we create CSA */
02ff519e 2657 if (adev->gfx.mcbp) {
1e256e27 2658 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2659 AMDGPU_GEM_DOMAIN_VRAM |
2660 AMDGPU_GEM_DOMAIN_GTT,
2661 AMDGPU_CSA_SIZE);
2493664f
ML
2662 if (r) {
2663 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2664 goto init_failed;
2493664f
ML
2665 }
2666 }
d38ceaf9
AD
2667 }
2668 }
2669
c9ffa427 2670 if (amdgpu_sriov_vf(adev))
22c16d25 2671 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2672
533aed27
AG
2673 r = amdgpu_ib_pool_init(adev);
2674 if (r) {
2675 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2676 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2677 goto init_failed;
2678 }
2679
c8963ea4
RZ
2680 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2681 if (r)
72d3f592 2682 goto init_failed;
0a4f2520
RZ
2683
2684 r = amdgpu_device_ip_hw_init_phase1(adev);
2685 if (r)
72d3f592 2686 goto init_failed;
0a4f2520 2687
7a3e0bb2
RZ
2688 r = amdgpu_device_fw_loading(adev);
2689 if (r)
72d3f592 2690 goto init_failed;
7a3e0bb2 2691
0a4f2520
RZ
2692 r = amdgpu_device_ip_hw_init_phase2(adev);
2693 if (r)
72d3f592 2694 goto init_failed;
d38ceaf9 2695
121a2bc6
AG
2696 /*
2697 * retired pages will be loaded from eeprom and reserved here,
2698 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2699 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2700 * for I2C communication which only true at this point.
b82e65a9
GC
2701 *
2702 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2703 * failure from bad gpu situation and stop amdgpu init process
2704 * accordingly. For other failed cases, it will still release all
2705 * the resource and print error message, rather than returning one
2706 * negative value to upper level.
121a2bc6
AG
2707 *
2708 * Note: theoretically, this should be called before all vram allocations
2709 * to protect retired page from abusing
2710 */
b82e65a9
GC
2711 r = amdgpu_ras_recovery_init(adev);
2712 if (r)
2713 goto init_failed;
121a2bc6 2714
cfbb6b00
AG
2715 /**
2716 * In case of XGMI grab extra reference for reset domain for this device
2717 */
a4c63caf 2718 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2719 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2720 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2721 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2722
dfd0287b
LH
2723 if (WARN_ON(!hive)) {
2724 r = -ENOENT;
2725 goto init_failed;
2726 }
2727
46c67660 2728 if (!hive->reset_domain ||
2729 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2730 r = -ENOENT;
2731 amdgpu_put_xgmi_hive(hive);
2732 goto init_failed;
2733 }
2734
2735 /* Drop the early temporary reset domain we created for device */
2736 amdgpu_reset_put_reset_domain(adev->reset_domain);
2737 adev->reset_domain = hive->reset_domain;
9dfa4860 2738 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2739 }
a4c63caf
AG
2740 }
2741 }
2742
5fd8518d
AG
2743 r = amdgpu_device_init_schedulers(adev);
2744 if (r)
2745 goto init_failed;
e3c1b071 2746
b7043800
AD
2747 if (adev->mman.buffer_funcs_ring->sched.ready)
2748 amdgpu_ttm_set_buffer_funcs_status(adev, true);
2749
e3c1b071 2750 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2751 if (!adev->gmc.xgmi.pending_reset) {
2752 kgd2kfd_init_zone_device(adev);
e3c1b071 2753 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2754 }
c6332b97 2755
bd607166
KR
2756 amdgpu_fru_get_product_info(adev);
2757
72d3f592 2758init_failed:
c6332b97 2759
72d3f592 2760 return r;
d38ceaf9
AD
2761}
2762
e3ecdffa
AD
2763/**
2764 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2765 *
2766 * @adev: amdgpu_device pointer
2767 *
2768 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2769 * this function before a GPU reset. If the value is retained after a
2770 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2771 */
06ec9070 2772static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2773{
2774 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2775}
2776
e3ecdffa
AD
2777/**
2778 * amdgpu_device_check_vram_lost - check if vram is valid
2779 *
2780 * @adev: amdgpu_device pointer
2781 *
2782 * Checks the reset magic value written to the gart pointer in VRAM.
2783 * The driver calls this after a GPU reset to see if the contents of
2784 * VRAM is lost or now.
2785 * returns true if vram is lost, false if not.
2786 */
06ec9070 2787static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2788{
dadce777
EQ
2789 if (memcmp(adev->gart.ptr, adev->reset_magic,
2790 AMDGPU_RESET_MAGIC_NUM))
2791 return true;
2792
53b3f8f4 2793 if (!amdgpu_in_reset(adev))
dadce777
EQ
2794 return false;
2795
2796 /*
2797 * For all ASICs with baco/mode1 reset, the VRAM is
2798 * always assumed to be lost.
2799 */
2800 switch (amdgpu_asic_reset_method(adev)) {
2801 case AMD_RESET_METHOD_BACO:
2802 case AMD_RESET_METHOD_MODE1:
2803 return true;
2804 default:
2805 return false;
2806 }
0c49e0b8
CZ
2807}
2808
e3ecdffa 2809/**
1112a46b 2810 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2811 *
2812 * @adev: amdgpu_device pointer
b8b72130 2813 * @state: clockgating state (gate or ungate)
e3ecdffa 2814 *
e3ecdffa 2815 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2816 * set_clockgating_state callbacks are run.
2817 * Late initialization pass enabling clockgating for hardware IPs.
2818 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2819 * Returns 0 on success, negative error code on failure.
2820 */
fdd34271 2821
5d89bb2d
LL
2822int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2823 enum amd_clockgating_state state)
d38ceaf9 2824{
1112a46b 2825 int i, j, r;
d38ceaf9 2826
4a2ba394
SL
2827 if (amdgpu_emu_mode == 1)
2828 return 0;
2829
1112a46b
RZ
2830 for (j = 0; j < adev->num_ip_blocks; j++) {
2831 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2832 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2833 continue;
47198eb7 2834 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2835 if (adev->in_s0ix &&
47198eb7
AD
2836 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2837 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2838 continue;
4a446d55 2839 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2840 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2841 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2842 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2843 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2844 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2845 /* enable clockgating to save power */
a1255107 2846 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2847 state);
4a446d55
AD
2848 if (r) {
2849 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2850 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2851 return r;
2852 }
b0b00ff1 2853 }
d38ceaf9 2854 }
06b18f61 2855
c9f96fd5
RZ
2856 return 0;
2857}
2858
5d89bb2d
LL
2859int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2860 enum amd_powergating_state state)
c9f96fd5 2861{
1112a46b 2862 int i, j, r;
06b18f61 2863
c9f96fd5
RZ
2864 if (amdgpu_emu_mode == 1)
2865 return 0;
2866
1112a46b
RZ
2867 for (j = 0; j < adev->num_ip_blocks; j++) {
2868 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2869 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2870 continue;
47198eb7 2871 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2872 if (adev->in_s0ix &&
47198eb7
AD
2873 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2874 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2875 continue;
c9f96fd5
RZ
2876 /* skip CG for VCE/UVD, it's handled specially */
2877 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2878 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2879 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2880 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2881 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2882 /* enable powergating to save power */
2883 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2884 state);
c9f96fd5
RZ
2885 if (r) {
2886 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2887 adev->ip_blocks[i].version->funcs->name, r);
2888 return r;
2889 }
2890 }
2891 }
2dc80b00
S
2892 return 0;
2893}
2894
beff74bc
AD
2895static int amdgpu_device_enable_mgpu_fan_boost(void)
2896{
2897 struct amdgpu_gpu_instance *gpu_ins;
2898 struct amdgpu_device *adev;
2899 int i, ret = 0;
2900
2901 mutex_lock(&mgpu_info.mutex);
2902
2903 /*
2904 * MGPU fan boost feature should be enabled
2905 * only when there are two or more dGPUs in
2906 * the system
2907 */
2908 if (mgpu_info.num_dgpu < 2)
2909 goto out;
2910
2911 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2912 gpu_ins = &(mgpu_info.gpu_ins[i]);
2913 adev = gpu_ins->adev;
2914 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2915 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2916 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2917 if (ret)
2918 break;
2919
2920 gpu_ins->mgpu_fan_enabled = 1;
2921 }
2922 }
2923
2924out:
2925 mutex_unlock(&mgpu_info.mutex);
2926
2927 return ret;
2928}
2929
e3ecdffa
AD
2930/**
2931 * amdgpu_device_ip_late_init - run late init for hardware IPs
2932 *
2933 * @adev: amdgpu_device pointer
2934 *
2935 * Late initialization pass for hardware IPs. The list of all the hardware
2936 * IPs that make up the asic is walked and the late_init callbacks are run.
2937 * late_init covers any special initialization that an IP requires
2938 * after all of the have been initialized or something that needs to happen
2939 * late in the init process.
2940 * Returns 0 on success, negative error code on failure.
2941 */
06ec9070 2942static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2943{
60599a03 2944 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2945 int i = 0, r;
2946
2947 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2948 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2949 continue;
2950 if (adev->ip_blocks[i].version->funcs->late_init) {
2951 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2952 if (r) {
2953 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2954 adev->ip_blocks[i].version->funcs->name, r);
2955 return r;
2956 }
2dc80b00 2957 }
73f847db 2958 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2959 }
2960
867e24ca 2961 r = amdgpu_ras_late_init(adev);
2962 if (r) {
2963 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2964 return r;
2965 }
2966
a891d239
DL
2967 amdgpu_ras_set_error_query_ready(adev, true);
2968
1112a46b
RZ
2969 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2970 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2971
06ec9070 2972 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2973
beff74bc
AD
2974 r = amdgpu_device_enable_mgpu_fan_boost();
2975 if (r)
2976 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2977
4da8b639 2978 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2979 if (amdgpu_passthrough(adev) &&
2980 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2981 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2982 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2983
2984 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2985 mutex_lock(&mgpu_info.mutex);
2986
2987 /*
2988 * Reset device p-state to low as this was booted with high.
2989 *
2990 * This should be performed only after all devices from the same
2991 * hive get initialized.
2992 *
2993 * However, it's unknown how many device in the hive in advance.
2994 * As this is counted one by one during devices initializations.
2995 *
2996 * So, we wait for all XGMI interlinked devices initialized.
2997 * This may bring some delays as those devices may come from
2998 * different hives. But that should be OK.
2999 */
3000 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3001 for (i = 0; i < mgpu_info.num_gpu; i++) {
3002 gpu_instance = &(mgpu_info.gpu_ins[i]);
3003 if (gpu_instance->adev->flags & AMD_IS_APU)
3004 continue;
3005
d84a430d
JK
3006 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3007 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
3008 if (r) {
3009 DRM_ERROR("pstate setting failed (%d).\n", r);
3010 break;
3011 }
3012 }
3013 }
3014
3015 mutex_unlock(&mgpu_info.mutex);
3016 }
3017
d38ceaf9
AD
3018 return 0;
3019}
3020
613aa3ea
LY
3021/**
3022 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3023 *
3024 * @adev: amdgpu_device pointer
3025 *
3026 * For ASICs need to disable SMC first
3027 */
3028static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3029{
3030 int i, r;
3031
4e8303cf 3032 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
3033 return;
3034
3035 for (i = 0; i < adev->num_ip_blocks; i++) {
3036 if (!adev->ip_blocks[i].status.hw)
3037 continue;
3038 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3039 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3040 /* XXX handle errors */
3041 if (r) {
3042 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3043 adev->ip_blocks[i].version->funcs->name, r);
3044 }
3045 adev->ip_blocks[i].status.hw = false;
3046 break;
3047 }
3048 }
3049}
3050
e9669fb7 3051static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
3052{
3053 int i, r;
3054
e9669fb7
AG
3055 for (i = 0; i < adev->num_ip_blocks; i++) {
3056 if (!adev->ip_blocks[i].version->funcs->early_fini)
3057 continue;
5278a159 3058
e9669fb7
AG
3059 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3060 if (r) {
3061 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3062 adev->ip_blocks[i].version->funcs->name, r);
3063 }
3064 }
c030f2e4 3065
05df1f01 3066 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
3067 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3068
7270e895
TY
3069 amdgpu_amdkfd_suspend(adev, false);
3070
613aa3ea
LY
3071 /* Workaroud for ASICs need to disable SMC first */
3072 amdgpu_device_smu_fini_early(adev);
3e96dbfd 3073
d38ceaf9 3074 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3075 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 3076 continue;
8201a67a 3077
a1255107 3078 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 3079 /* XXX handle errors */
2c1a2784 3080 if (r) {
a1255107
AD
3081 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3082 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3083 }
8201a67a 3084
a1255107 3085 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
3086 }
3087
6effad8a
GC
3088 if (amdgpu_sriov_vf(adev)) {
3089 if (amdgpu_virt_release_full_gpu(adev, false))
3090 DRM_ERROR("failed to release exclusive mode on fini\n");
3091 }
3092
e9669fb7
AG
3093 return 0;
3094}
3095
3096/**
3097 * amdgpu_device_ip_fini - run fini for hardware IPs
3098 *
3099 * @adev: amdgpu_device pointer
3100 *
3101 * Main teardown pass for hardware IPs. The list of all the hardware
3102 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3103 * are run. hw_fini tears down the hardware associated with each IP
3104 * and sw_fini tears down any software state associated with each IP.
3105 * Returns 0 on success, negative error code on failure.
3106 */
3107static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3108{
3109 int i, r;
3110
3111 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3112 amdgpu_virt_release_ras_err_handler_data(adev);
3113
e9669fb7
AG
3114 if (adev->gmc.xgmi.num_physical_nodes > 1)
3115 amdgpu_xgmi_remove_device(adev);
3116
c004d44e 3117 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3118
d38ceaf9 3119 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3120 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3121 continue;
c12aba3a
ML
3122
3123 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3124 amdgpu_ucode_free_bo(adev);
1e256e27 3125 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3126 amdgpu_device_wb_fini(adev);
7ccfd79f 3127 amdgpu_device_mem_scratch_fini(adev);
533aed27 3128 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
3129 }
3130
a1255107 3131 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3132 /* XXX handle errors */
2c1a2784 3133 if (r) {
a1255107
AD
3134 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3135 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3136 }
a1255107
AD
3137 adev->ip_blocks[i].status.sw = false;
3138 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3139 }
3140
a6dcfd9c 3141 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3142 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3143 continue;
a1255107
AD
3144 if (adev->ip_blocks[i].version->funcs->late_fini)
3145 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3146 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3147 }
3148
c030f2e4 3149 amdgpu_ras_fini(adev);
3150
d38ceaf9
AD
3151 return 0;
3152}
3153
e3ecdffa 3154/**
beff74bc 3155 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3156 *
1112a46b 3157 * @work: work_struct.
e3ecdffa 3158 */
beff74bc 3159static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3160{
3161 struct amdgpu_device *adev =
beff74bc 3162 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3163 int r;
3164
3165 r = amdgpu_ib_ring_tests(adev);
3166 if (r)
3167 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3168}
3169
1e317b99
RZ
3170static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3171{
3172 struct amdgpu_device *adev =
3173 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3174
90a92662
MD
3175 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3176 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3177
3178 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3179 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3180}
3181
e3ecdffa 3182/**
e7854a03 3183 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3184 *
3185 * @adev: amdgpu_device pointer
3186 *
3187 * Main suspend function for hardware IPs. The list of all the hardware
3188 * IPs that make up the asic is walked, clockgating is disabled and the
3189 * suspend callbacks are run. suspend puts the hardware and software state
3190 * in each IP into a state suitable for suspend.
3191 * Returns 0 on success, negative error code on failure.
3192 */
e7854a03
AD
3193static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3194{
3195 int i, r;
3196
50ec83f0
AD
3197 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3198 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3199
b31d6ada
EQ
3200 /*
3201 * Per PMFW team's suggestion, driver needs to handle gfxoff
3202 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3203 * scenario. Add the missing df cstate disablement here.
3204 */
3205 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3206 dev_warn(adev->dev, "Failed to disallow df cstate");
3207
e7854a03
AD
3208 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3209 if (!adev->ip_blocks[i].status.valid)
3210 continue;
2b9f7848 3211
e7854a03 3212 /* displays are handled separately */
2b9f7848
ND
3213 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3214 continue;
3215
3216 /* XXX handle errors */
3217 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3218 /* XXX handle errors */
3219 if (r) {
3220 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3221 adev->ip_blocks[i].version->funcs->name, r);
3222 return r;
e7854a03 3223 }
2b9f7848
ND
3224
3225 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3226 }
3227
e7854a03
AD
3228 return 0;
3229}
3230
3231/**
3232 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3233 *
3234 * @adev: amdgpu_device pointer
3235 *
3236 * Main suspend function for hardware IPs. The list of all the hardware
3237 * IPs that make up the asic is walked, clockgating is disabled and the
3238 * suspend callbacks are run. suspend puts the hardware and software state
3239 * in each IP into a state suitable for suspend.
3240 * Returns 0 on success, negative error code on failure.
3241 */
3242static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3243{
3244 int i, r;
3245
557f42a2 3246 if (adev->in_s0ix)
bc143d8b 3247 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3248
d38ceaf9 3249 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3250 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3251 continue;
e7854a03
AD
3252 /* displays are handled in phase1 */
3253 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3254 continue;
bff77e86
LM
3255 /* PSP lost connection when err_event_athub occurs */
3256 if (amdgpu_ras_intr_triggered() &&
3257 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3258 adev->ip_blocks[i].status.hw = false;
3259 continue;
3260 }
e3c1b071 3261
3262 /* skip unnecessary suspend if we do not initialize them yet */
3263 if (adev->gmc.xgmi.pending_reset &&
3264 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3265 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3266 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3267 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3268 adev->ip_blocks[i].status.hw = false;
3269 continue;
3270 }
557f42a2 3271
afa6646b 3272 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3273 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3274 * like at runtime. PSP is also part of the always on hardware
3275 * so no need to suspend it.
3276 */
557f42a2 3277 if (adev->in_s0ix &&
32ff160d 3278 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3279 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3280 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3281 continue;
3282
2a7798ea
AD
3283 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3284 if (adev->in_s0ix &&
4e8303cf
LL
3285 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3286 IP_VERSION(5, 0, 0)) &&
3287 (adev->ip_blocks[i].version->type ==
3288 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3289 continue;
3290
e11c7750
TH
3291 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3292 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3293 * from this location and RLC Autoload automatically also gets loaded
3294 * from here based on PMFW -> PSP message during re-init sequence.
3295 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3296 * the TMR and reload FWs again for IMU enabled APU ASICs.
3297 */
3298 if (amdgpu_in_reset(adev) &&
3299 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3300 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3301 continue;
3302
d38ceaf9 3303 /* XXX handle errors */
a1255107 3304 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3305 /* XXX handle errors */
2c1a2784 3306 if (r) {
a1255107
AD
3307 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3308 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3309 }
876923fb 3310 adev->ip_blocks[i].status.hw = false;
a3a09142 3311 /* handle putting the SMC in the appropriate state */
47fc644f 3312 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3313 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3314 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3315 if (r) {
3316 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3317 adev->mp1_state, r);
3318 return r;
3319 }
a3a09142
AD
3320 }
3321 }
d38ceaf9
AD
3322 }
3323
3324 return 0;
3325}
3326
e7854a03
AD
3327/**
3328 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3329 *
3330 * @adev: amdgpu_device pointer
3331 *
3332 * Main suspend function for hardware IPs. The list of all the hardware
3333 * IPs that make up the asic is walked, clockgating is disabled and the
3334 * suspend callbacks are run. suspend puts the hardware and software state
3335 * in each IP into a state suitable for suspend.
3336 * Returns 0 on success, negative error code on failure.
3337 */
3338int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3339{
3340 int r;
3341
3c73683c
JC
3342 if (amdgpu_sriov_vf(adev)) {
3343 amdgpu_virt_fini_data_exchange(adev);
e7819644 3344 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3345 }
e7819644 3346
b7043800
AD
3347 amdgpu_ttm_set_buffer_funcs_status(adev, false);
3348
e7854a03
AD
3349 r = amdgpu_device_ip_suspend_phase1(adev);
3350 if (r)
3351 return r;
3352 r = amdgpu_device_ip_suspend_phase2(adev);
3353
e7819644
YT
3354 if (amdgpu_sriov_vf(adev))
3355 amdgpu_virt_release_full_gpu(adev, false);
3356
e7854a03
AD
3357 return r;
3358}
3359
06ec9070 3360static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3361{
3362 int i, r;
3363
2cb681b6 3364 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3365 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3366 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3367 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3368 AMD_IP_BLOCK_TYPE_IH,
3369 };
a90ad3c2 3370
95ea3dbc 3371 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3372 int j;
3373 struct amdgpu_ip_block *block;
a90ad3c2 3374
4cd2a96d
J
3375 block = &adev->ip_blocks[i];
3376 block->status.hw = false;
2cb681b6 3377
4cd2a96d 3378 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3379
4cd2a96d 3380 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3381 !block->status.valid)
3382 continue;
3383
3384 r = block->version->funcs->hw_init(adev);
0aaeefcc 3385 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3386 if (r)
3387 return r;
482f0e53 3388 block->status.hw = true;
a90ad3c2
ML
3389 }
3390 }
3391
3392 return 0;
3393}
3394
06ec9070 3395static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3396{
3397 int i, r;
3398
2cb681b6
ML
3399 static enum amd_ip_block_type ip_order[] = {
3400 AMD_IP_BLOCK_TYPE_SMC,
3401 AMD_IP_BLOCK_TYPE_DCE,
3402 AMD_IP_BLOCK_TYPE_GFX,
3403 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3404 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3405 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3406 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3407 AMD_IP_BLOCK_TYPE_VCN,
3408 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3409 };
a90ad3c2 3410
2cb681b6
ML
3411 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3412 int j;
3413 struct amdgpu_ip_block *block;
a90ad3c2 3414
2cb681b6
ML
3415 for (j = 0; j < adev->num_ip_blocks; j++) {
3416 block = &adev->ip_blocks[j];
3417
3418 if (block->version->type != ip_order[i] ||
482f0e53
ML
3419 !block->status.valid ||
3420 block->status.hw)
2cb681b6
ML
3421 continue;
3422
895bd048
JZ
3423 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3424 r = block->version->funcs->resume(adev);
3425 else
3426 r = block->version->funcs->hw_init(adev);
3427
0aaeefcc 3428 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3429 if (r)
3430 return r;
482f0e53 3431 block->status.hw = true;
a90ad3c2
ML
3432 }
3433 }
3434
3435 return 0;
3436}
3437
e3ecdffa
AD
3438/**
3439 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3440 *
3441 * @adev: amdgpu_device pointer
3442 *
3443 * First resume function for hardware IPs. The list of all the hardware
3444 * IPs that make up the asic is walked and the resume callbacks are run for
3445 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3446 * after a suspend and updates the software state as necessary. This
3447 * function is also used for restoring the GPU after a GPU reset.
3448 * Returns 0 on success, negative error code on failure.
3449 */
06ec9070 3450static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3451{
3452 int i, r;
3453
a90ad3c2 3454 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3455 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3456 continue;
a90ad3c2 3457 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3458 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3459 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3460 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3461
fcf0649f
CZ
3462 r = adev->ip_blocks[i].version->funcs->resume(adev);
3463 if (r) {
3464 DRM_ERROR("resume of IP block <%s> failed %d\n",
3465 adev->ip_blocks[i].version->funcs->name, r);
3466 return r;
3467 }
482f0e53 3468 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3469 }
3470 }
3471
3472 return 0;
3473}
3474
e3ecdffa
AD
3475/**
3476 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3477 *
3478 * @adev: amdgpu_device pointer
3479 *
3480 * First resume function for hardware IPs. The list of all the hardware
3481 * IPs that make up the asic is walked and the resume callbacks are run for
3482 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3483 * functional state after a suspend and updates the software state as
3484 * necessary. This function is also used for restoring the GPU after a GPU
3485 * reset.
3486 * Returns 0 on success, negative error code on failure.
3487 */
06ec9070 3488static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3489{
3490 int i, r;
3491
3492 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3493 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3494 continue;
fcf0649f 3495 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3496 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3497 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3498 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3499 continue;
a1255107 3500 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3501 if (r) {
a1255107
AD
3502 DRM_ERROR("resume of IP block <%s> failed %d\n",
3503 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3504 return r;
2c1a2784 3505 }
482f0e53 3506 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3507 }
3508
3509 return 0;
3510}
3511
e3ecdffa
AD
3512/**
3513 * amdgpu_device_ip_resume - run resume for hardware IPs
3514 *
3515 * @adev: amdgpu_device pointer
3516 *
3517 * Main resume function for hardware IPs. The hardware IPs
3518 * are split into two resume functions because they are
b8920e1e 3519 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3520 * steps need to be take between them. In this case (S3/S4) they are
3521 * run sequentially.
3522 * Returns 0 on success, negative error code on failure.
3523 */
06ec9070 3524static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3525{
3526 int r;
3527
06ec9070 3528 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3529 if (r)
3530 return r;
7a3e0bb2
RZ
3531
3532 r = amdgpu_device_fw_loading(adev);
3533 if (r)
3534 return r;
3535
06ec9070 3536 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f 3537
b7043800
AD
3538 if (adev->mman.buffer_funcs_ring->sched.ready)
3539 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3540
fcf0649f
CZ
3541 return r;
3542}
3543
e3ecdffa
AD
3544/**
3545 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3546 *
3547 * @adev: amdgpu_device pointer
3548 *
3549 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3550 */
4e99a44e 3551static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3552{
6867e1b5
ML
3553 if (amdgpu_sriov_vf(adev)) {
3554 if (adev->is_atom_fw) {
58ff791a 3555 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3556 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3557 } else {
3558 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3559 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3560 }
3561
3562 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3563 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3564 }
048765ad
AR
3565}
3566
e3ecdffa
AD
3567/**
3568 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3569 *
3570 * @asic_type: AMD asic type
3571 *
3572 * Check if there is DC (new modesetting infrastructre) support for an asic.
3573 * returns true if DC has support, false if not.
3574 */
4562236b
HW
3575bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3576{
3577 switch (asic_type) {
0637d417
AD
3578#ifdef CONFIG_DRM_AMDGPU_SI
3579 case CHIP_HAINAN:
3580#endif
3581 case CHIP_TOPAZ:
3582 /* chips with no display hardware */
3583 return false;
4562236b 3584#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3585 case CHIP_TAHITI:
3586 case CHIP_PITCAIRN:
3587 case CHIP_VERDE:
3588 case CHIP_OLAND:
2d32ffd6
AD
3589 /*
3590 * We have systems in the wild with these ASICs that require
3591 * LVDS and VGA support which is not supported with DC.
3592 *
3593 * Fallback to the non-DC driver here by default so as not to
3594 * cause regressions.
3595 */
3596#if defined(CONFIG_DRM_AMD_DC_SI)
3597 return amdgpu_dc > 0;
3598#else
3599 return false;
64200c46 3600#endif
4562236b 3601 case CHIP_BONAIRE:
0d6fbccb 3602 case CHIP_KAVERI:
367e6687
AD
3603 case CHIP_KABINI:
3604 case CHIP_MULLINS:
d9fda248
HW
3605 /*
3606 * We have systems in the wild with these ASICs that require
b5a0168e 3607 * VGA support which is not supported with DC.
d9fda248
HW
3608 *
3609 * Fallback to the non-DC driver here by default so as not to
3610 * cause regressions.
3611 */
3612 return amdgpu_dc > 0;
f7f12b25 3613 default:
fd187853 3614 return amdgpu_dc != 0;
f7f12b25 3615#else
4562236b 3616 default:
93b09a9a 3617 if (amdgpu_dc > 0)
b8920e1e 3618 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3619 return false;
f7f12b25 3620#endif
4562236b
HW
3621 }
3622}
3623
3624/**
3625 * amdgpu_device_has_dc_support - check if dc is supported
3626 *
982a820b 3627 * @adev: amdgpu_device pointer
4562236b
HW
3628 *
3629 * Returns true for supported, false for not supported
3630 */
3631bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3632{
25263da3 3633 if (adev->enable_virtual_display ||
abaf210c 3634 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3635 return false;
3636
4562236b
HW
3637 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3638}
3639
d4535e2c
AG
3640static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3641{
3642 struct amdgpu_device *adev =
3643 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3644 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3645
c6a6e2db
AG
3646 /* It's a bug to not have a hive within this function */
3647 if (WARN_ON(!hive))
3648 return;
3649
3650 /*
3651 * Use task barrier to synchronize all xgmi reset works across the
3652 * hive. task_barrier_enter and task_barrier_exit will block
3653 * until all the threads running the xgmi reset works reach
3654 * those points. task_barrier_full will do both blocks.
3655 */
3656 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3657
3658 task_barrier_enter(&hive->tb);
4a580877 3659 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3660
3661 if (adev->asic_reset_res)
3662 goto fail;
3663
3664 task_barrier_exit(&hive->tb);
4a580877 3665 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3666
3667 if (adev->asic_reset_res)
3668 goto fail;
43c4d576 3669
21226f02 3670 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
c6a6e2db
AG
3671 } else {
3672
3673 task_barrier_full(&hive->tb);
3674 adev->asic_reset_res = amdgpu_asic_reset(adev);
3675 }
ce316fa5 3676
c6a6e2db 3677fail:
d4535e2c 3678 if (adev->asic_reset_res)
fed184e9 3679 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3680 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3681 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3682}
3683
71f98027
AD
3684static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3685{
3686 char *input = amdgpu_lockup_timeout;
3687 char *timeout_setting = NULL;
3688 int index = 0;
3689 long timeout;
3690 int ret = 0;
3691
3692 /*
67387dfe
AD
3693 * By default timeout for non compute jobs is 10000
3694 * and 60000 for compute jobs.
71f98027 3695 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3696 * jobs are 60000 by default.
71f98027
AD
3697 */
3698 adev->gfx_timeout = msecs_to_jiffies(10000);
3699 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3700 if (amdgpu_sriov_vf(adev))
3701 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3702 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3703 else
67387dfe 3704 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3705
f440ff44 3706 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3707 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3708 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3709 ret = kstrtol(timeout_setting, 0, &timeout);
3710 if (ret)
3711 return ret;
3712
3713 if (timeout == 0) {
3714 index++;
3715 continue;
3716 } else if (timeout < 0) {
3717 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3718 dev_warn(adev->dev, "lockup timeout disabled");
3719 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3720 } else {
3721 timeout = msecs_to_jiffies(timeout);
3722 }
3723
3724 switch (index++) {
3725 case 0:
3726 adev->gfx_timeout = timeout;
3727 break;
3728 case 1:
3729 adev->compute_timeout = timeout;
3730 break;
3731 case 2:
3732 adev->sdma_timeout = timeout;
3733 break;
3734 case 3:
3735 adev->video_timeout = timeout;
3736 break;
3737 default:
3738 break;
3739 }
3740 }
3741 /*
3742 * There is only one value specified and
3743 * it should apply to all non-compute jobs.
3744 */
bcccee89 3745 if (index == 1) {
71f98027 3746 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3747 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3748 adev->compute_timeout = adev->gfx_timeout;
3749 }
71f98027
AD
3750 }
3751
3752 return ret;
3753}
d4535e2c 3754
4a74c38c
PY
3755/**
3756 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3757 *
3758 * @adev: amdgpu_device pointer
3759 *
3760 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3761 */
3762static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3763{
3764 struct iommu_domain *domain;
3765
3766 domain = iommu_get_domain_for_dev(adev->dev);
3767 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3768 adev->ram_is_direct_mapped = true;
3769}
3770
77f3a5cd 3771static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3772 &dev_attr_pcie_replay_count.attr,
3773 NULL
3774};
3775
02ff519e
AD
3776static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3777{
3778 if (amdgpu_mcbp == 1)
3779 adev->gfx.mcbp = true;
1e9e15dc
JZ
3780 else if (amdgpu_mcbp == 0)
3781 adev->gfx.mcbp = false;
4e8303cf
LL
3782 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3783 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3784 adev->gfx.num_gfx_rings)
50a7c876
AD
3785 adev->gfx.mcbp = true;
3786
02ff519e
AD
3787 if (amdgpu_sriov_vf(adev))
3788 adev->gfx.mcbp = true;
3789
3790 if (adev->gfx.mcbp)
3791 DRM_INFO("MCBP is enabled\n");
3792}
3793
d38ceaf9
AD
3794/**
3795 * amdgpu_device_init - initialize the driver
3796 *
3797 * @adev: amdgpu_device pointer
d38ceaf9
AD
3798 * @flags: driver flags
3799 *
3800 * Initializes the driver info and hw (all asics).
3801 * Returns 0 for success or an error on failure.
3802 * Called at driver startup.
3803 */
3804int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3805 uint32_t flags)
3806{
8aba21b7
LT
3807 struct drm_device *ddev = adev_to_drm(adev);
3808 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3809 int r, i;
b98c6299 3810 bool px = false;
95844d20 3811 u32 max_MBps;
59e9fff1 3812 int tmp;
d38ceaf9
AD
3813
3814 adev->shutdown = false;
d38ceaf9 3815 adev->flags = flags;
4e66d7d2
YZ
3816
3817 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3818 adev->asic_type = amdgpu_force_asic_type;
3819 else
3820 adev->asic_type = flags & AMD_ASIC_MASK;
3821
d38ceaf9 3822 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3823 if (amdgpu_emu_mode == 1)
8bdab6bb 3824 adev->usec_timeout *= 10;
770d13b1 3825 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3826 adev->accel_working = false;
3827 adev->num_rings = 0;
68ce8b24 3828 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3829 adev->mman.buffer_funcs = NULL;
3830 adev->mman.buffer_funcs_ring = NULL;
3831 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3832 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3833 adev->gmc.gmc_funcs = NULL;
7bd939d0 3834 adev->harvest_ip_mask = 0x0;
f54d1867 3835 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3836 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3837
3838 adev->smc_rreg = &amdgpu_invalid_rreg;
3839 adev->smc_wreg = &amdgpu_invalid_wreg;
3840 adev->pcie_rreg = &amdgpu_invalid_rreg;
3841 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3842 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3843 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3844 adev->pciep_rreg = &amdgpu_invalid_rreg;
3845 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3846 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3847 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3848 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3849 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3850 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3851 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3852 adev->didt_rreg = &amdgpu_invalid_rreg;
3853 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3854 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3855 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3856 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3857 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3858
3e39ab90
AD
3859 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3860 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3861 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3862
3863 /* mutex initialization are all done here so we
b8920e1e
SS
3864 * can recall function without having locking issues
3865 */
0e5ca0d1 3866 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3867 mutex_init(&adev->pm.mutex);
3868 mutex_init(&adev->gfx.gpu_clock_mutex);
3869 mutex_init(&adev->srbm_mutex);
b8866c26 3870 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3871 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3872 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3873 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3874 mutex_init(&adev->mn_lock);
e23b74aa 3875 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3876 hash_init(adev->mn_hash);
32eaeae0 3877 mutex_init(&adev->psp.mutex);
bd052211 3878 mutex_init(&adev->notifier_lock);
8cda7a4f 3879 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3880 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3881
ab3b9de6 3882 amdgpu_device_init_apu_flags(adev);
9f6a7857 3883
912dfc84
EQ
3884 r = amdgpu_device_check_arguments(adev);
3885 if (r)
3886 return r;
d38ceaf9 3887
d38ceaf9
AD
3888 spin_lock_init(&adev->mmio_idx_lock);
3889 spin_lock_init(&adev->smc_idx_lock);
3890 spin_lock_init(&adev->pcie_idx_lock);
3891 spin_lock_init(&adev->uvd_ctx_idx_lock);
3892 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3893 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3894 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3895 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3896 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3897
0c4e7fa5
CZ
3898 INIT_LIST_HEAD(&adev->shadow_list);
3899 mutex_init(&adev->shadow_list_lock);
3900
655ce9cb 3901 INIT_LIST_HEAD(&adev->reset_list);
3902
6492e1b0 3903 INIT_LIST_HEAD(&adev->ras_list);
3904
3e38b634
EQ
3905 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3906
beff74bc
AD
3907 INIT_DELAYED_WORK(&adev->delayed_init_work,
3908 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3909 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3910 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3911
d4535e2c
AG
3912 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3913
d23ee13f 3914 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3915 adev->gfx.gfx_off_residency = 0;
3916 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3917 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3918
b265bdbd
EQ
3919 atomic_set(&adev->throttling_logging_enabled, 1);
3920 /*
3921 * If throttling continues, logging will be performed every minute
3922 * to avoid log flooding. "-1" is subtracted since the thermal
3923 * throttling interrupt comes every second. Thus, the total logging
3924 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3925 * for throttling interrupt) = 60 seconds.
3926 */
3927 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3928 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3929
0fa49558
AX
3930 /* Registers mapping */
3931 /* TODO: block userspace mapping of io register */
da69c161
KW
3932 if (adev->asic_type >= CHIP_BONAIRE) {
3933 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3934 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3935 } else {
3936 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3937 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3938 }
d38ceaf9 3939
6c08e0ef
EQ
3940 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3941 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3942
d38ceaf9 3943 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3944 if (!adev->rmmio)
d38ceaf9 3945 return -ENOMEM;
b8920e1e 3946
d38ceaf9 3947 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3948 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3949
436afdfa
PY
3950 /*
3951 * Reset domain needs to be present early, before XGMI hive discovered
3952 * (if any) and intitialized to use reset sem and in_gpu reset flag
3953 * early on during init and before calling to RREG32.
3954 */
3955 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3956 if (!adev->reset_domain)
3957 return -ENOMEM;
3958
3aa0115d
ML
3959 /* detect hw virtualization here */
3960 amdgpu_detect_virtualization(adev);
3961
04e85958
TL
3962 amdgpu_device_get_pcie_info(adev);
3963
dffa11b4
ML
3964 r = amdgpu_device_get_job_timeout_settings(adev);
3965 if (r) {
3966 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3967 return r;
a190d1c7
XY
3968 }
3969
d38ceaf9 3970 /* early init functions */
06ec9070 3971 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3972 if (r)
4ef87d8f 3973 return r;
d38ceaf9 3974
02ff519e
AD
3975 amdgpu_device_set_mcbp(adev);
3976
b7cdb41e
ML
3977 /* Get rid of things like offb */
3978 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3979 if (r)
3980 return r;
3981
4d33e704
SK
3982 /* Enable TMZ based on IP_VERSION */
3983 amdgpu_gmc_tmz_set(adev);
3984
957b0787 3985 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3986 /* Need to get xgmi info early to decide the reset behavior*/
3987 if (adev->gmc.xgmi.supported) {
3988 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3989 if (r)
3990 return r;
3991 }
3992
8e6d0b69 3993 /* enable PCIE atomic ops */
b4520bfd
GW
3994 if (amdgpu_sriov_vf(adev)) {
3995 if (adev->virt.fw_reserve.p_pf2vf)
3996 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3997 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3998 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3999 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4000 * internal path natively support atomics, set have_atomics_support to true.
4001 */
b4520bfd 4002 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
4003 (amdgpu_ip_version(adev, GC_HWIP, 0) >
4004 IP_VERSION(9, 0, 0))) {
0e768043 4005 adev->have_atomics_support = true;
b4520bfd 4006 } else {
8e6d0b69 4007 adev->have_atomics_support =
4008 !pci_enable_atomic_ops_to_root(adev->pdev,
4009 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4010 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
4011 }
4012
8e6d0b69 4013 if (!adev->have_atomics_support)
4014 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4015
6585661d 4016 /* doorbell bar mapping and doorbell index init*/
43c064db 4017 amdgpu_doorbell_init(adev);
6585661d 4018
9475a943
SL
4019 if (amdgpu_emu_mode == 1) {
4020 /* post the asic on emulation mode */
4021 emu_soc_asic_init(adev);
bfca0289 4022 goto fence_driver_init;
9475a943 4023 }
bfca0289 4024
04442bf7
LL
4025 amdgpu_reset_init(adev);
4026
4e99a44e 4027 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
4028 if (adev->bios)
4029 amdgpu_device_detect_sriov_bios(adev);
048765ad 4030
95e8e59e
AD
4031 /* check if we need to reset the asic
4032 * E.g., driver was not cleanly unloaded previously, etc.
4033 */
f14899fd 4034 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 4035 if (adev->gmc.xgmi.num_physical_nodes) {
4036 dev_info(adev->dev, "Pending hive reset.\n");
4037 adev->gmc.xgmi.pending_reset = true;
4038 /* Only need to init necessary block for SMU to handle the reset */
4039 for (i = 0; i < adev->num_ip_blocks; i++) {
4040 if (!adev->ip_blocks[i].status.valid)
4041 continue;
4042 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4043 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4044 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4045 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 4046 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 4047 adev->ip_blocks[i].version->funcs->name);
4048 adev->ip_blocks[i].status.hw = true;
4049 }
4050 }
4051 } else {
5f38ac54
KF
4052 switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
4053 case IP_VERSION(13, 0, 0):
4054 case IP_VERSION(13, 0, 7):
4055 case IP_VERSION(13, 0, 10):
4056 r = psp_gpu_reset(adev);
4057 break;
4058 default:
4059 tmp = amdgpu_reset_method;
4060 /* It should do a default reset when loading or reloading the driver,
4061 * regardless of the module parameter reset_method.
4062 */
4063 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4064 r = amdgpu_asic_reset(adev);
4065 amdgpu_reset_method = tmp;
4066 break;
4067 }
4068
e3c1b071 4069 if (r) {
4070 dev_err(adev->dev, "asic reset on init failed\n");
4071 goto failed;
4072 }
95e8e59e
AD
4073 }
4074 }
4075
d38ceaf9 4076 /* Post card if necessary */
39c640c0 4077 if (amdgpu_device_need_post(adev)) {
d38ceaf9 4078 if (!adev->bios) {
bec86378 4079 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
4080 r = -EINVAL;
4081 goto failed;
d38ceaf9 4082 }
bec86378 4083 DRM_INFO("GPU posting now...\n");
4d2997ab 4084 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
4085 if (r) {
4086 dev_err(adev->dev, "gpu post error!\n");
4087 goto failed;
4088 }
d38ceaf9
AD
4089 }
4090
9535a86a
SZ
4091 if (adev->bios) {
4092 if (adev->is_atom_fw) {
4093 /* Initialize clocks */
4094 r = amdgpu_atomfirmware_get_clock_info(adev);
4095 if (r) {
4096 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4097 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4098 goto failed;
4099 }
4100 } else {
4101 /* Initialize clocks */
4102 r = amdgpu_atombios_get_clock_info(adev);
4103 if (r) {
4104 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4105 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4106 goto failed;
4107 }
4108 /* init i2c buses */
4109 if (!amdgpu_device_has_dc_support(adev))
4110 amdgpu_atombios_i2c_init(adev);
a5bde2f9 4111 }
2c1a2784 4112 }
d38ceaf9 4113
bfca0289 4114fence_driver_init:
d38ceaf9 4115 /* Fence driver */
067f44c8 4116 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4117 if (r) {
067f44c8 4118 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4119 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4120 goto failed;
2c1a2784 4121 }
d38ceaf9
AD
4122
4123 /* init the mode config */
4a580877 4124 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4125
06ec9070 4126 r = amdgpu_device_ip_init(adev);
d38ceaf9 4127 if (r) {
06ec9070 4128 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4129 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4130 goto release_ras_con;
d38ceaf9
AD
4131 }
4132
8d35a259
LG
4133 amdgpu_fence_driver_hw_init(adev);
4134
d69b8971
YZ
4135 dev_info(adev->dev,
4136 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4137 adev->gfx.config.max_shader_engines,
4138 adev->gfx.config.max_sh_per_se,
4139 adev->gfx.config.max_cu_per_sh,
4140 adev->gfx.cu_info.number);
4141
d38ceaf9
AD
4142 adev->accel_working = true;
4143
e59c0205
AX
4144 amdgpu_vm_check_compute_bug(adev);
4145
95844d20
MO
4146 /* Initialize the buffer migration limit. */
4147 if (amdgpu_moverate >= 0)
4148 max_MBps = amdgpu_moverate;
4149 else
4150 max_MBps = 8; /* Allow 8 MB/s. */
4151 /* Get a log2 for easy divisions. */
4152 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4153
b0adca4d
EQ
4154 /*
4155 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4156 * Otherwise the mgpu fan boost feature will be skipped due to the
4157 * gpu instance is counted less.
4158 */
4159 amdgpu_register_gpu_instance(adev);
4160
d38ceaf9
AD
4161 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4162 * explicit gating rather than handling it automatically.
4163 */
e3c1b071 4164 if (!adev->gmc.xgmi.pending_reset) {
4165 r = amdgpu_device_ip_late_init(adev);
4166 if (r) {
4167 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4168 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4169 goto release_ras_con;
e3c1b071 4170 }
4171 /* must succeed. */
4172 amdgpu_ras_resume(adev);
4173 queue_delayed_work(system_wq, &adev->delayed_init_work,
4174 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4175 }
d38ceaf9 4176
38eecbe0
CL
4177 if (amdgpu_sriov_vf(adev)) {
4178 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4179 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4180 }
2c738637 4181
90bcb9b5
EQ
4182 /*
4183 * Place those sysfs registering after `late_init`. As some of those
4184 * operations performed in `late_init` might affect the sysfs
4185 * interfaces creating.
4186 */
4187 r = amdgpu_atombios_sysfs_init(adev);
4188 if (r)
4189 drm_err(&adev->ddev,
4190 "registering atombios sysfs failed (%d).\n", r);
4191
4192 r = amdgpu_pm_sysfs_init(adev);
4193 if (r)
4194 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4195
4196 r = amdgpu_ucode_sysfs_init(adev);
4197 if (r) {
4198 adev->ucode_sysfs_en = false;
4199 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4200 } else
4201 adev->ucode_sysfs_en = true;
4202
77f3a5cd 4203 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4204 if (r)
77f3a5cd 4205 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4206
76da73f0
LL
4207 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4208 if (r)
4209 dev_err(adev->dev,
4210 "Could not create amdgpu board attributes\n");
4211
7957ec80
LL
4212 amdgpu_fru_sysfs_init(adev);
4213
d155bef0
AB
4214 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4215 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4216 if (r)
4217 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4218
c1dd4aa6
AG
4219 /* Have stored pci confspace at hand for restore in sudden PCI error */
4220 if (amdgpu_device_cache_pci_state(adev->pdev))
4221 pci_restore_state(pdev);
4222
8c3dd61c
KHF
4223 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4224 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4225 * ignore it
4226 */
8c3dd61c 4227 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4228 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4229
d37a3929
OC
4230 px = amdgpu_device_supports_px(ddev);
4231
7b1c6263 4232 if (px || (!dev_is_removable(&adev->pdev->dev) &&
d37a3929 4233 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4234 vga_switcheroo_register_client(adev->pdev,
4235 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4236
4237 if (px)
8c3dd61c 4238 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4239
e3c1b071 4240 if (adev->gmc.xgmi.pending_reset)
4241 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4242 msecs_to_jiffies(AMDGPU_RESUME_MS));
4243
4a74c38c
PY
4244 amdgpu_device_check_iommu_direct_map(adev);
4245
d38ceaf9 4246 return 0;
83ba126a 4247
970fd197 4248release_ras_con:
38eecbe0
CL
4249 if (amdgpu_sriov_vf(adev))
4250 amdgpu_virt_release_full_gpu(adev, true);
4251
4252 /* failed in exclusive mode due to timeout */
4253 if (amdgpu_sriov_vf(adev) &&
4254 !amdgpu_sriov_runtime(adev) &&
4255 amdgpu_virt_mmio_blocked(adev) &&
4256 !amdgpu_virt_wait_reset(adev)) {
4257 dev_err(adev->dev, "VF exclusive mode timeout\n");
4258 /* Don't send request since VF is inactive. */
4259 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4260 adev->virt.ops = NULL;
4261 r = -EAGAIN;
4262 }
970fd197
SY
4263 amdgpu_release_ras_context(adev);
4264
83ba126a 4265failed:
89041940 4266 amdgpu_vf_error_trans_all(adev);
8840a387 4267
83ba126a 4268 return r;
d38ceaf9
AD
4269}
4270
07775fc1
AG
4271static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4272{
62d5f9f7 4273
07775fc1
AG
4274 /* Clear all CPU mappings pointing to this device */
4275 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4276
4277 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4278 amdgpu_doorbell_fini(adev);
07775fc1
AG
4279
4280 iounmap(adev->rmmio);
4281 adev->rmmio = NULL;
4282 if (adev->mman.aper_base_kaddr)
4283 iounmap(adev->mman.aper_base_kaddr);
4284 adev->mman.aper_base_kaddr = NULL;
4285
4286 /* Memory manager related */
a0ba1279 4287 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4288 arch_phys_wc_del(adev->gmc.vram_mtrr);
4289 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4290 }
4291}
4292
d38ceaf9 4293/**
bbe04dec 4294 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4295 *
4296 * @adev: amdgpu_device pointer
4297 *
4298 * Tear down the driver info (all asics).
4299 * Called at driver shutdown.
4300 */
72c8c97b 4301void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4302{
aac89168 4303 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4304 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4305 adev->shutdown = true;
9f875167 4306
752c683d
ML
4307 /* make sure IB test finished before entering exclusive mode
4308 * to avoid preemption on IB test
b8920e1e 4309 */
519b8b76 4310 if (amdgpu_sriov_vf(adev)) {
752c683d 4311 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4312 amdgpu_virt_fini_data_exchange(adev);
4313 }
752c683d 4314
e5b03032
ML
4315 /* disable all interrupts */
4316 amdgpu_irq_disable_all(adev);
47fc644f 4317 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4318 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4319 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4320 else
4a580877 4321 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4322 }
8d35a259 4323 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4324
cd3a8a59 4325 if (adev->mman.initialized)
9bff18d1 4326 drain_workqueue(adev->mman.bdev.wq);
98f56188 4327
53e9d836 4328 if (adev->pm.sysfs_initialized)
7c868b59 4329 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4330 if (adev->ucode_sysfs_en)
4331 amdgpu_ucode_sysfs_fini(adev);
4332 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4333 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4334
232d1d43
SY
4335 /* disable ras feature must before hw fini */
4336 amdgpu_ras_pre_fini(adev);
4337
b7043800
AD
4338 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4339
e9669fb7 4340 amdgpu_device_ip_fini_early(adev);
d10d0daa 4341
a3848df6
YW
4342 amdgpu_irq_fini_hw(adev);
4343
b6fd6e0f
SK
4344 if (adev->mman.initialized)
4345 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4346
d10d0daa 4347 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4348
39934d3e
VP
4349 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4350 amdgpu_device_unmap_mmio(adev);
87172e89 4351
72c8c97b
AG
4352}
4353
4354void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4355{
62d5f9f7 4356 int idx;
d37a3929 4357 bool px;
62d5f9f7 4358
8d35a259 4359 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4360 amdgpu_device_ip_fini(adev);
b31d3063 4361 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4362 adev->accel_working = false;
68ce8b24 4363 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4364
4365 amdgpu_reset_fini(adev);
4366
d38ceaf9 4367 /* free i2c buses */
4562236b
HW
4368 if (!amdgpu_device_has_dc_support(adev))
4369 amdgpu_i2c_fini(adev);
bfca0289
SL
4370
4371 if (amdgpu_emu_mode != 1)
4372 amdgpu_atombios_fini(adev);
4373
d38ceaf9
AD
4374 kfree(adev->bios);
4375 adev->bios = NULL;
d37a3929 4376
8a2b5139
LL
4377 kfree(adev->fru_info);
4378 adev->fru_info = NULL;
4379
d37a3929
OC
4380 px = amdgpu_device_supports_px(adev_to_drm(adev));
4381
7b1c6263 4382 if (px || (!dev_is_removable(&adev->pdev->dev) &&
d37a3929 4383 apple_gmux_detect(NULL, NULL)))
84c8b22e 4384 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4385
4386 if (px)
83ba126a 4387 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4388
38d6be81 4389 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4390 vga_client_unregister(adev->pdev);
e9bc1bf7 4391
62d5f9f7
LS
4392 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4393
4394 iounmap(adev->rmmio);
4395 adev->rmmio = NULL;
43c064db 4396 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4397 drm_dev_exit(idx);
4398 }
4399
d155bef0
AB
4400 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4401 amdgpu_pmu_fini(adev);
72de33f8 4402 if (adev->mman.discovery_bin)
a190d1c7 4403 amdgpu_discovery_fini(adev);
72c8c97b 4404
cfbb6b00
AG
4405 amdgpu_reset_put_reset_domain(adev->reset_domain);
4406 adev->reset_domain = NULL;
4407
72c8c97b
AG
4408 kfree(adev->pci_state);
4409
d38ceaf9
AD
4410}
4411
58144d28
ND
4412/**
4413 * amdgpu_device_evict_resources - evict device resources
4414 * @adev: amdgpu device object
4415 *
4416 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4417 * of the vram memory type. Mainly used for evicting device resources
4418 * at suspend time.
4419 *
4420 */
7863c155 4421static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4422{
7863c155
ML
4423 int ret;
4424
e53d9665
ML
4425 /* No need to evict vram on APUs for suspend to ram or s2idle */
4426 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4427 return 0;
58144d28 4428
7863c155
ML
4429 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4430 if (ret)
58144d28 4431 DRM_WARN("evicting device resources failed\n");
7863c155 4432 return ret;
58144d28 4433}
d38ceaf9
AD
4434
4435/*
4436 * Suspend & resume.
4437 */
5095d541
ML
4438/**
4439 * amdgpu_device_prepare - prepare for device suspend
4440 *
4441 * @dev: drm dev pointer
4442 *
4443 * Prepare to put the hw in the suspend state (all asics).
4444 * Returns 0 for success or an error on failure.
4445 * Called at driver suspend.
4446 */
4447int amdgpu_device_prepare(struct drm_device *dev)
4448{
4449 struct amdgpu_device *adev = drm_to_adev(dev);
cb11ca32 4450 int i, r;
5095d541
ML
4451
4452 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4453 return 0;
4454
4455 /* Evict the majority of BOs before starting suspend sequence */
4456 r = amdgpu_device_evict_resources(adev);
4457 if (r)
4458 return r;
4459
cb11ca32
ML
4460 for (i = 0; i < adev->num_ip_blocks; i++) {
4461 if (!adev->ip_blocks[i].status.valid)
4462 continue;
4463 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4464 continue;
4465 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4466 if (r)
4467 return r;
4468 }
4469
5095d541
ML
4470 return 0;
4471}
4472
d38ceaf9 4473/**
810ddc3a 4474 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4475 *
87e3f136 4476 * @dev: drm dev pointer
87e3f136 4477 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4478 *
4479 * Puts the hw in the suspend state (all asics).
4480 * Returns 0 for success or an error on failure.
4481 * Called at driver suspend.
4482 */
de185019 4483int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4484{
a2e15b0e 4485 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4486 int r = 0;
d38ceaf9 4487
d38ceaf9
AD
4488 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4489 return 0;
4490
44779b43 4491 adev->in_suspend = true;
3fa8f89d 4492
d7274ec7
BZ
4493 if (amdgpu_sriov_vf(adev)) {
4494 amdgpu_virt_fini_data_exchange(adev);
4495 r = amdgpu_virt_request_full_gpu(adev, false);
4496 if (r)
4497 return r;
4498 }
4499
3fa8f89d
S
4500 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4501 DRM_WARN("smart shift update failed\n");
4502
5f818173 4503 if (fbcon)
087451f3 4504 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4505
beff74bc 4506 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4507 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4508
5e6932fe 4509 amdgpu_ras_suspend(adev);
4510
b7043800
AD
4511 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4512
2196927b 4513 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4514
c004d44e 4515 if (!adev->in_s0ix)
5d3a2d95 4516 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4517
7863c155
ML
4518 r = amdgpu_device_evict_resources(adev);
4519 if (r)
4520 return r;
d38ceaf9 4521
8d35a259 4522 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4523
2196927b 4524 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4525
d7274ec7
BZ
4526 if (amdgpu_sriov_vf(adev))
4527 amdgpu_virt_release_full_gpu(adev, false);
4528
d38ceaf9
AD
4529 return 0;
4530}
4531
4532/**
810ddc3a 4533 * amdgpu_device_resume - initiate device resume
d38ceaf9 4534 *
87e3f136 4535 * @dev: drm dev pointer
87e3f136 4536 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4537 *
4538 * Bring the hw back to operating state (all asics).
4539 * Returns 0 for success or an error on failure.
4540 * Called at driver resume.
4541 */
de185019 4542int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4543{
1348969a 4544 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4545 int r = 0;
d38ceaf9 4546
d7274ec7
BZ
4547 if (amdgpu_sriov_vf(adev)) {
4548 r = amdgpu_virt_request_full_gpu(adev, true);
4549 if (r)
4550 return r;
4551 }
4552
d38ceaf9
AD
4553 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4554 return 0;
4555
62498733 4556 if (adev->in_s0ix)
bc143d8b 4557 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4558
d38ceaf9 4559 /* post card */
39c640c0 4560 if (amdgpu_device_need_post(adev)) {
4d2997ab 4561 r = amdgpu_device_asic_init(adev);
74b0b157 4562 if (r)
aac89168 4563 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4564 }
d38ceaf9 4565
06ec9070 4566 r = amdgpu_device_ip_resume(adev);
d7274ec7 4567
e6707218 4568 if (r) {
aac89168 4569 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4570 goto exit;
e6707218 4571 }
8d35a259 4572 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4573
06ec9070 4574 r = amdgpu_device_ip_late_init(adev);
03161a6e 4575 if (r)
3c22c1ea 4576 goto exit;
d38ceaf9 4577
beff74bc
AD
4578 queue_delayed_work(system_wq, &adev->delayed_init_work,
4579 msecs_to_jiffies(AMDGPU_RESUME_MS));
4580
c004d44e 4581 if (!adev->in_s0ix) {
5d3a2d95
AD
4582 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4583 if (r)
3c22c1ea 4584 goto exit;
5d3a2d95 4585 }
756e6880 4586
3c22c1ea
SF
4587exit:
4588 if (amdgpu_sriov_vf(adev)) {
4589 amdgpu_virt_init_data_exchange(adev);
4590 amdgpu_virt_release_full_gpu(adev, true);
4591 }
4592
4593 if (r)
4594 return r;
4595
96a5d8d4 4596 /* Make sure IB tests flushed */
beff74bc 4597 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4598
a2e15b0e 4599 if (fbcon)
087451f3 4600 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4601
5e6932fe 4602 amdgpu_ras_resume(adev);
4603
d09ef243
AD
4604 if (adev->mode_info.num_crtc) {
4605 /*
4606 * Most of the connector probing functions try to acquire runtime pm
4607 * refs to ensure that the GPU is powered on when connector polling is
4608 * performed. Since we're calling this from a runtime PM callback,
4609 * trying to acquire rpm refs will cause us to deadlock.
4610 *
4611 * Since we're guaranteed to be holding the rpm lock, it's safe to
4612 * temporarily disable the rpm helpers so this doesn't deadlock us.
4613 */
23a1a9e5 4614#ifdef CONFIG_PM
d09ef243 4615 dev->dev->power.disable_depth++;
23a1a9e5 4616#endif
d09ef243
AD
4617 if (!adev->dc_enabled)
4618 drm_helper_hpd_irq_event(dev);
4619 else
4620 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4621#ifdef CONFIG_PM
d09ef243 4622 dev->dev->power.disable_depth--;
23a1a9e5 4623#endif
d09ef243 4624 }
44779b43
RZ
4625 adev->in_suspend = false;
4626
dc907c9d
JX
4627 if (adev->enable_mes)
4628 amdgpu_mes_self_test(adev);
4629
3fa8f89d
S
4630 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4631 DRM_WARN("smart shift update failed\n");
4632
4d3b9ae5 4633 return 0;
d38ceaf9
AD
4634}
4635
e3ecdffa
AD
4636/**
4637 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4638 *
4639 * @adev: amdgpu_device pointer
4640 *
4641 * The list of all the hardware IPs that make up the asic is walked and
4642 * the check_soft_reset callbacks are run. check_soft_reset determines
4643 * if the asic is still hung or not.
4644 * Returns true if any of the IPs are still in a hung state, false if not.
4645 */
06ec9070 4646static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4647{
4648 int i;
4649 bool asic_hang = false;
4650
f993d628
ML
4651 if (amdgpu_sriov_vf(adev))
4652 return true;
4653
8bc04c29
AD
4654 if (amdgpu_asic_need_full_reset(adev))
4655 return true;
4656
63fbf42f 4657 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4658 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4659 continue;
a1255107
AD
4660 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4661 adev->ip_blocks[i].status.hang =
4662 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4663 if (adev->ip_blocks[i].status.hang) {
aac89168 4664 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4665 asic_hang = true;
4666 }
4667 }
4668 return asic_hang;
4669}
4670
e3ecdffa
AD
4671/**
4672 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4673 *
4674 * @adev: amdgpu_device pointer
4675 *
4676 * The list of all the hardware IPs that make up the asic is walked and the
4677 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4678 * handles any IP specific hardware or software state changes that are
4679 * necessary for a soft reset to succeed.
4680 * Returns 0 on success, negative error code on failure.
4681 */
06ec9070 4682static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4683{
4684 int i, r = 0;
4685
4686 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4687 if (!adev->ip_blocks[i].status.valid)
d31a501e 4688 continue;
a1255107
AD
4689 if (adev->ip_blocks[i].status.hang &&
4690 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4691 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4692 if (r)
4693 return r;
4694 }
4695 }
4696
4697 return 0;
4698}
4699
e3ecdffa
AD
4700/**
4701 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4702 *
4703 * @adev: amdgpu_device pointer
4704 *
4705 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4706 * reset is necessary to recover.
4707 * Returns true if a full asic reset is required, false if not.
4708 */
06ec9070 4709static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4710{
da146d3b
AD
4711 int i;
4712
8bc04c29
AD
4713 if (amdgpu_asic_need_full_reset(adev))
4714 return true;
4715
da146d3b 4716 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4717 if (!adev->ip_blocks[i].status.valid)
da146d3b 4718 continue;
a1255107
AD
4719 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4720 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4721 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4722 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4723 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4724 if (adev->ip_blocks[i].status.hang) {
aac89168 4725 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4726 return true;
4727 }
4728 }
35d782fe
CZ
4729 }
4730 return false;
4731}
4732
e3ecdffa
AD
4733/**
4734 * amdgpu_device_ip_soft_reset - do a soft reset
4735 *
4736 * @adev: amdgpu_device pointer
4737 *
4738 * The list of all the hardware IPs that make up the asic is walked and the
4739 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4740 * IP specific hardware or software state changes that are necessary to soft
4741 * reset the IP.
4742 * Returns 0 on success, negative error code on failure.
4743 */
06ec9070 4744static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4745{
4746 int i, r = 0;
4747
4748 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4749 if (!adev->ip_blocks[i].status.valid)
35d782fe 4750 continue;
a1255107
AD
4751 if (adev->ip_blocks[i].status.hang &&
4752 adev->ip_blocks[i].version->funcs->soft_reset) {
4753 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4754 if (r)
4755 return r;
4756 }
4757 }
4758
4759 return 0;
4760}
4761
e3ecdffa
AD
4762/**
4763 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4764 *
4765 * @adev: amdgpu_device pointer
4766 *
4767 * The list of all the hardware IPs that make up the asic is walked and the
4768 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4769 * handles any IP specific hardware or software state changes that are
4770 * necessary after the IP has been soft reset.
4771 * Returns 0 on success, negative error code on failure.
4772 */
06ec9070 4773static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4774{
4775 int i, r = 0;
4776
4777 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4778 if (!adev->ip_blocks[i].status.valid)
35d782fe 4779 continue;
a1255107
AD
4780 if (adev->ip_blocks[i].status.hang &&
4781 adev->ip_blocks[i].version->funcs->post_soft_reset)
4782 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4783 if (r)
4784 return r;
4785 }
4786
4787 return 0;
4788}
4789
e3ecdffa 4790/**
c33adbc7 4791 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4792 *
4793 * @adev: amdgpu_device pointer
4794 *
4795 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4796 * restore things like GPUVM page tables after a GPU reset where
4797 * the contents of VRAM might be lost.
403009bf
CK
4798 *
4799 * Returns:
4800 * 0 on success, negative error code on failure.
e3ecdffa 4801 */
c33adbc7 4802static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4803{
c41d1cf6 4804 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4805 struct amdgpu_bo *shadow;
e18aaea7 4806 struct amdgpu_bo_vm *vmbo;
403009bf 4807 long r = 1, tmo;
c41d1cf6
ML
4808
4809 if (amdgpu_sriov_runtime(adev))
b045d3af 4810 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4811 else
4812 tmo = msecs_to_jiffies(100);
4813
aac89168 4814 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4815 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4816 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4817 /* If vm is compute context or adev is APU, shadow will be NULL */
4818 if (!vmbo->shadow)
4819 continue;
4820 shadow = vmbo->shadow;
4821
403009bf 4822 /* No need to recover an evicted BO */
d3116756
CK
4823 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4824 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4825 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4826 continue;
4827
4828 r = amdgpu_bo_restore_shadow(shadow, &next);
4829 if (r)
4830 break;
4831
c41d1cf6 4832 if (fence) {
1712fb1a 4833 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4834 dma_fence_put(fence);
4835 fence = next;
1712fb1a 4836 if (tmo == 0) {
4837 r = -ETIMEDOUT;
c41d1cf6 4838 break;
1712fb1a 4839 } else if (tmo < 0) {
4840 r = tmo;
4841 break;
4842 }
403009bf
CK
4843 } else {
4844 fence = next;
c41d1cf6 4845 }
c41d1cf6
ML
4846 }
4847 mutex_unlock(&adev->shadow_list_lock);
4848
403009bf
CK
4849 if (fence)
4850 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4851 dma_fence_put(fence);
4852
1712fb1a 4853 if (r < 0 || tmo <= 0) {
aac89168 4854 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4855 return -EIO;
4856 }
c41d1cf6 4857
aac89168 4858 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4859 return 0;
c41d1cf6
ML
4860}
4861
a90ad3c2 4862
e3ecdffa 4863/**
06ec9070 4864 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4865 *
982a820b 4866 * @adev: amdgpu_device pointer
87e3f136 4867 * @from_hypervisor: request from hypervisor
5740682e
ML
4868 *
4869 * do VF FLR and reinitialize Asic
3f48c681 4870 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4871 */
4872static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4873 bool from_hypervisor)
5740682e
ML
4874{
4875 int r;
a5f67c93 4876 struct amdgpu_hive_info *hive = NULL;
7258fa31 4877 int retry_limit = 0;
5740682e 4878
7258fa31 4879retry:
c004d44e 4880 amdgpu_amdkfd_pre_reset(adev);
428890a3 4881
5740682e
ML
4882 if (from_hypervisor)
4883 r = amdgpu_virt_request_full_gpu(adev, true);
4884 else
4885 r = amdgpu_virt_reset_gpu(adev);
4886 if (r)
4887 return r;
f734b213 4888 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4889
83f24a8f
HC
4890 /* some sw clean up VF needs to do before recover */
4891 amdgpu_virt_post_reset(adev);
4892
a90ad3c2 4893 /* Resume IP prior to SMC */
06ec9070 4894 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4895 if (r)
4896 goto error;
a90ad3c2 4897
c9ffa427 4898 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4899
7a3e0bb2
RZ
4900 r = amdgpu_device_fw_loading(adev);
4901 if (r)
4902 return r;
4903
a90ad3c2 4904 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4905 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4906 if (r)
4907 goto error;
a90ad3c2 4908
a5f67c93
ZL
4909 hive = amdgpu_get_xgmi_hive(adev);
4910 /* Update PSP FW topology after reset */
4911 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4912 r = amdgpu_xgmi_update_topology(hive, adev);
4913
4914 if (hive)
4915 amdgpu_put_xgmi_hive(hive);
4916
4917 if (!r) {
a5f67c93 4918 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4919
c004d44e 4920 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4921 }
a90ad3c2 4922
abc34253 4923error:
c41d1cf6 4924 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4925 amdgpu_inc_vram_lost(adev);
c33adbc7 4926 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4927 }
437f3e0b 4928 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4929
7258fa31
SK
4930 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4931 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4932 retry_limit++;
4933 goto retry;
4934 } else
4935 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4936 }
4937
a90ad3c2
ML
4938 return r;
4939}
4940
9a1cddd6 4941/**
4942 * amdgpu_device_has_job_running - check if there is any job in mirror list
4943 *
982a820b 4944 * @adev: amdgpu_device pointer
9a1cddd6 4945 *
4946 * check if there is any job in mirror list
4947 */
4948bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4949{
4950 int i;
4951 struct drm_sched_job *job;
4952
4953 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4954 struct amdgpu_ring *ring = adev->rings[i];
4955
4956 if (!ring || !ring->sched.thread)
4957 continue;
4958
4959 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4960 job = list_first_entry_or_null(&ring->sched.pending_list,
4961 struct drm_sched_job, list);
9a1cddd6 4962 spin_unlock(&ring->sched.job_list_lock);
4963 if (job)
4964 return true;
4965 }
4966 return false;
4967}
4968
12938fad
CK
4969/**
4970 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4971 *
982a820b 4972 * @adev: amdgpu_device pointer
12938fad
CK
4973 *
4974 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4975 * a hung GPU.
4976 */
4977bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4978{
12938fad 4979
3ba7b418
AG
4980 if (amdgpu_gpu_recovery == 0)
4981 goto disabled;
4982
1a11a65d
YC
4983 /* Skip soft reset check in fatal error mode */
4984 if (!amdgpu_ras_is_poison_mode_supported(adev))
4985 return true;
4986
3ba7b418
AG
4987 if (amdgpu_sriov_vf(adev))
4988 return true;
4989
4990 if (amdgpu_gpu_recovery == -1) {
4991 switch (adev->asic_type) {
b3523c45
AD
4992#ifdef CONFIG_DRM_AMDGPU_SI
4993 case CHIP_VERDE:
4994 case CHIP_TAHITI:
4995 case CHIP_PITCAIRN:
4996 case CHIP_OLAND:
4997 case CHIP_HAINAN:
4998#endif
4999#ifdef CONFIG_DRM_AMDGPU_CIK
5000 case CHIP_KAVERI:
5001 case CHIP_KABINI:
5002 case CHIP_MULLINS:
5003#endif
5004 case CHIP_CARRIZO:
5005 case CHIP_STONEY:
5006 case CHIP_CYAN_SKILLFISH:
3ba7b418 5007 goto disabled;
b3523c45
AD
5008 default:
5009 break;
3ba7b418 5010 }
12938fad
CK
5011 }
5012
5013 return true;
3ba7b418
AG
5014
5015disabled:
aac89168 5016 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 5017 return false;
12938fad
CK
5018}
5019
5c03e584
FX
5020int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5021{
47fc644f
SS
5022 u32 i;
5023 int ret = 0;
5c03e584 5024
47fc644f 5025 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 5026
47fc644f 5027 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 5028
47fc644f
SS
5029 /* disable BM */
5030 pci_clear_master(adev->pdev);
5c03e584 5031
47fc644f 5032 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 5033
47fc644f
SS
5034 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5035 dev_info(adev->dev, "GPU smu mode1 reset\n");
5036 ret = amdgpu_dpm_mode1_reset(adev);
5037 } else {
5038 dev_info(adev->dev, "GPU psp mode1 reset\n");
5039 ret = psp_gpu_reset(adev);
5040 }
5c03e584 5041
47fc644f 5042 if (ret)
2c0f880a 5043 goto mode1_reset_failed;
5c03e584 5044
47fc644f 5045 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
5046 ret = amdgpu_psp_wait_for_bootloader(adev);
5047 if (ret)
2c0f880a 5048 goto mode1_reset_failed;
5c03e584 5049
47fc644f
SS
5050 /* wait for asic to come out of reset */
5051 for (i = 0; i < adev->usec_timeout; i++) {
5052 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 5053
47fc644f
SS
5054 if (memsize != 0xffffffff)
5055 break;
5056 udelay(1);
5057 }
5c03e584 5058
2c0f880a
HZ
5059 if (i >= adev->usec_timeout) {
5060 ret = -ETIMEDOUT;
5061 goto mode1_reset_failed;
5062 }
5063
47fc644f 5064 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 5065
2c0f880a
HZ
5066 return 0;
5067
5068mode1_reset_failed:
5069 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 5070 return ret;
5c03e584 5071}
5c6dd71e 5072
e3c1b071 5073int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 5074 struct amdgpu_reset_context *reset_context)
26bc5340 5075{
5c1e6fa4 5076 int i, r = 0;
04442bf7
LL
5077 struct amdgpu_job *job = NULL;
5078 bool need_full_reset =
5079 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5080
5081 if (reset_context->reset_req_dev == adev)
5082 job = reset_context->job;
71182665 5083
b602ca5f
TZ
5084 if (amdgpu_sriov_vf(adev)) {
5085 /* stop the data exchange thread */
5086 amdgpu_virt_fini_data_exchange(adev);
5087 }
5088
9e225fb9
AG
5089 amdgpu_fence_driver_isr_toggle(adev, true);
5090
71182665 5091 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
5092 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5093 struct amdgpu_ring *ring = adev->rings[i];
5094
51687759 5095 if (!ring || !ring->sched.thread)
0875dc9e 5096 continue;
5740682e 5097
b8920e1e
SS
5098 /* Clear job fence from fence drv to avoid force_completion
5099 * leave NULL and vm flush fence in fence drv
5100 */
5c1e6fa4 5101 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 5102
2f9d4084
ML
5103 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5104 amdgpu_fence_driver_force_completion(ring);
0875dc9e 5105 }
d38ceaf9 5106
9e225fb9
AG
5107 amdgpu_fence_driver_isr_toggle(adev, false);
5108
ff99849b 5109 if (job && job->vm)
222b5f04
AG
5110 drm_sched_increase_karma(&job->base);
5111
04442bf7 5112 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 5113 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5114 if (r == -EOPNOTSUPP)
404b277b
LL
5115 r = 0;
5116 else
04442bf7
LL
5117 return r;
5118
1d721ed6 5119 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
5120 if (!amdgpu_sriov_vf(adev)) {
5121
5122 if (!need_full_reset)
5123 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5124
360cd081
LG
5125 if (!need_full_reset && amdgpu_gpu_recovery &&
5126 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
5127 amdgpu_device_ip_pre_soft_reset(adev);
5128 r = amdgpu_device_ip_soft_reset(adev);
5129 amdgpu_device_ip_post_soft_reset(adev);
5130 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 5131 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
5132 need_full_reset = true;
5133 }
5134 }
5135
5136 if (need_full_reset)
5137 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
5138 if (need_full_reset)
5139 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5140 else
5141 clear_bit(AMDGPU_NEED_FULL_RESET,
5142 &reset_context->flags);
26bc5340
AG
5143 }
5144
5145 return r;
5146}
5147
15fd09a0
SA
5148static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5149{
15fd09a0
SA
5150 int i;
5151
38a15ad9 5152 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0 5153
2d6a2a28
AA
5154 for (i = 0; i < adev->reset_info.num_regs; i++) {
5155 adev->reset_info.reset_dump_reg_value[i] =
5156 RREG32(adev->reset_info.reset_dump_reg_list[i]);
5157
5158 trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5159 adev->reset_info.reset_dump_reg_value[i]);
15fd09a0
SA
5160 }
5161
5162 return 0;
5163}
5164
04442bf7
LL
5165int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5166 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5167{
5168 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5169 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5170 int r = 0;
f5c7e779 5171 bool gpu_reset_for_dev_remove = 0;
26bc5340 5172
04442bf7
LL
5173 /* Try reset handler method first */
5174 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5175 reset_list);
15fd09a0 5176 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5177
5178 reset_context->reset_device_list = device_list_handle;
04442bf7 5179 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5180 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5181 if (r == -EOPNOTSUPP)
404b277b
LL
5182 r = 0;
5183 else
04442bf7
LL
5184 return r;
5185
5186 /* Reset handler not implemented, use the default method */
5187 need_full_reset =
5188 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5189 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5190
f5c7e779
YC
5191 gpu_reset_for_dev_remove =
5192 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5193 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5194
26bc5340 5195 /*
655ce9cb 5196 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5197 * to allow proper links negotiation in FW (within 1 sec)
5198 */
7ac71382 5199 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5200 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5201 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5202 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5203 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5204 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5205 r = -EALREADY;
5206 } else
5207 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5208
041a62bc 5209 if (r) {
aac89168 5210 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5211 r, adev_to_drm(tmp_adev)->unique);
19349072 5212 goto out;
ce316fa5
LM
5213 }
5214 }
5215
041a62bc
AG
5216 /* For XGMI wait for all resets to complete before proceed */
5217 if (!r) {
655ce9cb 5218 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5219 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5220 flush_work(&tmp_adev->xgmi_reset_work);
5221 r = tmp_adev->asic_reset_res;
5222 if (r)
5223 break;
ce316fa5
LM
5224 }
5225 }
5226 }
ce316fa5 5227 }
26bc5340 5228
43c4d576 5229 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5230 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
21226f02 5231 amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
43c4d576
JC
5232 }
5233
00eaa571 5234 amdgpu_ras_intr_cleared();
43c4d576 5235 }
00eaa571 5236
f5c7e779
YC
5237 /* Since the mode1 reset affects base ip blocks, the
5238 * phase1 ip blocks need to be resumed. Otherwise there
5239 * will be a BIOS signature error and the psp bootloader
5240 * can't load kdb on the next amdgpu install.
5241 */
5242 if (gpu_reset_for_dev_remove) {
5243 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5244 amdgpu_device_ip_resume_phase1(tmp_adev);
5245
5246 goto end;
5247 }
5248
655ce9cb 5249 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5250 if (need_full_reset) {
5251 /* post card */
e3c1b071 5252 r = amdgpu_device_asic_init(tmp_adev);
5253 if (r) {
aac89168 5254 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5255 } else {
26bc5340 5256 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5257
26bc5340
AG
5258 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5259 if (r)
5260 goto out;
5261
5262 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785
AA
5263
5264 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5265
26bc5340 5266 if (vram_lost) {
77e7f829 5267 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5268 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5269 }
5270
26bc5340
AG
5271 r = amdgpu_device_fw_loading(tmp_adev);
5272 if (r)
5273 return r;
5274
c45e38f2
LL
5275 r = amdgpu_xcp_restore_partition_mode(
5276 tmp_adev->xcp_mgr);
5277 if (r)
5278 goto out;
5279
26bc5340
AG
5280 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5281 if (r)
5282 goto out;
5283
b7043800
AD
5284 if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5285 amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5286
26bc5340
AG
5287 if (vram_lost)
5288 amdgpu_device_fill_reset_magic(tmp_adev);
5289
fdafb359
EQ
5290 /*
5291 * Add this ASIC as tracked as reset was already
5292 * complete successfully.
5293 */
5294 amdgpu_register_gpu_instance(tmp_adev);
5295
04442bf7
LL
5296 if (!reset_context->hive &&
5297 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5298 amdgpu_xgmi_add_device(tmp_adev);
5299
7c04ca50 5300 r = amdgpu_device_ip_late_init(tmp_adev);
5301 if (r)
5302 goto out;
5303
087451f3 5304 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5305
e8fbaf03
GC
5306 /*
5307 * The GPU enters bad state once faulty pages
5308 * by ECC has reached the threshold, and ras
5309 * recovery is scheduled next. So add one check
5310 * here to break recovery if it indeed exceeds
5311 * bad page threshold, and remind user to
5312 * retire this GPU or setting one bigger
5313 * bad_page_threshold value to fix this once
5314 * probing driver again.
5315 */
11003c68 5316 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5317 /* must succeed. */
5318 amdgpu_ras_resume(tmp_adev);
5319 } else {
5320 r = -EINVAL;
5321 goto out;
5322 }
e79a04d5 5323
26bc5340 5324 /* Update PSP FW topology after reset */
04442bf7
LL
5325 if (reset_context->hive &&
5326 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5327 r = amdgpu_xgmi_update_topology(
5328 reset_context->hive, tmp_adev);
26bc5340
AG
5329 }
5330 }
5331
26bc5340
AG
5332out:
5333 if (!r) {
5334 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5335 r = amdgpu_ib_ring_tests(tmp_adev);
5336 if (r) {
5337 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5338 need_full_reset = true;
5339 r = -EAGAIN;
5340 goto end;
5341 }
5342 }
5343
5344 if (!r)
5345 r = amdgpu_device_recover_vram(tmp_adev);
5346 else
5347 tmp_adev->asic_reset_res = r;
5348 }
5349
5350end:
04442bf7
LL
5351 if (need_full_reset)
5352 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5353 else
5354 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5355 return r;
5356}
5357
e923be99 5358static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5359{
5740682e 5360
a3a09142
AD
5361 switch (amdgpu_asic_reset_method(adev)) {
5362 case AMD_RESET_METHOD_MODE1:
5363 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5364 break;
5365 case AMD_RESET_METHOD_MODE2:
5366 adev->mp1_state = PP_MP1_STATE_RESET;
5367 break;
5368 default:
5369 adev->mp1_state = PP_MP1_STATE_NONE;
5370 break;
5371 }
26bc5340 5372}
d38ceaf9 5373
e923be99 5374static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5375{
89041940 5376 amdgpu_vf_error_trans_all(adev);
a3a09142 5377 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5378}
5379
3f12acc8
EQ
5380static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5381{
5382 struct pci_dev *p = NULL;
5383
5384 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5385 adev->pdev->bus->number, 1);
5386 if (p) {
5387 pm_runtime_enable(&(p->dev));
5388 pm_runtime_resume(&(p->dev));
5389 }
b85e285e
YY
5390
5391 pci_dev_put(p);
3f12acc8
EQ
5392}
5393
5394static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5395{
5396 enum amd_reset_method reset_method;
5397 struct pci_dev *p = NULL;
5398 u64 expires;
5399
5400 /*
5401 * For now, only BACO and mode1 reset are confirmed
5402 * to suffer the audio issue without proper suspended.
5403 */
5404 reset_method = amdgpu_asic_reset_method(adev);
5405 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5406 (reset_method != AMD_RESET_METHOD_MODE1))
5407 return -EINVAL;
5408
5409 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5410 adev->pdev->bus->number, 1);
5411 if (!p)
5412 return -ENODEV;
5413
5414 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5415 if (!expires)
5416 /*
5417 * If we cannot get the audio device autosuspend delay,
5418 * a fixed 4S interval will be used. Considering 3S is
5419 * the audio controller default autosuspend delay setting.
5420 * 4S used here is guaranteed to cover that.
5421 */
54b7feb9 5422 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5423
5424 while (!pm_runtime_status_suspended(&(p->dev))) {
5425 if (!pm_runtime_suspend(&(p->dev)))
5426 break;
5427
5428 if (expires < ktime_get_mono_fast_ns()) {
5429 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5430 pci_dev_put(p);
3f12acc8
EQ
5431 /* TODO: abort the succeeding gpu reset? */
5432 return -ETIMEDOUT;
5433 }
5434 }
5435
5436 pm_runtime_disable(&(p->dev));
5437
b85e285e 5438 pci_dev_put(p);
3f12acc8
EQ
5439 return 0;
5440}
5441
d193b12b 5442static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5443{
5444 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5445
5446#if defined(CONFIG_DEBUG_FS)
5447 if (!amdgpu_sriov_vf(adev))
5448 cancel_work(&adev->reset_work);
5449#endif
5450
5451 if (adev->kfd.dev)
5452 cancel_work(&adev->kfd.reset_work);
5453
5454 if (amdgpu_sriov_vf(adev))
5455 cancel_work(&adev->virt.flr_work);
5456
5457 if (con && adev->ras_enabled)
5458 cancel_work(&con->recovery_work);
5459
5460}
5461
26bc5340 5462/**
6e9c65f7 5463 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5464 *
982a820b 5465 * @adev: amdgpu_device pointer
26bc5340 5466 * @job: which job trigger hang
80bd2de1 5467 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5468 *
5469 * Attempt to reset the GPU if it has hung (all asics).
5470 * Attempt to do soft-reset or full-reset and reinitialize Asic
5471 * Returns 0 for success or an error on failure.
5472 */
5473
cf727044 5474int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5475 struct amdgpu_job *job,
5476 struct amdgpu_reset_context *reset_context)
26bc5340 5477{
1d721ed6 5478 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5479 bool job_signaled = false;
26bc5340 5480 struct amdgpu_hive_info *hive = NULL;
26bc5340 5481 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5482 int i, r = 0;
bb5c7235 5483 bool need_emergency_restart = false;
3f12acc8 5484 bool audio_suspended = false;
f5c7e779
YC
5485 bool gpu_reset_for_dev_remove = false;
5486
5487 gpu_reset_for_dev_remove =
5488 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5489 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5490
6e3cd2a9 5491 /*
bb5c7235
WS
5492 * Special case: RAS triggered and full reset isn't supported
5493 */
5494 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5495
d5ea093e
AG
5496 /*
5497 * Flush RAM to disk so that after reboot
5498 * the user can read log and see why the system rebooted.
5499 */
80285ae1
SY
5500 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5501 amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5502 DRM_WARN("Emergency reboot.");
5503
5504 ksys_sync_helper();
5505 emergency_restart();
5506 }
5507
b823821f 5508 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5509 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5510
175ac6ec
ZL
5511 if (!amdgpu_sriov_vf(adev))
5512 hive = amdgpu_get_xgmi_hive(adev);
681260df 5513 if (hive)
53b3f8f4 5514 mutex_lock(&hive->hive_lock);
26bc5340 5515
f1549c09
LG
5516 reset_context->job = job;
5517 reset_context->hive = hive;
9e94d22c
EQ
5518 /*
5519 * Build list of devices to reset.
5520 * In case we are in XGMI hive mode, resort the device list
5521 * to put adev in the 1st position.
5522 */
5523 INIT_LIST_HEAD(&device_list);
175ac6ec 5524 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5525 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5526 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5527 if (gpu_reset_for_dev_remove && adev->shutdown)
5528 tmp_adev->shutdown = true;
5529 }
655ce9cb 5530 if (!list_is_first(&adev->reset_list, &device_list))
5531 list_rotate_to_front(&adev->reset_list, &device_list);
5532 device_list_handle = &device_list;
26bc5340 5533 } else {
655ce9cb 5534 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5535 device_list_handle = &device_list;
5536 }
5537
e923be99
AG
5538 /* We need to lock reset domain only once both for XGMI and single device */
5539 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5540 reset_list);
3675c2f2 5541 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5542
1d721ed6 5543 /* block all schedulers and reset given job's ring */
655ce9cb 5544 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5545
e923be99 5546 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5547
3f12acc8
EQ
5548 /*
5549 * Try to put the audio codec into suspend state
5550 * before gpu reset started.
5551 *
5552 * Due to the power domain of the graphics device
5553 * is shared with AZ power domain. Without this,
5554 * we may change the audio hardware from behind
5555 * the audio driver's back. That will trigger
5556 * some audio codec errors.
5557 */
5558 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5559 audio_suspended = true;
5560
9e94d22c
EQ
5561 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5562
52fb44cf
EQ
5563 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5564
c004d44e 5565 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5566 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5567
12ffa55d
AG
5568 /*
5569 * Mark these ASICs to be reseted as untracked first
5570 * And add them back after reset completed
5571 */
5572 amdgpu_unregister_gpu_instance(tmp_adev);
5573
163d4cd2 5574 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5575
f1c1314b 5576 /* disable ras on ALL IPs */
bb5c7235 5577 if (!need_emergency_restart &&
b823821f 5578 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5579 amdgpu_ras_suspend(tmp_adev);
5580
1d721ed6
AG
5581 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5582 struct amdgpu_ring *ring = tmp_adev->rings[i];
5583
5584 if (!ring || !ring->sched.thread)
5585 continue;
5586
0b2d2c2e 5587 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5588
bb5c7235 5589 if (need_emergency_restart)
7c6e68c7 5590 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5591 }
8f8c80f4 5592 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5593 }
5594
bb5c7235 5595 if (need_emergency_restart)
7c6e68c7
AG
5596 goto skip_sched_resume;
5597
1d721ed6
AG
5598 /*
5599 * Must check guilty signal here since after this point all old
5600 * HW fences are force signaled.
5601 *
5602 * job->base holds a reference to parent fence
5603 */
f6a3f660 5604 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5605 job_signaled = true;
1d721ed6
AG
5606 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5607 goto skip_hw_reset;
5608 }
5609
26bc5340 5610retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5611 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5612 if (gpu_reset_for_dev_remove) {
5613 /* Workaroud for ASICs need to disable SMC first */
5614 amdgpu_device_smu_fini_early(tmp_adev);
5615 }
f1549c09 5616 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5617 /*TODO Should we stop ?*/
5618 if (r) {
aac89168 5619 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5620 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5621 tmp_adev->asic_reset_res = r;
5622 }
247c7b0d
AG
5623
5624 /*
5625 * Drop all pending non scheduler resets. Scheduler resets
5626 * were already dropped during drm_sched_stop
5627 */
d193b12b 5628 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5629 }
5630
5631 /* Actual ASIC resets if needed.*/
4f30d920 5632 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5633 if (amdgpu_sriov_vf(adev)) {
5634 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5635 if (r)
5636 adev->asic_reset_res = r;
950d6425 5637
28606c4e 5638 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5639 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5640 IP_VERSION(9, 4, 2) ||
5641 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5642 amdgpu_ras_resume(adev);
26bc5340 5643 } else {
f1549c09 5644 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5645 if (r && r == -EAGAIN)
26bc5340 5646 goto retry;
f5c7e779
YC
5647
5648 if (!r && gpu_reset_for_dev_remove)
5649 goto recover_end;
26bc5340
AG
5650 }
5651
1d721ed6
AG
5652skip_hw_reset:
5653
26bc5340 5654 /* Post ASIC reset for all devs .*/
655ce9cb 5655 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5656
1d721ed6
AG
5657 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5658 struct amdgpu_ring *ring = tmp_adev->rings[i];
5659
5660 if (!ring || !ring->sched.thread)
5661 continue;
5662
6868a2c4 5663 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5664 }
5665
b8920e1e 5666 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5667 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5668
7258fa31
SK
5669 if (tmp_adev->asic_reset_res)
5670 r = tmp_adev->asic_reset_res;
5671
1d721ed6 5672 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5673
5674 if (r) {
5675 /* bad news, how to tell it to userspace ? */
12ffa55d 5676 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5677 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5678 } else {
12ffa55d 5679 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5680 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5681 DRM_WARN("smart shift update failed\n");
26bc5340 5682 }
7c6e68c7 5683 }
26bc5340 5684
7c6e68c7 5685skip_sched_resume:
655ce9cb 5686 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5687 /* unlock kfd: SRIOV would do it separately */
c004d44e 5688 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5689 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5690
5691 /* kfd_post_reset will do nothing if kfd device is not initialized,
5692 * need to bring up kfd here if it's not be initialized before
5693 */
5694 if (!adev->kfd.init_complete)
5695 amdgpu_amdkfd_device_init(adev);
5696
3f12acc8
EQ
5697 if (audio_suspended)
5698 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5699
5700 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5701
5702 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5703 }
5704
f5c7e779 5705recover_end:
e923be99
AG
5706 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5707 reset_list);
5708 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5709
9e94d22c 5710 if (hive) {
9e94d22c 5711 mutex_unlock(&hive->hive_lock);
d95e8e97 5712 amdgpu_put_xgmi_hive(hive);
9e94d22c 5713 }
26bc5340 5714
f287a3c5 5715 if (r)
26bc5340 5716 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5717
5718 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5719 return r;
5720}
5721
e3ecdffa
AD
5722/**
5723 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5724 *
5725 * @adev: amdgpu_device pointer
5726 *
5727 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5728 * and lanes) of the slot the device is in. Handles APUs and
5729 * virtualized environments where PCIE config space may not be available.
5730 */
5494d864 5731static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5732{
5d9a6330 5733 struct pci_dev *pdev;
c5313457
HK
5734 enum pci_bus_speed speed_cap, platform_speed_cap;
5735 enum pcie_link_width platform_link_width;
d0dd7f0c 5736
cd474ba0
AD
5737 if (amdgpu_pcie_gen_cap)
5738 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5739
cd474ba0
AD
5740 if (amdgpu_pcie_lane_cap)
5741 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5742
cd474ba0 5743 /* covers APUs as well */
04e85958 5744 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5745 if (adev->pm.pcie_gen_mask == 0)
5746 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5747 if (adev->pm.pcie_mlw_mask == 0)
5748 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5749 return;
cd474ba0 5750 }
d0dd7f0c 5751
c5313457
HK
5752 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5753 return;
5754
dbaa922b
AD
5755 pcie_bandwidth_available(adev->pdev, NULL,
5756 &platform_speed_cap, &platform_link_width);
c5313457 5757
cd474ba0 5758 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5759 /* asic caps */
5760 pdev = adev->pdev;
5761 speed_cap = pcie_get_speed_cap(pdev);
5762 if (speed_cap == PCI_SPEED_UNKNOWN) {
5763 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5764 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5765 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5766 } else {
2b3a1f51
FX
5767 if (speed_cap == PCIE_SPEED_32_0GT)
5768 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5769 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5770 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5771 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5772 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5773 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5774 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5775 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5776 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5777 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5778 else if (speed_cap == PCIE_SPEED_8_0GT)
5779 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5780 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5781 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5782 else if (speed_cap == PCIE_SPEED_5_0GT)
5783 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5784 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5785 else
5786 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5787 }
5788 /* platform caps */
c5313457 5789 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5790 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5791 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5792 } else {
2b3a1f51
FX
5793 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5794 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5795 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5796 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5797 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5798 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5799 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5800 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5801 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5802 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5803 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5804 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5805 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5806 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5807 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5808 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5809 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5810 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5811 else
5812 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5813
cd474ba0
AD
5814 }
5815 }
5816 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5817 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5818 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5819 } else {
c5313457 5820 switch (platform_link_width) {
5d9a6330 5821 case PCIE_LNK_X32:
cd474ba0
AD
5822 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5823 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5824 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5825 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5826 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5827 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5828 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5829 break;
5d9a6330 5830 case PCIE_LNK_X16:
cd474ba0
AD
5831 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5832 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5833 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5834 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5835 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5836 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5837 break;
5d9a6330 5838 case PCIE_LNK_X12:
cd474ba0
AD
5839 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5840 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5841 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5842 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5843 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5844 break;
5d9a6330 5845 case PCIE_LNK_X8:
cd474ba0
AD
5846 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5847 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5848 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5849 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5850 break;
5d9a6330 5851 case PCIE_LNK_X4:
cd474ba0
AD
5852 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5853 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5854 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5855 break;
5d9a6330 5856 case PCIE_LNK_X2:
cd474ba0
AD
5857 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5858 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5859 break;
5d9a6330 5860 case PCIE_LNK_X1:
cd474ba0
AD
5861 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5862 break;
5863 default:
5864 break;
5865 }
d0dd7f0c
AD
5866 }
5867 }
5868}
d38ceaf9 5869
08a2fd23
RE
5870/**
5871 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5872 *
5873 * @adev: amdgpu_device pointer
5874 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5875 *
5876 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5877 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5878 * @peer_adev.
5879 */
5880bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5881 struct amdgpu_device *peer_adev)
5882{
5883#ifdef CONFIG_HSA_AMD_P2P
5884 uint64_t address_mask = peer_adev->dev->dma_mask ?
5885 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5886 resource_size_t aper_limit =
5887 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5888 bool p2p_access =
5889 !adev->gmc.xgmi.connected_to_cpu &&
5890 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5891
5892 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5893 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5894 !(adev->gmc.aper_base & address_mask ||
5895 aper_limit & address_mask));
5896#else
5897 return false;
5898#endif
5899}
5900
361dbd01
AD
5901int amdgpu_device_baco_enter(struct drm_device *dev)
5902{
1348969a 5903 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5904 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5905
6ab68650 5906 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5907 return -ENOTSUPP;
5908
8ab0d6f0 5909 if (ras && adev->ras_enabled &&
acdae216 5910 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5911 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5912
9530273e 5913 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5914}
5915
5916int amdgpu_device_baco_exit(struct drm_device *dev)
5917{
1348969a 5918 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5919 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5920 int ret = 0;
361dbd01 5921
6ab68650 5922 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5923 return -ENOTSUPP;
5924
9530273e
EQ
5925 ret = amdgpu_dpm_baco_exit(adev);
5926 if (ret)
5927 return ret;
7a22677b 5928
8ab0d6f0 5929 if (ras && adev->ras_enabled &&
acdae216 5930 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5931 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5932
1bece222
CL
5933 if (amdgpu_passthrough(adev) &&
5934 adev->nbio.funcs->clear_doorbell_interrupt)
5935 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5936
7a22677b 5937 return 0;
361dbd01 5938}
c9a6b82f
AG
5939
5940/**
5941 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5942 * @pdev: PCI device struct
5943 * @state: PCI channel state
5944 *
5945 * Description: Called when a PCI error is detected.
5946 *
5947 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5948 */
5949pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5950{
5951 struct drm_device *dev = pci_get_drvdata(pdev);
5952 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5953 int i;
c9a6b82f
AG
5954
5955 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5956
6894305c
AG
5957 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5958 DRM_WARN("No support for XGMI hive yet...");
5959 return PCI_ERS_RESULT_DISCONNECT;
5960 }
5961
e17e27f9
GC
5962 adev->pci_channel_state = state;
5963
c9a6b82f
AG
5964 switch (state) {
5965 case pci_channel_io_normal:
5966 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5967 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5968 case pci_channel_io_frozen:
5969 /*
d0fb18b5 5970 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5971 * to GPU during PCI error recovery
5972 */
3675c2f2 5973 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5974 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5975
5976 /*
5977 * Block any work scheduling as we do for regular GPU reset
5978 * for the duration of the recovery
5979 */
5980 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5981 struct amdgpu_ring *ring = adev->rings[i];
5982
5983 if (!ring || !ring->sched.thread)
5984 continue;
5985
5986 drm_sched_stop(&ring->sched, NULL);
5987 }
8f8c80f4 5988 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5989 return PCI_ERS_RESULT_NEED_RESET;
5990 case pci_channel_io_perm_failure:
5991 /* Permanent error, prepare for device removal */
5992 return PCI_ERS_RESULT_DISCONNECT;
5993 }
5994
5995 return PCI_ERS_RESULT_NEED_RESET;
5996}
5997
5998/**
5999 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6000 * @pdev: pointer to PCI device
6001 */
6002pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6003{
6004
6005 DRM_INFO("PCI error: mmio enabled callback!!\n");
6006
6007 /* TODO - dump whatever for debugging purposes */
6008
6009 /* This called only if amdgpu_pci_error_detected returns
6010 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6011 * works, no need to reset slot.
6012 */
6013
6014 return PCI_ERS_RESULT_RECOVERED;
6015}
6016
6017/**
6018 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6019 * @pdev: PCI device struct
6020 *
6021 * Description: This routine is called by the pci error recovery
6022 * code after the PCI slot has been reset, just before we
6023 * should resume normal operations.
6024 */
6025pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6026{
6027 struct drm_device *dev = pci_get_drvdata(pdev);
6028 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 6029 int r, i;
04442bf7 6030 struct amdgpu_reset_context reset_context;
362c7b91 6031 u32 memsize;
7ac71382 6032 struct list_head device_list;
c9a6b82f
AG
6033
6034 DRM_INFO("PCI error: slot reset callback!!\n");
6035
04442bf7
LL
6036 memset(&reset_context, 0, sizeof(reset_context));
6037
7ac71382 6038 INIT_LIST_HEAD(&device_list);
655ce9cb 6039 list_add_tail(&adev->reset_list, &device_list);
7ac71382 6040
362c7b91
AG
6041 /* wait for asic to come out of reset */
6042 msleep(500);
6043
7ac71382 6044 /* Restore PCI confspace */
c1dd4aa6 6045 amdgpu_device_load_pci_state(pdev);
c9a6b82f 6046
362c7b91
AG
6047 /* confirm ASIC came out of reset */
6048 for (i = 0; i < adev->usec_timeout; i++) {
6049 memsize = amdgpu_asic_get_config_memsize(adev);
6050
6051 if (memsize != 0xffffffff)
6052 break;
6053 udelay(1);
6054 }
6055 if (memsize == 0xffffffff) {
6056 r = -ETIME;
6057 goto out;
6058 }
6059
04442bf7
LL
6060 reset_context.method = AMD_RESET_METHOD_NONE;
6061 reset_context.reset_req_dev = adev;
6062 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6063 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6064
7afefb81 6065 adev->no_hw_access = true;
04442bf7 6066 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 6067 adev->no_hw_access = false;
c9a6b82f
AG
6068 if (r)
6069 goto out;
6070
04442bf7 6071 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
6072
6073out:
c9a6b82f 6074 if (!r) {
c1dd4aa6
AG
6075 if (amdgpu_device_cache_pci_state(adev->pdev))
6076 pci_restore_state(adev->pdev);
6077
c9a6b82f
AG
6078 DRM_INFO("PCIe error recovery succeeded\n");
6079 } else {
6080 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
6081 amdgpu_device_unset_mp1_state(adev);
6082 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
6083 }
6084
6085 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6086}
6087
6088/**
6089 * amdgpu_pci_resume() - resume normal ops after PCI reset
6090 * @pdev: pointer to PCI device
6091 *
6092 * Called when the error recovery driver tells us that its
505199a3 6093 * OK to resume normal operation.
c9a6b82f
AG
6094 */
6095void amdgpu_pci_resume(struct pci_dev *pdev)
6096{
6097 struct drm_device *dev = pci_get_drvdata(pdev);
6098 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 6099 int i;
c9a6b82f 6100
c9a6b82f
AG
6101
6102 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 6103
e17e27f9
GC
6104 /* Only continue execution for the case of pci_channel_io_frozen */
6105 if (adev->pci_channel_state != pci_channel_io_frozen)
6106 return;
6107
acd89fca
AG
6108 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6109 struct amdgpu_ring *ring = adev->rings[i];
6110
6111 if (!ring || !ring->sched.thread)
6112 continue;
6113
acd89fca
AG
6114 drm_sched_start(&ring->sched, true);
6115 }
6116
e923be99
AG
6117 amdgpu_device_unset_mp1_state(adev);
6118 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 6119}
c1dd4aa6
AG
6120
6121bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6122{
6123 struct drm_device *dev = pci_get_drvdata(pdev);
6124 struct amdgpu_device *adev = drm_to_adev(dev);
6125 int r;
6126
6127 r = pci_save_state(pdev);
6128 if (!r) {
6129 kfree(adev->pci_state);
6130
6131 adev->pci_state = pci_store_saved_state(pdev);
6132
6133 if (!adev->pci_state) {
6134 DRM_ERROR("Failed to store PCI saved state");
6135 return false;
6136 }
6137 } else {
6138 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6139 return false;
6140 }
6141
6142 return true;
6143}
6144
6145bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6146{
6147 struct drm_device *dev = pci_get_drvdata(pdev);
6148 struct amdgpu_device *adev = drm_to_adev(dev);
6149 int r;
6150
6151 if (!adev->pci_state)
6152 return false;
6153
6154 r = pci_load_saved_state(pdev, adev->pci_state);
6155
6156 if (!r) {
6157 pci_restore_state(pdev);
6158 } else {
6159 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6160 return false;
6161 }
6162
6163 return true;
6164}
6165
810085dd
EH
6166void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6167 struct amdgpu_ring *ring)
6168{
6169#ifdef CONFIG_X86_64
b818a5d3 6170 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6171 return;
6172#endif
6173 if (adev->gmc.xgmi.connected_to_cpu)
6174 return;
6175
6176 if (ring && ring->funcs->emit_hdp_flush)
6177 amdgpu_ring_emit_hdp_flush(ring);
6178 else
6179 amdgpu_asic_flush_hdp(adev, ring);
6180}
c1dd4aa6 6181
810085dd
EH
6182void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6183 struct amdgpu_ring *ring)
6184{
6185#ifdef CONFIG_X86_64
b818a5d3 6186 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6187 return;
6188#endif
6189 if (adev->gmc.xgmi.connected_to_cpu)
6190 return;
c1dd4aa6 6191
810085dd
EH
6192 amdgpu_asic_invalidate_hdp(adev, ring);
6193}
34f3a4a9 6194
89a7a870
AG
6195int amdgpu_in_reset(struct amdgpu_device *adev)
6196{
6197 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6198}
6199
34f3a4a9
LY
6200/**
6201 * amdgpu_device_halt() - bring hardware to some kind of halt state
6202 *
6203 * @adev: amdgpu_device pointer
6204 *
6205 * Bring hardware to some kind of halt state so that no one can touch it
6206 * any more. It will help to maintain error context when error occurred.
6207 * Compare to a simple hang, the system will keep stable at least for SSH
6208 * access. Then it should be trivial to inspect the hardware state and
6209 * see what's going on. Implemented as following:
6210 *
6211 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6212 * clears all CPU mappings to device, disallows remappings through page faults
6213 * 2. amdgpu_irq_disable_all() disables all interrupts
6214 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6215 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6216 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6217 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6218 * flush any in flight DMA operations
6219 */
6220void amdgpu_device_halt(struct amdgpu_device *adev)
6221{
6222 struct pci_dev *pdev = adev->pdev;
e0f943b4 6223 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6224
2c1c7ba4 6225 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6226 drm_dev_unplug(ddev);
6227
6228 amdgpu_irq_disable_all(adev);
6229
6230 amdgpu_fence_driver_hw_fini(adev);
6231
6232 adev->no_hw_access = true;
6233
6234 amdgpu_device_unmap_mmio(adev);
6235
6236 pci_disable_device(pdev);
6237 pci_wait_for_pending_transaction(pdev);
6238}
86700a40
XD
6239
6240u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6241 u32 reg)
6242{
6243 unsigned long flags, address, data;
6244 u32 r;
6245
6246 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6247 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6248
6249 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6250 WREG32(address, reg * 4);
6251 (void)RREG32(address);
6252 r = RREG32(data);
6253 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6254 return r;
6255}
6256
6257void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6258 u32 reg, u32 v)
6259{
6260 unsigned long flags, address, data;
6261
6262 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6263 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6264
6265 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6266 WREG32(address, reg * 4);
6267 (void)RREG32(address);
6268 WREG32(data, v);
6269 (void)RREG32(data);
6270 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6271}
68ce8b24
CK
6272
6273/**
6274 * amdgpu_device_switch_gang - switch to a new gang
6275 * @adev: amdgpu_device pointer
6276 * @gang: the gang to switch to
6277 *
6278 * Try to switch to a new gang.
6279 * Returns: NULL if we switched to the new gang or a reference to the current
6280 * gang leader.
6281 */
6282struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6283 struct dma_fence *gang)
6284{
6285 struct dma_fence *old = NULL;
6286
6287 do {
6288 dma_fence_put(old);
6289 rcu_read_lock();
6290 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6291 rcu_read_unlock();
6292
6293 if (old == gang)
6294 break;
6295
6296 if (!dma_fence_is_signaled(old))
6297 return old;
6298
6299 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6300 old, gang) != old);
6301
6302 dma_fence_put(old);
6303 return NULL;
6304}
220c8cc8
AD
6305
6306bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6307{
6308 switch (adev->asic_type) {
6309#ifdef CONFIG_DRM_AMDGPU_SI
6310 case CHIP_HAINAN:
6311#endif
6312 case CHIP_TOPAZ:
6313 /* chips with no display hardware */
6314 return false;
6315#ifdef CONFIG_DRM_AMDGPU_SI
6316 case CHIP_TAHITI:
6317 case CHIP_PITCAIRN:
6318 case CHIP_VERDE:
6319 case CHIP_OLAND:
6320#endif
6321#ifdef CONFIG_DRM_AMDGPU_CIK
6322 case CHIP_BONAIRE:
6323 case CHIP_HAWAII:
6324 case CHIP_KAVERI:
6325 case CHIP_KABINI:
6326 case CHIP_MULLINS:
6327#endif
6328 case CHIP_TONGA:
6329 case CHIP_FIJI:
6330 case CHIP_POLARIS10:
6331 case CHIP_POLARIS11:
6332 case CHIP_POLARIS12:
6333 case CHIP_VEGAM:
6334 case CHIP_CARRIZO:
6335 case CHIP_STONEY:
6336 /* chips with display hardware */
6337 return true;
6338 default:
6339 /* IP discovery */
4e8303cf 6340 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6341 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6342 return false;
6343 return true;
6344 }
6345}
81283fee
JZ
6346
6347uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6348 uint32_t inst, uint32_t reg_addr, char reg_name[],
6349 uint32_t expected_value, uint32_t mask)
6350{
6351 uint32_t ret = 0;
6352 uint32_t old_ = 0;
6353 uint32_t tmp_ = RREG32(reg_addr);
6354 uint32_t loop = adev->usec_timeout;
6355
6356 while ((tmp_ & (mask)) != (expected_value)) {
6357 if (old_ != tmp_) {
6358 loop = adev->usec_timeout;
6359 old_ = tmp_;
6360 } else
6361 udelay(1);
6362 tmp_ = RREG32(reg_addr);
6363 loop--;
6364 if (!loop) {
6365 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6366 inst, reg_name, (uint32_t)expected_value,
6367 (uint32_t)(tmp_ & (mask)));
6368 ret = -ETIMEDOUT;
6369 break;
6370 }
6371 }
6372 return ret;
6373}