drm/amd: Use the first non-dGPU PCI device for BW limits
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
08a2fd23 35#include <linux/pci-p2pdma.h>
d37a3929 36#include <linux/apple-gmux.h>
fdf2f6c5 37
b7cdb41e 38#include <drm/drm_aperture.h>
4562236b 39#include <drm/drm_atomic_helper.h>
973ad627 40#include <drm/drm_crtc_helper.h>
45b64fd9 41#include <drm/drm_fb_helper.h>
fcd70cd3 42#include <drm/drm_probe_helper.h>
d38ceaf9 43#include <drm/amdgpu_drm.h>
7b1c6263 44#include <linux/device.h>
d38ceaf9
AD
45#include <linux/vgaarb.h>
46#include <linux/vga_switcheroo.h>
47#include <linux/efi.h>
48#include "amdgpu.h"
f4b373f4 49#include "amdgpu_trace.h"
d38ceaf9
AD
50#include "amdgpu_i2c.h"
51#include "atom.h"
52#include "amdgpu_atombios.h"
a5bde2f9 53#include "amdgpu_atomfirmware.h"
d0dd7f0c 54#include "amd_pcie.h"
33f34802
KW
55#ifdef CONFIG_DRM_AMDGPU_SI
56#include "si.h"
57#endif
a2e73f56
AD
58#ifdef CONFIG_DRM_AMDGPU_CIK
59#include "cik.h"
60#endif
aaa36a97 61#include "vi.h"
460826e6 62#include "soc15.h"
0a5b8c7b 63#include "nv.h"
d38ceaf9 64#include "bif/bif_4_1_d.h"
bec86378 65#include <linux/firmware.h>
89041940 66#include "amdgpu_vf_error.h"
d38ceaf9 67
ba997709 68#include "amdgpu_amdkfd.h"
d2f52ac8 69#include "amdgpu_pm.h"
d38ceaf9 70
5183411b 71#include "amdgpu_xgmi.h"
c030f2e4 72#include "amdgpu_ras.h"
9c7c85f7 73#include "amdgpu_pmu.h"
bd607166 74#include "amdgpu_fru_eeprom.h"
04442bf7 75#include "amdgpu_reset.h"
85150626 76#include "amdgpu_virt.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
4798db85
LL
165/**
166 * DOC: board_info
167 *
168 * The amdgpu driver provides a sysfs API for giving board related information.
169 * It provides the form factor information in the format
170 *
171 * type : form factor
172 *
173 * Possible form factor values
174 *
175 * - "cem" - PCIE CEM card
176 * - "oam" - Open Compute Accelerator Module
177 * - "unknown" - Not known
178 *
179 */
180
76da73f0
LL
181static ssize_t amdgpu_device_get_board_info(struct device *dev,
182 struct device_attribute *attr,
183 char *buf)
184{
185 struct drm_device *ddev = dev_get_drvdata(dev);
186 struct amdgpu_device *adev = drm_to_adev(ddev);
187 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
188 const char *pkg;
189
190 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
191 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
192
193 switch (pkg_type) {
194 case AMDGPU_PKG_TYPE_CEM:
195 pkg = "cem";
196 break;
197 case AMDGPU_PKG_TYPE_OAM:
198 pkg = "oam";
199 break;
200 default:
201 pkg = "unknown";
202 break;
203 }
204
205 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
206}
207
208static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
209
210static struct attribute *amdgpu_board_attrs[] = {
211 &dev_attr_board_info.attr,
212 NULL,
213};
214
215static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
216 struct attribute *attr, int n)
217{
218 struct device *dev = kobj_to_dev(kobj);
219 struct drm_device *ddev = dev_get_drvdata(dev);
220 struct amdgpu_device *adev = drm_to_adev(ddev);
221
222 if (adev->flags & AMD_IS_APU)
223 return 0;
224
225 return attr->mode;
226}
227
228static const struct attribute_group amdgpu_board_attrs_group = {
229 .attrs = amdgpu_board_attrs,
230 .is_visible = amdgpu_board_attrs_is_visible
231};
232
5494d864
AD
233static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
234
bd607166 235
fd496ca8 236/**
b98c6299 237 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
238 *
239 * @dev: drm_device pointer
240 *
b98c6299 241 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
242 * otherwise return false.
243 */
b98c6299 244bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
245{
246 struct amdgpu_device *adev = drm_to_adev(dev);
247
b98c6299 248 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
249 return true;
250 return false;
251}
252
e3ecdffa 253/**
0330b848 254 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
255 *
256 * @dev: drm_device pointer
257 *
b98c6299 258 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
259 * otherwise return false.
260 */
31af062a 261bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 262{
1348969a 263 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 264
b98c6299
AD
265 if (adev->has_pr3 ||
266 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
267 return true;
268 return false;
269}
270
a69cba42
AD
271/**
272 * amdgpu_device_supports_baco - Does the device support BACO
273 *
274 * @dev: drm_device pointer
275 *
276 * Returns true if the device supporte BACO,
277 * otherwise return false.
278 */
279bool amdgpu_device_supports_baco(struct drm_device *dev)
280{
1348969a 281 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
282
283 return amdgpu_asic_supports_baco(adev);
284}
285
3fa8f89d
S
286/**
287 * amdgpu_device_supports_smart_shift - Is the device dGPU with
288 * smart shift support
289 *
290 * @dev: drm_device pointer
291 *
292 * Returns true if the device is a dGPU with Smart Shift support,
293 * otherwise returns false.
294 */
295bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
296{
297 return (amdgpu_device_supports_boco(dev) &&
298 amdgpu_acpi_is_power_shift_control_supported());
299}
300
6e3cd2a9
MCC
301/*
302 * VRAM access helper functions
303 */
304
e35e2b11 305/**
048af66b 306 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
307 *
308 * @adev: amdgpu_device pointer
309 * @pos: offset of the buffer in vram
310 * @buf: virtual address of the buffer in system memory
311 * @size: read/write size, sizeof(@buf) must > @size
312 * @write: true - write to vram, otherwise - read from vram
313 */
048af66b
KW
314void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
315 void *buf, size_t size, bool write)
e35e2b11 316{
e35e2b11 317 unsigned long flags;
048af66b
KW
318 uint32_t hi = ~0, tmp = 0;
319 uint32_t *data = buf;
ce05ac56 320 uint64_t last;
f89f8c6b 321 int idx;
ce05ac56 322
c58a863b 323 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 324 return;
9d11eb0d 325
048af66b
KW
326 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
327
328 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
329 for (last = pos + size; pos < last; pos += 4) {
330 tmp = pos >> 31;
331
332 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
333 if (tmp != hi) {
334 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
335 hi = tmp;
336 }
337 if (write)
338 WREG32_NO_KIQ(mmMM_DATA, *data++);
339 else
340 *data++ = RREG32_NO_KIQ(mmMM_DATA);
341 }
342
343 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
344 drm_dev_exit(idx);
345}
346
347/**
bbe04dec 348 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
349 *
350 * @adev: amdgpu_device pointer
351 * @pos: offset of the buffer in vram
352 * @buf: virtual address of the buffer in system memory
353 * @size: read/write size, sizeof(@buf) must > @size
354 * @write: true - write to vram, otherwise - read from vram
355 *
356 * The return value means how many bytes have been transferred.
357 */
358size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
359 void *buf, size_t size, bool write)
360{
9d11eb0d 361#ifdef CONFIG_64BIT
048af66b
KW
362 void __iomem *addr;
363 size_t count = 0;
364 uint64_t last;
365
366 if (!adev->mman.aper_base_kaddr)
367 return 0;
368
9d11eb0d
CK
369 last = min(pos + size, adev->gmc.visible_vram_size);
370 if (last > pos) {
048af66b
KW
371 addr = adev->mman.aper_base_kaddr + pos;
372 count = last - pos;
9d11eb0d
CK
373
374 if (write) {
375 memcpy_toio(addr, buf, count);
4c452b5c
SS
376 /* Make sure HDP write cache flush happens without any reordering
377 * after the system memory contents are sent over PCIe device
378 */
9d11eb0d 379 mb();
810085dd 380 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 381 } else {
810085dd 382 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
383 /* Make sure HDP read cache is invalidated before issuing a read
384 * to the PCIe device
385 */
9d11eb0d
CK
386 mb();
387 memcpy_fromio(buf, addr, count);
388 }
389
9d11eb0d 390 }
048af66b
KW
391
392 return count;
393#else
394 return 0;
9d11eb0d 395#endif
048af66b 396}
9d11eb0d 397
048af66b
KW
398/**
399 * amdgpu_device_vram_access - read/write a buffer in vram
400 *
401 * @adev: amdgpu_device pointer
402 * @pos: offset of the buffer in vram
403 * @buf: virtual address of the buffer in system memory
404 * @size: read/write size, sizeof(@buf) must > @size
405 * @write: true - write to vram, otherwise - read from vram
406 */
407void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
408 void *buf, size_t size, bool write)
409{
410 size_t count;
e35e2b11 411
048af66b
KW
412 /* try to using vram apreature to access vram first */
413 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
414 size -= count;
415 if (size) {
416 /* using MM to access rest vram */
417 pos += count;
418 buf += count;
419 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
420 }
421}
422
d38ceaf9 423/*
f7ee1874 424 * register access helper functions.
d38ceaf9 425 */
56b53c0b
DL
426
427/* Check if hw access should be skipped because of hotplug or device error */
428bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
429{
7afefb81 430 if (adev->no_hw_access)
56b53c0b
DL
431 return true;
432
433#ifdef CONFIG_LOCKDEP
434 /*
435 * This is a bit complicated to understand, so worth a comment. What we assert
436 * here is that the GPU reset is not running on another thread in parallel.
437 *
438 * For this we trylock the read side of the reset semaphore, if that succeeds
439 * we know that the reset is not running in paralell.
440 *
441 * If the trylock fails we assert that we are either already holding the read
442 * side of the lock or are the reset thread itself and hold the write side of
443 * the lock.
444 */
445 if (in_task()) {
d0fb18b5
AG
446 if (down_read_trylock(&adev->reset_domain->sem))
447 up_read(&adev->reset_domain->sem);
56b53c0b 448 else
d0fb18b5 449 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
450 }
451#endif
452 return false;
453}
454
e3ecdffa 455/**
f7ee1874 456 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
457 *
458 * @adev: amdgpu_device pointer
459 * @reg: dword aligned register offset
460 * @acc_flags: access flags which require special behavior
461 *
462 * Returns the 32 bit value from the offset specified.
463 */
f7ee1874
HZ
464uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
465 uint32_t reg, uint32_t acc_flags)
d38ceaf9 466{
f4b373f4
TSD
467 uint32_t ret;
468
56b53c0b 469 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
470 return 0;
471
f7ee1874
HZ
472 if ((reg * 4) < adev->rmmio_size) {
473 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
474 amdgpu_sriov_runtime(adev) &&
d0fb18b5 475 down_read_trylock(&adev->reset_domain->sem)) {
85150626 476 ret = amdgpu_kiq_rreg(adev, reg, 0);
d0fb18b5 477 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
478 } else {
479 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
480 }
481 } else {
482 ret = adev->pcie_rreg(adev, reg * 4);
81202807 483 }
bc992ba5 484
f7ee1874 485 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 486
f4b373f4 487 return ret;
d38ceaf9
AD
488}
489
421a2a30
ML
490/*
491 * MMIO register read with bytes helper functions
492 * @offset:bytes offset from MMIO start
b8920e1e 493 */
421a2a30 494
e3ecdffa
AD
495/**
496 * amdgpu_mm_rreg8 - read a memory mapped IO register
497 *
498 * @adev: amdgpu_device pointer
499 * @offset: byte aligned register offset
500 *
501 * Returns the 8 bit value from the offset specified.
502 */
7cbbc745
AG
503uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
504{
56b53c0b 505 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
506 return 0;
507
421a2a30
ML
508 if (offset < adev->rmmio_size)
509 return (readb(adev->rmmio + offset));
510 BUG();
511}
512
85150626
VL
513
514/**
515 * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
516 *
517 * @adev: amdgpu_device pointer
518 * @reg: dword aligned register offset
519 * @acc_flags: access flags which require special behavior
520 * @xcc_id: xcc accelerated compute core id
521 *
522 * Returns the 32 bit value from the offset specified.
523 */
524uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
525 uint32_t reg, uint32_t acc_flags,
526 uint32_t xcc_id)
527{
528 uint32_t ret, rlcg_flag;
529
530 if (amdgpu_device_skip_hw_access(adev))
531 return 0;
532
533 if ((reg * 4) < adev->rmmio_size) {
534 if (amdgpu_sriov_vf(adev) &&
535 !amdgpu_sriov_runtime(adev) &&
536 adev->gfx.rlc.rlcg_reg_access_supported &&
537 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
538 GC_HWIP, false,
539 &rlcg_flag)) {
540 ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
541 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
542 amdgpu_sriov_runtime(adev) &&
543 down_read_trylock(&adev->reset_domain->sem)) {
544 ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
545 up_read(&adev->reset_domain->sem);
546 } else {
547 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
548 }
549 } else {
550 ret = adev->pcie_rreg(adev, reg * 4);
551 }
552
553 return ret;
554}
555
421a2a30
ML
556/*
557 * MMIO register write with bytes helper functions
558 * @offset:bytes offset from MMIO start
559 * @value: the value want to be written to the register
b8920e1e
SS
560 */
561
e3ecdffa
AD
562/**
563 * amdgpu_mm_wreg8 - read a memory mapped IO register
564 *
565 * @adev: amdgpu_device pointer
566 * @offset: byte aligned register offset
567 * @value: 8 bit value to write
568 *
569 * Writes the value specified to the offset specified.
570 */
7cbbc745
AG
571void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
572{
56b53c0b 573 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
574 return;
575
421a2a30
ML
576 if (offset < adev->rmmio_size)
577 writeb(value, adev->rmmio + offset);
578 else
579 BUG();
580}
581
e3ecdffa 582/**
f7ee1874 583 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
584 *
585 * @adev: amdgpu_device pointer
586 * @reg: dword aligned register offset
587 * @v: 32 bit value to write to the register
588 * @acc_flags: access flags which require special behavior
589 *
590 * Writes the value specified to the offset specified.
591 */
f7ee1874
HZ
592void amdgpu_device_wreg(struct amdgpu_device *adev,
593 uint32_t reg, uint32_t v,
594 uint32_t acc_flags)
d38ceaf9 595{
56b53c0b 596 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
597 return;
598
f7ee1874
HZ
599 if ((reg * 4) < adev->rmmio_size) {
600 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
601 amdgpu_sriov_runtime(adev) &&
d0fb18b5 602 down_read_trylock(&adev->reset_domain->sem)) {
85150626 603 amdgpu_kiq_wreg(adev, reg, v, 0);
d0fb18b5 604 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
605 } else {
606 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
607 }
608 } else {
609 adev->pcie_wreg(adev, reg * 4, v);
81202807 610 }
bc992ba5 611
f7ee1874 612 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 613}
d38ceaf9 614
03f2abb0 615/**
4cc9f86f 616 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 617 *
71579346
RB
618 * @adev: amdgpu_device pointer
619 * @reg: mmio/rlc register
620 * @v: value to write
8057a9d6 621 * @xcc_id: xcc accelerated compute core id
71579346
RB
622 *
623 * this function is invoked only for the debugfs register access
03f2abb0 624 */
f7ee1874 625void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
626 uint32_t reg, uint32_t v,
627 uint32_t xcc_id)
2e0cc4d4 628{
56b53c0b 629 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
630 return;
631
2e0cc4d4 632 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
633 adev->gfx.rlc.funcs &&
634 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 635 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 636 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
637 } else if ((reg * 4) >= adev->rmmio_size) {
638 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
639 } else {
640 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 641 }
d38ceaf9
AD
642}
643
85150626
VL
644/**
645 * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
646 *
647 * @adev: amdgpu_device pointer
648 * @reg: dword aligned register offset
649 * @v: 32 bit value to write to the register
650 * @acc_flags: access flags which require special behavior
651 * @xcc_id: xcc accelerated compute core id
652 *
653 * Writes the value specified to the offset specified.
654 */
655void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
656 uint32_t reg, uint32_t v,
657 uint32_t acc_flags, uint32_t xcc_id)
658{
659 uint32_t rlcg_flag;
660
661 if (amdgpu_device_skip_hw_access(adev))
662 return;
663
664 if ((reg * 4) < adev->rmmio_size) {
665 if (amdgpu_sriov_vf(adev) &&
666 !amdgpu_sriov_runtime(adev) &&
667 adev->gfx.rlc.rlcg_reg_access_supported &&
668 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
669 GC_HWIP, true,
670 &rlcg_flag)) {
671 amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
672 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
673 amdgpu_sriov_runtime(adev) &&
674 down_read_trylock(&adev->reset_domain->sem)) {
675 amdgpu_kiq_wreg(adev, reg, v, xcc_id);
676 up_read(&adev->reset_domain->sem);
677 } else {
678 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
679 }
680 } else {
681 adev->pcie_wreg(adev, reg * 4, v);
682 }
683}
684
1bba3683
HZ
685/**
686 * amdgpu_device_indirect_rreg - read an indirect register
687 *
688 * @adev: amdgpu_device pointer
22f453fb 689 * @reg_addr: indirect register address to read from
1bba3683
HZ
690 *
691 * Returns the value of indirect register @reg_addr
692 */
693u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
694 u32 reg_addr)
695{
65ba96e9 696 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
697 void __iomem *pcie_index_offset;
698 void __iomem *pcie_data_offset;
65ba96e9
HZ
699 u32 r;
700
701 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
702 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
703
704 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
705 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
706 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
707
708 writel(reg_addr, pcie_index_offset);
709 readl(pcie_index_offset);
710 r = readl(pcie_data_offset);
711 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
712
713 return r;
714}
715
0c552ed3
LM
716u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
717 u64 reg_addr)
718{
719 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
720 u32 r;
721 void __iomem *pcie_index_offset;
722 void __iomem *pcie_index_hi_offset;
723 void __iomem *pcie_data_offset;
724
725 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
726 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 727 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
728 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
729 else
730 pcie_index_hi = 0;
731
732 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
733 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
734 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
735 if (pcie_index_hi != 0)
736 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
737 pcie_index_hi * 4;
738
739 writel(reg_addr, pcie_index_offset);
740 readl(pcie_index_offset);
741 if (pcie_index_hi != 0) {
742 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
743 readl(pcie_index_hi_offset);
744 }
745 r = readl(pcie_data_offset);
746
747 /* clear the high bits */
748 if (pcie_index_hi != 0) {
749 writel(0, pcie_index_hi_offset);
750 readl(pcie_index_hi_offset);
751 }
752
753 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
754
755 return r;
756}
757
1bba3683
HZ
758/**
759 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
760 *
761 * @adev: amdgpu_device pointer
22f453fb 762 * @reg_addr: indirect register address to read from
1bba3683
HZ
763 *
764 * Returns the value of indirect register @reg_addr
765 */
766u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
767 u32 reg_addr)
768{
65ba96e9 769 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
770 void __iomem *pcie_index_offset;
771 void __iomem *pcie_data_offset;
65ba96e9
HZ
772 u64 r;
773
774 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
775 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
776
777 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
778 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
779 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
780
781 /* read low 32 bits */
782 writel(reg_addr, pcie_index_offset);
783 readl(pcie_index_offset);
784 r = readl(pcie_data_offset);
785 /* read high 32 bits */
786 writel(reg_addr + 4, pcie_index_offset);
787 readl(pcie_index_offset);
788 r |= ((u64)readl(pcie_data_offset) << 32);
789 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
790
791 return r;
792}
793
a76b2870
CL
794u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
795 u64 reg_addr)
796{
797 unsigned long flags, pcie_index, pcie_data;
798 unsigned long pcie_index_hi = 0;
799 void __iomem *pcie_index_offset;
800 void __iomem *pcie_index_hi_offset;
801 void __iomem *pcie_data_offset;
802 u64 r;
803
804 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
805 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
806 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
807 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
808
809 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
810 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
811 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
812 if (pcie_index_hi != 0)
813 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
814 pcie_index_hi * 4;
815
816 /* read low 32 bits */
817 writel(reg_addr, pcie_index_offset);
818 readl(pcie_index_offset);
819 if (pcie_index_hi != 0) {
820 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
821 readl(pcie_index_hi_offset);
822 }
823 r = readl(pcie_data_offset);
824 /* read high 32 bits */
825 writel(reg_addr + 4, pcie_index_offset);
826 readl(pcie_index_offset);
827 if (pcie_index_hi != 0) {
828 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
829 readl(pcie_index_hi_offset);
830 }
831 r |= ((u64)readl(pcie_data_offset) << 32);
832
833 /* clear the high bits */
834 if (pcie_index_hi != 0) {
835 writel(0, pcie_index_hi_offset);
836 readl(pcie_index_hi_offset);
837 }
838
839 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
840
841 return r;
842}
843
1bba3683
HZ
844/**
845 * amdgpu_device_indirect_wreg - write an indirect register address
846 *
847 * @adev: amdgpu_device pointer
1bba3683
HZ
848 * @reg_addr: indirect register offset
849 * @reg_data: indirect register data
850 *
851 */
852void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
853 u32 reg_addr, u32 reg_data)
854{
65ba96e9 855 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
856 void __iomem *pcie_index_offset;
857 void __iomem *pcie_data_offset;
858
65ba96e9
HZ
859 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
860 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
861
1bba3683
HZ
862 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
863 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
864 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
865
866 writel(reg_addr, pcie_index_offset);
867 readl(pcie_index_offset);
868 writel(reg_data, pcie_data_offset);
869 readl(pcie_data_offset);
870 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
871}
872
0c552ed3
LM
873void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
874 u64 reg_addr, u32 reg_data)
875{
876 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
877 void __iomem *pcie_index_offset;
878 void __iomem *pcie_index_hi_offset;
879 void __iomem *pcie_data_offset;
880
881 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
882 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 883 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
884 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
885 else
886 pcie_index_hi = 0;
887
888 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
889 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
890 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
891 if (pcie_index_hi != 0)
892 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
893 pcie_index_hi * 4;
894
895 writel(reg_addr, pcie_index_offset);
896 readl(pcie_index_offset);
897 if (pcie_index_hi != 0) {
898 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
899 readl(pcie_index_hi_offset);
900 }
901 writel(reg_data, pcie_data_offset);
902 readl(pcie_data_offset);
903
904 /* clear the high bits */
905 if (pcie_index_hi != 0) {
906 writel(0, pcie_index_hi_offset);
907 readl(pcie_index_hi_offset);
908 }
909
910 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
911}
912
1bba3683
HZ
913/**
914 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
915 *
916 * @adev: amdgpu_device pointer
1bba3683
HZ
917 * @reg_addr: indirect register offset
918 * @reg_data: indirect register data
919 *
920 */
921void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
922 u32 reg_addr, u64 reg_data)
923{
65ba96e9 924 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
925 void __iomem *pcie_index_offset;
926 void __iomem *pcie_data_offset;
927
65ba96e9
HZ
928 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
929 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
930
1bba3683
HZ
931 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
932 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
933 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
934
935 /* write low 32 bits */
936 writel(reg_addr, pcie_index_offset);
937 readl(pcie_index_offset);
938 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
939 readl(pcie_data_offset);
940 /* write high 32 bits */
941 writel(reg_addr + 4, pcie_index_offset);
942 readl(pcie_index_offset);
943 writel((u32)(reg_data >> 32), pcie_data_offset);
944 readl(pcie_data_offset);
945 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
946}
947
a76b2870
CL
948void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
949 u64 reg_addr, u64 reg_data)
950{
951 unsigned long flags, pcie_index, pcie_data;
952 unsigned long pcie_index_hi = 0;
953 void __iomem *pcie_index_offset;
954 void __iomem *pcie_index_hi_offset;
955 void __iomem *pcie_data_offset;
956
957 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
958 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
959 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
960 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
961
962 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
963 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
964 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
965 if (pcie_index_hi != 0)
966 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
967 pcie_index_hi * 4;
968
969 /* write low 32 bits */
970 writel(reg_addr, pcie_index_offset);
971 readl(pcie_index_offset);
972 if (pcie_index_hi != 0) {
973 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
974 readl(pcie_index_hi_offset);
975 }
976 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
977 readl(pcie_data_offset);
978 /* write high 32 bits */
979 writel(reg_addr + 4, pcie_index_offset);
980 readl(pcie_index_offset);
981 if (pcie_index_hi != 0) {
982 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
983 readl(pcie_index_hi_offset);
984 }
985 writel((u32)(reg_data >> 32), pcie_data_offset);
986 readl(pcie_data_offset);
987
988 /* clear the high bits */
989 if (pcie_index_hi != 0) {
990 writel(0, pcie_index_hi_offset);
991 readl(pcie_index_hi_offset);
992 }
993
994 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
995}
996
dabc114e
HZ
997/**
998 * amdgpu_device_get_rev_id - query device rev_id
999 *
1000 * @adev: amdgpu_device pointer
1001 *
1002 * Return device rev_id
1003 */
1004u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1005{
1006 return adev->nbio.funcs->get_rev_id(adev);
1007}
1008
d38ceaf9
AD
1009/**
1010 * amdgpu_invalid_rreg - dummy reg read function
1011 *
982a820b 1012 * @adev: amdgpu_device pointer
d38ceaf9
AD
1013 * @reg: offset of register
1014 *
1015 * Dummy register read function. Used for register blocks
1016 * that certain asics don't have (all asics).
1017 * Returns the value in the register.
1018 */
1019static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1020{
1021 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1022 BUG();
1023 return 0;
1024}
1025
0c552ed3
LM
1026static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1027{
1028 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1029 BUG();
1030 return 0;
1031}
1032
d38ceaf9
AD
1033/**
1034 * amdgpu_invalid_wreg - dummy reg write function
1035 *
982a820b 1036 * @adev: amdgpu_device pointer
d38ceaf9
AD
1037 * @reg: offset of register
1038 * @v: value to write to the register
1039 *
1040 * Dummy register read function. Used for register blocks
1041 * that certain asics don't have (all asics).
1042 */
1043static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1044{
1045 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1046 reg, v);
1047 BUG();
1048}
1049
0c552ed3
LM
1050static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1051{
1052 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1053 reg, v);
1054 BUG();
1055}
1056
4fa1c6a6
TZ
1057/**
1058 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1059 *
982a820b 1060 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1061 * @reg: offset of register
1062 *
1063 * Dummy register read function. Used for register blocks
1064 * that certain asics don't have (all asics).
1065 * Returns the value in the register.
1066 */
1067static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1068{
1069 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1070 BUG();
1071 return 0;
1072}
1073
a76b2870
CL
1074static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1075{
1076 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1077 BUG();
1078 return 0;
1079}
1080
4fa1c6a6
TZ
1081/**
1082 * amdgpu_invalid_wreg64 - dummy reg write function
1083 *
982a820b 1084 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1085 * @reg: offset of register
1086 * @v: value to write to the register
1087 *
1088 * Dummy register read function. Used for register blocks
1089 * that certain asics don't have (all asics).
1090 */
1091static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1092{
1093 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1094 reg, v);
1095 BUG();
1096}
1097
a76b2870
CL
1098static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1099{
1100 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1101 reg, v);
1102 BUG();
1103}
1104
d38ceaf9
AD
1105/**
1106 * amdgpu_block_invalid_rreg - dummy reg read function
1107 *
982a820b 1108 * @adev: amdgpu_device pointer
d38ceaf9
AD
1109 * @block: offset of instance
1110 * @reg: offset of register
1111 *
1112 * Dummy register read function. Used for register blocks
1113 * that certain asics don't have (all asics).
1114 * Returns the value in the register.
1115 */
1116static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1117 uint32_t block, uint32_t reg)
1118{
1119 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1120 reg, block);
1121 BUG();
1122 return 0;
1123}
1124
1125/**
1126 * amdgpu_block_invalid_wreg - dummy reg write function
1127 *
982a820b 1128 * @adev: amdgpu_device pointer
d38ceaf9
AD
1129 * @block: offset of instance
1130 * @reg: offset of register
1131 * @v: value to write to the register
1132 *
1133 * Dummy register read function. Used for register blocks
1134 * that certain asics don't have (all asics).
1135 */
1136static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1137 uint32_t block,
1138 uint32_t reg, uint32_t v)
1139{
1140 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1141 reg, block, v);
1142 BUG();
1143}
1144
4d2997ab
AD
1145/**
1146 * amdgpu_device_asic_init - Wrapper for atom asic_init
1147 *
982a820b 1148 * @adev: amdgpu_device pointer
4d2997ab
AD
1149 *
1150 * Does any asic specific work and then calls atom asic init.
1151 */
1152static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1153{
15c5c5f5
LL
1154 int ret;
1155
4d2997ab
AD
1156 amdgpu_asic_pre_asic_init(adev);
1157
4e8303cf
LL
1158 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1159 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1160 amdgpu_psp_wait_for_bootloader(adev);
1161 ret = amdgpu_atomfirmware_asic_init(adev, true);
23618280
HZ
1162 /* TODO: check the return val and stop device initialization if boot fails */
1163 amdgpu_psp_query_boot_status(adev);
15c5c5f5
LL
1164 return ret;
1165 } else {
85d1bcc6 1166 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1167 }
1168
1169 return 0;
4d2997ab
AD
1170}
1171
e3ecdffa 1172/**
7ccfd79f 1173 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1174 *
982a820b 1175 * @adev: amdgpu_device pointer
e3ecdffa
AD
1176 *
1177 * Allocates a scratch page of VRAM for use by various things in the
1178 * driver.
1179 */
7ccfd79f 1180static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1181{
7ccfd79f
CK
1182 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1183 AMDGPU_GEM_DOMAIN_VRAM |
1184 AMDGPU_GEM_DOMAIN_GTT,
1185 &adev->mem_scratch.robj,
1186 &adev->mem_scratch.gpu_addr,
1187 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1188}
1189
e3ecdffa 1190/**
7ccfd79f 1191 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1192 *
982a820b 1193 * @adev: amdgpu_device pointer
e3ecdffa
AD
1194 *
1195 * Frees the VRAM scratch page.
1196 */
7ccfd79f 1197static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1198{
7ccfd79f 1199 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1200}
1201
1202/**
9c3f2b54 1203 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1204 *
1205 * @adev: amdgpu_device pointer
1206 * @registers: pointer to the register array
1207 * @array_size: size of the register array
1208 *
b8920e1e 1209 * Programs an array or registers with and or masks.
d38ceaf9
AD
1210 * This is a helper for setting golden registers.
1211 */
9c3f2b54
AD
1212void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1213 const u32 *registers,
1214 const u32 array_size)
d38ceaf9
AD
1215{
1216 u32 tmp, reg, and_mask, or_mask;
1217 int i;
1218
1219 if (array_size % 3)
1220 return;
1221
47fc644f 1222 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1223 reg = registers[i + 0];
1224 and_mask = registers[i + 1];
1225 or_mask = registers[i + 2];
1226
1227 if (and_mask == 0xffffffff) {
1228 tmp = or_mask;
1229 } else {
1230 tmp = RREG32(reg);
1231 tmp &= ~and_mask;
e0d07657
HZ
1232 if (adev->family >= AMDGPU_FAMILY_AI)
1233 tmp |= (or_mask & and_mask);
1234 else
1235 tmp |= or_mask;
d38ceaf9
AD
1236 }
1237 WREG32(reg, tmp);
1238 }
1239}
1240
e3ecdffa
AD
1241/**
1242 * amdgpu_device_pci_config_reset - reset the GPU
1243 *
1244 * @adev: amdgpu_device pointer
1245 *
1246 * Resets the GPU using the pci config reset sequence.
1247 * Only applicable to asics prior to vega10.
1248 */
8111c387 1249void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1250{
1251 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1252}
1253
af484df8
AD
1254/**
1255 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1256 *
1257 * @adev: amdgpu_device pointer
1258 *
1259 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1260 */
1261int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1262{
1263 return pci_reset_function(adev->pdev);
1264}
1265
d38ceaf9 1266/*
06ec9070 1267 * amdgpu_device_wb_*()
455a7bc2 1268 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1269 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1270 */
1271
1272/**
06ec9070 1273 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1274 *
1275 * @adev: amdgpu_device pointer
1276 *
1277 * Disables Writeback and frees the Writeback memory (all asics).
1278 * Used at driver shutdown.
1279 */
06ec9070 1280static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1281{
1282 if (adev->wb.wb_obj) {
a76ed485
AD
1283 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1284 &adev->wb.gpu_addr,
1285 (void **)&adev->wb.wb);
d38ceaf9
AD
1286 adev->wb.wb_obj = NULL;
1287 }
1288}
1289
1290/**
03f2abb0 1291 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1292 *
1293 * @adev: amdgpu_device pointer
1294 *
455a7bc2 1295 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1296 * Used at driver startup.
1297 * Returns 0 on success or an -error on failure.
1298 */
06ec9070 1299static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1300{
1301 int r;
1302
1303 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1304 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1305 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1306 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1307 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1308 (void **)&adev->wb.wb);
d38ceaf9
AD
1309 if (r) {
1310 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1311 return r;
1312 }
d38ceaf9
AD
1313
1314 adev->wb.num_wb = AMDGPU_MAX_WB;
1315 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1316
1317 /* clear wb memory */
73469585 1318 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1319 }
1320
1321 return 0;
1322}
1323
1324/**
131b4b36 1325 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1326 *
1327 * @adev: amdgpu_device pointer
1328 * @wb: wb index
1329 *
1330 * Allocate a wb slot for use by the driver (all asics).
1331 * Returns 0 on success or -EINVAL on failure.
1332 */
131b4b36 1333int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1334{
1335 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1336
97407b63 1337 if (offset < adev->wb.num_wb) {
7014285a 1338 __set_bit(offset, adev->wb.used);
63ae07ca 1339 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1340 return 0;
1341 } else {
1342 return -EINVAL;
1343 }
1344}
1345
d38ceaf9 1346/**
131b4b36 1347 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1348 *
1349 * @adev: amdgpu_device pointer
1350 * @wb: wb index
1351 *
1352 * Free a wb slot allocated for use by the driver (all asics)
1353 */
131b4b36 1354void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1355{
73469585 1356 wb >>= 3;
d38ceaf9 1357 if (wb < adev->wb.num_wb)
73469585 1358 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1359}
1360
d6895ad3
CK
1361/**
1362 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1363 *
1364 * @adev: amdgpu_device pointer
1365 *
1366 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1367 * to fail, but if any of the BARs is not accessible after the size we abort
1368 * driver loading by returning -ENODEV.
1369 */
1370int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1371{
453f617a 1372 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1373 struct pci_bus *root;
1374 struct resource *res;
b8920e1e 1375 unsigned int i;
d6895ad3
CK
1376 u16 cmd;
1377 int r;
1378
822130b5
AB
1379 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1380 return 0;
1381
0c03b912 1382 /* Bypass for VF */
1383 if (amdgpu_sriov_vf(adev))
1384 return 0;
1385
b7221f2b
AD
1386 /* skip if the bios has already enabled large BAR */
1387 if (adev->gmc.real_vram_size &&
1388 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1389 return 0;
1390
31b8adab
CK
1391 /* Check if the root BUS has 64bit memory resources */
1392 root = adev->pdev->bus;
1393 while (root->parent)
1394 root = root->parent;
1395
1396 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1397 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1398 res->start > 0x100000000ull)
1399 break;
1400 }
1401
1402 /* Trying to resize is pointless without a root hub window above 4GB */
1403 if (!res)
1404 return 0;
1405
453f617a
ND
1406 /* Limit the BAR size to what is available */
1407 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1408 rbar_size);
1409
d6895ad3
CK
1410 /* Disable memory decoding while we change the BAR addresses and size */
1411 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1412 pci_write_config_word(adev->pdev, PCI_COMMAND,
1413 cmd & ~PCI_COMMAND_MEMORY);
1414
1415 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1416 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1417 if (adev->asic_type >= CHIP_BONAIRE)
1418 pci_release_resource(adev->pdev, 2);
1419
1420 pci_release_resource(adev->pdev, 0);
1421
1422 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1423 if (r == -ENOSPC)
1424 DRM_INFO("Not enough PCI address space for a large BAR.");
1425 else if (r && r != -ENOTSUPP)
1426 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1427
1428 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1429
1430 /* When the doorbell or fb BAR isn't available we have no chance of
1431 * using the device.
1432 */
43c064db 1433 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1434 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1435 return -ENODEV;
1436
1437 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1438
1439 return 0;
1440}
a05502e5 1441
9535a86a
SZ
1442static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1443{
b8920e1e 1444 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1445 return false;
9535a86a
SZ
1446
1447 return true;
1448}
1449
d38ceaf9
AD
1450/*
1451 * GPU helpers function.
1452 */
1453/**
39c640c0 1454 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1455 *
1456 * @adev: amdgpu_device pointer
1457 *
c836fec5
JQ
1458 * Check if the asic has been initialized (all asics) at driver startup
1459 * or post is needed if hw reset is performed.
1460 * Returns true if need or false if not.
d38ceaf9 1461 */
39c640c0 1462bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1463{
1464 uint32_t reg;
1465
bec86378
ML
1466 if (amdgpu_sriov_vf(adev))
1467 return false;
1468
9535a86a
SZ
1469 if (!amdgpu_device_read_bios(adev))
1470 return false;
1471
bec86378 1472 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1473 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1474 * some old smc fw still need driver do vPost otherwise gpu hang, while
1475 * those smc fw version above 22.15 doesn't have this flaw, so we force
1476 * vpost executed for smc version below 22.15
bec86378
ML
1477 */
1478 if (adev->asic_type == CHIP_FIJI) {
1479 int err;
1480 uint32_t fw_ver;
b8920e1e 1481
bec86378
ML
1482 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1483 /* force vPost if error occured */
1484 if (err)
1485 return true;
1486
1487 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1488 if (fw_ver < 0x00160e00)
1489 return true;
bec86378 1490 }
bec86378 1491 }
91fe77eb 1492
e3c1b071 1493 /* Don't post if we need to reset whole hive on init */
1494 if (adev->gmc.xgmi.pending_reset)
1495 return false;
1496
91fe77eb 1497 if (adev->has_hw_reset) {
1498 adev->has_hw_reset = false;
1499 return true;
1500 }
1501
1502 /* bios scratch used on CIK+ */
1503 if (adev->asic_type >= CHIP_BONAIRE)
1504 return amdgpu_atombios_scratch_need_asic_init(adev);
1505
1506 /* check MEM_SIZE for older asics */
1507 reg = amdgpu_asic_get_config_memsize(adev);
1508
1509 if ((reg != 0) && (reg != 0xffffffff))
1510 return false;
1511
1512 return true;
70e64c4d
ML
1513}
1514
bb0f8429
ML
1515/*
1516 * Check whether seamless boot is supported.
1517 *
7f4ce7b5
ML
1518 * So far we only support seamless boot on DCE 3.0 or later.
1519 * If users report that it works on older ASICS as well, we may
1520 * loosen this.
bb0f8429
ML
1521 */
1522bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1523{
5dc270d3
ML
1524 switch (amdgpu_seamless) {
1525 case -1:
1526 break;
1527 case 1:
1528 return true;
1529 case 0:
1530 return false;
1531 default:
1532 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1533 amdgpu_seamless);
1534 return false;
1535 }
1536
3657a1d5
ML
1537 if (!(adev->flags & AMD_IS_APU))
1538 return false;
1539
5dc270d3
ML
1540 if (adev->mman.keep_stolen_vga_memory)
1541 return false;
1542
7f4ce7b5 1543 return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1544}
1545
5d1eb4c4 1546/*
2757a848
ML
1547 * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1548 * don't support dynamic speed switching. Until we have confirmation from Intel
1549 * that a specific host supports it, it's safer that we keep it disabled for all.
5d1eb4c4
ML
1550 *
1551 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1552 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1553 */
2757a848 1554static bool amdgpu_device_pcie_dynamic_switching_supported(void)
5d1eb4c4
ML
1555{
1556#if IS_ENABLED(CONFIG_X86)
1557 struct cpuinfo_x86 *c = &cpu_data(0);
1558
1559 if (c->x86_vendor == X86_VENDOR_INTEL)
1560 return false;
1561#endif
1562 return true;
1563}
1564
0ab5d711
ML
1565/**
1566 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1567 *
1568 * @adev: amdgpu_device pointer
1569 *
1570 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1571 * be set for this device.
1572 *
1573 * Returns true if it should be used or false if not.
1574 */
1575bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1576{
1577 switch (amdgpu_aspm) {
1578 case -1:
1579 break;
1580 case 0:
1581 return false;
1582 case 1:
1583 return true;
1584 default:
1585 return false;
1586 }
1a6513de
ML
1587 if (adev->flags & AMD_IS_APU)
1588 return false;
2757a848
ML
1589 if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1590 return false;
0ab5d711
ML
1591 return pcie_aspm_enabled(adev->pdev);
1592}
1593
d38ceaf9
AD
1594/* if we get transitioned to only one device, take VGA back */
1595/**
06ec9070 1596 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1597 *
bf44e8ce 1598 * @pdev: PCI device pointer
d38ceaf9
AD
1599 * @state: enable/disable vga decode
1600 *
1601 * Enable/disable vga decode (all asics).
1602 * Returns VGA resource flags.
1603 */
bf44e8ce
CH
1604static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1605 bool state)
d38ceaf9 1606{
bf44e8ce 1607 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1608
d38ceaf9
AD
1609 amdgpu_asic_set_vga_state(adev, state);
1610 if (state)
1611 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1612 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1613 else
1614 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1615}
1616
e3ecdffa
AD
1617/**
1618 * amdgpu_device_check_block_size - validate the vm block size
1619 *
1620 * @adev: amdgpu_device pointer
1621 *
1622 * Validates the vm block size specified via module parameter.
1623 * The vm block size defines number of bits in page table versus page directory,
1624 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1625 * page table and the remaining bits are in the page directory.
1626 */
06ec9070 1627static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1628{
1629 /* defines number of bits in page table versus page directory,
1630 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1631 * page table and the remaining bits are in the page directory
1632 */
bab4fee7
JZ
1633 if (amdgpu_vm_block_size == -1)
1634 return;
a1adf8be 1635
bab4fee7 1636 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1637 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1638 amdgpu_vm_block_size);
97489129 1639 amdgpu_vm_block_size = -1;
a1adf8be 1640 }
a1adf8be
CZ
1641}
1642
e3ecdffa
AD
1643/**
1644 * amdgpu_device_check_vm_size - validate the vm size
1645 *
1646 * @adev: amdgpu_device pointer
1647 *
1648 * Validates the vm size in GB specified via module parameter.
1649 * The VM size is the size of the GPU virtual memory space in GB.
1650 */
06ec9070 1651static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1652{
64dab074
AD
1653 /* no need to check the default value */
1654 if (amdgpu_vm_size == -1)
1655 return;
1656
83ca145d
ZJ
1657 if (amdgpu_vm_size < 1) {
1658 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1659 amdgpu_vm_size);
f3368128 1660 amdgpu_vm_size = -1;
83ca145d 1661 }
83ca145d
ZJ
1662}
1663
7951e376
RZ
1664static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1665{
1666 struct sysinfo si;
a9d4fe2f 1667 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1668 uint64_t total_memory;
1669 uint64_t dram_size_seven_GB = 0x1B8000000;
1670 uint64_t dram_size_three_GB = 0xB8000000;
1671
1672 if (amdgpu_smu_memory_pool_size == 0)
1673 return;
1674
1675 if (!is_os_64) {
1676 DRM_WARN("Not 64-bit OS, feature not supported\n");
1677 goto def_value;
1678 }
1679 si_meminfo(&si);
1680 total_memory = (uint64_t)si.totalram * si.mem_unit;
1681
1682 if ((amdgpu_smu_memory_pool_size == 1) ||
1683 (amdgpu_smu_memory_pool_size == 2)) {
1684 if (total_memory < dram_size_three_GB)
1685 goto def_value1;
1686 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1687 (amdgpu_smu_memory_pool_size == 8)) {
1688 if (total_memory < dram_size_seven_GB)
1689 goto def_value1;
1690 } else {
1691 DRM_WARN("Smu memory pool size not supported\n");
1692 goto def_value;
1693 }
1694 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1695
1696 return;
1697
1698def_value1:
1699 DRM_WARN("No enough system memory\n");
1700def_value:
1701 adev->pm.smu_prv_buffer_size = 0;
1702}
1703
9f6a7857
HR
1704static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1705{
1706 if (!(adev->flags & AMD_IS_APU) ||
1707 adev->asic_type < CHIP_RAVEN)
1708 return 0;
1709
1710 switch (adev->asic_type) {
1711 case CHIP_RAVEN:
1712 if (adev->pdev->device == 0x15dd)
1713 adev->apu_flags |= AMD_APU_IS_RAVEN;
1714 if (adev->pdev->device == 0x15d8)
1715 adev->apu_flags |= AMD_APU_IS_PICASSO;
1716 break;
1717 case CHIP_RENOIR:
1718 if ((adev->pdev->device == 0x1636) ||
1719 (adev->pdev->device == 0x164c))
1720 adev->apu_flags |= AMD_APU_IS_RENOIR;
1721 else
1722 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1723 break;
1724 case CHIP_VANGOGH:
1725 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1726 break;
1727 case CHIP_YELLOW_CARP:
1728 break;
d0f56dc2 1729 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1730 if ((adev->pdev->device == 0x13FE) ||
1731 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1732 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1733 break;
9f6a7857 1734 default:
4eaf21b7 1735 break;
9f6a7857
HR
1736 }
1737
1738 return 0;
1739}
1740
d38ceaf9 1741/**
06ec9070 1742 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1743 *
1744 * @adev: amdgpu_device pointer
1745 *
1746 * Validates certain module parameters and updates
1747 * the associated values used by the driver (all asics).
1748 */
912dfc84 1749static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1750{
5b011235
CZ
1751 if (amdgpu_sched_jobs < 4) {
1752 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1753 amdgpu_sched_jobs);
1754 amdgpu_sched_jobs = 4;
47fc644f 1755 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1756 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1757 amdgpu_sched_jobs);
1758 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1759 }
d38ceaf9 1760
83e74db6 1761 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1762 /* gart size must be greater or equal to 32M */
1763 dev_warn(adev->dev, "gart size (%d) too small\n",
1764 amdgpu_gart_size);
83e74db6 1765 amdgpu_gart_size = -1;
d38ceaf9
AD
1766 }
1767
36d38372 1768 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1769 /* gtt size must be greater or equal to 32M */
36d38372
CK
1770 dev_warn(adev->dev, "gtt size (%d) too small\n",
1771 amdgpu_gtt_size);
1772 amdgpu_gtt_size = -1;
d38ceaf9
AD
1773 }
1774
d07f14be
RH
1775 /* valid range is between 4 and 9 inclusive */
1776 if (amdgpu_vm_fragment_size != -1 &&
1777 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1778 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1779 amdgpu_vm_fragment_size = -1;
1780 }
1781
5d5bd5e3
KW
1782 if (amdgpu_sched_hw_submission < 2) {
1783 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1784 amdgpu_sched_hw_submission);
1785 amdgpu_sched_hw_submission = 2;
1786 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1787 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1788 amdgpu_sched_hw_submission);
1789 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1790 }
1791
2656fd23
AG
1792 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1793 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1794 amdgpu_reset_method = -1;
1795 }
1796
7951e376
RZ
1797 amdgpu_device_check_smu_prv_buffer_size(adev);
1798
06ec9070 1799 amdgpu_device_check_vm_size(adev);
d38ceaf9 1800
06ec9070 1801 amdgpu_device_check_block_size(adev);
6a7f76e7 1802
19aede77 1803 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1804
e3c00faa 1805 return 0;
d38ceaf9
AD
1806}
1807
1808/**
1809 * amdgpu_switcheroo_set_state - set switcheroo state
1810 *
1811 * @pdev: pci dev pointer
1694467b 1812 * @state: vga_switcheroo state
d38ceaf9 1813 *
12024b17 1814 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1815 * the asics before or after it is powered up using ACPI methods.
1816 */
8aba21b7
LT
1817static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1818 enum vga_switcheroo_state state)
d38ceaf9
AD
1819{
1820 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1821 int r;
d38ceaf9 1822
b98c6299 1823 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1824 return;
1825
1826 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1827 pr_info("switched on\n");
d38ceaf9
AD
1828 /* don't suspend or resume card normally */
1829 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1830
8f66090b
TZ
1831 pci_set_power_state(pdev, PCI_D0);
1832 amdgpu_device_load_pci_state(pdev);
1833 r = pci_enable_device(pdev);
de185019
AD
1834 if (r)
1835 DRM_WARN("pci_enable_device failed (%d)\n", r);
1836 amdgpu_device_resume(dev, true);
d38ceaf9 1837
d38ceaf9 1838 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1839 } else {
dd4fa6c1 1840 pr_info("switched off\n");
d38ceaf9 1841 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
5095d541 1842 amdgpu_device_prepare(dev);
de185019 1843 amdgpu_device_suspend(dev, true);
8f66090b 1844 amdgpu_device_cache_pci_state(pdev);
de185019 1845 /* Shut down the device */
8f66090b
TZ
1846 pci_disable_device(pdev);
1847 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1848 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1849 }
1850}
1851
1852/**
1853 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1854 *
1855 * @pdev: pci dev pointer
1856 *
1857 * Callback for the switcheroo driver. Check of the switcheroo
1858 * state can be changed.
1859 * Returns true if the state can be changed, false if not.
1860 */
1861static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1862{
1863 struct drm_device *dev = pci_get_drvdata(pdev);
1864
b8920e1e 1865 /*
d38ceaf9
AD
1866 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1867 * locking inversion with the driver load path. And the access here is
1868 * completely racy anyway. So don't bother with locking for now.
1869 */
7e13ad89 1870 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1871}
1872
1873static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1874 .set_gpu_state = amdgpu_switcheroo_set_state,
1875 .reprobe = NULL,
1876 .can_switch = amdgpu_switcheroo_can_switch,
1877};
1878
e3ecdffa
AD
1879/**
1880 * amdgpu_device_ip_set_clockgating_state - set the CG state
1881 *
87e3f136 1882 * @dev: amdgpu_device pointer
e3ecdffa
AD
1883 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1884 * @state: clockgating state (gate or ungate)
1885 *
1886 * Sets the requested clockgating state for all instances of
1887 * the hardware IP specified.
1888 * Returns the error code from the last instance.
1889 */
43fa561f 1890int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1891 enum amd_ip_block_type block_type,
1892 enum amd_clockgating_state state)
d38ceaf9 1893{
43fa561f 1894 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1895 int i, r = 0;
1896
1897 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1898 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1899 continue;
c722865a
RZ
1900 if (adev->ip_blocks[i].version->type != block_type)
1901 continue;
1902 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1903 continue;
1904 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1905 (void *)adev, state);
1906 if (r)
1907 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1908 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1909 }
1910 return r;
1911}
1912
e3ecdffa
AD
1913/**
1914 * amdgpu_device_ip_set_powergating_state - set the PG state
1915 *
87e3f136 1916 * @dev: amdgpu_device pointer
e3ecdffa
AD
1917 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1918 * @state: powergating state (gate or ungate)
1919 *
1920 * Sets the requested powergating state for all instances of
1921 * the hardware IP specified.
1922 * Returns the error code from the last instance.
1923 */
43fa561f 1924int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1925 enum amd_ip_block_type block_type,
1926 enum amd_powergating_state state)
d38ceaf9 1927{
43fa561f 1928 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1929 int i, r = 0;
1930
1931 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1932 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1933 continue;
c722865a
RZ
1934 if (adev->ip_blocks[i].version->type != block_type)
1935 continue;
1936 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1937 continue;
1938 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1939 (void *)adev, state);
1940 if (r)
1941 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1942 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1943 }
1944 return r;
1945}
1946
e3ecdffa
AD
1947/**
1948 * amdgpu_device_ip_get_clockgating_state - get the CG state
1949 *
1950 * @adev: amdgpu_device pointer
1951 * @flags: clockgating feature flags
1952 *
1953 * Walks the list of IPs on the device and updates the clockgating
1954 * flags for each IP.
1955 * Updates @flags with the feature flags for each hardware IP where
1956 * clockgating is enabled.
1957 */
2990a1fc 1958void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1959 u64 *flags)
6cb2d4e4
HR
1960{
1961 int i;
1962
1963 for (i = 0; i < adev->num_ip_blocks; i++) {
1964 if (!adev->ip_blocks[i].status.valid)
1965 continue;
1966 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1967 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1968 }
1969}
1970
e3ecdffa
AD
1971/**
1972 * amdgpu_device_ip_wait_for_idle - wait for idle
1973 *
1974 * @adev: amdgpu_device pointer
1975 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1976 *
1977 * Waits for the request hardware IP to be idle.
1978 * Returns 0 for success or a negative error code on failure.
1979 */
2990a1fc
AD
1980int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1981 enum amd_ip_block_type block_type)
5dbbb60b
AD
1982{
1983 int i, r;
1984
1985 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1986 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1987 continue;
a1255107
AD
1988 if (adev->ip_blocks[i].version->type == block_type) {
1989 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1990 if (r)
1991 return r;
1992 break;
1993 }
1994 }
1995 return 0;
1996
1997}
1998
e3ecdffa
AD
1999/**
2000 * amdgpu_device_ip_is_idle - is the hardware IP idle
2001 *
2002 * @adev: amdgpu_device pointer
2003 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2004 *
2005 * Check if the hardware IP is idle or not.
2006 * Returns true if it the IP is idle, false if not.
2007 */
2990a1fc
AD
2008bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2009 enum amd_ip_block_type block_type)
5dbbb60b
AD
2010{
2011 int i;
2012
2013 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2014 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2015 continue;
a1255107
AD
2016 if (adev->ip_blocks[i].version->type == block_type)
2017 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
2018 }
2019 return true;
2020
2021}
2022
e3ecdffa
AD
2023/**
2024 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2025 *
2026 * @adev: amdgpu_device pointer
87e3f136 2027 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
2028 *
2029 * Returns a pointer to the hardware IP block structure
2030 * if it exists for the asic, otherwise NULL.
2031 */
2990a1fc
AD
2032struct amdgpu_ip_block *
2033amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2034 enum amd_ip_block_type type)
d38ceaf9
AD
2035{
2036 int i;
2037
2038 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 2039 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
2040 return &adev->ip_blocks[i];
2041
2042 return NULL;
2043}
2044
2045/**
2990a1fc 2046 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
2047 *
2048 * @adev: amdgpu_device pointer
5fc3aeeb 2049 * @type: enum amd_ip_block_type
d38ceaf9
AD
2050 * @major: major version
2051 * @minor: minor version
2052 *
2053 * return 0 if equal or greater
2054 * return 1 if smaller or the ip_block doesn't exist
2055 */
2990a1fc
AD
2056int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2057 enum amd_ip_block_type type,
2058 u32 major, u32 minor)
d38ceaf9 2059{
2990a1fc 2060 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 2061
a1255107
AD
2062 if (ip_block && ((ip_block->version->major > major) ||
2063 ((ip_block->version->major == major) &&
2064 (ip_block->version->minor >= minor))))
d38ceaf9
AD
2065 return 0;
2066
2067 return 1;
2068}
2069
a1255107 2070/**
2990a1fc 2071 * amdgpu_device_ip_block_add
a1255107
AD
2072 *
2073 * @adev: amdgpu_device pointer
2074 * @ip_block_version: pointer to the IP to add
2075 *
2076 * Adds the IP block driver information to the collection of IPs
2077 * on the asic.
2078 */
2990a1fc
AD
2079int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2080 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
2081{
2082 if (!ip_block_version)
2083 return -EINVAL;
2084
7bd939d0
LG
2085 switch (ip_block_version->type) {
2086 case AMD_IP_BLOCK_TYPE_VCN:
2087 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2088 return 0;
2089 break;
2090 case AMD_IP_BLOCK_TYPE_JPEG:
2091 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2092 return 0;
2093 break;
2094 default:
2095 break;
2096 }
2097
e966a725 2098 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
2099 ip_block_version->funcs->name);
2100
a1255107
AD
2101 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2102
2103 return 0;
2104}
2105
e3ecdffa
AD
2106/**
2107 * amdgpu_device_enable_virtual_display - enable virtual display feature
2108 *
2109 * @adev: amdgpu_device pointer
2110 *
2111 * Enabled the virtual display feature if the user has enabled it via
2112 * the module parameter virtual_display. This feature provides a virtual
2113 * display hardware on headless boards or in virtualized environments.
2114 * This function parses and validates the configuration string specified by
2115 * the user and configues the virtual display configuration (number of
2116 * virtual connectors, crtcs, etc.) specified.
2117 */
483ef985 2118static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2119{
2120 adev->enable_virtual_display = false;
2121
2122 if (amdgpu_virtual_display) {
8f66090b 2123 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2124 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2125
2126 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2127 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2128 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2129 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2130 if (!strcmp("all", pciaddname)
2131 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2132 long num_crtc;
2133 int res = -1;
2134
9accf2fd 2135 adev->enable_virtual_display = true;
0f66356d
ED
2136
2137 if (pciaddname_tmp)
2138 res = kstrtol(pciaddname_tmp, 10,
2139 &num_crtc);
2140
2141 if (!res) {
2142 if (num_crtc < 1)
2143 num_crtc = 1;
2144 if (num_crtc > 6)
2145 num_crtc = 6;
2146 adev->mode_info.num_crtc = num_crtc;
2147 } else {
2148 adev->mode_info.num_crtc = 1;
2149 }
9accf2fd
ED
2150 break;
2151 }
2152 }
2153
0f66356d
ED
2154 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2155 amdgpu_virtual_display, pci_address_name,
2156 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2157
2158 kfree(pciaddstr);
2159 }
2160}
2161
25263da3
AD
2162void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2163{
2164 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2165 adev->mode_info.num_crtc = 1;
2166 adev->enable_virtual_display = true;
2167 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2168 adev->enable_virtual_display, adev->mode_info.num_crtc);
2169 }
2170}
2171
e3ecdffa
AD
2172/**
2173 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2174 *
2175 * @adev: amdgpu_device pointer
2176 *
2177 * Parses the asic configuration parameters specified in the gpu info
2178 * firmware and makes them availale to the driver for use in configuring
2179 * the asic.
2180 * Returns 0 on success, -EINVAL on failure.
2181 */
e2a75f88
AD
2182static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2183{
e2a75f88 2184 const char *chip_name;
c0a43457 2185 char fw_name[40];
e2a75f88
AD
2186 int err;
2187 const struct gpu_info_firmware_header_v1_0 *hdr;
2188
ab4fe3e1
HR
2189 adev->firmware.gpu_info_fw = NULL;
2190
72de33f8 2191 if (adev->mman.discovery_bin) {
cc375d8c
TY
2192 /*
2193 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2194 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2195 * when DAL no longer needs it.
2196 */
2197 if (adev->asic_type != CHIP_NAVI12)
2198 return 0;
258620d0
AD
2199 }
2200
e2a75f88 2201 switch (adev->asic_type) {
e2a75f88
AD
2202 default:
2203 return 0;
2204 case CHIP_VEGA10:
2205 chip_name = "vega10";
2206 break;
3f76dced
AD
2207 case CHIP_VEGA12:
2208 chip_name = "vega12";
2209 break;
2d2e5e7e 2210 case CHIP_RAVEN:
54f78a76 2211 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2212 chip_name = "raven2";
54f78a76 2213 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2214 chip_name = "picasso";
54c4d17e
FX
2215 else
2216 chip_name = "raven";
2d2e5e7e 2217 break;
65e60f6e
LM
2218 case CHIP_ARCTURUS:
2219 chip_name = "arcturus";
2220 break;
42b325e5
XY
2221 case CHIP_NAVI12:
2222 chip_name = "navi12";
2223 break;
e2a75f88
AD
2224 }
2225
2226 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2227 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2228 if (err) {
2229 dev_err(adev->dev,
b31d3063 2230 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2231 fw_name);
2232 goto out;
2233 }
2234
ab4fe3e1 2235 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2236 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2237
2238 switch (hdr->version_major) {
2239 case 1:
2240 {
2241 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2242 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2243 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2244
cc375d8c
TY
2245 /*
2246 * Should be droped when DAL no longer needs it.
2247 */
2248 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2249 goto parse_soc_bounding_box;
2250
b5ab16bf
AD
2251 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2252 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2253 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2254 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2255 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2256 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2257 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2258 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2259 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2260 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2261 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2262 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2263 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2264 adev->gfx.cu_info.max_waves_per_simd =
2265 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2266 adev->gfx.cu_info.max_scratch_slots_per_cu =
2267 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2268 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2269 if (hdr->version_minor >= 1) {
35c2e910
HZ
2270 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2271 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2272 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2273 adev->gfx.config.num_sc_per_sh =
2274 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2275 adev->gfx.config.num_packer_per_sc =
2276 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2277 }
ec51d3fa
XY
2278
2279parse_soc_bounding_box:
ec51d3fa
XY
2280 /*
2281 * soc bounding box info is not integrated in disocovery table,
258620d0 2282 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2283 */
48321c3d
HW
2284 if (hdr->version_minor == 2) {
2285 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2286 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2287 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2288 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2289 }
e2a75f88
AD
2290 break;
2291 }
2292 default:
2293 dev_err(adev->dev,
2294 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2295 err = -EINVAL;
2296 goto out;
2297 }
2298out:
e2a75f88
AD
2299 return err;
2300}
2301
e3ecdffa
AD
2302/**
2303 * amdgpu_device_ip_early_init - run early init for hardware IPs
2304 *
2305 * @adev: amdgpu_device pointer
2306 *
2307 * Early initialization pass for hardware IPs. The hardware IPs that make
2308 * up each asic are discovered each IP's early_init callback is run. This
2309 * is the first stage in initializing the asic.
2310 * Returns 0 on success, negative error code on failure.
2311 */
06ec9070 2312static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2313{
901e2be2 2314 struct pci_dev *parent;
aaa36a97 2315 int i, r;
ced69502 2316 bool total;
d38ceaf9 2317
483ef985 2318 amdgpu_device_enable_virtual_display(adev);
a6be7570 2319
00a979f3 2320 if (amdgpu_sriov_vf(adev)) {
00a979f3 2321 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2322 if (r)
2323 return r;
00a979f3
WS
2324 }
2325
d38ceaf9 2326 switch (adev->asic_type) {
33f34802
KW
2327#ifdef CONFIG_DRM_AMDGPU_SI
2328 case CHIP_VERDE:
2329 case CHIP_TAHITI:
2330 case CHIP_PITCAIRN:
2331 case CHIP_OLAND:
2332 case CHIP_HAINAN:
295d0daf 2333 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2334 r = si_set_ip_blocks(adev);
2335 if (r)
2336 return r;
2337 break;
2338#endif
a2e73f56
AD
2339#ifdef CONFIG_DRM_AMDGPU_CIK
2340 case CHIP_BONAIRE:
2341 case CHIP_HAWAII:
2342 case CHIP_KAVERI:
2343 case CHIP_KABINI:
2344 case CHIP_MULLINS:
e1ad2d53 2345 if (adev->flags & AMD_IS_APU)
a2e73f56 2346 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2347 else
2348 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2349
2350 r = cik_set_ip_blocks(adev);
2351 if (r)
2352 return r;
2353 break;
2354#endif
da87c30b
AD
2355 case CHIP_TOPAZ:
2356 case CHIP_TONGA:
2357 case CHIP_FIJI:
2358 case CHIP_POLARIS10:
2359 case CHIP_POLARIS11:
2360 case CHIP_POLARIS12:
2361 case CHIP_VEGAM:
2362 case CHIP_CARRIZO:
2363 case CHIP_STONEY:
2364 if (adev->flags & AMD_IS_APU)
2365 adev->family = AMDGPU_FAMILY_CZ;
2366 else
2367 adev->family = AMDGPU_FAMILY_VI;
2368
2369 r = vi_set_ip_blocks(adev);
2370 if (r)
2371 return r;
2372 break;
d38ceaf9 2373 default:
63352b7f
AD
2374 r = amdgpu_discovery_set_ip_blocks(adev);
2375 if (r)
2376 return r;
2377 break;
d38ceaf9
AD
2378 }
2379
901e2be2
AD
2380 if (amdgpu_has_atpx() &&
2381 (amdgpu_is_atpx_hybrid() ||
2382 amdgpu_has_atpx_dgpu_power_cntl()) &&
2383 ((adev->flags & AMD_IS_APU) == 0) &&
7b1c6263 2384 !dev_is_removable(&adev->pdev->dev))
901e2be2
AD
2385 adev->flags |= AMD_IS_PX;
2386
85ac2021 2387 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2388 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2389 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2390 }
901e2be2 2391
1884734a 2392
3b94fb10 2393 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2394 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2395 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2396 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2397 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
fbf1035b
ML
2398 if (!amdgpu_device_pcie_dynamic_switching_supported())
2399 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
00f54b97 2400
ced69502 2401 total = true;
d38ceaf9
AD
2402 for (i = 0; i < adev->num_ip_blocks; i++) {
2403 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2404 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2405 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2406 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2407 } else {
a1255107
AD
2408 if (adev->ip_blocks[i].version->funcs->early_init) {
2409 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2410 if (r == -ENOENT) {
a1255107 2411 adev->ip_blocks[i].status.valid = false;
2c1a2784 2412 } else if (r) {
a1255107
AD
2413 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2414 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2415 total = false;
2c1a2784 2416 } else {
a1255107 2417 adev->ip_blocks[i].status.valid = true;
2c1a2784 2418 }
974e6b64 2419 } else {
a1255107 2420 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2421 }
d38ceaf9 2422 }
21a249ca
AD
2423 /* get the vbios after the asic_funcs are set up */
2424 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2425 r = amdgpu_device_parse_gpu_info_fw(adev);
2426 if (r)
2427 return r;
2428
21a249ca 2429 /* Read BIOS */
9535a86a
SZ
2430 if (amdgpu_device_read_bios(adev)) {
2431 if (!amdgpu_get_bios(adev))
2432 return -EINVAL;
21a249ca 2433
9535a86a
SZ
2434 r = amdgpu_atombios_init(adev);
2435 if (r) {
2436 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2437 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2438 return r;
2439 }
21a249ca 2440 }
77eabc6f
PJZ
2441
2442 /*get pf2vf msg info at it's earliest time*/
2443 if (amdgpu_sriov_vf(adev))
2444 amdgpu_virt_init_data_exchange(adev);
2445
21a249ca 2446 }
d38ceaf9 2447 }
ced69502
ML
2448 if (!total)
2449 return -ENODEV;
d38ceaf9 2450
00fa4035 2451 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2452 adev->cg_flags &= amdgpu_cg_mask;
2453 adev->pg_flags &= amdgpu_pg_mask;
2454
d38ceaf9
AD
2455 return 0;
2456}
2457
0a4f2520
RZ
2458static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2459{
2460 int i, r;
2461
2462 for (i = 0; i < adev->num_ip_blocks; i++) {
2463 if (!adev->ip_blocks[i].status.sw)
2464 continue;
2465 if (adev->ip_blocks[i].status.hw)
2466 continue;
2467 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2468 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2469 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2470 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2471 if (r) {
2472 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2473 adev->ip_blocks[i].version->funcs->name, r);
2474 return r;
2475 }
2476 adev->ip_blocks[i].status.hw = true;
2477 }
2478 }
2479
2480 return 0;
2481}
2482
2483static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2484{
2485 int i, r;
2486
2487 for (i = 0; i < adev->num_ip_blocks; i++) {
2488 if (!adev->ip_blocks[i].status.sw)
2489 continue;
2490 if (adev->ip_blocks[i].status.hw)
2491 continue;
2492 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2493 if (r) {
2494 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2495 adev->ip_blocks[i].version->funcs->name, r);
2496 return r;
2497 }
2498 adev->ip_blocks[i].status.hw = true;
2499 }
2500
2501 return 0;
2502}
2503
7a3e0bb2
RZ
2504static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2505{
2506 int r = 0;
2507 int i;
80f41f84 2508 uint32_t smu_version;
7a3e0bb2
RZ
2509
2510 if (adev->asic_type >= CHIP_VEGA10) {
2511 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2512 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2513 continue;
2514
e3c1b071 2515 if (!adev->ip_blocks[i].status.sw)
2516 continue;
2517
482f0e53
ML
2518 /* no need to do the fw loading again if already done*/
2519 if (adev->ip_blocks[i].status.hw == true)
2520 break;
2521
53b3f8f4 2522 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2523 r = adev->ip_blocks[i].version->funcs->resume(adev);
2524 if (r) {
2525 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2526 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2527 return r;
2528 }
2529 } else {
2530 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2531 if (r) {
2532 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2533 adev->ip_blocks[i].version->funcs->name, r);
2534 return r;
7a3e0bb2 2535 }
7a3e0bb2 2536 }
482f0e53
ML
2537
2538 adev->ip_blocks[i].status.hw = true;
2539 break;
7a3e0bb2
RZ
2540 }
2541 }
482f0e53 2542
8973d9ec
ED
2543 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2544 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2545
80f41f84 2546 return r;
7a3e0bb2
RZ
2547}
2548
5fd8518d
AG
2549static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2550{
2551 long timeout;
2552 int r, i;
2553
2554 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2555 struct amdgpu_ring *ring = adev->rings[i];
2556
2557 /* No need to setup the GPU scheduler for rings that don't need it */
2558 if (!ring || ring->no_scheduler)
2559 continue;
2560
2561 switch (ring->funcs->type) {
2562 case AMDGPU_RING_TYPE_GFX:
2563 timeout = adev->gfx_timeout;
2564 break;
2565 case AMDGPU_RING_TYPE_COMPUTE:
2566 timeout = adev->compute_timeout;
2567 break;
2568 case AMDGPU_RING_TYPE_SDMA:
2569 timeout = adev->sdma_timeout;
2570 break;
2571 default:
2572 timeout = adev->video_timeout;
2573 break;
2574 }
2575
2576 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
56e44960 2577 DRM_SCHED_PRIORITY_COUNT,
11f25c84 2578 ring->num_hw_submission, 0,
8ab62eda
JG
2579 timeout, adev->reset_domain->wq,
2580 ring->sched_score, ring->name,
2581 adev->dev);
5fd8518d
AG
2582 if (r) {
2583 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2584 ring->name);
2585 return r;
2586 }
037b98a2
AD
2587 r = amdgpu_uvd_entity_init(adev, ring);
2588 if (r) {
2589 DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2590 ring->name);
2591 return r;
2592 }
2593 r = amdgpu_vce_entity_init(adev, ring);
2594 if (r) {
2595 DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2596 ring->name);
2597 return r;
2598 }
5fd8518d
AG
2599 }
2600
d425c6f4
JZ
2601 amdgpu_xcp_update_partition_sched_list(adev);
2602
5fd8518d
AG
2603 return 0;
2604}
2605
2606
e3ecdffa
AD
2607/**
2608 * amdgpu_device_ip_init - run init for hardware IPs
2609 *
2610 * @adev: amdgpu_device pointer
2611 *
2612 * Main initialization pass for hardware IPs. The list of all the hardware
2613 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2614 * are run. sw_init initializes the software state associated with each IP
2615 * and hw_init initializes the hardware associated with each IP.
2616 * Returns 0 on success, negative error code on failure.
2617 */
06ec9070 2618static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2619{
2620 int i, r;
2621
c030f2e4 2622 r = amdgpu_ras_init(adev);
2623 if (r)
2624 return r;
2625
d38ceaf9 2626 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2627 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2628 continue;
a1255107 2629 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2630 if (r) {
a1255107
AD
2631 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2632 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2633 goto init_failed;
2c1a2784 2634 }
a1255107 2635 adev->ip_blocks[i].status.sw = true;
bfca0289 2636
c1c39032
AD
2637 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2638 /* need to do common hw init early so everything is set up for gmc */
2639 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2640 if (r) {
2641 DRM_ERROR("hw_init %d failed %d\n", i, r);
2642 goto init_failed;
2643 }
2644 adev->ip_blocks[i].status.hw = true;
2645 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2646 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2647 /* Try to reserve bad pages early */
2648 if (amdgpu_sriov_vf(adev))
2649 amdgpu_virt_exchange_data(adev);
2650
7ccfd79f 2651 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2652 if (r) {
7ccfd79f 2653 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2654 goto init_failed;
2c1a2784 2655 }
a1255107 2656 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2657 if (r) {
2658 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2659 goto init_failed;
2c1a2784 2660 }
06ec9070 2661 r = amdgpu_device_wb_init(adev);
2c1a2784 2662 if (r) {
06ec9070 2663 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2664 goto init_failed;
2c1a2784 2665 }
a1255107 2666 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2667
2668 /* right after GMC hw init, we create CSA */
02ff519e 2669 if (adev->gfx.mcbp) {
1e256e27 2670 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2671 AMDGPU_GEM_DOMAIN_VRAM |
2672 AMDGPU_GEM_DOMAIN_GTT,
2673 AMDGPU_CSA_SIZE);
2493664f
ML
2674 if (r) {
2675 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2676 goto init_failed;
2493664f
ML
2677 }
2678 }
c8031019
APS
2679
2680 r = amdgpu_seq64_init(adev);
2681 if (r) {
2682 DRM_ERROR("allocate seq64 failed %d\n", r);
2683 goto init_failed;
2684 }
d38ceaf9
AD
2685 }
2686 }
2687
c9ffa427 2688 if (amdgpu_sriov_vf(adev))
22c16d25 2689 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2690
533aed27
AG
2691 r = amdgpu_ib_pool_init(adev);
2692 if (r) {
2693 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2694 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2695 goto init_failed;
2696 }
2697
c8963ea4
RZ
2698 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2699 if (r)
72d3f592 2700 goto init_failed;
0a4f2520
RZ
2701
2702 r = amdgpu_device_ip_hw_init_phase1(adev);
2703 if (r)
72d3f592 2704 goto init_failed;
0a4f2520 2705
7a3e0bb2
RZ
2706 r = amdgpu_device_fw_loading(adev);
2707 if (r)
72d3f592 2708 goto init_failed;
7a3e0bb2 2709
0a4f2520
RZ
2710 r = amdgpu_device_ip_hw_init_phase2(adev);
2711 if (r)
72d3f592 2712 goto init_failed;
d38ceaf9 2713
121a2bc6
AG
2714 /*
2715 * retired pages will be loaded from eeprom and reserved here,
2716 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2717 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2718 * for I2C communication which only true at this point.
b82e65a9
GC
2719 *
2720 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2721 * failure from bad gpu situation and stop amdgpu init process
2722 * accordingly. For other failed cases, it will still release all
2723 * the resource and print error message, rather than returning one
2724 * negative value to upper level.
121a2bc6
AG
2725 *
2726 * Note: theoretically, this should be called before all vram allocations
2727 * to protect retired page from abusing
2728 */
b82e65a9
GC
2729 r = amdgpu_ras_recovery_init(adev);
2730 if (r)
2731 goto init_failed;
121a2bc6 2732
cfbb6b00
AG
2733 /**
2734 * In case of XGMI grab extra reference for reset domain for this device
2735 */
a4c63caf 2736 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2737 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2738 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2739 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2740
dfd0287b
LH
2741 if (WARN_ON(!hive)) {
2742 r = -ENOENT;
2743 goto init_failed;
2744 }
2745
46c67660 2746 if (!hive->reset_domain ||
2747 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2748 r = -ENOENT;
2749 amdgpu_put_xgmi_hive(hive);
2750 goto init_failed;
2751 }
2752
2753 /* Drop the early temporary reset domain we created for device */
2754 amdgpu_reset_put_reset_domain(adev->reset_domain);
2755 adev->reset_domain = hive->reset_domain;
9dfa4860 2756 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2757 }
a4c63caf
AG
2758 }
2759 }
2760
5fd8518d
AG
2761 r = amdgpu_device_init_schedulers(adev);
2762 if (r)
2763 goto init_failed;
e3c1b071 2764
b7043800
AD
2765 if (adev->mman.buffer_funcs_ring->sched.ready)
2766 amdgpu_ttm_set_buffer_funcs_status(adev, true);
2767
e3c1b071 2768 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2769 if (!adev->gmc.xgmi.pending_reset) {
2770 kgd2kfd_init_zone_device(adev);
e3c1b071 2771 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2772 }
c6332b97 2773
bd607166
KR
2774 amdgpu_fru_get_product_info(adev);
2775
72d3f592 2776init_failed:
c6332b97 2777
72d3f592 2778 return r;
d38ceaf9
AD
2779}
2780
e3ecdffa
AD
2781/**
2782 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2783 *
2784 * @adev: amdgpu_device pointer
2785 *
2786 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2787 * this function before a GPU reset. If the value is retained after a
2788 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2789 */
06ec9070 2790static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2791{
2792 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2793}
2794
e3ecdffa
AD
2795/**
2796 * amdgpu_device_check_vram_lost - check if vram is valid
2797 *
2798 * @adev: amdgpu_device pointer
2799 *
2800 * Checks the reset magic value written to the gart pointer in VRAM.
2801 * The driver calls this after a GPU reset to see if the contents of
2802 * VRAM is lost or now.
2803 * returns true if vram is lost, false if not.
2804 */
06ec9070 2805static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2806{
dadce777
EQ
2807 if (memcmp(adev->gart.ptr, adev->reset_magic,
2808 AMDGPU_RESET_MAGIC_NUM))
2809 return true;
2810
53b3f8f4 2811 if (!amdgpu_in_reset(adev))
dadce777
EQ
2812 return false;
2813
2814 /*
2815 * For all ASICs with baco/mode1 reset, the VRAM is
2816 * always assumed to be lost.
2817 */
2818 switch (amdgpu_asic_reset_method(adev)) {
2819 case AMD_RESET_METHOD_BACO:
2820 case AMD_RESET_METHOD_MODE1:
2821 return true;
2822 default:
2823 return false;
2824 }
0c49e0b8
CZ
2825}
2826
e3ecdffa 2827/**
1112a46b 2828 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2829 *
2830 * @adev: amdgpu_device pointer
b8b72130 2831 * @state: clockgating state (gate or ungate)
e3ecdffa 2832 *
e3ecdffa 2833 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2834 * set_clockgating_state callbacks are run.
2835 * Late initialization pass enabling clockgating for hardware IPs.
2836 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2837 * Returns 0 on success, negative error code on failure.
2838 */
fdd34271 2839
5d89bb2d
LL
2840int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2841 enum amd_clockgating_state state)
d38ceaf9 2842{
1112a46b 2843 int i, j, r;
d38ceaf9 2844
4a2ba394
SL
2845 if (amdgpu_emu_mode == 1)
2846 return 0;
2847
1112a46b
RZ
2848 for (j = 0; j < adev->num_ip_blocks; j++) {
2849 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2850 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2851 continue;
47198eb7 2852 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2853 if (adev->in_s0ix &&
47198eb7
AD
2854 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2855 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2856 continue;
4a446d55 2857 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2858 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2859 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2860 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2861 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2862 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2863 /* enable clockgating to save power */
a1255107 2864 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2865 state);
4a446d55
AD
2866 if (r) {
2867 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2868 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2869 return r;
2870 }
b0b00ff1 2871 }
d38ceaf9 2872 }
06b18f61 2873
c9f96fd5
RZ
2874 return 0;
2875}
2876
5d89bb2d
LL
2877int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2878 enum amd_powergating_state state)
c9f96fd5 2879{
1112a46b 2880 int i, j, r;
06b18f61 2881
c9f96fd5
RZ
2882 if (amdgpu_emu_mode == 1)
2883 return 0;
2884
1112a46b
RZ
2885 for (j = 0; j < adev->num_ip_blocks; j++) {
2886 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2887 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2888 continue;
47198eb7 2889 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2890 if (adev->in_s0ix &&
47198eb7
AD
2891 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2892 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2893 continue;
c9f96fd5
RZ
2894 /* skip CG for VCE/UVD, it's handled specially */
2895 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2896 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2897 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2898 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2899 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2900 /* enable powergating to save power */
2901 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2902 state);
c9f96fd5
RZ
2903 if (r) {
2904 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2905 adev->ip_blocks[i].version->funcs->name, r);
2906 return r;
2907 }
2908 }
2909 }
2dc80b00
S
2910 return 0;
2911}
2912
beff74bc
AD
2913static int amdgpu_device_enable_mgpu_fan_boost(void)
2914{
2915 struct amdgpu_gpu_instance *gpu_ins;
2916 struct amdgpu_device *adev;
2917 int i, ret = 0;
2918
2919 mutex_lock(&mgpu_info.mutex);
2920
2921 /*
2922 * MGPU fan boost feature should be enabled
2923 * only when there are two or more dGPUs in
2924 * the system
2925 */
2926 if (mgpu_info.num_dgpu < 2)
2927 goto out;
2928
2929 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2930 gpu_ins = &(mgpu_info.gpu_ins[i]);
2931 adev = gpu_ins->adev;
2932 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2933 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2934 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2935 if (ret)
2936 break;
2937
2938 gpu_ins->mgpu_fan_enabled = 1;
2939 }
2940 }
2941
2942out:
2943 mutex_unlock(&mgpu_info.mutex);
2944
2945 return ret;
2946}
2947
e3ecdffa
AD
2948/**
2949 * amdgpu_device_ip_late_init - run late init for hardware IPs
2950 *
2951 * @adev: amdgpu_device pointer
2952 *
2953 * Late initialization pass for hardware IPs. The list of all the hardware
2954 * IPs that make up the asic is walked and the late_init callbacks are run.
2955 * late_init covers any special initialization that an IP requires
2956 * after all of the have been initialized or something that needs to happen
2957 * late in the init process.
2958 * Returns 0 on success, negative error code on failure.
2959 */
06ec9070 2960static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2961{
60599a03 2962 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2963 int i = 0, r;
2964
2965 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2966 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2967 continue;
2968 if (adev->ip_blocks[i].version->funcs->late_init) {
2969 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2970 if (r) {
2971 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2972 adev->ip_blocks[i].version->funcs->name, r);
2973 return r;
2974 }
2dc80b00 2975 }
73f847db 2976 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2977 }
2978
867e24ca 2979 r = amdgpu_ras_late_init(adev);
2980 if (r) {
2981 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2982 return r;
2983 }
2984
a891d239
DL
2985 amdgpu_ras_set_error_query_ready(adev, true);
2986
1112a46b
RZ
2987 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2988 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2989
06ec9070 2990 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2991
beff74bc
AD
2992 r = amdgpu_device_enable_mgpu_fan_boost();
2993 if (r)
2994 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2995
4da8b639 2996 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2997 if (amdgpu_passthrough(adev) &&
2998 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2999 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 3000 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
3001
3002 if (adev->gmc.xgmi.num_physical_nodes > 1) {
3003 mutex_lock(&mgpu_info.mutex);
3004
3005 /*
3006 * Reset device p-state to low as this was booted with high.
3007 *
3008 * This should be performed only after all devices from the same
3009 * hive get initialized.
3010 *
3011 * However, it's unknown how many device in the hive in advance.
3012 * As this is counted one by one during devices initializations.
3013 *
3014 * So, we wait for all XGMI interlinked devices initialized.
3015 * This may bring some delays as those devices may come from
3016 * different hives. But that should be OK.
3017 */
3018 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3019 for (i = 0; i < mgpu_info.num_gpu; i++) {
3020 gpu_instance = &(mgpu_info.gpu_ins[i]);
3021 if (gpu_instance->adev->flags & AMD_IS_APU)
3022 continue;
3023
d84a430d
JK
3024 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3025 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
3026 if (r) {
3027 DRM_ERROR("pstate setting failed (%d).\n", r);
3028 break;
3029 }
3030 }
3031 }
3032
3033 mutex_unlock(&mgpu_info.mutex);
3034 }
3035
d38ceaf9
AD
3036 return 0;
3037}
3038
613aa3ea
LY
3039/**
3040 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3041 *
3042 * @adev: amdgpu_device pointer
3043 *
3044 * For ASICs need to disable SMC first
3045 */
3046static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3047{
3048 int i, r;
3049
4e8303cf 3050 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
3051 return;
3052
3053 for (i = 0; i < adev->num_ip_blocks; i++) {
3054 if (!adev->ip_blocks[i].status.hw)
3055 continue;
3056 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3057 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3058 /* XXX handle errors */
3059 if (r) {
3060 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3061 adev->ip_blocks[i].version->funcs->name, r);
3062 }
3063 adev->ip_blocks[i].status.hw = false;
3064 break;
3065 }
3066 }
3067}
3068
e9669fb7 3069static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
3070{
3071 int i, r;
3072
e9669fb7
AG
3073 for (i = 0; i < adev->num_ip_blocks; i++) {
3074 if (!adev->ip_blocks[i].version->funcs->early_fini)
3075 continue;
5278a159 3076
e9669fb7
AG
3077 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3078 if (r) {
3079 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3080 adev->ip_blocks[i].version->funcs->name, r);
3081 }
3082 }
c030f2e4 3083
05df1f01 3084 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
3085 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3086
7270e895
TY
3087 amdgpu_amdkfd_suspend(adev, false);
3088
613aa3ea
LY
3089 /* Workaroud for ASICs need to disable SMC first */
3090 amdgpu_device_smu_fini_early(adev);
3e96dbfd 3091
d38ceaf9 3092 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3093 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 3094 continue;
8201a67a 3095
a1255107 3096 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 3097 /* XXX handle errors */
2c1a2784 3098 if (r) {
a1255107
AD
3099 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3100 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3101 }
8201a67a 3102
a1255107 3103 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
3104 }
3105
6effad8a
GC
3106 if (amdgpu_sriov_vf(adev)) {
3107 if (amdgpu_virt_release_full_gpu(adev, false))
3108 DRM_ERROR("failed to release exclusive mode on fini\n");
3109 }
3110
e9669fb7
AG
3111 return 0;
3112}
3113
3114/**
3115 * amdgpu_device_ip_fini - run fini for hardware IPs
3116 *
3117 * @adev: amdgpu_device pointer
3118 *
3119 * Main teardown pass for hardware IPs. The list of all the hardware
3120 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3121 * are run. hw_fini tears down the hardware associated with each IP
3122 * and sw_fini tears down any software state associated with each IP.
3123 * Returns 0 on success, negative error code on failure.
3124 */
3125static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3126{
3127 int i, r;
3128
3129 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3130 amdgpu_virt_release_ras_err_handler_data(adev);
3131
e9669fb7
AG
3132 if (adev->gmc.xgmi.num_physical_nodes > 1)
3133 amdgpu_xgmi_remove_device(adev);
3134
c004d44e 3135 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3136
d38ceaf9 3137 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3138 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3139 continue;
c12aba3a
ML
3140
3141 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3142 amdgpu_ucode_free_bo(adev);
1e256e27 3143 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3144 amdgpu_device_wb_fini(adev);
7ccfd79f 3145 amdgpu_device_mem_scratch_fini(adev);
533aed27 3146 amdgpu_ib_pool_fini(adev);
c8031019 3147 amdgpu_seq64_fini(adev);
c12aba3a
ML
3148 }
3149
a1255107 3150 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3151 /* XXX handle errors */
2c1a2784 3152 if (r) {
a1255107
AD
3153 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3154 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3155 }
a1255107
AD
3156 adev->ip_blocks[i].status.sw = false;
3157 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3158 }
3159
a6dcfd9c 3160 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3161 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3162 continue;
a1255107
AD
3163 if (adev->ip_blocks[i].version->funcs->late_fini)
3164 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3165 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3166 }
3167
c030f2e4 3168 amdgpu_ras_fini(adev);
3169
d38ceaf9
AD
3170 return 0;
3171}
3172
e3ecdffa 3173/**
beff74bc 3174 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3175 *
1112a46b 3176 * @work: work_struct.
e3ecdffa 3177 */
beff74bc 3178static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3179{
3180 struct amdgpu_device *adev =
beff74bc 3181 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3182 int r;
3183
3184 r = amdgpu_ib_ring_tests(adev);
3185 if (r)
3186 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3187}
3188
1e317b99
RZ
3189static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3190{
3191 struct amdgpu_device *adev =
3192 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3193
90a92662
MD
3194 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3195 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3196
3197 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3198 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3199}
3200
e3ecdffa 3201/**
e7854a03 3202 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3203 *
3204 * @adev: amdgpu_device pointer
3205 *
3206 * Main suspend function for hardware IPs. The list of all the hardware
3207 * IPs that make up the asic is walked, clockgating is disabled and the
3208 * suspend callbacks are run. suspend puts the hardware and software state
3209 * in each IP into a state suitable for suspend.
3210 * Returns 0 on success, negative error code on failure.
3211 */
e7854a03
AD
3212static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3213{
3214 int i, r;
3215
50ec83f0
AD
3216 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3217 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3218
b31d6ada
EQ
3219 /*
3220 * Per PMFW team's suggestion, driver needs to handle gfxoff
3221 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3222 * scenario. Add the missing df cstate disablement here.
3223 */
3224 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3225 dev_warn(adev->dev, "Failed to disallow df cstate");
3226
e7854a03
AD
3227 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3228 if (!adev->ip_blocks[i].status.valid)
3229 continue;
2b9f7848 3230
e7854a03 3231 /* displays are handled separately */
2b9f7848
ND
3232 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3233 continue;
3234
3235 /* XXX handle errors */
3236 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3237 /* XXX handle errors */
3238 if (r) {
3239 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3240 adev->ip_blocks[i].version->funcs->name, r);
3241 return r;
e7854a03 3242 }
2b9f7848
ND
3243
3244 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3245 }
3246
e7854a03
AD
3247 return 0;
3248}
3249
3250/**
3251 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3252 *
3253 * @adev: amdgpu_device pointer
3254 *
3255 * Main suspend function for hardware IPs. The list of all the hardware
3256 * IPs that make up the asic is walked, clockgating is disabled and the
3257 * suspend callbacks are run. suspend puts the hardware and software state
3258 * in each IP into a state suitable for suspend.
3259 * Returns 0 on success, negative error code on failure.
3260 */
3261static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3262{
3263 int i, r;
3264
557f42a2 3265 if (adev->in_s0ix)
bc143d8b 3266 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3267
d38ceaf9 3268 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3269 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3270 continue;
e7854a03
AD
3271 /* displays are handled in phase1 */
3272 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3273 continue;
bff77e86
LM
3274 /* PSP lost connection when err_event_athub occurs */
3275 if (amdgpu_ras_intr_triggered() &&
3276 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3277 adev->ip_blocks[i].status.hw = false;
3278 continue;
3279 }
e3c1b071 3280
3281 /* skip unnecessary suspend if we do not initialize them yet */
3282 if (adev->gmc.xgmi.pending_reset &&
3283 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3284 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3285 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3286 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3287 adev->ip_blocks[i].status.hw = false;
3288 continue;
3289 }
557f42a2 3290
afa6646b 3291 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3292 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3293 * like at runtime. PSP is also part of the always on hardware
3294 * so no need to suspend it.
3295 */
557f42a2 3296 if (adev->in_s0ix &&
32ff160d 3297 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3298 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3299 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3300 continue;
3301
2a7798ea
AD
3302 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3303 if (adev->in_s0ix &&
4e8303cf
LL
3304 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3305 IP_VERSION(5, 0, 0)) &&
3306 (adev->ip_blocks[i].version->type ==
3307 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3308 continue;
3309
e11c7750
TH
3310 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3311 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3312 * from this location and RLC Autoload automatically also gets loaded
3313 * from here based on PMFW -> PSP message during re-init sequence.
3314 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3315 * the TMR and reload FWs again for IMU enabled APU ASICs.
3316 */
3317 if (amdgpu_in_reset(adev) &&
3318 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3319 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3320 continue;
3321
d38ceaf9 3322 /* XXX handle errors */
a1255107 3323 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3324 /* XXX handle errors */
2c1a2784 3325 if (r) {
a1255107
AD
3326 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3327 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3328 }
876923fb 3329 adev->ip_blocks[i].status.hw = false;
a3a09142 3330 /* handle putting the SMC in the appropriate state */
47fc644f 3331 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3332 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3333 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3334 if (r) {
3335 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3336 adev->mp1_state, r);
3337 return r;
3338 }
a3a09142
AD
3339 }
3340 }
d38ceaf9
AD
3341 }
3342
3343 return 0;
3344}
3345
e7854a03
AD
3346/**
3347 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3348 *
3349 * @adev: amdgpu_device pointer
3350 *
3351 * Main suspend function for hardware IPs. The list of all the hardware
3352 * IPs that make up the asic is walked, clockgating is disabled and the
3353 * suspend callbacks are run. suspend puts the hardware and software state
3354 * in each IP into a state suitable for suspend.
3355 * Returns 0 on success, negative error code on failure.
3356 */
3357int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3358{
3359 int r;
3360
3c73683c
JC
3361 if (amdgpu_sriov_vf(adev)) {
3362 amdgpu_virt_fini_data_exchange(adev);
e7819644 3363 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3364 }
e7819644 3365
b7043800
AD
3366 amdgpu_ttm_set_buffer_funcs_status(adev, false);
3367
e7854a03
AD
3368 r = amdgpu_device_ip_suspend_phase1(adev);
3369 if (r)
3370 return r;
3371 r = amdgpu_device_ip_suspend_phase2(adev);
3372
e7819644
YT
3373 if (amdgpu_sriov_vf(adev))
3374 amdgpu_virt_release_full_gpu(adev, false);
3375
e7854a03
AD
3376 return r;
3377}
3378
06ec9070 3379static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3380{
3381 int i, r;
3382
2cb681b6 3383 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3384 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3385 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3386 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3387 AMD_IP_BLOCK_TYPE_IH,
3388 };
a90ad3c2 3389
95ea3dbc 3390 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3391 int j;
3392 struct amdgpu_ip_block *block;
a90ad3c2 3393
4cd2a96d
J
3394 block = &adev->ip_blocks[i];
3395 block->status.hw = false;
2cb681b6 3396
4cd2a96d 3397 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3398
4cd2a96d 3399 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3400 !block->status.valid)
3401 continue;
3402
3403 r = block->version->funcs->hw_init(adev);
0aaeefcc 3404 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3405 if (r)
3406 return r;
482f0e53 3407 block->status.hw = true;
a90ad3c2
ML
3408 }
3409 }
3410
3411 return 0;
3412}
3413
06ec9070 3414static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3415{
3416 int i, r;
3417
2cb681b6
ML
3418 static enum amd_ip_block_type ip_order[] = {
3419 AMD_IP_BLOCK_TYPE_SMC,
3420 AMD_IP_BLOCK_TYPE_DCE,
3421 AMD_IP_BLOCK_TYPE_GFX,
3422 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3423 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3424 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3425 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3426 AMD_IP_BLOCK_TYPE_VCN,
3427 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3428 };
a90ad3c2 3429
2cb681b6
ML
3430 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3431 int j;
3432 struct amdgpu_ip_block *block;
a90ad3c2 3433
2cb681b6
ML
3434 for (j = 0; j < adev->num_ip_blocks; j++) {
3435 block = &adev->ip_blocks[j];
3436
3437 if (block->version->type != ip_order[i] ||
482f0e53
ML
3438 !block->status.valid ||
3439 block->status.hw)
2cb681b6
ML
3440 continue;
3441
895bd048
JZ
3442 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3443 r = block->version->funcs->resume(adev);
3444 else
3445 r = block->version->funcs->hw_init(adev);
3446
0aaeefcc 3447 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3448 if (r)
3449 return r;
482f0e53 3450 block->status.hw = true;
a90ad3c2
ML
3451 }
3452 }
3453
3454 return 0;
3455}
3456
e3ecdffa
AD
3457/**
3458 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3459 *
3460 * @adev: amdgpu_device pointer
3461 *
3462 * First resume function for hardware IPs. The list of all the hardware
3463 * IPs that make up the asic is walked and the resume callbacks are run for
3464 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3465 * after a suspend and updates the software state as necessary. This
3466 * function is also used for restoring the GPU after a GPU reset.
3467 * Returns 0 on success, negative error code on failure.
3468 */
06ec9070 3469static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3470{
3471 int i, r;
3472
a90ad3c2 3473 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3474 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3475 continue;
a90ad3c2 3476 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3477 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3478 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3479 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3480
fcf0649f
CZ
3481 r = adev->ip_blocks[i].version->funcs->resume(adev);
3482 if (r) {
3483 DRM_ERROR("resume of IP block <%s> failed %d\n",
3484 adev->ip_blocks[i].version->funcs->name, r);
3485 return r;
3486 }
482f0e53 3487 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3488 }
3489 }
3490
3491 return 0;
3492}
3493
e3ecdffa
AD
3494/**
3495 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3496 *
3497 * @adev: amdgpu_device pointer
3498 *
3499 * First resume function for hardware IPs. The list of all the hardware
3500 * IPs that make up the asic is walked and the resume callbacks are run for
3501 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3502 * functional state after a suspend and updates the software state as
3503 * necessary. This function is also used for restoring the GPU after a GPU
3504 * reset.
3505 * Returns 0 on success, negative error code on failure.
3506 */
06ec9070 3507static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3508{
3509 int i, r;
3510
3511 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3512 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3513 continue;
fcf0649f 3514 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3515 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3516 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3517 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3518 continue;
a1255107 3519 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3520 if (r) {
a1255107
AD
3521 DRM_ERROR("resume of IP block <%s> failed %d\n",
3522 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3523 return r;
2c1a2784 3524 }
482f0e53 3525 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3526 }
3527
3528 return 0;
3529}
3530
e3ecdffa
AD
3531/**
3532 * amdgpu_device_ip_resume - run resume for hardware IPs
3533 *
3534 * @adev: amdgpu_device pointer
3535 *
3536 * Main resume function for hardware IPs. The hardware IPs
3537 * are split into two resume functions because they are
b8920e1e 3538 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3539 * steps need to be take between them. In this case (S3/S4) they are
3540 * run sequentially.
3541 * Returns 0 on success, negative error code on failure.
3542 */
06ec9070 3543static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3544{
3545 int r;
3546
06ec9070 3547 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3548 if (r)
3549 return r;
7a3e0bb2
RZ
3550
3551 r = amdgpu_device_fw_loading(adev);
3552 if (r)
3553 return r;
3554
06ec9070 3555 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f 3556
b7043800
AD
3557 if (adev->mman.buffer_funcs_ring->sched.ready)
3558 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3559
fcf0649f
CZ
3560 return r;
3561}
3562
e3ecdffa
AD
3563/**
3564 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3565 *
3566 * @adev: amdgpu_device pointer
3567 *
3568 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3569 */
4e99a44e 3570static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3571{
6867e1b5
ML
3572 if (amdgpu_sriov_vf(adev)) {
3573 if (adev->is_atom_fw) {
58ff791a 3574 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3575 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3576 } else {
3577 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3578 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3579 }
3580
3581 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3582 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3583 }
048765ad
AR
3584}
3585
e3ecdffa
AD
3586/**
3587 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3588 *
3589 * @asic_type: AMD asic type
3590 *
3591 * Check if there is DC (new modesetting infrastructre) support for an asic.
3592 * returns true if DC has support, false if not.
3593 */
4562236b
HW
3594bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3595{
3596 switch (asic_type) {
0637d417
AD
3597#ifdef CONFIG_DRM_AMDGPU_SI
3598 case CHIP_HAINAN:
3599#endif
3600 case CHIP_TOPAZ:
3601 /* chips with no display hardware */
3602 return false;
4562236b 3603#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3604 case CHIP_TAHITI:
3605 case CHIP_PITCAIRN:
3606 case CHIP_VERDE:
3607 case CHIP_OLAND:
2d32ffd6
AD
3608 /*
3609 * We have systems in the wild with these ASICs that require
3610 * LVDS and VGA support which is not supported with DC.
3611 *
3612 * Fallback to the non-DC driver here by default so as not to
3613 * cause regressions.
3614 */
3615#if defined(CONFIG_DRM_AMD_DC_SI)
3616 return amdgpu_dc > 0;
3617#else
3618 return false;
64200c46 3619#endif
4562236b 3620 case CHIP_BONAIRE:
0d6fbccb 3621 case CHIP_KAVERI:
367e6687
AD
3622 case CHIP_KABINI:
3623 case CHIP_MULLINS:
d9fda248
HW
3624 /*
3625 * We have systems in the wild with these ASICs that require
b5a0168e 3626 * VGA support which is not supported with DC.
d9fda248
HW
3627 *
3628 * Fallback to the non-DC driver here by default so as not to
3629 * cause regressions.
3630 */
3631 return amdgpu_dc > 0;
f7f12b25 3632 default:
fd187853 3633 return amdgpu_dc != 0;
f7f12b25 3634#else
4562236b 3635 default:
93b09a9a 3636 if (amdgpu_dc > 0)
b8920e1e 3637 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3638 return false;
f7f12b25 3639#endif
4562236b
HW
3640 }
3641}
3642
3643/**
3644 * amdgpu_device_has_dc_support - check if dc is supported
3645 *
982a820b 3646 * @adev: amdgpu_device pointer
4562236b
HW
3647 *
3648 * Returns true for supported, false for not supported
3649 */
3650bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3651{
25263da3 3652 if (adev->enable_virtual_display ||
abaf210c 3653 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3654 return false;
3655
4562236b
HW
3656 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3657}
3658
d4535e2c
AG
3659static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3660{
3661 struct amdgpu_device *adev =
3662 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3663 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3664
c6a6e2db
AG
3665 /* It's a bug to not have a hive within this function */
3666 if (WARN_ON(!hive))
3667 return;
3668
3669 /*
3670 * Use task barrier to synchronize all xgmi reset works across the
3671 * hive. task_barrier_enter and task_barrier_exit will block
3672 * until all the threads running the xgmi reset works reach
3673 * those points. task_barrier_full will do both blocks.
3674 */
3675 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3676
3677 task_barrier_enter(&hive->tb);
4a580877 3678 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3679
3680 if (adev->asic_reset_res)
3681 goto fail;
3682
3683 task_barrier_exit(&hive->tb);
4a580877 3684 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3685
3686 if (adev->asic_reset_res)
3687 goto fail;
43c4d576 3688
21226f02 3689 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
c6a6e2db
AG
3690 } else {
3691
3692 task_barrier_full(&hive->tb);
3693 adev->asic_reset_res = amdgpu_asic_reset(adev);
3694 }
ce316fa5 3695
c6a6e2db 3696fail:
d4535e2c 3697 if (adev->asic_reset_res)
fed184e9 3698 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3699 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3700 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3701}
3702
71f98027
AD
3703static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3704{
3705 char *input = amdgpu_lockup_timeout;
3706 char *timeout_setting = NULL;
3707 int index = 0;
3708 long timeout;
3709 int ret = 0;
3710
3711 /*
67387dfe
AD
3712 * By default timeout for non compute jobs is 10000
3713 * and 60000 for compute jobs.
71f98027 3714 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3715 * jobs are 60000 by default.
71f98027
AD
3716 */
3717 adev->gfx_timeout = msecs_to_jiffies(10000);
3718 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3719 if (amdgpu_sriov_vf(adev))
3720 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3721 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3722 else
67387dfe 3723 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3724
f440ff44 3725 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3726 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3727 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3728 ret = kstrtol(timeout_setting, 0, &timeout);
3729 if (ret)
3730 return ret;
3731
3732 if (timeout == 0) {
3733 index++;
3734 continue;
3735 } else if (timeout < 0) {
3736 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3737 dev_warn(adev->dev, "lockup timeout disabled");
3738 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3739 } else {
3740 timeout = msecs_to_jiffies(timeout);
3741 }
3742
3743 switch (index++) {
3744 case 0:
3745 adev->gfx_timeout = timeout;
3746 break;
3747 case 1:
3748 adev->compute_timeout = timeout;
3749 break;
3750 case 2:
3751 adev->sdma_timeout = timeout;
3752 break;
3753 case 3:
3754 adev->video_timeout = timeout;
3755 break;
3756 default:
3757 break;
3758 }
3759 }
3760 /*
3761 * There is only one value specified and
3762 * it should apply to all non-compute jobs.
3763 */
bcccee89 3764 if (index == 1) {
71f98027 3765 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3766 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3767 adev->compute_timeout = adev->gfx_timeout;
3768 }
71f98027
AD
3769 }
3770
3771 return ret;
3772}
d4535e2c 3773
4a74c38c
PY
3774/**
3775 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3776 *
3777 * @adev: amdgpu_device pointer
3778 *
3779 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3780 */
3781static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3782{
3783 struct iommu_domain *domain;
3784
3785 domain = iommu_get_domain_for_dev(adev->dev);
3786 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3787 adev->ram_is_direct_mapped = true;
3788}
3789
77f3a5cd 3790static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3791 &dev_attr_pcie_replay_count.attr,
3792 NULL
3793};
3794
02ff519e
AD
3795static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3796{
3797 if (amdgpu_mcbp == 1)
3798 adev->gfx.mcbp = true;
1e9e15dc
JZ
3799 else if (amdgpu_mcbp == 0)
3800 adev->gfx.mcbp = false;
4e8303cf
LL
3801 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3802 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3803 adev->gfx.num_gfx_rings)
50a7c876
AD
3804 adev->gfx.mcbp = true;
3805
02ff519e
AD
3806 if (amdgpu_sriov_vf(adev))
3807 adev->gfx.mcbp = true;
3808
3809 if (adev->gfx.mcbp)
3810 DRM_INFO("MCBP is enabled\n");
3811}
3812
d38ceaf9
AD
3813/**
3814 * amdgpu_device_init - initialize the driver
3815 *
3816 * @adev: amdgpu_device pointer
d38ceaf9
AD
3817 * @flags: driver flags
3818 *
3819 * Initializes the driver info and hw (all asics).
3820 * Returns 0 for success or an error on failure.
3821 * Called at driver startup.
3822 */
3823int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3824 uint32_t flags)
3825{
8aba21b7
LT
3826 struct drm_device *ddev = adev_to_drm(adev);
3827 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3828 int r, i;
b98c6299 3829 bool px = false;
95844d20 3830 u32 max_MBps;
59e9fff1 3831 int tmp;
d38ceaf9
AD
3832
3833 adev->shutdown = false;
d38ceaf9 3834 adev->flags = flags;
4e66d7d2
YZ
3835
3836 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3837 adev->asic_type = amdgpu_force_asic_type;
3838 else
3839 adev->asic_type = flags & AMD_ASIC_MASK;
3840
d38ceaf9 3841 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3842 if (amdgpu_emu_mode == 1)
8bdab6bb 3843 adev->usec_timeout *= 10;
770d13b1 3844 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3845 adev->accel_working = false;
3846 adev->num_rings = 0;
68ce8b24 3847 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3848 adev->mman.buffer_funcs = NULL;
3849 adev->mman.buffer_funcs_ring = NULL;
3850 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3851 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3852 adev->gmc.gmc_funcs = NULL;
7bd939d0 3853 adev->harvest_ip_mask = 0x0;
f54d1867 3854 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3855 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3856
3857 adev->smc_rreg = &amdgpu_invalid_rreg;
3858 adev->smc_wreg = &amdgpu_invalid_wreg;
3859 adev->pcie_rreg = &amdgpu_invalid_rreg;
3860 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3861 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3862 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3863 adev->pciep_rreg = &amdgpu_invalid_rreg;
3864 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3865 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3866 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3867 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3868 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3869 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3870 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3871 adev->didt_rreg = &amdgpu_invalid_rreg;
3872 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3873 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3874 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3875 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3876 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3877
3e39ab90
AD
3878 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3879 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3880 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3881
3882 /* mutex initialization are all done here so we
b8920e1e
SS
3883 * can recall function without having locking issues
3884 */
0e5ca0d1 3885 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3886 mutex_init(&adev->pm.mutex);
3887 mutex_init(&adev->gfx.gpu_clock_mutex);
3888 mutex_init(&adev->srbm_mutex);
b8866c26 3889 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3890 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3891 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3892 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3893 mutex_init(&adev->mn_lock);
e23b74aa 3894 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3895 hash_init(adev->mn_hash);
32eaeae0 3896 mutex_init(&adev->psp.mutex);
bd052211 3897 mutex_init(&adev->notifier_lock);
8cda7a4f 3898 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3899 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3900
ab3b9de6 3901 amdgpu_device_init_apu_flags(adev);
9f6a7857 3902
912dfc84
EQ
3903 r = amdgpu_device_check_arguments(adev);
3904 if (r)
3905 return r;
d38ceaf9 3906
d38ceaf9
AD
3907 spin_lock_init(&adev->mmio_idx_lock);
3908 spin_lock_init(&adev->smc_idx_lock);
3909 spin_lock_init(&adev->pcie_idx_lock);
3910 spin_lock_init(&adev->uvd_ctx_idx_lock);
3911 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3912 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3913 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3914 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3915 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3916
0c4e7fa5
CZ
3917 INIT_LIST_HEAD(&adev->shadow_list);
3918 mutex_init(&adev->shadow_list_lock);
3919
655ce9cb 3920 INIT_LIST_HEAD(&adev->reset_list);
3921
6492e1b0 3922 INIT_LIST_HEAD(&adev->ras_list);
3923
3e38b634
EQ
3924 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3925
beff74bc
AD
3926 INIT_DELAYED_WORK(&adev->delayed_init_work,
3927 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3928 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3929 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3930
d4535e2c
AG
3931 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3932
d23ee13f 3933 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3934 adev->gfx.gfx_off_residency = 0;
3935 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3936 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3937
b265bdbd
EQ
3938 atomic_set(&adev->throttling_logging_enabled, 1);
3939 /*
3940 * If throttling continues, logging will be performed every minute
3941 * to avoid log flooding. "-1" is subtracted since the thermal
3942 * throttling interrupt comes every second. Thus, the total logging
3943 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3944 * for throttling interrupt) = 60 seconds.
3945 */
3946 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3947 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3948
0fa49558
AX
3949 /* Registers mapping */
3950 /* TODO: block userspace mapping of io register */
da69c161
KW
3951 if (adev->asic_type >= CHIP_BONAIRE) {
3952 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3953 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3954 } else {
3955 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3956 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3957 }
d38ceaf9 3958
6c08e0ef
EQ
3959 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3960 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3961
d38ceaf9 3962 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3963 if (!adev->rmmio)
d38ceaf9 3964 return -ENOMEM;
b8920e1e 3965
d38ceaf9 3966 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3967 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3968
436afdfa
PY
3969 /*
3970 * Reset domain needs to be present early, before XGMI hive discovered
3971 * (if any) and intitialized to use reset sem and in_gpu reset flag
3972 * early on during init and before calling to RREG32.
3973 */
3974 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3975 if (!adev->reset_domain)
3976 return -ENOMEM;
3977
3aa0115d
ML
3978 /* detect hw virtualization here */
3979 amdgpu_detect_virtualization(adev);
3980
04e85958
TL
3981 amdgpu_device_get_pcie_info(adev);
3982
dffa11b4
ML
3983 r = amdgpu_device_get_job_timeout_settings(adev);
3984 if (r) {
3985 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3986 return r;
a190d1c7
XY
3987 }
3988
d38ceaf9 3989 /* early init functions */
06ec9070 3990 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3991 if (r)
4ef87d8f 3992 return r;
d38ceaf9 3993
02ff519e
AD
3994 amdgpu_device_set_mcbp(adev);
3995
b7cdb41e
ML
3996 /* Get rid of things like offb */
3997 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3998 if (r)
3999 return r;
4000
4d33e704
SK
4001 /* Enable TMZ based on IP_VERSION */
4002 amdgpu_gmc_tmz_set(adev);
4003
957b0787 4004 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
4005 /* Need to get xgmi info early to decide the reset behavior*/
4006 if (adev->gmc.xgmi.supported) {
4007 r = adev->gfxhub.funcs->get_xgmi_info(adev);
4008 if (r)
4009 return r;
4010 }
4011
8e6d0b69 4012 /* enable PCIE atomic ops */
b4520bfd
GW
4013 if (amdgpu_sriov_vf(adev)) {
4014 if (adev->virt.fw_reserve.p_pf2vf)
4015 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4016 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4017 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
4018 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4019 * internal path natively support atomics, set have_atomics_support to true.
4020 */
b4520bfd 4021 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
4022 (amdgpu_ip_version(adev, GC_HWIP, 0) >
4023 IP_VERSION(9, 0, 0))) {
0e768043 4024 adev->have_atomics_support = true;
b4520bfd 4025 } else {
8e6d0b69 4026 adev->have_atomics_support =
4027 !pci_enable_atomic_ops_to_root(adev->pdev,
4028 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4029 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
4030 }
4031
8e6d0b69 4032 if (!adev->have_atomics_support)
4033 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4034
6585661d 4035 /* doorbell bar mapping and doorbell index init*/
43c064db 4036 amdgpu_doorbell_init(adev);
6585661d 4037
9475a943
SL
4038 if (amdgpu_emu_mode == 1) {
4039 /* post the asic on emulation mode */
4040 emu_soc_asic_init(adev);
bfca0289 4041 goto fence_driver_init;
9475a943 4042 }
bfca0289 4043
04442bf7
LL
4044 amdgpu_reset_init(adev);
4045
4e99a44e 4046 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
4047 if (adev->bios)
4048 amdgpu_device_detect_sriov_bios(adev);
048765ad 4049
95e8e59e
AD
4050 /* check if we need to reset the asic
4051 * E.g., driver was not cleanly unloaded previously, etc.
4052 */
f14899fd 4053 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 4054 if (adev->gmc.xgmi.num_physical_nodes) {
4055 dev_info(adev->dev, "Pending hive reset.\n");
4056 adev->gmc.xgmi.pending_reset = true;
4057 /* Only need to init necessary block for SMU to handle the reset */
4058 for (i = 0; i < adev->num_ip_blocks; i++) {
4059 if (!adev->ip_blocks[i].status.valid)
4060 continue;
4061 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4062 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4063 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4064 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 4065 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 4066 adev->ip_blocks[i].version->funcs->name);
4067 adev->ip_blocks[i].status.hw = true;
4068 }
4069 }
4070 } else {
5f38ac54
KF
4071 switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
4072 case IP_VERSION(13, 0, 0):
4073 case IP_VERSION(13, 0, 7):
4074 case IP_VERSION(13, 0, 10):
4075 r = psp_gpu_reset(adev);
4076 break;
4077 default:
4078 tmp = amdgpu_reset_method;
4079 /* It should do a default reset when loading or reloading the driver,
4080 * regardless of the module parameter reset_method.
4081 */
4082 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4083 r = amdgpu_asic_reset(adev);
4084 amdgpu_reset_method = tmp;
4085 break;
4086 }
4087
e3c1b071 4088 if (r) {
4089 dev_err(adev->dev, "asic reset on init failed\n");
4090 goto failed;
4091 }
95e8e59e
AD
4092 }
4093 }
4094
d38ceaf9 4095 /* Post card if necessary */
39c640c0 4096 if (amdgpu_device_need_post(adev)) {
d38ceaf9 4097 if (!adev->bios) {
bec86378 4098 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
4099 r = -EINVAL;
4100 goto failed;
d38ceaf9 4101 }
bec86378 4102 DRM_INFO("GPU posting now...\n");
4d2997ab 4103 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
4104 if (r) {
4105 dev_err(adev->dev, "gpu post error!\n");
4106 goto failed;
4107 }
d38ceaf9
AD
4108 }
4109
9535a86a
SZ
4110 if (adev->bios) {
4111 if (adev->is_atom_fw) {
4112 /* Initialize clocks */
4113 r = amdgpu_atomfirmware_get_clock_info(adev);
4114 if (r) {
4115 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4116 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4117 goto failed;
4118 }
4119 } else {
4120 /* Initialize clocks */
4121 r = amdgpu_atombios_get_clock_info(adev);
4122 if (r) {
4123 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4124 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4125 goto failed;
4126 }
4127 /* init i2c buses */
4128 if (!amdgpu_device_has_dc_support(adev))
4129 amdgpu_atombios_i2c_init(adev);
a5bde2f9 4130 }
2c1a2784 4131 }
d38ceaf9 4132
bfca0289 4133fence_driver_init:
d38ceaf9 4134 /* Fence driver */
067f44c8 4135 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4136 if (r) {
067f44c8 4137 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4138 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4139 goto failed;
2c1a2784 4140 }
d38ceaf9
AD
4141
4142 /* init the mode config */
4a580877 4143 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4144
06ec9070 4145 r = amdgpu_device_ip_init(adev);
d38ceaf9 4146 if (r) {
06ec9070 4147 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4148 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4149 goto release_ras_con;
d38ceaf9
AD
4150 }
4151
8d35a259
LG
4152 amdgpu_fence_driver_hw_init(adev);
4153
d69b8971
YZ
4154 dev_info(adev->dev,
4155 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4156 adev->gfx.config.max_shader_engines,
4157 adev->gfx.config.max_sh_per_se,
4158 adev->gfx.config.max_cu_per_sh,
4159 adev->gfx.cu_info.number);
4160
d38ceaf9
AD
4161 adev->accel_working = true;
4162
e59c0205
AX
4163 amdgpu_vm_check_compute_bug(adev);
4164
95844d20
MO
4165 /* Initialize the buffer migration limit. */
4166 if (amdgpu_moverate >= 0)
4167 max_MBps = amdgpu_moverate;
4168 else
4169 max_MBps = 8; /* Allow 8 MB/s. */
4170 /* Get a log2 for easy divisions. */
4171 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4172
b0adca4d
EQ
4173 /*
4174 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4175 * Otherwise the mgpu fan boost feature will be skipped due to the
4176 * gpu instance is counted less.
4177 */
4178 amdgpu_register_gpu_instance(adev);
4179
d38ceaf9
AD
4180 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4181 * explicit gating rather than handling it automatically.
4182 */
e3c1b071 4183 if (!adev->gmc.xgmi.pending_reset) {
4184 r = amdgpu_device_ip_late_init(adev);
4185 if (r) {
4186 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4187 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4188 goto release_ras_con;
e3c1b071 4189 }
4190 /* must succeed. */
4191 amdgpu_ras_resume(adev);
4192 queue_delayed_work(system_wq, &adev->delayed_init_work,
4193 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4194 }
d38ceaf9 4195
38eecbe0
CL
4196 if (amdgpu_sriov_vf(adev)) {
4197 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4198 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4199 }
2c738637 4200
90bcb9b5
EQ
4201 /*
4202 * Place those sysfs registering after `late_init`. As some of those
4203 * operations performed in `late_init` might affect the sysfs
4204 * interfaces creating.
4205 */
4206 r = amdgpu_atombios_sysfs_init(adev);
4207 if (r)
4208 drm_err(&adev->ddev,
4209 "registering atombios sysfs failed (%d).\n", r);
4210
4211 r = amdgpu_pm_sysfs_init(adev);
4212 if (r)
4213 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4214
4215 r = amdgpu_ucode_sysfs_init(adev);
4216 if (r) {
4217 adev->ucode_sysfs_en = false;
4218 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4219 } else
4220 adev->ucode_sysfs_en = true;
4221
77f3a5cd 4222 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4223 if (r)
77f3a5cd 4224 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4225
76da73f0
LL
4226 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4227 if (r)
4228 dev_err(adev->dev,
4229 "Could not create amdgpu board attributes\n");
4230
7957ec80
LL
4231 amdgpu_fru_sysfs_init(adev);
4232
d155bef0
AB
4233 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4234 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4235 if (r)
4236 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4237
c1dd4aa6
AG
4238 /* Have stored pci confspace at hand for restore in sudden PCI error */
4239 if (amdgpu_device_cache_pci_state(adev->pdev))
4240 pci_restore_state(pdev);
4241
8c3dd61c
KHF
4242 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4243 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4244 * ignore it
4245 */
8c3dd61c 4246 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4247 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4248
d37a3929
OC
4249 px = amdgpu_device_supports_px(ddev);
4250
7b1c6263 4251 if (px || (!dev_is_removable(&adev->pdev->dev) &&
d37a3929 4252 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4253 vga_switcheroo_register_client(adev->pdev,
4254 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4255
4256 if (px)
8c3dd61c 4257 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4258
e3c1b071 4259 if (adev->gmc.xgmi.pending_reset)
4260 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4261 msecs_to_jiffies(AMDGPU_RESUME_MS));
4262
4a74c38c
PY
4263 amdgpu_device_check_iommu_direct_map(adev);
4264
d38ceaf9 4265 return 0;
83ba126a 4266
970fd197 4267release_ras_con:
38eecbe0
CL
4268 if (amdgpu_sriov_vf(adev))
4269 amdgpu_virt_release_full_gpu(adev, true);
4270
4271 /* failed in exclusive mode due to timeout */
4272 if (amdgpu_sriov_vf(adev) &&
4273 !amdgpu_sriov_runtime(adev) &&
4274 amdgpu_virt_mmio_blocked(adev) &&
4275 !amdgpu_virt_wait_reset(adev)) {
4276 dev_err(adev->dev, "VF exclusive mode timeout\n");
4277 /* Don't send request since VF is inactive. */
4278 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4279 adev->virt.ops = NULL;
4280 r = -EAGAIN;
4281 }
970fd197
SY
4282 amdgpu_release_ras_context(adev);
4283
83ba126a 4284failed:
89041940 4285 amdgpu_vf_error_trans_all(adev);
8840a387 4286
83ba126a 4287 return r;
d38ceaf9
AD
4288}
4289
07775fc1
AG
4290static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4291{
62d5f9f7 4292
07775fc1
AG
4293 /* Clear all CPU mappings pointing to this device */
4294 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4295
4296 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4297 amdgpu_doorbell_fini(adev);
07775fc1
AG
4298
4299 iounmap(adev->rmmio);
4300 adev->rmmio = NULL;
4301 if (adev->mman.aper_base_kaddr)
4302 iounmap(adev->mman.aper_base_kaddr);
4303 adev->mman.aper_base_kaddr = NULL;
4304
4305 /* Memory manager related */
a0ba1279 4306 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4307 arch_phys_wc_del(adev->gmc.vram_mtrr);
4308 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4309 }
4310}
4311
d38ceaf9 4312/**
bbe04dec 4313 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4314 *
4315 * @adev: amdgpu_device pointer
4316 *
4317 * Tear down the driver info (all asics).
4318 * Called at driver shutdown.
4319 */
72c8c97b 4320void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4321{
aac89168 4322 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4323 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4324 adev->shutdown = true;
9f875167 4325
752c683d
ML
4326 /* make sure IB test finished before entering exclusive mode
4327 * to avoid preemption on IB test
b8920e1e 4328 */
519b8b76 4329 if (amdgpu_sriov_vf(adev)) {
752c683d 4330 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4331 amdgpu_virt_fini_data_exchange(adev);
4332 }
752c683d 4333
e5b03032
ML
4334 /* disable all interrupts */
4335 amdgpu_irq_disable_all(adev);
47fc644f 4336 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4337 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4338 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4339 else
4a580877 4340 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4341 }
8d35a259 4342 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4343
cd3a8a59 4344 if (adev->mman.initialized)
9bff18d1 4345 drain_workqueue(adev->mman.bdev.wq);
98f56188 4346
53e9d836 4347 if (adev->pm.sysfs_initialized)
7c868b59 4348 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4349 if (adev->ucode_sysfs_en)
4350 amdgpu_ucode_sysfs_fini(adev);
4351 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4352 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4353
232d1d43
SY
4354 /* disable ras feature must before hw fini */
4355 amdgpu_ras_pre_fini(adev);
4356
b7043800
AD
4357 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4358
e9669fb7 4359 amdgpu_device_ip_fini_early(adev);
d10d0daa 4360
a3848df6
YW
4361 amdgpu_irq_fini_hw(adev);
4362
b6fd6e0f
SK
4363 if (adev->mman.initialized)
4364 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4365
d10d0daa 4366 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4367
39934d3e
VP
4368 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4369 amdgpu_device_unmap_mmio(adev);
87172e89 4370
72c8c97b
AG
4371}
4372
4373void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4374{
62d5f9f7 4375 int idx;
d37a3929 4376 bool px;
62d5f9f7 4377
8d35a259 4378 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4379 amdgpu_device_ip_fini(adev);
b31d3063 4380 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4381 adev->accel_working = false;
68ce8b24 4382 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4383
4384 amdgpu_reset_fini(adev);
4385
d38ceaf9 4386 /* free i2c buses */
4562236b
HW
4387 if (!amdgpu_device_has_dc_support(adev))
4388 amdgpu_i2c_fini(adev);
bfca0289
SL
4389
4390 if (amdgpu_emu_mode != 1)
4391 amdgpu_atombios_fini(adev);
4392
d38ceaf9
AD
4393 kfree(adev->bios);
4394 adev->bios = NULL;
d37a3929 4395
8a2b5139
LL
4396 kfree(adev->fru_info);
4397 adev->fru_info = NULL;
4398
d37a3929
OC
4399 px = amdgpu_device_supports_px(adev_to_drm(adev));
4400
7b1c6263 4401 if (px || (!dev_is_removable(&adev->pdev->dev) &&
d37a3929 4402 apple_gmux_detect(NULL, NULL)))
84c8b22e 4403 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4404
4405 if (px)
83ba126a 4406 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4407
38d6be81 4408 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4409 vga_client_unregister(adev->pdev);
e9bc1bf7 4410
62d5f9f7
LS
4411 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4412
4413 iounmap(adev->rmmio);
4414 adev->rmmio = NULL;
43c064db 4415 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4416 drm_dev_exit(idx);
4417 }
4418
d155bef0
AB
4419 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4420 amdgpu_pmu_fini(adev);
72de33f8 4421 if (adev->mman.discovery_bin)
a190d1c7 4422 amdgpu_discovery_fini(adev);
72c8c97b 4423
cfbb6b00
AG
4424 amdgpu_reset_put_reset_domain(adev->reset_domain);
4425 adev->reset_domain = NULL;
4426
72c8c97b
AG
4427 kfree(adev->pci_state);
4428
d38ceaf9
AD
4429}
4430
58144d28
ND
4431/**
4432 * amdgpu_device_evict_resources - evict device resources
4433 * @adev: amdgpu device object
4434 *
4435 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4436 * of the vram memory type. Mainly used for evicting device resources
4437 * at suspend time.
4438 *
4439 */
7863c155 4440static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4441{
7863c155
ML
4442 int ret;
4443
e53d9665
ML
4444 /* No need to evict vram on APUs for suspend to ram or s2idle */
4445 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4446 return 0;
58144d28 4447
7863c155
ML
4448 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4449 if (ret)
58144d28 4450 DRM_WARN("evicting device resources failed\n");
7863c155 4451 return ret;
58144d28 4452}
d38ceaf9
AD
4453
4454/*
4455 * Suspend & resume.
4456 */
5095d541
ML
4457/**
4458 * amdgpu_device_prepare - prepare for device suspend
4459 *
4460 * @dev: drm dev pointer
4461 *
4462 * Prepare to put the hw in the suspend state (all asics).
4463 * Returns 0 for success or an error on failure.
4464 * Called at driver suspend.
4465 */
4466int amdgpu_device_prepare(struct drm_device *dev)
4467{
4468 struct amdgpu_device *adev = drm_to_adev(dev);
cb11ca32 4469 int i, r;
5095d541
ML
4470
4471 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4472 return 0;
4473
4474 /* Evict the majority of BOs before starting suspend sequence */
4475 r = amdgpu_device_evict_resources(adev);
4476 if (r)
4477 return r;
4478
cb11ca32
ML
4479 for (i = 0; i < adev->num_ip_blocks; i++) {
4480 if (!adev->ip_blocks[i].status.valid)
4481 continue;
4482 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4483 continue;
4484 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4485 if (r)
4486 return r;
4487 }
4488
5095d541
ML
4489 return 0;
4490}
4491
d38ceaf9 4492/**
810ddc3a 4493 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4494 *
87e3f136 4495 * @dev: drm dev pointer
87e3f136 4496 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4497 *
4498 * Puts the hw in the suspend state (all asics).
4499 * Returns 0 for success or an error on failure.
4500 * Called at driver suspend.
4501 */
de185019 4502int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4503{
a2e15b0e 4504 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4505 int r = 0;
d38ceaf9 4506
d38ceaf9
AD
4507 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4508 return 0;
4509
44779b43 4510 adev->in_suspend = true;
3fa8f89d 4511
d7274ec7
BZ
4512 if (amdgpu_sriov_vf(adev)) {
4513 amdgpu_virt_fini_data_exchange(adev);
4514 r = amdgpu_virt_request_full_gpu(adev, false);
4515 if (r)
4516 return r;
4517 }
4518
3fa8f89d
S
4519 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4520 DRM_WARN("smart shift update failed\n");
4521
5f818173 4522 if (fbcon)
087451f3 4523 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4524
beff74bc 4525 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4526 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4527
5e6932fe 4528 amdgpu_ras_suspend(adev);
4529
b7043800
AD
4530 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4531
2196927b 4532 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4533
c004d44e 4534 if (!adev->in_s0ix)
5d3a2d95 4535 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4536
7863c155
ML
4537 r = amdgpu_device_evict_resources(adev);
4538 if (r)
4539 return r;
d38ceaf9 4540
8d35a259 4541 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4542
2196927b 4543 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4544
d7274ec7
BZ
4545 if (amdgpu_sriov_vf(adev))
4546 amdgpu_virt_release_full_gpu(adev, false);
4547
d38ceaf9
AD
4548 return 0;
4549}
4550
4551/**
810ddc3a 4552 * amdgpu_device_resume - initiate device resume
d38ceaf9 4553 *
87e3f136 4554 * @dev: drm dev pointer
87e3f136 4555 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4556 *
4557 * Bring the hw back to operating state (all asics).
4558 * Returns 0 for success or an error on failure.
4559 * Called at driver resume.
4560 */
de185019 4561int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4562{
1348969a 4563 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4564 int r = 0;
d38ceaf9 4565
d7274ec7
BZ
4566 if (amdgpu_sriov_vf(adev)) {
4567 r = amdgpu_virt_request_full_gpu(adev, true);
4568 if (r)
4569 return r;
4570 }
4571
d38ceaf9
AD
4572 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4573 return 0;
4574
62498733 4575 if (adev->in_s0ix)
bc143d8b 4576 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4577
d38ceaf9 4578 /* post card */
39c640c0 4579 if (amdgpu_device_need_post(adev)) {
4d2997ab 4580 r = amdgpu_device_asic_init(adev);
74b0b157 4581 if (r)
aac89168 4582 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4583 }
d38ceaf9 4584
06ec9070 4585 r = amdgpu_device_ip_resume(adev);
d7274ec7 4586
e6707218 4587 if (r) {
aac89168 4588 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4589 goto exit;
e6707218 4590 }
8d35a259 4591 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4592
c004d44e 4593 if (!adev->in_s0ix) {
5d3a2d95
AD
4594 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4595 if (r)
3c22c1ea 4596 goto exit;
5d3a2d95 4597 }
756e6880 4598
8ed79c40
TH
4599 r = amdgpu_device_ip_late_init(adev);
4600 if (r)
4601 goto exit;
4602
4603 queue_delayed_work(system_wq, &adev->delayed_init_work,
4604 msecs_to_jiffies(AMDGPU_RESUME_MS));
3c22c1ea
SF
4605exit:
4606 if (amdgpu_sriov_vf(adev)) {
4607 amdgpu_virt_init_data_exchange(adev);
4608 amdgpu_virt_release_full_gpu(adev, true);
4609 }
4610
4611 if (r)
4612 return r;
4613
96a5d8d4 4614 /* Make sure IB tests flushed */
beff74bc 4615 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4616
a2e15b0e 4617 if (fbcon)
087451f3 4618 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4619
5e6932fe 4620 amdgpu_ras_resume(adev);
4621
d09ef243
AD
4622 if (adev->mode_info.num_crtc) {
4623 /*
4624 * Most of the connector probing functions try to acquire runtime pm
4625 * refs to ensure that the GPU is powered on when connector polling is
4626 * performed. Since we're calling this from a runtime PM callback,
4627 * trying to acquire rpm refs will cause us to deadlock.
4628 *
4629 * Since we're guaranteed to be holding the rpm lock, it's safe to
4630 * temporarily disable the rpm helpers so this doesn't deadlock us.
4631 */
23a1a9e5 4632#ifdef CONFIG_PM
d09ef243 4633 dev->dev->power.disable_depth++;
23a1a9e5 4634#endif
d09ef243
AD
4635 if (!adev->dc_enabled)
4636 drm_helper_hpd_irq_event(dev);
4637 else
4638 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4639#ifdef CONFIG_PM
d09ef243 4640 dev->dev->power.disable_depth--;
23a1a9e5 4641#endif
d09ef243 4642 }
44779b43
RZ
4643 adev->in_suspend = false;
4644
dc907c9d
JX
4645 if (adev->enable_mes)
4646 amdgpu_mes_self_test(adev);
4647
3fa8f89d
S
4648 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4649 DRM_WARN("smart shift update failed\n");
4650
4d3b9ae5 4651 return 0;
d38ceaf9
AD
4652}
4653
e3ecdffa
AD
4654/**
4655 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4656 *
4657 * @adev: amdgpu_device pointer
4658 *
4659 * The list of all the hardware IPs that make up the asic is walked and
4660 * the check_soft_reset callbacks are run. check_soft_reset determines
4661 * if the asic is still hung or not.
4662 * Returns true if any of the IPs are still in a hung state, false if not.
4663 */
06ec9070 4664static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4665{
4666 int i;
4667 bool asic_hang = false;
4668
f993d628
ML
4669 if (amdgpu_sriov_vf(adev))
4670 return true;
4671
8bc04c29
AD
4672 if (amdgpu_asic_need_full_reset(adev))
4673 return true;
4674
63fbf42f 4675 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4676 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4677 continue;
a1255107
AD
4678 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4679 adev->ip_blocks[i].status.hang =
4680 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4681 if (adev->ip_blocks[i].status.hang) {
aac89168 4682 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4683 asic_hang = true;
4684 }
4685 }
4686 return asic_hang;
4687}
4688
e3ecdffa
AD
4689/**
4690 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4691 *
4692 * @adev: amdgpu_device pointer
4693 *
4694 * The list of all the hardware IPs that make up the asic is walked and the
4695 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4696 * handles any IP specific hardware or software state changes that are
4697 * necessary for a soft reset to succeed.
4698 * Returns 0 on success, negative error code on failure.
4699 */
06ec9070 4700static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4701{
4702 int i, r = 0;
4703
4704 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4705 if (!adev->ip_blocks[i].status.valid)
d31a501e 4706 continue;
a1255107
AD
4707 if (adev->ip_blocks[i].status.hang &&
4708 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4709 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4710 if (r)
4711 return r;
4712 }
4713 }
4714
4715 return 0;
4716}
4717
e3ecdffa
AD
4718/**
4719 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4720 *
4721 * @adev: amdgpu_device pointer
4722 *
4723 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4724 * reset is necessary to recover.
4725 * Returns true if a full asic reset is required, false if not.
4726 */
06ec9070 4727static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4728{
da146d3b
AD
4729 int i;
4730
8bc04c29
AD
4731 if (amdgpu_asic_need_full_reset(adev))
4732 return true;
4733
da146d3b 4734 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4735 if (!adev->ip_blocks[i].status.valid)
da146d3b 4736 continue;
a1255107
AD
4737 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4738 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4739 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4740 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4741 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4742 if (adev->ip_blocks[i].status.hang) {
aac89168 4743 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4744 return true;
4745 }
4746 }
35d782fe
CZ
4747 }
4748 return false;
4749}
4750
e3ecdffa
AD
4751/**
4752 * amdgpu_device_ip_soft_reset - do a soft reset
4753 *
4754 * @adev: amdgpu_device pointer
4755 *
4756 * The list of all the hardware IPs that make up the asic is walked and the
4757 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4758 * IP specific hardware or software state changes that are necessary to soft
4759 * reset the IP.
4760 * Returns 0 on success, negative error code on failure.
4761 */
06ec9070 4762static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4763{
4764 int i, r = 0;
4765
4766 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4767 if (!adev->ip_blocks[i].status.valid)
35d782fe 4768 continue;
a1255107
AD
4769 if (adev->ip_blocks[i].status.hang &&
4770 adev->ip_blocks[i].version->funcs->soft_reset) {
4771 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4772 if (r)
4773 return r;
4774 }
4775 }
4776
4777 return 0;
4778}
4779
e3ecdffa
AD
4780/**
4781 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4782 *
4783 * @adev: amdgpu_device pointer
4784 *
4785 * The list of all the hardware IPs that make up the asic is walked and the
4786 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4787 * handles any IP specific hardware or software state changes that are
4788 * necessary after the IP has been soft reset.
4789 * Returns 0 on success, negative error code on failure.
4790 */
06ec9070 4791static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4792{
4793 int i, r = 0;
4794
4795 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4796 if (!adev->ip_blocks[i].status.valid)
35d782fe 4797 continue;
a1255107
AD
4798 if (adev->ip_blocks[i].status.hang &&
4799 adev->ip_blocks[i].version->funcs->post_soft_reset)
4800 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4801 if (r)
4802 return r;
4803 }
4804
4805 return 0;
4806}
4807
e3ecdffa 4808/**
c33adbc7 4809 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4810 *
4811 * @adev: amdgpu_device pointer
4812 *
4813 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4814 * restore things like GPUVM page tables after a GPU reset where
4815 * the contents of VRAM might be lost.
403009bf
CK
4816 *
4817 * Returns:
4818 * 0 on success, negative error code on failure.
e3ecdffa 4819 */
c33adbc7 4820static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4821{
c41d1cf6 4822 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4823 struct amdgpu_bo *shadow;
e18aaea7 4824 struct amdgpu_bo_vm *vmbo;
403009bf 4825 long r = 1, tmo;
c41d1cf6
ML
4826
4827 if (amdgpu_sriov_runtime(adev))
b045d3af 4828 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4829 else
4830 tmo = msecs_to_jiffies(100);
4831
aac89168 4832 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4833 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4834 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4835 /* If vm is compute context or adev is APU, shadow will be NULL */
4836 if (!vmbo->shadow)
4837 continue;
4838 shadow = vmbo->shadow;
4839
403009bf 4840 /* No need to recover an evicted BO */
d3116756
CK
4841 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4842 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4843 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4844 continue;
4845
4846 r = amdgpu_bo_restore_shadow(shadow, &next);
4847 if (r)
4848 break;
4849
c41d1cf6 4850 if (fence) {
1712fb1a 4851 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4852 dma_fence_put(fence);
4853 fence = next;
1712fb1a 4854 if (tmo == 0) {
4855 r = -ETIMEDOUT;
c41d1cf6 4856 break;
1712fb1a 4857 } else if (tmo < 0) {
4858 r = tmo;
4859 break;
4860 }
403009bf
CK
4861 } else {
4862 fence = next;
c41d1cf6 4863 }
c41d1cf6
ML
4864 }
4865 mutex_unlock(&adev->shadow_list_lock);
4866
403009bf
CK
4867 if (fence)
4868 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4869 dma_fence_put(fence);
4870
1712fb1a 4871 if (r < 0 || tmo <= 0) {
aac89168 4872 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4873 return -EIO;
4874 }
c41d1cf6 4875
aac89168 4876 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4877 return 0;
c41d1cf6
ML
4878}
4879
a90ad3c2 4880
e3ecdffa 4881/**
06ec9070 4882 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4883 *
982a820b 4884 * @adev: amdgpu_device pointer
87e3f136 4885 * @from_hypervisor: request from hypervisor
5740682e
ML
4886 *
4887 * do VF FLR and reinitialize Asic
3f48c681 4888 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4889 */
4890static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4891 bool from_hypervisor)
5740682e
ML
4892{
4893 int r;
a5f67c93 4894 struct amdgpu_hive_info *hive = NULL;
7258fa31 4895 int retry_limit = 0;
5740682e 4896
7258fa31 4897retry:
c004d44e 4898 amdgpu_amdkfd_pre_reset(adev);
428890a3 4899
5740682e
ML
4900 if (from_hypervisor)
4901 r = amdgpu_virt_request_full_gpu(adev, true);
4902 else
4903 r = amdgpu_virt_reset_gpu(adev);
4904 if (r)
4905 return r;
f734b213 4906 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4907
83f24a8f
HC
4908 /* some sw clean up VF needs to do before recover */
4909 amdgpu_virt_post_reset(adev);
4910
a90ad3c2 4911 /* Resume IP prior to SMC */
06ec9070 4912 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4913 if (r)
4914 goto error;
a90ad3c2 4915
c9ffa427 4916 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4917
7a3e0bb2
RZ
4918 r = amdgpu_device_fw_loading(adev);
4919 if (r)
4920 return r;
4921
a90ad3c2 4922 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4923 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4924 if (r)
4925 goto error;
a90ad3c2 4926
a5f67c93
ZL
4927 hive = amdgpu_get_xgmi_hive(adev);
4928 /* Update PSP FW topology after reset */
4929 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4930 r = amdgpu_xgmi_update_topology(hive, adev);
4931
4932 if (hive)
4933 amdgpu_put_xgmi_hive(hive);
4934
4935 if (!r) {
a5f67c93 4936 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4937
c004d44e 4938 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4939 }
a90ad3c2 4940
abc34253 4941error:
c41d1cf6 4942 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4943 amdgpu_inc_vram_lost(adev);
c33adbc7 4944 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4945 }
437f3e0b 4946 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4947
7258fa31
SK
4948 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4949 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4950 retry_limit++;
4951 goto retry;
4952 } else
4953 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4954 }
4955
a90ad3c2
ML
4956 return r;
4957}
4958
9a1cddd6 4959/**
4960 * amdgpu_device_has_job_running - check if there is any job in mirror list
4961 *
982a820b 4962 * @adev: amdgpu_device pointer
9a1cddd6 4963 *
4964 * check if there is any job in mirror list
4965 */
4966bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4967{
4968 int i;
4969 struct drm_sched_job *job;
4970
4971 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4972 struct amdgpu_ring *ring = adev->rings[i];
4973
4974 if (!ring || !ring->sched.thread)
4975 continue;
4976
4977 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4978 job = list_first_entry_or_null(&ring->sched.pending_list,
4979 struct drm_sched_job, list);
9a1cddd6 4980 spin_unlock(&ring->sched.job_list_lock);
4981 if (job)
4982 return true;
4983 }
4984 return false;
4985}
4986
12938fad
CK
4987/**
4988 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4989 *
982a820b 4990 * @adev: amdgpu_device pointer
12938fad
CK
4991 *
4992 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4993 * a hung GPU.
4994 */
4995bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4996{
12938fad 4997
3ba7b418
AG
4998 if (amdgpu_gpu_recovery == 0)
4999 goto disabled;
5000
1a11a65d
YC
5001 /* Skip soft reset check in fatal error mode */
5002 if (!amdgpu_ras_is_poison_mode_supported(adev))
5003 return true;
5004
3ba7b418
AG
5005 if (amdgpu_sriov_vf(adev))
5006 return true;
5007
5008 if (amdgpu_gpu_recovery == -1) {
5009 switch (adev->asic_type) {
b3523c45
AD
5010#ifdef CONFIG_DRM_AMDGPU_SI
5011 case CHIP_VERDE:
5012 case CHIP_TAHITI:
5013 case CHIP_PITCAIRN:
5014 case CHIP_OLAND:
5015 case CHIP_HAINAN:
5016#endif
5017#ifdef CONFIG_DRM_AMDGPU_CIK
5018 case CHIP_KAVERI:
5019 case CHIP_KABINI:
5020 case CHIP_MULLINS:
5021#endif
5022 case CHIP_CARRIZO:
5023 case CHIP_STONEY:
5024 case CHIP_CYAN_SKILLFISH:
3ba7b418 5025 goto disabled;
b3523c45
AD
5026 default:
5027 break;
3ba7b418 5028 }
12938fad
CK
5029 }
5030
5031 return true;
3ba7b418
AG
5032
5033disabled:
aac89168 5034 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 5035 return false;
12938fad
CK
5036}
5037
5c03e584
FX
5038int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5039{
47fc644f
SS
5040 u32 i;
5041 int ret = 0;
5c03e584 5042
47fc644f 5043 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 5044
47fc644f 5045 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 5046
47fc644f
SS
5047 /* disable BM */
5048 pci_clear_master(adev->pdev);
5c03e584 5049
47fc644f 5050 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 5051
47fc644f
SS
5052 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5053 dev_info(adev->dev, "GPU smu mode1 reset\n");
5054 ret = amdgpu_dpm_mode1_reset(adev);
5055 } else {
5056 dev_info(adev->dev, "GPU psp mode1 reset\n");
5057 ret = psp_gpu_reset(adev);
5058 }
5c03e584 5059
47fc644f 5060 if (ret)
2c0f880a 5061 goto mode1_reset_failed;
5c03e584 5062
47fc644f 5063 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
5064 ret = amdgpu_psp_wait_for_bootloader(adev);
5065 if (ret)
2c0f880a 5066 goto mode1_reset_failed;
5c03e584 5067
47fc644f
SS
5068 /* wait for asic to come out of reset */
5069 for (i = 0; i < adev->usec_timeout; i++) {
5070 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 5071
47fc644f
SS
5072 if (memsize != 0xffffffff)
5073 break;
5074 udelay(1);
5075 }
5c03e584 5076
2c0f880a
HZ
5077 if (i >= adev->usec_timeout) {
5078 ret = -ETIMEDOUT;
5079 goto mode1_reset_failed;
5080 }
5081
47fc644f 5082 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 5083
2c0f880a
HZ
5084 return 0;
5085
5086mode1_reset_failed:
5087 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 5088 return ret;
5c03e584 5089}
5c6dd71e 5090
e3c1b071 5091int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 5092 struct amdgpu_reset_context *reset_context)
26bc5340 5093{
5c1e6fa4 5094 int i, r = 0;
04442bf7
LL
5095 struct amdgpu_job *job = NULL;
5096 bool need_full_reset =
5097 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5098
5099 if (reset_context->reset_req_dev == adev)
5100 job = reset_context->job;
71182665 5101
b602ca5f
TZ
5102 if (amdgpu_sriov_vf(adev)) {
5103 /* stop the data exchange thread */
5104 amdgpu_virt_fini_data_exchange(adev);
5105 }
5106
9e225fb9
AG
5107 amdgpu_fence_driver_isr_toggle(adev, true);
5108
71182665 5109 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
5110 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5111 struct amdgpu_ring *ring = adev->rings[i];
5112
51687759 5113 if (!ring || !ring->sched.thread)
0875dc9e 5114 continue;
5740682e 5115
b8920e1e
SS
5116 /* Clear job fence from fence drv to avoid force_completion
5117 * leave NULL and vm flush fence in fence drv
5118 */
5c1e6fa4 5119 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 5120
2f9d4084
ML
5121 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5122 amdgpu_fence_driver_force_completion(ring);
0875dc9e 5123 }
d38ceaf9 5124
9e225fb9
AG
5125 amdgpu_fence_driver_isr_toggle(adev, false);
5126
ff99849b 5127 if (job && job->vm)
222b5f04
AG
5128 drm_sched_increase_karma(&job->base);
5129
04442bf7 5130 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 5131 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5132 if (r == -EOPNOTSUPP)
404b277b
LL
5133 r = 0;
5134 else
04442bf7
LL
5135 return r;
5136
1d721ed6 5137 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
5138 if (!amdgpu_sriov_vf(adev)) {
5139
5140 if (!need_full_reset)
5141 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5142
360cd081
LG
5143 if (!need_full_reset && amdgpu_gpu_recovery &&
5144 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
5145 amdgpu_device_ip_pre_soft_reset(adev);
5146 r = amdgpu_device_ip_soft_reset(adev);
5147 amdgpu_device_ip_post_soft_reset(adev);
5148 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 5149 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
5150 need_full_reset = true;
5151 }
5152 }
5153
5154 if (need_full_reset)
5155 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
5156 if (need_full_reset)
5157 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5158 else
5159 clear_bit(AMDGPU_NEED_FULL_RESET,
5160 &reset_context->flags);
26bc5340
AG
5161 }
5162
5163 return r;
5164}
5165
15fd09a0
SA
5166static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5167{
15fd09a0
SA
5168 int i;
5169
38a15ad9 5170 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0 5171
2d6a2a28
AA
5172 for (i = 0; i < adev->reset_info.num_regs; i++) {
5173 adev->reset_info.reset_dump_reg_value[i] =
5174 RREG32(adev->reset_info.reset_dump_reg_list[i]);
5175
5176 trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5177 adev->reset_info.reset_dump_reg_value[i]);
15fd09a0
SA
5178 }
5179
5180 return 0;
5181}
5182
04442bf7
LL
5183int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5184 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5185{
5186 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5187 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5188 int r = 0;
f5c7e779 5189 bool gpu_reset_for_dev_remove = 0;
26bc5340 5190
04442bf7
LL
5191 /* Try reset handler method first */
5192 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5193 reset_list);
15fd09a0 5194 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5195
5196 reset_context->reset_device_list = device_list_handle;
04442bf7 5197 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5198 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5199 if (r == -EOPNOTSUPP)
404b277b
LL
5200 r = 0;
5201 else
04442bf7
LL
5202 return r;
5203
5204 /* Reset handler not implemented, use the default method */
5205 need_full_reset =
5206 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5207 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5208
f5c7e779
YC
5209 gpu_reset_for_dev_remove =
5210 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5211 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5212
26bc5340 5213 /*
655ce9cb 5214 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5215 * to allow proper links negotiation in FW (within 1 sec)
5216 */
7ac71382 5217 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5218 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5219 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5220 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5221 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5222 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5223 r = -EALREADY;
5224 } else
5225 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5226
041a62bc 5227 if (r) {
aac89168 5228 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5229 r, adev_to_drm(tmp_adev)->unique);
19349072 5230 goto out;
ce316fa5
LM
5231 }
5232 }
5233
041a62bc
AG
5234 /* For XGMI wait for all resets to complete before proceed */
5235 if (!r) {
655ce9cb 5236 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5237 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5238 flush_work(&tmp_adev->xgmi_reset_work);
5239 r = tmp_adev->asic_reset_res;
5240 if (r)
5241 break;
ce316fa5
LM
5242 }
5243 }
5244 }
ce316fa5 5245 }
26bc5340 5246
43c4d576 5247 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5248 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
21226f02 5249 amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
43c4d576
JC
5250 }
5251
00eaa571 5252 amdgpu_ras_intr_cleared();
43c4d576 5253 }
00eaa571 5254
f5c7e779
YC
5255 /* Since the mode1 reset affects base ip blocks, the
5256 * phase1 ip blocks need to be resumed. Otherwise there
5257 * will be a BIOS signature error and the psp bootloader
5258 * can't load kdb on the next amdgpu install.
5259 */
5260 if (gpu_reset_for_dev_remove) {
5261 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5262 amdgpu_device_ip_resume_phase1(tmp_adev);
5263
5264 goto end;
5265 }
5266
655ce9cb 5267 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5268 if (need_full_reset) {
5269 /* post card */
e3c1b071 5270 r = amdgpu_device_asic_init(tmp_adev);
5271 if (r) {
aac89168 5272 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5273 } else {
26bc5340 5274 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5275
26bc5340
AG
5276 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5277 if (r)
5278 goto out;
5279
5280 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785
AA
5281
5282 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5283
26bc5340 5284 if (vram_lost) {
77e7f829 5285 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5286 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5287 }
5288
26bc5340
AG
5289 r = amdgpu_device_fw_loading(tmp_adev);
5290 if (r)
5291 return r;
5292
c45e38f2
LL
5293 r = amdgpu_xcp_restore_partition_mode(
5294 tmp_adev->xcp_mgr);
5295 if (r)
5296 goto out;
5297
26bc5340
AG
5298 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5299 if (r)
5300 goto out;
5301
b7043800
AD
5302 if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5303 amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5304
26bc5340
AG
5305 if (vram_lost)
5306 amdgpu_device_fill_reset_magic(tmp_adev);
5307
fdafb359
EQ
5308 /*
5309 * Add this ASIC as tracked as reset was already
5310 * complete successfully.
5311 */
5312 amdgpu_register_gpu_instance(tmp_adev);
5313
04442bf7
LL
5314 if (!reset_context->hive &&
5315 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5316 amdgpu_xgmi_add_device(tmp_adev);
5317
7c04ca50 5318 r = amdgpu_device_ip_late_init(tmp_adev);
5319 if (r)
5320 goto out;
5321
087451f3 5322 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5323
e8fbaf03
GC
5324 /*
5325 * The GPU enters bad state once faulty pages
5326 * by ECC has reached the threshold, and ras
5327 * recovery is scheduled next. So add one check
5328 * here to break recovery if it indeed exceeds
5329 * bad page threshold, and remind user to
5330 * retire this GPU or setting one bigger
5331 * bad_page_threshold value to fix this once
5332 * probing driver again.
5333 */
11003c68 5334 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5335 /* must succeed. */
5336 amdgpu_ras_resume(tmp_adev);
5337 } else {
5338 r = -EINVAL;
5339 goto out;
5340 }
e79a04d5 5341
26bc5340 5342 /* Update PSP FW topology after reset */
04442bf7
LL
5343 if (reset_context->hive &&
5344 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5345 r = amdgpu_xgmi_update_topology(
5346 reset_context->hive, tmp_adev);
26bc5340
AG
5347 }
5348 }
5349
26bc5340
AG
5350out:
5351 if (!r) {
5352 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5353 r = amdgpu_ib_ring_tests(tmp_adev);
5354 if (r) {
5355 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5356 need_full_reset = true;
5357 r = -EAGAIN;
5358 goto end;
5359 }
5360 }
5361
5362 if (!r)
5363 r = amdgpu_device_recover_vram(tmp_adev);
5364 else
5365 tmp_adev->asic_reset_res = r;
5366 }
5367
5368end:
04442bf7
LL
5369 if (need_full_reset)
5370 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5371 else
5372 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5373 return r;
5374}
5375
e923be99 5376static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5377{
5740682e 5378
a3a09142
AD
5379 switch (amdgpu_asic_reset_method(adev)) {
5380 case AMD_RESET_METHOD_MODE1:
5381 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5382 break;
5383 case AMD_RESET_METHOD_MODE2:
5384 adev->mp1_state = PP_MP1_STATE_RESET;
5385 break;
5386 default:
5387 adev->mp1_state = PP_MP1_STATE_NONE;
5388 break;
5389 }
26bc5340 5390}
d38ceaf9 5391
e923be99 5392static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5393{
89041940 5394 amdgpu_vf_error_trans_all(adev);
a3a09142 5395 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5396}
5397
3f12acc8
EQ
5398static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5399{
5400 struct pci_dev *p = NULL;
5401
5402 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5403 adev->pdev->bus->number, 1);
5404 if (p) {
5405 pm_runtime_enable(&(p->dev));
5406 pm_runtime_resume(&(p->dev));
5407 }
b85e285e
YY
5408
5409 pci_dev_put(p);
3f12acc8
EQ
5410}
5411
5412static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5413{
5414 enum amd_reset_method reset_method;
5415 struct pci_dev *p = NULL;
5416 u64 expires;
5417
5418 /*
5419 * For now, only BACO and mode1 reset are confirmed
5420 * to suffer the audio issue without proper suspended.
5421 */
5422 reset_method = amdgpu_asic_reset_method(adev);
5423 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5424 (reset_method != AMD_RESET_METHOD_MODE1))
5425 return -EINVAL;
5426
5427 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5428 adev->pdev->bus->number, 1);
5429 if (!p)
5430 return -ENODEV;
5431
5432 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5433 if (!expires)
5434 /*
5435 * If we cannot get the audio device autosuspend delay,
5436 * a fixed 4S interval will be used. Considering 3S is
5437 * the audio controller default autosuspend delay setting.
5438 * 4S used here is guaranteed to cover that.
5439 */
54b7feb9 5440 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5441
5442 while (!pm_runtime_status_suspended(&(p->dev))) {
5443 if (!pm_runtime_suspend(&(p->dev)))
5444 break;
5445
5446 if (expires < ktime_get_mono_fast_ns()) {
5447 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5448 pci_dev_put(p);
3f12acc8
EQ
5449 /* TODO: abort the succeeding gpu reset? */
5450 return -ETIMEDOUT;
5451 }
5452 }
5453
5454 pm_runtime_disable(&(p->dev));
5455
b85e285e 5456 pci_dev_put(p);
3f12acc8
EQ
5457 return 0;
5458}
5459
d193b12b 5460static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5461{
5462 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5463
5464#if defined(CONFIG_DEBUG_FS)
5465 if (!amdgpu_sriov_vf(adev))
5466 cancel_work(&adev->reset_work);
5467#endif
5468
5469 if (adev->kfd.dev)
5470 cancel_work(&adev->kfd.reset_work);
5471
5472 if (amdgpu_sriov_vf(adev))
5473 cancel_work(&adev->virt.flr_work);
5474
5475 if (con && adev->ras_enabled)
5476 cancel_work(&con->recovery_work);
5477
5478}
5479
26bc5340 5480/**
6e9c65f7 5481 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5482 *
982a820b 5483 * @adev: amdgpu_device pointer
26bc5340 5484 * @job: which job trigger hang
80bd2de1 5485 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5486 *
5487 * Attempt to reset the GPU if it has hung (all asics).
5488 * Attempt to do soft-reset or full-reset and reinitialize Asic
5489 * Returns 0 for success or an error on failure.
5490 */
5491
cf727044 5492int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5493 struct amdgpu_job *job,
5494 struct amdgpu_reset_context *reset_context)
26bc5340 5495{
1d721ed6 5496 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5497 bool job_signaled = false;
26bc5340 5498 struct amdgpu_hive_info *hive = NULL;
26bc5340 5499 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5500 int i, r = 0;
bb5c7235 5501 bool need_emergency_restart = false;
3f12acc8 5502 bool audio_suspended = false;
f5c7e779
YC
5503 bool gpu_reset_for_dev_remove = false;
5504
5505 gpu_reset_for_dev_remove =
5506 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5507 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5508
6e3cd2a9 5509 /*
bb5c7235
WS
5510 * Special case: RAS triggered and full reset isn't supported
5511 */
5512 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5513
d5ea093e
AG
5514 /*
5515 * Flush RAM to disk so that after reboot
5516 * the user can read log and see why the system rebooted.
5517 */
80285ae1
SY
5518 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5519 amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5520 DRM_WARN("Emergency reboot.");
5521
5522 ksys_sync_helper();
5523 emergency_restart();
5524 }
5525
b823821f 5526 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5527 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5528
175ac6ec
ZL
5529 if (!amdgpu_sriov_vf(adev))
5530 hive = amdgpu_get_xgmi_hive(adev);
681260df 5531 if (hive)
53b3f8f4 5532 mutex_lock(&hive->hive_lock);
26bc5340 5533
f1549c09
LG
5534 reset_context->job = job;
5535 reset_context->hive = hive;
9e94d22c
EQ
5536 /*
5537 * Build list of devices to reset.
5538 * In case we are in XGMI hive mode, resort the device list
5539 * to put adev in the 1st position.
5540 */
5541 INIT_LIST_HEAD(&device_list);
175ac6ec 5542 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5543 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5544 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5545 if (gpu_reset_for_dev_remove && adev->shutdown)
5546 tmp_adev->shutdown = true;
5547 }
655ce9cb 5548 if (!list_is_first(&adev->reset_list, &device_list))
5549 list_rotate_to_front(&adev->reset_list, &device_list);
5550 device_list_handle = &device_list;
26bc5340 5551 } else {
655ce9cb 5552 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5553 device_list_handle = &device_list;
5554 }
5555
e923be99
AG
5556 /* We need to lock reset domain only once both for XGMI and single device */
5557 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5558 reset_list);
3675c2f2 5559 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5560
1d721ed6 5561 /* block all schedulers and reset given job's ring */
655ce9cb 5562 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5563
e923be99 5564 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5565
3f12acc8
EQ
5566 /*
5567 * Try to put the audio codec into suspend state
5568 * before gpu reset started.
5569 *
5570 * Due to the power domain of the graphics device
5571 * is shared with AZ power domain. Without this,
5572 * we may change the audio hardware from behind
5573 * the audio driver's back. That will trigger
5574 * some audio codec errors.
5575 */
5576 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5577 audio_suspended = true;
5578
9e94d22c
EQ
5579 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5580
52fb44cf
EQ
5581 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5582
c004d44e 5583 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5584 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5585
12ffa55d
AG
5586 /*
5587 * Mark these ASICs to be reseted as untracked first
5588 * And add them back after reset completed
5589 */
5590 amdgpu_unregister_gpu_instance(tmp_adev);
5591
163d4cd2 5592 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5593
f1c1314b 5594 /* disable ras on ALL IPs */
bb5c7235 5595 if (!need_emergency_restart &&
b823821f 5596 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5597 amdgpu_ras_suspend(tmp_adev);
5598
1d721ed6
AG
5599 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5600 struct amdgpu_ring *ring = tmp_adev->rings[i];
5601
5602 if (!ring || !ring->sched.thread)
5603 continue;
5604
0b2d2c2e 5605 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5606
bb5c7235 5607 if (need_emergency_restart)
7c6e68c7 5608 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5609 }
8f8c80f4 5610 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5611 }
5612
bb5c7235 5613 if (need_emergency_restart)
7c6e68c7
AG
5614 goto skip_sched_resume;
5615
1d721ed6
AG
5616 /*
5617 * Must check guilty signal here since after this point all old
5618 * HW fences are force signaled.
5619 *
5620 * job->base holds a reference to parent fence
5621 */
f6a3f660 5622 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5623 job_signaled = true;
1d721ed6
AG
5624 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5625 goto skip_hw_reset;
5626 }
5627
26bc5340 5628retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5629 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5630 if (gpu_reset_for_dev_remove) {
5631 /* Workaroud for ASICs need to disable SMC first */
5632 amdgpu_device_smu_fini_early(tmp_adev);
5633 }
f1549c09 5634 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5635 /*TODO Should we stop ?*/
5636 if (r) {
aac89168 5637 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5638 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5639 tmp_adev->asic_reset_res = r;
5640 }
247c7b0d
AG
5641
5642 /*
5643 * Drop all pending non scheduler resets. Scheduler resets
5644 * were already dropped during drm_sched_stop
5645 */
d193b12b 5646 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5647 }
5648
5649 /* Actual ASIC resets if needed.*/
4f30d920 5650 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5651 if (amdgpu_sriov_vf(adev)) {
5652 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5653 if (r)
5654 adev->asic_reset_res = r;
950d6425 5655
28606c4e 5656 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5657 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5658 IP_VERSION(9, 4, 2) ||
5659 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5660 amdgpu_ras_resume(adev);
26bc5340 5661 } else {
f1549c09 5662 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5663 if (r && r == -EAGAIN)
26bc5340 5664 goto retry;
f5c7e779
YC
5665
5666 if (!r && gpu_reset_for_dev_remove)
5667 goto recover_end;
26bc5340
AG
5668 }
5669
1d721ed6
AG
5670skip_hw_reset:
5671
26bc5340 5672 /* Post ASIC reset for all devs .*/
655ce9cb 5673 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5674
1d721ed6
AG
5675 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5676 struct amdgpu_ring *ring = tmp_adev->rings[i];
5677
5678 if (!ring || !ring->sched.thread)
5679 continue;
5680
6868a2c4 5681 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5682 }
5683
b8920e1e 5684 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5685 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5686
7258fa31
SK
5687 if (tmp_adev->asic_reset_res)
5688 r = tmp_adev->asic_reset_res;
5689
1d721ed6 5690 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5691
5692 if (r) {
5693 /* bad news, how to tell it to userspace ? */
12ffa55d 5694 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5695 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5696 } else {
12ffa55d 5697 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5698 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5699 DRM_WARN("smart shift update failed\n");
26bc5340 5700 }
7c6e68c7 5701 }
26bc5340 5702
7c6e68c7 5703skip_sched_resume:
655ce9cb 5704 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5705 /* unlock kfd: SRIOV would do it separately */
c004d44e 5706 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5707 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5708
5709 /* kfd_post_reset will do nothing if kfd device is not initialized,
5710 * need to bring up kfd here if it's not be initialized before
5711 */
5712 if (!adev->kfd.init_complete)
5713 amdgpu_amdkfd_device_init(adev);
5714
3f12acc8
EQ
5715 if (audio_suspended)
5716 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5717
5718 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5719
5720 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5721 }
5722
f5c7e779 5723recover_end:
e923be99
AG
5724 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5725 reset_list);
5726 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5727
9e94d22c 5728 if (hive) {
9e94d22c 5729 mutex_unlock(&hive->hive_lock);
d95e8e97 5730 amdgpu_put_xgmi_hive(hive);
9e94d22c 5731 }
26bc5340 5732
f287a3c5 5733 if (r)
26bc5340 5734 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5735
5736 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5737 return r;
5738}
5739
466a7d11
ML
5740/**
5741 * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5742 *
5743 * @adev: amdgpu_device pointer
5744 * @speed: pointer to the speed of the link
5745 * @width: pointer to the width of the link
5746 *
5747 * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5748 * first physical partner to an AMD dGPU.
5749 * This will exclude any virtual switches and links.
5750 */
5751static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5752 enum pci_bus_speed *speed,
5753 enum pcie_link_width *width)
5754{
5755 struct pci_dev *parent = adev->pdev;
5756
5757 if (!speed || !width)
5758 return;
5759
5760 *speed = PCI_SPEED_UNKNOWN;
5761 *width = PCIE_LNK_WIDTH_UNKNOWN;
5762
5763 while ((parent = pci_upstream_bridge(parent))) {
5764 /* skip upstream/downstream switches internal to dGPU*/
5765 if (parent->vendor == PCI_VENDOR_ID_ATI)
5766 continue;
5767 *speed = pcie_get_speed_cap(parent);
5768 *width = pcie_get_width_cap(parent);
5769 break;
5770 }
5771}
5772
e3ecdffa
AD
5773/**
5774 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5775 *
5776 * @adev: amdgpu_device pointer
5777 *
5778 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5779 * and lanes) of the slot the device is in. Handles APUs and
5780 * virtualized environments where PCIE config space may not be available.
5781 */
5494d864 5782static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5783{
5d9a6330 5784 struct pci_dev *pdev;
c5313457
HK
5785 enum pci_bus_speed speed_cap, platform_speed_cap;
5786 enum pcie_link_width platform_link_width;
d0dd7f0c 5787
cd474ba0
AD
5788 if (amdgpu_pcie_gen_cap)
5789 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5790
cd474ba0
AD
5791 if (amdgpu_pcie_lane_cap)
5792 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5793
cd474ba0 5794 /* covers APUs as well */
04e85958 5795 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5796 if (adev->pm.pcie_gen_mask == 0)
5797 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5798 if (adev->pm.pcie_mlw_mask == 0)
5799 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5800 return;
cd474ba0 5801 }
d0dd7f0c 5802
c5313457
HK
5803 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5804 return;
5805
466a7d11
ML
5806 amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5807 &platform_link_width);
c5313457 5808
cd474ba0 5809 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5810 /* asic caps */
5811 pdev = adev->pdev;
5812 speed_cap = pcie_get_speed_cap(pdev);
5813 if (speed_cap == PCI_SPEED_UNKNOWN) {
5814 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5815 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5816 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5817 } else {
2b3a1f51
FX
5818 if (speed_cap == PCIE_SPEED_32_0GT)
5819 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5820 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5821 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5822 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5823 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5824 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5825 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5826 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5827 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5828 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5829 else if (speed_cap == PCIE_SPEED_8_0GT)
5830 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5831 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5832 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5833 else if (speed_cap == PCIE_SPEED_5_0GT)
5834 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5835 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5836 else
5837 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5838 }
5839 /* platform caps */
c5313457 5840 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5841 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5842 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5843 } else {
2b3a1f51
FX
5844 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5845 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5846 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5847 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5848 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5849 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5850 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5851 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5852 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5853 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5854 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5855 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5856 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5857 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5858 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5859 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5860 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5861 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5862 else
5863 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5864
cd474ba0
AD
5865 }
5866 }
5867 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5868 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5869 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5870 } else {
c5313457 5871 switch (platform_link_width) {
5d9a6330 5872 case PCIE_LNK_X32:
cd474ba0
AD
5873 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5874 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5875 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5876 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5877 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5878 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5879 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5880 break;
5d9a6330 5881 case PCIE_LNK_X16:
cd474ba0
AD
5882 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5883 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5884 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5885 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5886 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5887 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5888 break;
5d9a6330 5889 case PCIE_LNK_X12:
cd474ba0
AD
5890 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5891 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5892 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5893 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5894 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5895 break;
5d9a6330 5896 case PCIE_LNK_X8:
cd474ba0
AD
5897 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5898 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5899 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5900 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5901 break;
5d9a6330 5902 case PCIE_LNK_X4:
cd474ba0
AD
5903 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5904 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5905 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5906 break;
5d9a6330 5907 case PCIE_LNK_X2:
cd474ba0
AD
5908 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5909 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5910 break;
5d9a6330 5911 case PCIE_LNK_X1:
cd474ba0
AD
5912 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5913 break;
5914 default:
5915 break;
5916 }
d0dd7f0c
AD
5917 }
5918 }
5919}
d38ceaf9 5920
08a2fd23
RE
5921/**
5922 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5923 *
5924 * @adev: amdgpu_device pointer
5925 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5926 *
5927 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5928 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5929 * @peer_adev.
5930 */
5931bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5932 struct amdgpu_device *peer_adev)
5933{
5934#ifdef CONFIG_HSA_AMD_P2P
5935 uint64_t address_mask = peer_adev->dev->dma_mask ?
5936 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5937 resource_size_t aper_limit =
5938 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5939 bool p2p_access =
5940 !adev->gmc.xgmi.connected_to_cpu &&
5941 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5942
5943 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5944 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5945 !(adev->gmc.aper_base & address_mask ||
5946 aper_limit & address_mask));
5947#else
5948 return false;
5949#endif
5950}
5951
361dbd01
AD
5952int amdgpu_device_baco_enter(struct drm_device *dev)
5953{
1348969a 5954 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5955 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5956
6ab68650 5957 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5958 return -ENOTSUPP;
5959
8ab0d6f0 5960 if (ras && adev->ras_enabled &&
acdae216 5961 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5962 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5963
9530273e 5964 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5965}
5966
5967int amdgpu_device_baco_exit(struct drm_device *dev)
5968{
1348969a 5969 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5970 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5971 int ret = 0;
361dbd01 5972
6ab68650 5973 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5974 return -ENOTSUPP;
5975
9530273e
EQ
5976 ret = amdgpu_dpm_baco_exit(adev);
5977 if (ret)
5978 return ret;
7a22677b 5979
8ab0d6f0 5980 if (ras && adev->ras_enabled &&
acdae216 5981 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5982 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5983
1bece222
CL
5984 if (amdgpu_passthrough(adev) &&
5985 adev->nbio.funcs->clear_doorbell_interrupt)
5986 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5987
7a22677b 5988 return 0;
361dbd01 5989}
c9a6b82f
AG
5990
5991/**
5992 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5993 * @pdev: PCI device struct
5994 * @state: PCI channel state
5995 *
5996 * Description: Called when a PCI error is detected.
5997 *
5998 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5999 */
6000pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6001{
6002 struct drm_device *dev = pci_get_drvdata(pdev);
6003 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 6004 int i;
c9a6b82f
AG
6005
6006 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6007
6894305c
AG
6008 if (adev->gmc.xgmi.num_physical_nodes > 1) {
6009 DRM_WARN("No support for XGMI hive yet...");
6010 return PCI_ERS_RESULT_DISCONNECT;
6011 }
6012
e17e27f9
GC
6013 adev->pci_channel_state = state;
6014
c9a6b82f
AG
6015 switch (state) {
6016 case pci_channel_io_normal:
6017 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 6018 /* Fatal error, prepare for slot reset */
8a11d283
TZ
6019 case pci_channel_io_frozen:
6020 /*
d0fb18b5 6021 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
6022 * to GPU during PCI error recovery
6023 */
3675c2f2 6024 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 6025 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
6026
6027 /*
6028 * Block any work scheduling as we do for regular GPU reset
6029 * for the duration of the recovery
6030 */
6031 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6032 struct amdgpu_ring *ring = adev->rings[i];
6033
6034 if (!ring || !ring->sched.thread)
6035 continue;
6036
6037 drm_sched_stop(&ring->sched, NULL);
6038 }
8f8c80f4 6039 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
6040 return PCI_ERS_RESULT_NEED_RESET;
6041 case pci_channel_io_perm_failure:
6042 /* Permanent error, prepare for device removal */
6043 return PCI_ERS_RESULT_DISCONNECT;
6044 }
6045
6046 return PCI_ERS_RESULT_NEED_RESET;
6047}
6048
6049/**
6050 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6051 * @pdev: pointer to PCI device
6052 */
6053pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6054{
6055
6056 DRM_INFO("PCI error: mmio enabled callback!!\n");
6057
6058 /* TODO - dump whatever for debugging purposes */
6059
6060 /* This called only if amdgpu_pci_error_detected returns
6061 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6062 * works, no need to reset slot.
6063 */
6064
6065 return PCI_ERS_RESULT_RECOVERED;
6066}
6067
6068/**
6069 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6070 * @pdev: PCI device struct
6071 *
6072 * Description: This routine is called by the pci error recovery
6073 * code after the PCI slot has been reset, just before we
6074 * should resume normal operations.
6075 */
6076pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6077{
6078 struct drm_device *dev = pci_get_drvdata(pdev);
6079 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 6080 int r, i;
04442bf7 6081 struct amdgpu_reset_context reset_context;
362c7b91 6082 u32 memsize;
7ac71382 6083 struct list_head device_list;
c9a6b82f
AG
6084
6085 DRM_INFO("PCI error: slot reset callback!!\n");
6086
04442bf7
LL
6087 memset(&reset_context, 0, sizeof(reset_context));
6088
7ac71382 6089 INIT_LIST_HEAD(&device_list);
655ce9cb 6090 list_add_tail(&adev->reset_list, &device_list);
7ac71382 6091
362c7b91
AG
6092 /* wait for asic to come out of reset */
6093 msleep(500);
6094
7ac71382 6095 /* Restore PCI confspace */
c1dd4aa6 6096 amdgpu_device_load_pci_state(pdev);
c9a6b82f 6097
362c7b91
AG
6098 /* confirm ASIC came out of reset */
6099 for (i = 0; i < adev->usec_timeout; i++) {
6100 memsize = amdgpu_asic_get_config_memsize(adev);
6101
6102 if (memsize != 0xffffffff)
6103 break;
6104 udelay(1);
6105 }
6106 if (memsize == 0xffffffff) {
6107 r = -ETIME;
6108 goto out;
6109 }
6110
04442bf7
LL
6111 reset_context.method = AMD_RESET_METHOD_NONE;
6112 reset_context.reset_req_dev = adev;
6113 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6114 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6115
7afefb81 6116 adev->no_hw_access = true;
04442bf7 6117 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 6118 adev->no_hw_access = false;
c9a6b82f
AG
6119 if (r)
6120 goto out;
6121
04442bf7 6122 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
6123
6124out:
c9a6b82f 6125 if (!r) {
c1dd4aa6
AG
6126 if (amdgpu_device_cache_pci_state(adev->pdev))
6127 pci_restore_state(adev->pdev);
6128
c9a6b82f
AG
6129 DRM_INFO("PCIe error recovery succeeded\n");
6130 } else {
6131 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
6132 amdgpu_device_unset_mp1_state(adev);
6133 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
6134 }
6135
6136 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6137}
6138
6139/**
6140 * amdgpu_pci_resume() - resume normal ops after PCI reset
6141 * @pdev: pointer to PCI device
6142 *
6143 * Called when the error recovery driver tells us that its
505199a3 6144 * OK to resume normal operation.
c9a6b82f
AG
6145 */
6146void amdgpu_pci_resume(struct pci_dev *pdev)
6147{
6148 struct drm_device *dev = pci_get_drvdata(pdev);
6149 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 6150 int i;
c9a6b82f 6151
c9a6b82f
AG
6152
6153 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 6154
e17e27f9
GC
6155 /* Only continue execution for the case of pci_channel_io_frozen */
6156 if (adev->pci_channel_state != pci_channel_io_frozen)
6157 return;
6158
acd89fca
AG
6159 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6160 struct amdgpu_ring *ring = adev->rings[i];
6161
6162 if (!ring || !ring->sched.thread)
6163 continue;
6164
acd89fca
AG
6165 drm_sched_start(&ring->sched, true);
6166 }
6167
e923be99
AG
6168 amdgpu_device_unset_mp1_state(adev);
6169 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 6170}
c1dd4aa6
AG
6171
6172bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6173{
6174 struct drm_device *dev = pci_get_drvdata(pdev);
6175 struct amdgpu_device *adev = drm_to_adev(dev);
6176 int r;
6177
6178 r = pci_save_state(pdev);
6179 if (!r) {
6180 kfree(adev->pci_state);
6181
6182 adev->pci_state = pci_store_saved_state(pdev);
6183
6184 if (!adev->pci_state) {
6185 DRM_ERROR("Failed to store PCI saved state");
6186 return false;
6187 }
6188 } else {
6189 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6190 return false;
6191 }
6192
6193 return true;
6194}
6195
6196bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6197{
6198 struct drm_device *dev = pci_get_drvdata(pdev);
6199 struct amdgpu_device *adev = drm_to_adev(dev);
6200 int r;
6201
6202 if (!adev->pci_state)
6203 return false;
6204
6205 r = pci_load_saved_state(pdev, adev->pci_state);
6206
6207 if (!r) {
6208 pci_restore_state(pdev);
6209 } else {
6210 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6211 return false;
6212 }
6213
6214 return true;
6215}
6216
810085dd
EH
6217void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6218 struct amdgpu_ring *ring)
6219{
6220#ifdef CONFIG_X86_64
b818a5d3 6221 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6222 return;
6223#endif
6224 if (adev->gmc.xgmi.connected_to_cpu)
6225 return;
6226
6227 if (ring && ring->funcs->emit_hdp_flush)
6228 amdgpu_ring_emit_hdp_flush(ring);
6229 else
6230 amdgpu_asic_flush_hdp(adev, ring);
6231}
c1dd4aa6 6232
810085dd
EH
6233void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6234 struct amdgpu_ring *ring)
6235{
6236#ifdef CONFIG_X86_64
b818a5d3 6237 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6238 return;
6239#endif
6240 if (adev->gmc.xgmi.connected_to_cpu)
6241 return;
c1dd4aa6 6242
810085dd
EH
6243 amdgpu_asic_invalidate_hdp(adev, ring);
6244}
34f3a4a9 6245
89a7a870
AG
6246int amdgpu_in_reset(struct amdgpu_device *adev)
6247{
6248 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6249}
6250
34f3a4a9
LY
6251/**
6252 * amdgpu_device_halt() - bring hardware to some kind of halt state
6253 *
6254 * @adev: amdgpu_device pointer
6255 *
6256 * Bring hardware to some kind of halt state so that no one can touch it
6257 * any more. It will help to maintain error context when error occurred.
6258 * Compare to a simple hang, the system will keep stable at least for SSH
6259 * access. Then it should be trivial to inspect the hardware state and
6260 * see what's going on. Implemented as following:
6261 *
6262 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6263 * clears all CPU mappings to device, disallows remappings through page faults
6264 * 2. amdgpu_irq_disable_all() disables all interrupts
6265 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6266 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6267 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6268 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6269 * flush any in flight DMA operations
6270 */
6271void amdgpu_device_halt(struct amdgpu_device *adev)
6272{
6273 struct pci_dev *pdev = adev->pdev;
e0f943b4 6274 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6275
2c1c7ba4 6276 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6277 drm_dev_unplug(ddev);
6278
6279 amdgpu_irq_disable_all(adev);
6280
6281 amdgpu_fence_driver_hw_fini(adev);
6282
6283 adev->no_hw_access = true;
6284
6285 amdgpu_device_unmap_mmio(adev);
6286
6287 pci_disable_device(pdev);
6288 pci_wait_for_pending_transaction(pdev);
6289}
86700a40
XD
6290
6291u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6292 u32 reg)
6293{
6294 unsigned long flags, address, data;
6295 u32 r;
6296
6297 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6298 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6299
6300 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6301 WREG32(address, reg * 4);
6302 (void)RREG32(address);
6303 r = RREG32(data);
6304 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6305 return r;
6306}
6307
6308void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6309 u32 reg, u32 v)
6310{
6311 unsigned long flags, address, data;
6312
6313 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6314 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6315
6316 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6317 WREG32(address, reg * 4);
6318 (void)RREG32(address);
6319 WREG32(data, v);
6320 (void)RREG32(data);
6321 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6322}
68ce8b24
CK
6323
6324/**
6325 * amdgpu_device_switch_gang - switch to a new gang
6326 * @adev: amdgpu_device pointer
6327 * @gang: the gang to switch to
6328 *
6329 * Try to switch to a new gang.
6330 * Returns: NULL if we switched to the new gang or a reference to the current
6331 * gang leader.
6332 */
6333struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6334 struct dma_fence *gang)
6335{
6336 struct dma_fence *old = NULL;
6337
6338 do {
6339 dma_fence_put(old);
6340 rcu_read_lock();
6341 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6342 rcu_read_unlock();
6343
6344 if (old == gang)
6345 break;
6346
6347 if (!dma_fence_is_signaled(old))
6348 return old;
6349
6350 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6351 old, gang) != old);
6352
6353 dma_fence_put(old);
6354 return NULL;
6355}
220c8cc8
AD
6356
6357bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6358{
6359 switch (adev->asic_type) {
6360#ifdef CONFIG_DRM_AMDGPU_SI
6361 case CHIP_HAINAN:
6362#endif
6363 case CHIP_TOPAZ:
6364 /* chips with no display hardware */
6365 return false;
6366#ifdef CONFIG_DRM_AMDGPU_SI
6367 case CHIP_TAHITI:
6368 case CHIP_PITCAIRN:
6369 case CHIP_VERDE:
6370 case CHIP_OLAND:
6371#endif
6372#ifdef CONFIG_DRM_AMDGPU_CIK
6373 case CHIP_BONAIRE:
6374 case CHIP_HAWAII:
6375 case CHIP_KAVERI:
6376 case CHIP_KABINI:
6377 case CHIP_MULLINS:
6378#endif
6379 case CHIP_TONGA:
6380 case CHIP_FIJI:
6381 case CHIP_POLARIS10:
6382 case CHIP_POLARIS11:
6383 case CHIP_POLARIS12:
6384 case CHIP_VEGAM:
6385 case CHIP_CARRIZO:
6386 case CHIP_STONEY:
6387 /* chips with display hardware */
6388 return true;
6389 default:
6390 /* IP discovery */
4e8303cf 6391 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6392 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6393 return false;
6394 return true;
6395 }
6396}
81283fee
JZ
6397
6398uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6399 uint32_t inst, uint32_t reg_addr, char reg_name[],
6400 uint32_t expected_value, uint32_t mask)
6401{
6402 uint32_t ret = 0;
6403 uint32_t old_ = 0;
6404 uint32_t tmp_ = RREG32(reg_addr);
6405 uint32_t loop = adev->usec_timeout;
6406
6407 while ((tmp_ & (mask)) != (expected_value)) {
6408 if (old_ != tmp_) {
6409 loop = adev->usec_timeout;
6410 old_ = tmp_;
6411 } else
6412 udelay(1);
6413 tmp_ = RREG32(reg_addr);
6414 loop--;
6415 if (!loop) {
6416 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6417 inst, reg_name, (uint32_t)expected_value,
6418 (uint32_t)(tmp_ & (mask)));
6419 ret = -ETIMEDOUT;
6420 break;
6421 }
6422 }
6423 return ret;
6424}