drm/amdgpu: Init pcie_index/data address as fallback (v2)
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
08a2fd23 35#include <linux/pci-p2pdma.h>
d37a3929 36#include <linux/apple-gmux.h>
fdf2f6c5 37
b7cdb41e 38#include <drm/drm_aperture.h>
4562236b 39#include <drm/drm_atomic_helper.h>
973ad627 40#include <drm/drm_crtc_helper.h>
45b64fd9 41#include <drm/drm_fb_helper.h>
fcd70cd3 42#include <drm/drm_probe_helper.h>
d38ceaf9 43#include <drm/amdgpu_drm.h>
7b1c6263 44#include <linux/device.h>
d38ceaf9
AD
45#include <linux/vgaarb.h>
46#include <linux/vga_switcheroo.h>
47#include <linux/efi.h>
48#include "amdgpu.h"
f4b373f4 49#include "amdgpu_trace.h"
d38ceaf9
AD
50#include "amdgpu_i2c.h"
51#include "atom.h"
52#include "amdgpu_atombios.h"
a5bde2f9 53#include "amdgpu_atomfirmware.h"
d0dd7f0c 54#include "amd_pcie.h"
33f34802
KW
55#ifdef CONFIG_DRM_AMDGPU_SI
56#include "si.h"
57#endif
a2e73f56
AD
58#ifdef CONFIG_DRM_AMDGPU_CIK
59#include "cik.h"
60#endif
aaa36a97 61#include "vi.h"
460826e6 62#include "soc15.h"
0a5b8c7b 63#include "nv.h"
d38ceaf9 64#include "bif/bif_4_1_d.h"
bec86378 65#include <linux/firmware.h>
89041940 66#include "amdgpu_vf_error.h"
d38ceaf9 67
ba997709 68#include "amdgpu_amdkfd.h"
d2f52ac8 69#include "amdgpu_pm.h"
d38ceaf9 70
5183411b 71#include "amdgpu_xgmi.h"
c030f2e4 72#include "amdgpu_ras.h"
9c7c85f7 73#include "amdgpu_pmu.h"
bd607166 74#include "amdgpu_fru_eeprom.h"
04442bf7 75#include "amdgpu_reset.h"
85150626 76#include "amdgpu_virt.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
ad390542
HZ
99#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
100#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
101#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
2dc80b00 102
b7cdb41e
ML
103static const struct drm_driver amdgpu_kms_driver;
104
050091ab 105const char *amdgpu_asic_name[] = {
da69c161
KW
106 "TAHITI",
107 "PITCAIRN",
108 "VERDE",
109 "OLAND",
110 "HAINAN",
d38ceaf9
AD
111 "BONAIRE",
112 "KAVERI",
113 "KABINI",
114 "HAWAII",
115 "MULLINS",
116 "TOPAZ",
117 "TONGA",
48299f95 118 "FIJI",
d38ceaf9 119 "CARRIZO",
139f4917 120 "STONEY",
2cc0c0b5
FC
121 "POLARIS10",
122 "POLARIS11",
c4642a47 123 "POLARIS12",
48ff108d 124 "VEGAM",
d4196f01 125 "VEGA10",
8fab806a 126 "VEGA12",
956fcddc 127 "VEGA20",
2ca8a5d2 128 "RAVEN",
d6c3b24e 129 "ARCTURUS",
1eee4228 130 "RENOIR",
d46b417a 131 "ALDEBARAN",
852a6626 132 "NAVI10",
d0f56dc2 133 "CYAN_SKILLFISH",
87dbad02 134 "NAVI14",
9802f5d7 135 "NAVI12",
ccaf72d3 136 "SIENNA_CICHLID",
ddd8fbe7 137 "NAVY_FLOUNDER",
4f1e9a76 138 "VANGOGH",
a2468e04 139 "DIMGREY_CAVEFISH",
6f169591 140 "BEIGE_GOBY",
ee9236b7 141 "YELLOW_CARP",
3ae695d6 142 "IP DISCOVERY",
d38ceaf9
AD
143 "LAST",
144};
145
dcea6e65
KR
146/**
147 * DOC: pcie_replay_count
148 *
149 * The amdgpu driver provides a sysfs API for reporting the total number
150 * of PCIe replays (NAKs)
151 * The file pcie_replay_count is used for this and returns the total
152 * number of replays as a sum of the NAKs generated and NAKs received
153 */
154
155static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
156 struct device_attribute *attr, char *buf)
157{
158 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 159 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
160 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
161
36000c7a 162 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
163}
164
b8920e1e 165static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
166 amdgpu_device_get_pcie_replay_count, NULL);
167
af39e6f4
LL
168static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
169 struct bin_attribute *attr, char *buf,
170 loff_t ppos, size_t count)
171{
172 struct device *dev = kobj_to_dev(kobj);
173 struct drm_device *ddev = dev_get_drvdata(dev);
174 struct amdgpu_device *adev = drm_to_adev(ddev);
175 ssize_t bytes_read;
176
177 switch (ppos) {
178 case AMDGPU_SYS_REG_STATE_XGMI:
179 bytes_read = amdgpu_asic_get_reg_state(
180 adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
181 break;
182 case AMDGPU_SYS_REG_STATE_WAFL:
183 bytes_read = amdgpu_asic_get_reg_state(
184 adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
185 break;
186 case AMDGPU_SYS_REG_STATE_PCIE:
187 bytes_read = amdgpu_asic_get_reg_state(
188 adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
189 break;
190 case AMDGPU_SYS_REG_STATE_USR:
191 bytes_read = amdgpu_asic_get_reg_state(
192 adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
193 break;
194 case AMDGPU_SYS_REG_STATE_USR_1:
195 bytes_read = amdgpu_asic_get_reg_state(
196 adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
197 break;
198 default:
199 return -EINVAL;
200 }
201
202 return bytes_read;
203}
204
205BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
206 AMDGPU_SYS_REG_STATE_END);
207
208int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
209{
210 int ret;
211
212 if (!amdgpu_asic_get_reg_state_supported(adev))
213 return 0;
214
215 ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
216
217 return ret;
218}
219
220void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
221{
222 if (!amdgpu_asic_get_reg_state_supported(adev))
223 return;
224 sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
225}
226
4798db85
LL
227/**
228 * DOC: board_info
229 *
230 * The amdgpu driver provides a sysfs API for giving board related information.
231 * It provides the form factor information in the format
232 *
233 * type : form factor
234 *
235 * Possible form factor values
236 *
237 * - "cem" - PCIE CEM card
238 * - "oam" - Open Compute Accelerator Module
239 * - "unknown" - Not known
240 *
241 */
242
76da73f0
LL
243static ssize_t amdgpu_device_get_board_info(struct device *dev,
244 struct device_attribute *attr,
245 char *buf)
246{
247 struct drm_device *ddev = dev_get_drvdata(dev);
248 struct amdgpu_device *adev = drm_to_adev(ddev);
249 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
250 const char *pkg;
251
252 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
253 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
254
255 switch (pkg_type) {
256 case AMDGPU_PKG_TYPE_CEM:
257 pkg = "cem";
258 break;
259 case AMDGPU_PKG_TYPE_OAM:
260 pkg = "oam";
261 break;
262 default:
263 pkg = "unknown";
264 break;
265 }
266
267 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
268}
269
270static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
271
272static struct attribute *amdgpu_board_attrs[] = {
273 &dev_attr_board_info.attr,
274 NULL,
275};
276
277static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
278 struct attribute *attr, int n)
279{
280 struct device *dev = kobj_to_dev(kobj);
281 struct drm_device *ddev = dev_get_drvdata(dev);
282 struct amdgpu_device *adev = drm_to_adev(ddev);
283
284 if (adev->flags & AMD_IS_APU)
285 return 0;
286
287 return attr->mode;
288}
289
290static const struct attribute_group amdgpu_board_attrs_group = {
291 .attrs = amdgpu_board_attrs,
292 .is_visible = amdgpu_board_attrs_is_visible
293};
294
5494d864
AD
295static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
296
bd607166 297
fd496ca8 298/**
b98c6299 299 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
300 *
301 * @dev: drm_device pointer
302 *
b98c6299 303 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
304 * otherwise return false.
305 */
b98c6299 306bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
307{
308 struct amdgpu_device *adev = drm_to_adev(dev);
309
b98c6299 310 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
311 return true;
312 return false;
313}
314
e3ecdffa 315/**
0330b848 316 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
317 *
318 * @dev: drm_device pointer
319 *
b98c6299 320 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
321 * otherwise return false.
322 */
31af062a 323bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 324{
1348969a 325 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 326
b98c6299
AD
327 if (adev->has_pr3 ||
328 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
329 return true;
330 return false;
331}
332
a69cba42
AD
333/**
334 * amdgpu_device_supports_baco - Does the device support BACO
335 *
336 * @dev: drm_device pointer
337 *
338 * Returns true if the device supporte BACO,
339 * otherwise return false.
340 */
341bool amdgpu_device_supports_baco(struct drm_device *dev)
342{
1348969a 343 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
344
345 return amdgpu_asic_supports_baco(adev);
346}
347
3fa8f89d
S
348/**
349 * amdgpu_device_supports_smart_shift - Is the device dGPU with
350 * smart shift support
351 *
352 * @dev: drm_device pointer
353 *
354 * Returns true if the device is a dGPU with Smart Shift support,
355 * otherwise returns false.
356 */
357bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
358{
359 return (amdgpu_device_supports_boco(dev) &&
360 amdgpu_acpi_is_power_shift_control_supported());
361}
362
6e3cd2a9
MCC
363/*
364 * VRAM access helper functions
365 */
366
e35e2b11 367/**
048af66b 368 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
369 *
370 * @adev: amdgpu_device pointer
371 * @pos: offset of the buffer in vram
372 * @buf: virtual address of the buffer in system memory
373 * @size: read/write size, sizeof(@buf) must > @size
374 * @write: true - write to vram, otherwise - read from vram
375 */
048af66b
KW
376void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
377 void *buf, size_t size, bool write)
e35e2b11 378{
e35e2b11 379 unsigned long flags;
048af66b
KW
380 uint32_t hi = ~0, tmp = 0;
381 uint32_t *data = buf;
ce05ac56 382 uint64_t last;
f89f8c6b 383 int idx;
ce05ac56 384
c58a863b 385 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 386 return;
9d11eb0d 387
048af66b
KW
388 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
389
390 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
391 for (last = pos + size; pos < last; pos += 4) {
392 tmp = pos >> 31;
393
394 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
395 if (tmp != hi) {
396 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
397 hi = tmp;
398 }
399 if (write)
400 WREG32_NO_KIQ(mmMM_DATA, *data++);
401 else
402 *data++ = RREG32_NO_KIQ(mmMM_DATA);
403 }
404
405 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
406 drm_dev_exit(idx);
407}
408
409/**
bbe04dec 410 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
411 *
412 * @adev: amdgpu_device pointer
413 * @pos: offset of the buffer in vram
414 * @buf: virtual address of the buffer in system memory
415 * @size: read/write size, sizeof(@buf) must > @size
416 * @write: true - write to vram, otherwise - read from vram
417 *
418 * The return value means how many bytes have been transferred.
419 */
420size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
421 void *buf, size_t size, bool write)
422{
9d11eb0d 423#ifdef CONFIG_64BIT
048af66b
KW
424 void __iomem *addr;
425 size_t count = 0;
426 uint64_t last;
427
428 if (!adev->mman.aper_base_kaddr)
429 return 0;
430
9d11eb0d
CK
431 last = min(pos + size, adev->gmc.visible_vram_size);
432 if (last > pos) {
048af66b
KW
433 addr = adev->mman.aper_base_kaddr + pos;
434 count = last - pos;
9d11eb0d
CK
435
436 if (write) {
437 memcpy_toio(addr, buf, count);
4c452b5c
SS
438 /* Make sure HDP write cache flush happens without any reordering
439 * after the system memory contents are sent over PCIe device
440 */
9d11eb0d 441 mb();
810085dd 442 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 443 } else {
810085dd 444 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
445 /* Make sure HDP read cache is invalidated before issuing a read
446 * to the PCIe device
447 */
9d11eb0d
CK
448 mb();
449 memcpy_fromio(buf, addr, count);
450 }
451
9d11eb0d 452 }
048af66b
KW
453
454 return count;
455#else
456 return 0;
9d11eb0d 457#endif
048af66b 458}
9d11eb0d 459
048af66b
KW
460/**
461 * amdgpu_device_vram_access - read/write a buffer in vram
462 *
463 * @adev: amdgpu_device pointer
464 * @pos: offset of the buffer in vram
465 * @buf: virtual address of the buffer in system memory
466 * @size: read/write size, sizeof(@buf) must > @size
467 * @write: true - write to vram, otherwise - read from vram
468 */
469void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
470 void *buf, size_t size, bool write)
471{
472 size_t count;
e35e2b11 473
048af66b
KW
474 /* try to using vram apreature to access vram first */
475 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
476 size -= count;
477 if (size) {
478 /* using MM to access rest vram */
479 pos += count;
480 buf += count;
481 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
482 }
483}
484
d38ceaf9 485/*
f7ee1874 486 * register access helper functions.
d38ceaf9 487 */
56b53c0b
DL
488
489/* Check if hw access should be skipped because of hotplug or device error */
490bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
491{
7afefb81 492 if (adev->no_hw_access)
56b53c0b
DL
493 return true;
494
495#ifdef CONFIG_LOCKDEP
496 /*
497 * This is a bit complicated to understand, so worth a comment. What we assert
498 * here is that the GPU reset is not running on another thread in parallel.
499 *
500 * For this we trylock the read side of the reset semaphore, if that succeeds
501 * we know that the reset is not running in paralell.
502 *
503 * If the trylock fails we assert that we are either already holding the read
504 * side of the lock or are the reset thread itself and hold the write side of
505 * the lock.
506 */
507 if (in_task()) {
d0fb18b5
AG
508 if (down_read_trylock(&adev->reset_domain->sem))
509 up_read(&adev->reset_domain->sem);
56b53c0b 510 else
d0fb18b5 511 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
512 }
513#endif
514 return false;
515}
516
e3ecdffa 517/**
f7ee1874 518 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
519 *
520 * @adev: amdgpu_device pointer
521 * @reg: dword aligned register offset
522 * @acc_flags: access flags which require special behavior
523 *
524 * Returns the 32 bit value from the offset specified.
525 */
f7ee1874
HZ
526uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
527 uint32_t reg, uint32_t acc_flags)
d38ceaf9 528{
f4b373f4
TSD
529 uint32_t ret;
530
56b53c0b 531 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
532 return 0;
533
f7ee1874
HZ
534 if ((reg * 4) < adev->rmmio_size) {
535 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
536 amdgpu_sriov_runtime(adev) &&
d0fb18b5 537 down_read_trylock(&adev->reset_domain->sem)) {
85150626 538 ret = amdgpu_kiq_rreg(adev, reg, 0);
d0fb18b5 539 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
540 } else {
541 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
542 }
543 } else {
544 ret = adev->pcie_rreg(adev, reg * 4);
81202807 545 }
bc992ba5 546
f7ee1874 547 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 548
f4b373f4 549 return ret;
d38ceaf9
AD
550}
551
421a2a30
ML
552/*
553 * MMIO register read with bytes helper functions
554 * @offset:bytes offset from MMIO start
b8920e1e 555 */
421a2a30 556
e3ecdffa
AD
557/**
558 * amdgpu_mm_rreg8 - read a memory mapped IO register
559 *
560 * @adev: amdgpu_device pointer
561 * @offset: byte aligned register offset
562 *
563 * Returns the 8 bit value from the offset specified.
564 */
7cbbc745
AG
565uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
566{
56b53c0b 567 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
568 return 0;
569
421a2a30
ML
570 if (offset < adev->rmmio_size)
571 return (readb(adev->rmmio + offset));
572 BUG();
573}
574
85150626
VL
575
576/**
577 * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
578 *
579 * @adev: amdgpu_device pointer
580 * @reg: dword aligned register offset
581 * @acc_flags: access flags which require special behavior
582 * @xcc_id: xcc accelerated compute core id
583 *
584 * Returns the 32 bit value from the offset specified.
585 */
586uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
587 uint32_t reg, uint32_t acc_flags,
588 uint32_t xcc_id)
589{
590 uint32_t ret, rlcg_flag;
591
592 if (amdgpu_device_skip_hw_access(adev))
593 return 0;
594
595 if ((reg * 4) < adev->rmmio_size) {
596 if (amdgpu_sriov_vf(adev) &&
597 !amdgpu_sriov_runtime(adev) &&
598 adev->gfx.rlc.rlcg_reg_access_supported &&
599 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
600 GC_HWIP, false,
601 &rlcg_flag)) {
602 ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
603 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
604 amdgpu_sriov_runtime(adev) &&
605 down_read_trylock(&adev->reset_domain->sem)) {
606 ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
607 up_read(&adev->reset_domain->sem);
608 } else {
609 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
610 }
611 } else {
612 ret = adev->pcie_rreg(adev, reg * 4);
613 }
614
615 return ret;
616}
617
421a2a30
ML
618/*
619 * MMIO register write with bytes helper functions
620 * @offset:bytes offset from MMIO start
621 * @value: the value want to be written to the register
b8920e1e
SS
622 */
623
e3ecdffa
AD
624/**
625 * amdgpu_mm_wreg8 - read a memory mapped IO register
626 *
627 * @adev: amdgpu_device pointer
628 * @offset: byte aligned register offset
629 * @value: 8 bit value to write
630 *
631 * Writes the value specified to the offset specified.
632 */
7cbbc745
AG
633void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
634{
56b53c0b 635 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
636 return;
637
421a2a30
ML
638 if (offset < adev->rmmio_size)
639 writeb(value, adev->rmmio + offset);
640 else
641 BUG();
642}
643
e3ecdffa 644/**
f7ee1874 645 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
646 *
647 * @adev: amdgpu_device pointer
648 * @reg: dword aligned register offset
649 * @v: 32 bit value to write to the register
650 * @acc_flags: access flags which require special behavior
651 *
652 * Writes the value specified to the offset specified.
653 */
f7ee1874
HZ
654void amdgpu_device_wreg(struct amdgpu_device *adev,
655 uint32_t reg, uint32_t v,
656 uint32_t acc_flags)
d38ceaf9 657{
56b53c0b 658 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
659 return;
660
f7ee1874
HZ
661 if ((reg * 4) < adev->rmmio_size) {
662 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
663 amdgpu_sriov_runtime(adev) &&
d0fb18b5 664 down_read_trylock(&adev->reset_domain->sem)) {
85150626 665 amdgpu_kiq_wreg(adev, reg, v, 0);
d0fb18b5 666 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
667 } else {
668 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
669 }
670 } else {
671 adev->pcie_wreg(adev, reg * 4, v);
81202807 672 }
bc992ba5 673
f7ee1874 674 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 675}
d38ceaf9 676
03f2abb0 677/**
4cc9f86f 678 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 679 *
71579346
RB
680 * @adev: amdgpu_device pointer
681 * @reg: mmio/rlc register
682 * @v: value to write
8057a9d6 683 * @xcc_id: xcc accelerated compute core id
71579346
RB
684 *
685 * this function is invoked only for the debugfs register access
03f2abb0 686 */
f7ee1874 687void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
688 uint32_t reg, uint32_t v,
689 uint32_t xcc_id)
2e0cc4d4 690{
56b53c0b 691 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
692 return;
693
2e0cc4d4 694 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
695 adev->gfx.rlc.funcs &&
696 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 697 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 698 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
699 } else if ((reg * 4) >= adev->rmmio_size) {
700 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
701 } else {
702 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 703 }
d38ceaf9
AD
704}
705
85150626
VL
706/**
707 * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
708 *
709 * @adev: amdgpu_device pointer
710 * @reg: dword aligned register offset
711 * @v: 32 bit value to write to the register
712 * @acc_flags: access flags which require special behavior
713 * @xcc_id: xcc accelerated compute core id
714 *
715 * Writes the value specified to the offset specified.
716 */
717void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
718 uint32_t reg, uint32_t v,
719 uint32_t acc_flags, uint32_t xcc_id)
720{
721 uint32_t rlcg_flag;
722
723 if (amdgpu_device_skip_hw_access(adev))
724 return;
725
726 if ((reg * 4) < adev->rmmio_size) {
727 if (amdgpu_sriov_vf(adev) &&
728 !amdgpu_sriov_runtime(adev) &&
729 adev->gfx.rlc.rlcg_reg_access_supported &&
730 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
731 GC_HWIP, true,
732 &rlcg_flag)) {
733 amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
734 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
735 amdgpu_sriov_runtime(adev) &&
736 down_read_trylock(&adev->reset_domain->sem)) {
737 amdgpu_kiq_wreg(adev, reg, v, xcc_id);
738 up_read(&adev->reset_domain->sem);
739 } else {
740 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
741 }
742 } else {
743 adev->pcie_wreg(adev, reg * 4, v);
744 }
745}
746
1bba3683
HZ
747/**
748 * amdgpu_device_indirect_rreg - read an indirect register
749 *
750 * @adev: amdgpu_device pointer
22f453fb 751 * @reg_addr: indirect register address to read from
1bba3683
HZ
752 *
753 * Returns the value of indirect register @reg_addr
754 */
755u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
756 u32 reg_addr)
757{
65ba96e9 758 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
759 void __iomem *pcie_index_offset;
760 void __iomem *pcie_data_offset;
65ba96e9
HZ
761 u32 r;
762
763 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
764 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
765
766 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
767 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
768 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
769
770 writel(reg_addr, pcie_index_offset);
771 readl(pcie_index_offset);
772 r = readl(pcie_data_offset);
773 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
774
775 return r;
776}
777
0c552ed3
LM
778u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
779 u64 reg_addr)
780{
781 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
782 u32 r;
783 void __iomem *pcie_index_offset;
784 void __iomem *pcie_index_hi_offset;
785 void __iomem *pcie_data_offset;
786
ad390542
HZ
787 if (unlikely(!adev->nbio.funcs)) {
788 pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
789 pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
790 } else {
791 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
792 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
793 }
794
795 if (reg_addr >> 32) {
796 if (unlikely(!adev->nbio.funcs))
797 pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
798 else
799 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
800 } else {
0c552ed3 801 pcie_index_hi = 0;
ad390542 802 }
0c552ed3
LM
803
804 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
805 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
806 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
807 if (pcie_index_hi != 0)
808 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
809 pcie_index_hi * 4;
810
811 writel(reg_addr, pcie_index_offset);
812 readl(pcie_index_offset);
813 if (pcie_index_hi != 0) {
814 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
815 readl(pcie_index_hi_offset);
816 }
817 r = readl(pcie_data_offset);
818
819 /* clear the high bits */
820 if (pcie_index_hi != 0) {
821 writel(0, pcie_index_hi_offset);
822 readl(pcie_index_hi_offset);
823 }
824
825 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
826
827 return r;
828}
829
1bba3683
HZ
830/**
831 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
832 *
833 * @adev: amdgpu_device pointer
22f453fb 834 * @reg_addr: indirect register address to read from
1bba3683
HZ
835 *
836 * Returns the value of indirect register @reg_addr
837 */
838u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
839 u32 reg_addr)
840{
65ba96e9 841 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
842 void __iomem *pcie_index_offset;
843 void __iomem *pcie_data_offset;
65ba96e9
HZ
844 u64 r;
845
846 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
847 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
848
849 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
850 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
851 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
852
853 /* read low 32 bits */
854 writel(reg_addr, pcie_index_offset);
855 readl(pcie_index_offset);
856 r = readl(pcie_data_offset);
857 /* read high 32 bits */
858 writel(reg_addr + 4, pcie_index_offset);
859 readl(pcie_index_offset);
860 r |= ((u64)readl(pcie_data_offset) << 32);
861 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
862
863 return r;
864}
865
a76b2870
CL
866u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
867 u64 reg_addr)
868{
869 unsigned long flags, pcie_index, pcie_data;
870 unsigned long pcie_index_hi = 0;
871 void __iomem *pcie_index_offset;
872 void __iomem *pcie_index_hi_offset;
873 void __iomem *pcie_data_offset;
874 u64 r;
875
876 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
877 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
878 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
879 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
880
881 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
882 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
883 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
884 if (pcie_index_hi != 0)
885 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
886 pcie_index_hi * 4;
887
888 /* read low 32 bits */
889 writel(reg_addr, pcie_index_offset);
890 readl(pcie_index_offset);
891 if (pcie_index_hi != 0) {
892 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
893 readl(pcie_index_hi_offset);
894 }
895 r = readl(pcie_data_offset);
896 /* read high 32 bits */
897 writel(reg_addr + 4, pcie_index_offset);
898 readl(pcie_index_offset);
899 if (pcie_index_hi != 0) {
900 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
901 readl(pcie_index_hi_offset);
902 }
903 r |= ((u64)readl(pcie_data_offset) << 32);
904
905 /* clear the high bits */
906 if (pcie_index_hi != 0) {
907 writel(0, pcie_index_hi_offset);
908 readl(pcie_index_hi_offset);
909 }
910
911 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
912
913 return r;
914}
915
1bba3683
HZ
916/**
917 * amdgpu_device_indirect_wreg - write an indirect register address
918 *
919 * @adev: amdgpu_device pointer
1bba3683
HZ
920 * @reg_addr: indirect register offset
921 * @reg_data: indirect register data
922 *
923 */
924void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
925 u32 reg_addr, u32 reg_data)
926{
65ba96e9 927 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
928 void __iomem *pcie_index_offset;
929 void __iomem *pcie_data_offset;
930
65ba96e9
HZ
931 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
932 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
933
1bba3683
HZ
934 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
935 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
936 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
937
938 writel(reg_addr, pcie_index_offset);
939 readl(pcie_index_offset);
940 writel(reg_data, pcie_data_offset);
941 readl(pcie_data_offset);
942 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
943}
944
0c552ed3
LM
945void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
946 u64 reg_addr, u32 reg_data)
947{
948 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
949 void __iomem *pcie_index_offset;
950 void __iomem *pcie_index_hi_offset;
951 void __iomem *pcie_data_offset;
952
953 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
954 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 955 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
956 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
957 else
958 pcie_index_hi = 0;
959
960 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
961 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
962 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
963 if (pcie_index_hi != 0)
964 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
965 pcie_index_hi * 4;
966
967 writel(reg_addr, pcie_index_offset);
968 readl(pcie_index_offset);
969 if (pcie_index_hi != 0) {
970 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
971 readl(pcie_index_hi_offset);
972 }
973 writel(reg_data, pcie_data_offset);
974 readl(pcie_data_offset);
975
976 /* clear the high bits */
977 if (pcie_index_hi != 0) {
978 writel(0, pcie_index_hi_offset);
979 readl(pcie_index_hi_offset);
980 }
981
982 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
983}
984
1bba3683
HZ
985/**
986 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
987 *
988 * @adev: amdgpu_device pointer
1bba3683
HZ
989 * @reg_addr: indirect register offset
990 * @reg_data: indirect register data
991 *
992 */
993void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
994 u32 reg_addr, u64 reg_data)
995{
65ba96e9 996 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
997 void __iomem *pcie_index_offset;
998 void __iomem *pcie_data_offset;
999
65ba96e9
HZ
1000 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1001 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1002
1bba3683
HZ
1003 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1004 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1005 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1006
1007 /* write low 32 bits */
1008 writel(reg_addr, pcie_index_offset);
1009 readl(pcie_index_offset);
1010 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1011 readl(pcie_data_offset);
1012 /* write high 32 bits */
1013 writel(reg_addr + 4, pcie_index_offset);
1014 readl(pcie_index_offset);
1015 writel((u32)(reg_data >> 32), pcie_data_offset);
1016 readl(pcie_data_offset);
1017 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1018}
1019
a76b2870
CL
1020void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1021 u64 reg_addr, u64 reg_data)
1022{
1023 unsigned long flags, pcie_index, pcie_data;
1024 unsigned long pcie_index_hi = 0;
1025 void __iomem *pcie_index_offset;
1026 void __iomem *pcie_index_hi_offset;
1027 void __iomem *pcie_data_offset;
1028
1029 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1030 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1031 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1032 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1033
1034 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1035 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1036 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1037 if (pcie_index_hi != 0)
1038 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1039 pcie_index_hi * 4;
1040
1041 /* write low 32 bits */
1042 writel(reg_addr, pcie_index_offset);
1043 readl(pcie_index_offset);
1044 if (pcie_index_hi != 0) {
1045 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1046 readl(pcie_index_hi_offset);
1047 }
1048 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1049 readl(pcie_data_offset);
1050 /* write high 32 bits */
1051 writel(reg_addr + 4, pcie_index_offset);
1052 readl(pcie_index_offset);
1053 if (pcie_index_hi != 0) {
1054 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1055 readl(pcie_index_hi_offset);
1056 }
1057 writel((u32)(reg_data >> 32), pcie_data_offset);
1058 readl(pcie_data_offset);
1059
1060 /* clear the high bits */
1061 if (pcie_index_hi != 0) {
1062 writel(0, pcie_index_hi_offset);
1063 readl(pcie_index_hi_offset);
1064 }
1065
1066 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1067}
1068
dabc114e
HZ
1069/**
1070 * amdgpu_device_get_rev_id - query device rev_id
1071 *
1072 * @adev: amdgpu_device pointer
1073 *
1074 * Return device rev_id
1075 */
1076u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1077{
1078 return adev->nbio.funcs->get_rev_id(adev);
1079}
1080
d38ceaf9
AD
1081/**
1082 * amdgpu_invalid_rreg - dummy reg read function
1083 *
982a820b 1084 * @adev: amdgpu_device pointer
d38ceaf9
AD
1085 * @reg: offset of register
1086 *
1087 * Dummy register read function. Used for register blocks
1088 * that certain asics don't have (all asics).
1089 * Returns the value in the register.
1090 */
1091static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1092{
1093 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1094 BUG();
1095 return 0;
1096}
1097
0c552ed3
LM
1098static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1099{
1100 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1101 BUG();
1102 return 0;
1103}
1104
d38ceaf9
AD
1105/**
1106 * amdgpu_invalid_wreg - dummy reg write function
1107 *
982a820b 1108 * @adev: amdgpu_device pointer
d38ceaf9
AD
1109 * @reg: offset of register
1110 * @v: value to write to the register
1111 *
1112 * Dummy register read function. Used for register blocks
1113 * that certain asics don't have (all asics).
1114 */
1115static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1116{
1117 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1118 reg, v);
1119 BUG();
1120}
1121
0c552ed3
LM
1122static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1123{
1124 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1125 reg, v);
1126 BUG();
1127}
1128
4fa1c6a6
TZ
1129/**
1130 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1131 *
982a820b 1132 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1133 * @reg: offset of register
1134 *
1135 * Dummy register read function. Used for register blocks
1136 * that certain asics don't have (all asics).
1137 * Returns the value in the register.
1138 */
1139static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1140{
1141 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1142 BUG();
1143 return 0;
1144}
1145
a76b2870
CL
1146static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1147{
1148 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1149 BUG();
1150 return 0;
1151}
1152
4fa1c6a6
TZ
1153/**
1154 * amdgpu_invalid_wreg64 - dummy reg write function
1155 *
982a820b 1156 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1157 * @reg: offset of register
1158 * @v: value to write to the register
1159 *
1160 * Dummy register read function. Used for register blocks
1161 * that certain asics don't have (all asics).
1162 */
1163static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1164{
1165 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1166 reg, v);
1167 BUG();
1168}
1169
a76b2870
CL
1170static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1171{
1172 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1173 reg, v);
1174 BUG();
1175}
1176
d38ceaf9
AD
1177/**
1178 * amdgpu_block_invalid_rreg - dummy reg read function
1179 *
982a820b 1180 * @adev: amdgpu_device pointer
d38ceaf9
AD
1181 * @block: offset of instance
1182 * @reg: offset of register
1183 *
1184 * Dummy register read function. Used for register blocks
1185 * that certain asics don't have (all asics).
1186 * Returns the value in the register.
1187 */
1188static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1189 uint32_t block, uint32_t reg)
1190{
1191 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1192 reg, block);
1193 BUG();
1194 return 0;
1195}
1196
1197/**
1198 * amdgpu_block_invalid_wreg - dummy reg write function
1199 *
982a820b 1200 * @adev: amdgpu_device pointer
d38ceaf9
AD
1201 * @block: offset of instance
1202 * @reg: offset of register
1203 * @v: value to write to the register
1204 *
1205 * Dummy register read function. Used for register blocks
1206 * that certain asics don't have (all asics).
1207 */
1208static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1209 uint32_t block,
1210 uint32_t reg, uint32_t v)
1211{
1212 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1213 reg, block, v);
1214 BUG();
1215}
1216
4d2997ab
AD
1217/**
1218 * amdgpu_device_asic_init - Wrapper for atom asic_init
1219 *
982a820b 1220 * @adev: amdgpu_device pointer
4d2997ab
AD
1221 *
1222 * Does any asic specific work and then calls atom asic init.
1223 */
1224static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1225{
7656168a
LL
1226 int ret;
1227
4d2997ab
AD
1228 amdgpu_asic_pre_asic_init(adev);
1229
4e8303cf
LL
1230 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1231 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
7656168a
LL
1232 amdgpu_psp_wait_for_bootloader(adev);
1233 ret = amdgpu_atomfirmware_asic_init(adev, true);
1234 return ret;
1235 } else {
85d1bcc6 1236 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
7656168a
LL
1237 }
1238
1239 return 0;
4d2997ab
AD
1240}
1241
e3ecdffa 1242/**
7ccfd79f 1243 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1244 *
982a820b 1245 * @adev: amdgpu_device pointer
e3ecdffa
AD
1246 *
1247 * Allocates a scratch page of VRAM for use by various things in the
1248 * driver.
1249 */
7ccfd79f 1250static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1251{
7ccfd79f
CK
1252 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1253 AMDGPU_GEM_DOMAIN_VRAM |
1254 AMDGPU_GEM_DOMAIN_GTT,
1255 &adev->mem_scratch.robj,
1256 &adev->mem_scratch.gpu_addr,
1257 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1258}
1259
e3ecdffa 1260/**
7ccfd79f 1261 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1262 *
982a820b 1263 * @adev: amdgpu_device pointer
e3ecdffa
AD
1264 *
1265 * Frees the VRAM scratch page.
1266 */
7ccfd79f 1267static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1268{
7ccfd79f 1269 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1270}
1271
1272/**
9c3f2b54 1273 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1274 *
1275 * @adev: amdgpu_device pointer
1276 * @registers: pointer to the register array
1277 * @array_size: size of the register array
1278 *
b8920e1e 1279 * Programs an array or registers with and or masks.
d38ceaf9
AD
1280 * This is a helper for setting golden registers.
1281 */
9c3f2b54
AD
1282void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1283 const u32 *registers,
1284 const u32 array_size)
d38ceaf9
AD
1285{
1286 u32 tmp, reg, and_mask, or_mask;
1287 int i;
1288
1289 if (array_size % 3)
1290 return;
1291
47fc644f 1292 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1293 reg = registers[i + 0];
1294 and_mask = registers[i + 1];
1295 or_mask = registers[i + 2];
1296
1297 if (and_mask == 0xffffffff) {
1298 tmp = or_mask;
1299 } else {
1300 tmp = RREG32(reg);
1301 tmp &= ~and_mask;
e0d07657
HZ
1302 if (adev->family >= AMDGPU_FAMILY_AI)
1303 tmp |= (or_mask & and_mask);
1304 else
1305 tmp |= or_mask;
d38ceaf9
AD
1306 }
1307 WREG32(reg, tmp);
1308 }
1309}
1310
e3ecdffa
AD
1311/**
1312 * amdgpu_device_pci_config_reset - reset the GPU
1313 *
1314 * @adev: amdgpu_device pointer
1315 *
1316 * Resets the GPU using the pci config reset sequence.
1317 * Only applicable to asics prior to vega10.
1318 */
8111c387 1319void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1320{
1321 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1322}
1323
af484df8
AD
1324/**
1325 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1326 *
1327 * @adev: amdgpu_device pointer
1328 *
1329 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1330 */
1331int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1332{
1333 return pci_reset_function(adev->pdev);
1334}
1335
d38ceaf9 1336/*
06ec9070 1337 * amdgpu_device_wb_*()
455a7bc2 1338 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1339 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1340 */
1341
1342/**
06ec9070 1343 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1344 *
1345 * @adev: amdgpu_device pointer
1346 *
1347 * Disables Writeback and frees the Writeback memory (all asics).
1348 * Used at driver shutdown.
1349 */
06ec9070 1350static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1351{
1352 if (adev->wb.wb_obj) {
a76ed485
AD
1353 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1354 &adev->wb.gpu_addr,
1355 (void **)&adev->wb.wb);
d38ceaf9
AD
1356 adev->wb.wb_obj = NULL;
1357 }
1358}
1359
1360/**
03f2abb0 1361 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1362 *
1363 * @adev: amdgpu_device pointer
1364 *
455a7bc2 1365 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1366 * Used at driver startup.
1367 * Returns 0 on success or an -error on failure.
1368 */
06ec9070 1369static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1370{
1371 int r;
1372
1373 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1374 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1375 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1376 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1377 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1378 (void **)&adev->wb.wb);
d38ceaf9
AD
1379 if (r) {
1380 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1381 return r;
1382 }
d38ceaf9
AD
1383
1384 adev->wb.num_wb = AMDGPU_MAX_WB;
1385 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1386
1387 /* clear wb memory */
73469585 1388 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1389 }
1390
1391 return 0;
1392}
1393
1394/**
131b4b36 1395 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1396 *
1397 * @adev: amdgpu_device pointer
1398 * @wb: wb index
1399 *
1400 * Allocate a wb slot for use by the driver (all asics).
1401 * Returns 0 on success or -EINVAL on failure.
1402 */
131b4b36 1403int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1404{
1405 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1406
97407b63 1407 if (offset < adev->wb.num_wb) {
7014285a 1408 __set_bit(offset, adev->wb.used);
63ae07ca 1409 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1410 return 0;
1411 } else {
1412 return -EINVAL;
1413 }
1414}
1415
d38ceaf9 1416/**
131b4b36 1417 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1418 *
1419 * @adev: amdgpu_device pointer
1420 * @wb: wb index
1421 *
1422 * Free a wb slot allocated for use by the driver (all asics)
1423 */
131b4b36 1424void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1425{
73469585 1426 wb >>= 3;
d38ceaf9 1427 if (wb < adev->wb.num_wb)
73469585 1428 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1429}
1430
d6895ad3
CK
1431/**
1432 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1433 *
1434 * @adev: amdgpu_device pointer
1435 *
1436 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1437 * to fail, but if any of the BARs is not accessible after the size we abort
1438 * driver loading by returning -ENODEV.
1439 */
1440int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1441{
453f617a 1442 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1443 struct pci_bus *root;
1444 struct resource *res;
b8920e1e 1445 unsigned int i;
d6895ad3
CK
1446 u16 cmd;
1447 int r;
1448
822130b5
AB
1449 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1450 return 0;
1451
0c03b912 1452 /* Bypass for VF */
1453 if (amdgpu_sriov_vf(adev))
1454 return 0;
1455
b7221f2b
AD
1456 /* skip if the bios has already enabled large BAR */
1457 if (adev->gmc.real_vram_size &&
1458 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1459 return 0;
1460
31b8adab
CK
1461 /* Check if the root BUS has 64bit memory resources */
1462 root = adev->pdev->bus;
1463 while (root->parent)
1464 root = root->parent;
1465
1466 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1467 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1468 res->start > 0x100000000ull)
1469 break;
1470 }
1471
1472 /* Trying to resize is pointless without a root hub window above 4GB */
1473 if (!res)
1474 return 0;
1475
453f617a
ND
1476 /* Limit the BAR size to what is available */
1477 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1478 rbar_size);
1479
d6895ad3
CK
1480 /* Disable memory decoding while we change the BAR addresses and size */
1481 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1482 pci_write_config_word(adev->pdev, PCI_COMMAND,
1483 cmd & ~PCI_COMMAND_MEMORY);
1484
1485 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1486 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1487 if (adev->asic_type >= CHIP_BONAIRE)
1488 pci_release_resource(adev->pdev, 2);
1489
1490 pci_release_resource(adev->pdev, 0);
1491
1492 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1493 if (r == -ENOSPC)
1494 DRM_INFO("Not enough PCI address space for a large BAR.");
1495 else if (r && r != -ENOTSUPP)
1496 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1497
1498 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1499
1500 /* When the doorbell or fb BAR isn't available we have no chance of
1501 * using the device.
1502 */
43c064db 1503 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1504 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1505 return -ENODEV;
1506
1507 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1508
1509 return 0;
1510}
a05502e5 1511
9535a86a
SZ
1512static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1513{
b8920e1e 1514 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1515 return false;
9535a86a
SZ
1516
1517 return true;
1518}
1519
d38ceaf9
AD
1520/*
1521 * GPU helpers function.
1522 */
1523/**
39c640c0 1524 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1525 *
1526 * @adev: amdgpu_device pointer
1527 *
c836fec5
JQ
1528 * Check if the asic has been initialized (all asics) at driver startup
1529 * or post is needed if hw reset is performed.
1530 * Returns true if need or false if not.
d38ceaf9 1531 */
39c640c0 1532bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1533{
1534 uint32_t reg;
1535
bec86378
ML
1536 if (amdgpu_sriov_vf(adev))
1537 return false;
1538
9535a86a
SZ
1539 if (!amdgpu_device_read_bios(adev))
1540 return false;
1541
bec86378 1542 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1543 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1544 * some old smc fw still need driver do vPost otherwise gpu hang, while
1545 * those smc fw version above 22.15 doesn't have this flaw, so we force
1546 * vpost executed for smc version below 22.15
bec86378
ML
1547 */
1548 if (adev->asic_type == CHIP_FIJI) {
1549 int err;
1550 uint32_t fw_ver;
b8920e1e 1551
bec86378
ML
1552 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1553 /* force vPost if error occured */
1554 if (err)
1555 return true;
1556
1557 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
8a44fdd3 1558 release_firmware(adev->pm.fw);
1da2c326
ML
1559 if (fw_ver < 0x00160e00)
1560 return true;
bec86378 1561 }
bec86378 1562 }
91fe77eb 1563
e3c1b071 1564 /* Don't post if we need to reset whole hive on init */
1565 if (adev->gmc.xgmi.pending_reset)
1566 return false;
1567
91fe77eb 1568 if (adev->has_hw_reset) {
1569 adev->has_hw_reset = false;
1570 return true;
1571 }
1572
1573 /* bios scratch used on CIK+ */
1574 if (adev->asic_type >= CHIP_BONAIRE)
1575 return amdgpu_atombios_scratch_need_asic_init(adev);
1576
1577 /* check MEM_SIZE for older asics */
1578 reg = amdgpu_asic_get_config_memsize(adev);
1579
1580 if ((reg != 0) && (reg != 0xffffffff))
1581 return false;
1582
1583 return true;
bec86378
ML
1584}
1585
5d1eb4c4 1586/*
bb0f8429
ML
1587 * Check whether seamless boot is supported.
1588 *
7f4ce7b5
ML
1589 * So far we only support seamless boot on DCE 3.0 or later.
1590 * If users report that it works on older ASICS as well, we may
1591 * loosen this.
bb0f8429
ML
1592 */
1593bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1594{
5dc270d3
ML
1595 switch (amdgpu_seamless) {
1596 case -1:
1597 break;
1598 case 1:
1599 return true;
1600 case 0:
1601 return false;
1602 default:
1603 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1604 amdgpu_seamless);
1605 return false;
1606 }
1607
3657a1d5
ML
1608 if (!(adev->flags & AMD_IS_APU))
1609 return false;
1610
5dc270d3
ML
1611 if (adev->mman.keep_stolen_vga_memory)
1612 return false;
1613
ed342a2e 1614 return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1615}
1616
5d1eb4c4 1617/*
2757a848
ML
1618 * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1619 * don't support dynamic speed switching. Until we have confirmation from Intel
1620 * that a specific host supports it, it's safer that we keep it disabled for all.
5d1eb4c4
ML
1621 *
1622 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1623 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1624 */
d9b3a066 1625static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
5d1eb4c4
ML
1626{
1627#if IS_ENABLED(CONFIG_X86)
1628 struct cpuinfo_x86 *c = &cpu_data(0);
1629
d9b3a066
ML
1630 /* eGPU change speeds based on USB4 fabric conditions */
1631 if (dev_is_removable(adev->dev))
1632 return true;
1633
5d1eb4c4
ML
1634 if (c->x86_vendor == X86_VENDOR_INTEL)
1635 return false;
1636#endif
1637 return true;
1638}
1639
0ab5d711
ML
1640/**
1641 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1642 *
1643 * @adev: amdgpu_device pointer
1644 *
1645 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1646 * be set for this device.
1647 *
1648 * Returns true if it should be used or false if not.
1649 */
1650bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1651{
1652 switch (amdgpu_aspm) {
1653 case -1:
1654 break;
1655 case 0:
1656 return false;
1657 case 1:
1658 return true;
1659 default:
1660 return false;
1661 }
1a6513de
ML
1662 if (adev->flags & AMD_IS_APU)
1663 return false;
2757a848
ML
1664 if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1665 return false;
0ab5d711
ML
1666 return pcie_aspm_enabled(adev->pdev);
1667}
1668
d38ceaf9
AD
1669/* if we get transitioned to only one device, take VGA back */
1670/**
06ec9070 1671 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1672 *
bf44e8ce 1673 * @pdev: PCI device pointer
d38ceaf9
AD
1674 * @state: enable/disable vga decode
1675 *
1676 * Enable/disable vga decode (all asics).
1677 * Returns VGA resource flags.
1678 */
bf44e8ce
CH
1679static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1680 bool state)
d38ceaf9 1681{
bf44e8ce 1682 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1683
d38ceaf9
AD
1684 amdgpu_asic_set_vga_state(adev, state);
1685 if (state)
1686 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1687 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1688 else
1689 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1690}
1691
e3ecdffa
AD
1692/**
1693 * amdgpu_device_check_block_size - validate the vm block size
1694 *
1695 * @adev: amdgpu_device pointer
1696 *
1697 * Validates the vm block size specified via module parameter.
1698 * The vm block size defines number of bits in page table versus page directory,
1699 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1700 * page table and the remaining bits are in the page directory.
1701 */
06ec9070 1702static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1703{
1704 /* defines number of bits in page table versus page directory,
1705 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1706 * page table and the remaining bits are in the page directory
1707 */
bab4fee7
JZ
1708 if (amdgpu_vm_block_size == -1)
1709 return;
a1adf8be 1710
bab4fee7 1711 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1712 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1713 amdgpu_vm_block_size);
97489129 1714 amdgpu_vm_block_size = -1;
a1adf8be 1715 }
a1adf8be
CZ
1716}
1717
e3ecdffa
AD
1718/**
1719 * amdgpu_device_check_vm_size - validate the vm size
1720 *
1721 * @adev: amdgpu_device pointer
1722 *
1723 * Validates the vm size in GB specified via module parameter.
1724 * The VM size is the size of the GPU virtual memory space in GB.
1725 */
06ec9070 1726static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1727{
64dab074
AD
1728 /* no need to check the default value */
1729 if (amdgpu_vm_size == -1)
1730 return;
1731
83ca145d
ZJ
1732 if (amdgpu_vm_size < 1) {
1733 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1734 amdgpu_vm_size);
f3368128 1735 amdgpu_vm_size = -1;
83ca145d 1736 }
83ca145d
ZJ
1737}
1738
7951e376
RZ
1739static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1740{
1741 struct sysinfo si;
a9d4fe2f 1742 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1743 uint64_t total_memory;
1744 uint64_t dram_size_seven_GB = 0x1B8000000;
1745 uint64_t dram_size_three_GB = 0xB8000000;
1746
1747 if (amdgpu_smu_memory_pool_size == 0)
1748 return;
1749
1750 if (!is_os_64) {
1751 DRM_WARN("Not 64-bit OS, feature not supported\n");
1752 goto def_value;
1753 }
1754 si_meminfo(&si);
1755 total_memory = (uint64_t)si.totalram * si.mem_unit;
1756
1757 if ((amdgpu_smu_memory_pool_size == 1) ||
1758 (amdgpu_smu_memory_pool_size == 2)) {
1759 if (total_memory < dram_size_three_GB)
1760 goto def_value1;
1761 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1762 (amdgpu_smu_memory_pool_size == 8)) {
1763 if (total_memory < dram_size_seven_GB)
1764 goto def_value1;
1765 } else {
1766 DRM_WARN("Smu memory pool size not supported\n");
1767 goto def_value;
1768 }
1769 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1770
1771 return;
1772
1773def_value1:
1774 DRM_WARN("No enough system memory\n");
1775def_value:
1776 adev->pm.smu_prv_buffer_size = 0;
1777}
1778
9f6a7857
HR
1779static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1780{
1781 if (!(adev->flags & AMD_IS_APU) ||
1782 adev->asic_type < CHIP_RAVEN)
1783 return 0;
1784
1785 switch (adev->asic_type) {
1786 case CHIP_RAVEN:
1787 if (adev->pdev->device == 0x15dd)
1788 adev->apu_flags |= AMD_APU_IS_RAVEN;
1789 if (adev->pdev->device == 0x15d8)
1790 adev->apu_flags |= AMD_APU_IS_PICASSO;
1791 break;
1792 case CHIP_RENOIR:
1793 if ((adev->pdev->device == 0x1636) ||
1794 (adev->pdev->device == 0x164c))
1795 adev->apu_flags |= AMD_APU_IS_RENOIR;
1796 else
1797 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1798 break;
1799 case CHIP_VANGOGH:
1800 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1801 break;
1802 case CHIP_YELLOW_CARP:
1803 break;
d0f56dc2 1804 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1805 if ((adev->pdev->device == 0x13FE) ||
1806 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1807 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1808 break;
9f6a7857 1809 default:
4eaf21b7 1810 break;
9f6a7857
HR
1811 }
1812
1813 return 0;
1814}
1815
d38ceaf9 1816/**
06ec9070 1817 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1818 *
1819 * @adev: amdgpu_device pointer
1820 *
1821 * Validates certain module parameters and updates
1822 * the associated values used by the driver (all asics).
1823 */
912dfc84 1824static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1825{
5b011235
CZ
1826 if (amdgpu_sched_jobs < 4) {
1827 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1828 amdgpu_sched_jobs);
1829 amdgpu_sched_jobs = 4;
47fc644f 1830 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1831 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1832 amdgpu_sched_jobs);
1833 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1834 }
d38ceaf9 1835
83e74db6 1836 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1837 /* gart size must be greater or equal to 32M */
1838 dev_warn(adev->dev, "gart size (%d) too small\n",
1839 amdgpu_gart_size);
83e74db6 1840 amdgpu_gart_size = -1;
d38ceaf9
AD
1841 }
1842
36d38372 1843 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1844 /* gtt size must be greater or equal to 32M */
36d38372
CK
1845 dev_warn(adev->dev, "gtt size (%d) too small\n",
1846 amdgpu_gtt_size);
1847 amdgpu_gtt_size = -1;
d38ceaf9
AD
1848 }
1849
d07f14be
RH
1850 /* valid range is between 4 and 9 inclusive */
1851 if (amdgpu_vm_fragment_size != -1 &&
1852 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1853 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1854 amdgpu_vm_fragment_size = -1;
1855 }
1856
5d5bd5e3
KW
1857 if (amdgpu_sched_hw_submission < 2) {
1858 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1859 amdgpu_sched_hw_submission);
1860 amdgpu_sched_hw_submission = 2;
1861 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1862 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1863 amdgpu_sched_hw_submission);
1864 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1865 }
1866
2656fd23
AG
1867 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1868 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1869 amdgpu_reset_method = -1;
1870 }
1871
7951e376
RZ
1872 amdgpu_device_check_smu_prv_buffer_size(adev);
1873
06ec9070 1874 amdgpu_device_check_vm_size(adev);
d38ceaf9 1875
06ec9070 1876 amdgpu_device_check_block_size(adev);
6a7f76e7 1877
19aede77 1878 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1879
e3c00faa 1880 return 0;
d38ceaf9
AD
1881}
1882
1883/**
1884 * amdgpu_switcheroo_set_state - set switcheroo state
1885 *
1886 * @pdev: pci dev pointer
1694467b 1887 * @state: vga_switcheroo state
d38ceaf9 1888 *
12024b17 1889 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1890 * the asics before or after it is powered up using ACPI methods.
1891 */
8aba21b7
LT
1892static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1893 enum vga_switcheroo_state state)
d38ceaf9
AD
1894{
1895 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1896 int r;
d38ceaf9 1897
b98c6299 1898 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1899 return;
1900
1901 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1902 pr_info("switched on\n");
d38ceaf9
AD
1903 /* don't suspend or resume card normally */
1904 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1905
8f66090b
TZ
1906 pci_set_power_state(pdev, PCI_D0);
1907 amdgpu_device_load_pci_state(pdev);
1908 r = pci_enable_device(pdev);
de185019
AD
1909 if (r)
1910 DRM_WARN("pci_enable_device failed (%d)\n", r);
1911 amdgpu_device_resume(dev, true);
d38ceaf9 1912
d38ceaf9 1913 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1914 } else {
dd4fa6c1 1915 pr_info("switched off\n");
d38ceaf9 1916 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
5095d541 1917 amdgpu_device_prepare(dev);
de185019 1918 amdgpu_device_suspend(dev, true);
8f66090b 1919 amdgpu_device_cache_pci_state(pdev);
de185019 1920 /* Shut down the device */
8f66090b
TZ
1921 pci_disable_device(pdev);
1922 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1923 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1924 }
1925}
1926
1927/**
1928 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1929 *
1930 * @pdev: pci dev pointer
1931 *
1932 * Callback for the switcheroo driver. Check of the switcheroo
1933 * state can be changed.
1934 * Returns true if the state can be changed, false if not.
1935 */
1936static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1937{
1938 struct drm_device *dev = pci_get_drvdata(pdev);
1939
b8920e1e 1940 /*
d38ceaf9
AD
1941 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1942 * locking inversion with the driver load path. And the access here is
1943 * completely racy anyway. So don't bother with locking for now.
1944 */
7e13ad89 1945 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1946}
1947
1948static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1949 .set_gpu_state = amdgpu_switcheroo_set_state,
1950 .reprobe = NULL,
1951 .can_switch = amdgpu_switcheroo_can_switch,
1952};
1953
e3ecdffa
AD
1954/**
1955 * amdgpu_device_ip_set_clockgating_state - set the CG state
1956 *
87e3f136 1957 * @dev: amdgpu_device pointer
e3ecdffa
AD
1958 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1959 * @state: clockgating state (gate or ungate)
1960 *
1961 * Sets the requested clockgating state for all instances of
1962 * the hardware IP specified.
1963 * Returns the error code from the last instance.
1964 */
43fa561f 1965int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1966 enum amd_ip_block_type block_type,
1967 enum amd_clockgating_state state)
d38ceaf9 1968{
43fa561f 1969 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1970 int i, r = 0;
1971
1972 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1973 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1974 continue;
c722865a
RZ
1975 if (adev->ip_blocks[i].version->type != block_type)
1976 continue;
1977 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1978 continue;
1979 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1980 (void *)adev, state);
1981 if (r)
1982 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1983 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1984 }
1985 return r;
1986}
1987
e3ecdffa
AD
1988/**
1989 * amdgpu_device_ip_set_powergating_state - set the PG state
1990 *
87e3f136 1991 * @dev: amdgpu_device pointer
e3ecdffa
AD
1992 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1993 * @state: powergating state (gate or ungate)
1994 *
1995 * Sets the requested powergating state for all instances of
1996 * the hardware IP specified.
1997 * Returns the error code from the last instance.
1998 */
43fa561f 1999int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
2000 enum amd_ip_block_type block_type,
2001 enum amd_powergating_state state)
d38ceaf9 2002{
43fa561f 2003 struct amdgpu_device *adev = dev;
d38ceaf9
AD
2004 int i, r = 0;
2005
2006 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2007 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2008 continue;
c722865a
RZ
2009 if (adev->ip_blocks[i].version->type != block_type)
2010 continue;
2011 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2012 continue;
2013 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2014 (void *)adev, state);
2015 if (r)
2016 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2017 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
2018 }
2019 return r;
2020}
2021
e3ecdffa
AD
2022/**
2023 * amdgpu_device_ip_get_clockgating_state - get the CG state
2024 *
2025 * @adev: amdgpu_device pointer
2026 * @flags: clockgating feature flags
2027 *
2028 * Walks the list of IPs on the device and updates the clockgating
2029 * flags for each IP.
2030 * Updates @flags with the feature flags for each hardware IP where
2031 * clockgating is enabled.
2032 */
2990a1fc 2033void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 2034 u64 *flags)
6cb2d4e4
HR
2035{
2036 int i;
2037
2038 for (i = 0; i < adev->num_ip_blocks; i++) {
2039 if (!adev->ip_blocks[i].status.valid)
2040 continue;
2041 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2042 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2043 }
2044}
2045
e3ecdffa
AD
2046/**
2047 * amdgpu_device_ip_wait_for_idle - wait for idle
2048 *
2049 * @adev: amdgpu_device pointer
2050 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2051 *
2052 * Waits for the request hardware IP to be idle.
2053 * Returns 0 for success or a negative error code on failure.
2054 */
2990a1fc
AD
2055int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2056 enum amd_ip_block_type block_type)
5dbbb60b
AD
2057{
2058 int i, r;
2059
2060 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2061 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2062 continue;
a1255107
AD
2063 if (adev->ip_blocks[i].version->type == block_type) {
2064 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
2065 if (r)
2066 return r;
2067 break;
2068 }
2069 }
2070 return 0;
2071
2072}
2073
e3ecdffa
AD
2074/**
2075 * amdgpu_device_ip_is_idle - is the hardware IP idle
2076 *
2077 * @adev: amdgpu_device pointer
2078 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2079 *
2080 * Check if the hardware IP is idle or not.
2081 * Returns true if it the IP is idle, false if not.
2082 */
2990a1fc
AD
2083bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2084 enum amd_ip_block_type block_type)
5dbbb60b
AD
2085{
2086 int i;
2087
2088 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2089 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 2090 continue;
a1255107
AD
2091 if (adev->ip_blocks[i].version->type == block_type)
2092 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
2093 }
2094 return true;
2095
2096}
2097
e3ecdffa
AD
2098/**
2099 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2100 *
2101 * @adev: amdgpu_device pointer
87e3f136 2102 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
2103 *
2104 * Returns a pointer to the hardware IP block structure
2105 * if it exists for the asic, otherwise NULL.
2106 */
2990a1fc
AD
2107struct amdgpu_ip_block *
2108amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2109 enum amd_ip_block_type type)
d38ceaf9
AD
2110{
2111 int i;
2112
2113 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 2114 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
2115 return &adev->ip_blocks[i];
2116
2117 return NULL;
2118}
2119
2120/**
2990a1fc 2121 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
2122 *
2123 * @adev: amdgpu_device pointer
5fc3aeeb 2124 * @type: enum amd_ip_block_type
d38ceaf9
AD
2125 * @major: major version
2126 * @minor: minor version
2127 *
2128 * return 0 if equal or greater
2129 * return 1 if smaller or the ip_block doesn't exist
2130 */
2990a1fc
AD
2131int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2132 enum amd_ip_block_type type,
2133 u32 major, u32 minor)
d38ceaf9 2134{
2990a1fc 2135 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 2136
a1255107
AD
2137 if (ip_block && ((ip_block->version->major > major) ||
2138 ((ip_block->version->major == major) &&
2139 (ip_block->version->minor >= minor))))
d38ceaf9
AD
2140 return 0;
2141
2142 return 1;
2143}
2144
a1255107 2145/**
2990a1fc 2146 * amdgpu_device_ip_block_add
a1255107
AD
2147 *
2148 * @adev: amdgpu_device pointer
2149 * @ip_block_version: pointer to the IP to add
2150 *
2151 * Adds the IP block driver information to the collection of IPs
2152 * on the asic.
2153 */
2990a1fc
AD
2154int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2155 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
2156{
2157 if (!ip_block_version)
2158 return -EINVAL;
2159
7bd939d0
LG
2160 switch (ip_block_version->type) {
2161 case AMD_IP_BLOCK_TYPE_VCN:
2162 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2163 return 0;
2164 break;
2165 case AMD_IP_BLOCK_TYPE_JPEG:
2166 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2167 return 0;
2168 break;
2169 default:
2170 break;
2171 }
2172
e966a725 2173 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
2174 ip_block_version->funcs->name);
2175
a1255107
AD
2176 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2177
2178 return 0;
2179}
2180
e3ecdffa
AD
2181/**
2182 * amdgpu_device_enable_virtual_display - enable virtual display feature
2183 *
2184 * @adev: amdgpu_device pointer
2185 *
2186 * Enabled the virtual display feature if the user has enabled it via
2187 * the module parameter virtual_display. This feature provides a virtual
2188 * display hardware on headless boards or in virtualized environments.
2189 * This function parses and validates the configuration string specified by
2190 * the user and configues the virtual display configuration (number of
2191 * virtual connectors, crtcs, etc.) specified.
2192 */
483ef985 2193static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2194{
2195 adev->enable_virtual_display = false;
2196
2197 if (amdgpu_virtual_display) {
8f66090b 2198 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2199 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2200
2201 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2202 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2203 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2204 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2205 if (!strcmp("all", pciaddname)
2206 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2207 long num_crtc;
2208 int res = -1;
2209
9accf2fd 2210 adev->enable_virtual_display = true;
0f66356d
ED
2211
2212 if (pciaddname_tmp)
2213 res = kstrtol(pciaddname_tmp, 10,
2214 &num_crtc);
2215
2216 if (!res) {
2217 if (num_crtc < 1)
2218 num_crtc = 1;
2219 if (num_crtc > 6)
2220 num_crtc = 6;
2221 adev->mode_info.num_crtc = num_crtc;
2222 } else {
2223 adev->mode_info.num_crtc = 1;
2224 }
9accf2fd
ED
2225 break;
2226 }
2227 }
2228
0f66356d
ED
2229 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2230 amdgpu_virtual_display, pci_address_name,
2231 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2232
2233 kfree(pciaddstr);
2234 }
2235}
2236
25263da3
AD
2237void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2238{
2239 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2240 adev->mode_info.num_crtc = 1;
2241 adev->enable_virtual_display = true;
2242 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2243 adev->enable_virtual_display, adev->mode_info.num_crtc);
2244 }
2245}
2246
e3ecdffa
AD
2247/**
2248 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2249 *
2250 * @adev: amdgpu_device pointer
2251 *
2252 * Parses the asic configuration parameters specified in the gpu info
2253 * firmware and makes them availale to the driver for use in configuring
2254 * the asic.
2255 * Returns 0 on success, -EINVAL on failure.
2256 */
e2a75f88
AD
2257static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2258{
e2a75f88 2259 const char *chip_name;
c0a43457 2260 char fw_name[40];
e2a75f88
AD
2261 int err;
2262 const struct gpu_info_firmware_header_v1_0 *hdr;
2263
ab4fe3e1
HR
2264 adev->firmware.gpu_info_fw = NULL;
2265
fb915c87
AD
2266 if (adev->mman.discovery_bin)
2267 return 0;
258620d0 2268
e2a75f88 2269 switch (adev->asic_type) {
e2a75f88
AD
2270 default:
2271 return 0;
2272 case CHIP_VEGA10:
2273 chip_name = "vega10";
2274 break;
3f76dced
AD
2275 case CHIP_VEGA12:
2276 chip_name = "vega12";
2277 break;
2d2e5e7e 2278 case CHIP_RAVEN:
54f78a76 2279 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2280 chip_name = "raven2";
54f78a76 2281 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2282 chip_name = "picasso";
54c4d17e
FX
2283 else
2284 chip_name = "raven";
2d2e5e7e 2285 break;
65e60f6e
LM
2286 case CHIP_ARCTURUS:
2287 chip_name = "arcturus";
2288 break;
42b325e5
XY
2289 case CHIP_NAVI12:
2290 chip_name = "navi12";
2291 break;
e2a75f88
AD
2292 }
2293
2294 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2295 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2296 if (err) {
2297 dev_err(adev->dev,
b31d3063 2298 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2299 fw_name);
2300 goto out;
2301 }
2302
ab4fe3e1 2303 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2304 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2305
2306 switch (hdr->version_major) {
2307 case 1:
2308 {
2309 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2310 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2311 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2312
cc375d8c
TY
2313 /*
2314 * Should be droped when DAL no longer needs it.
2315 */
2316 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2317 goto parse_soc_bounding_box;
2318
b5ab16bf
AD
2319 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2320 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2321 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2322 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2323 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2324 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2325 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2326 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2327 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2328 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2329 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2330 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2331 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2332 adev->gfx.cu_info.max_waves_per_simd =
2333 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2334 adev->gfx.cu_info.max_scratch_slots_per_cu =
2335 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2336 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2337 if (hdr->version_minor >= 1) {
35c2e910
HZ
2338 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2339 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2340 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2341 adev->gfx.config.num_sc_per_sh =
2342 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2343 adev->gfx.config.num_packer_per_sc =
2344 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2345 }
ec51d3fa
XY
2346
2347parse_soc_bounding_box:
ec51d3fa
XY
2348 /*
2349 * soc bounding box info is not integrated in disocovery table,
258620d0 2350 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2351 */
48321c3d
HW
2352 if (hdr->version_minor == 2) {
2353 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2354 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2355 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2356 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2357 }
e2a75f88
AD
2358 break;
2359 }
2360 default:
2361 dev_err(adev->dev,
2362 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2363 err = -EINVAL;
2364 goto out;
2365 }
2366out:
e2a75f88
AD
2367 return err;
2368}
2369
e3ecdffa
AD
2370/**
2371 * amdgpu_device_ip_early_init - run early init for hardware IPs
2372 *
2373 * @adev: amdgpu_device pointer
2374 *
2375 * Early initialization pass for hardware IPs. The hardware IPs that make
2376 * up each asic are discovered each IP's early_init callback is run. This
2377 * is the first stage in initializing the asic.
2378 * Returns 0 on success, negative error code on failure.
2379 */
06ec9070 2380static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2381{
901e2be2 2382 struct pci_dev *parent;
aaa36a97 2383 int i, r;
ced69502 2384 bool total;
d38ceaf9 2385
483ef985 2386 amdgpu_device_enable_virtual_display(adev);
a6be7570 2387
00a979f3 2388 if (amdgpu_sriov_vf(adev)) {
00a979f3 2389 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2390 if (r)
2391 return r;
00a979f3
WS
2392 }
2393
d38ceaf9 2394 switch (adev->asic_type) {
33f34802
KW
2395#ifdef CONFIG_DRM_AMDGPU_SI
2396 case CHIP_VERDE:
2397 case CHIP_TAHITI:
2398 case CHIP_PITCAIRN:
2399 case CHIP_OLAND:
2400 case CHIP_HAINAN:
295d0daf 2401 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2402 r = si_set_ip_blocks(adev);
2403 if (r)
2404 return r;
2405 break;
2406#endif
a2e73f56
AD
2407#ifdef CONFIG_DRM_AMDGPU_CIK
2408 case CHIP_BONAIRE:
2409 case CHIP_HAWAII:
2410 case CHIP_KAVERI:
2411 case CHIP_KABINI:
2412 case CHIP_MULLINS:
e1ad2d53 2413 if (adev->flags & AMD_IS_APU)
a2e73f56 2414 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2415 else
2416 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2417
2418 r = cik_set_ip_blocks(adev);
2419 if (r)
2420 return r;
2421 break;
2422#endif
da87c30b
AD
2423 case CHIP_TOPAZ:
2424 case CHIP_TONGA:
2425 case CHIP_FIJI:
2426 case CHIP_POLARIS10:
2427 case CHIP_POLARIS11:
2428 case CHIP_POLARIS12:
2429 case CHIP_VEGAM:
2430 case CHIP_CARRIZO:
2431 case CHIP_STONEY:
2432 if (adev->flags & AMD_IS_APU)
2433 adev->family = AMDGPU_FAMILY_CZ;
2434 else
2435 adev->family = AMDGPU_FAMILY_VI;
2436
2437 r = vi_set_ip_blocks(adev);
2438 if (r)
2439 return r;
2440 break;
d38ceaf9 2441 default:
63352b7f
AD
2442 r = amdgpu_discovery_set_ip_blocks(adev);
2443 if (r)
2444 return r;
2445 break;
d38ceaf9
AD
2446 }
2447
901e2be2
AD
2448 if (amdgpu_has_atpx() &&
2449 (amdgpu_is_atpx_hybrid() ||
2450 amdgpu_has_atpx_dgpu_power_cntl()) &&
2451 ((adev->flags & AMD_IS_APU) == 0) &&
7b1c6263 2452 !dev_is_removable(&adev->pdev->dev))
901e2be2
AD
2453 adev->flags |= AMD_IS_PX;
2454
85ac2021 2455 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2456 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2457 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2458 }
901e2be2 2459
1884734a 2460
3b94fb10 2461 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2462 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2463 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2464 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2465 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
d9b3a066 2466 if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
fbf1035b 2467 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
00f54b97 2468
ced69502 2469 total = true;
d38ceaf9
AD
2470 for (i = 0; i < adev->num_ip_blocks; i++) {
2471 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2472 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2473 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2474 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2475 } else {
a1255107
AD
2476 if (adev->ip_blocks[i].version->funcs->early_init) {
2477 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2478 if (r == -ENOENT) {
a1255107 2479 adev->ip_blocks[i].status.valid = false;
2c1a2784 2480 } else if (r) {
a1255107
AD
2481 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2482 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2483 total = false;
2c1a2784 2484 } else {
a1255107 2485 adev->ip_blocks[i].status.valid = true;
2c1a2784 2486 }
974e6b64 2487 } else {
a1255107 2488 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2489 }
d38ceaf9 2490 }
21a249ca
AD
2491 /* get the vbios after the asic_funcs are set up */
2492 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2493 r = amdgpu_device_parse_gpu_info_fw(adev);
2494 if (r)
2495 return r;
2496
21a249ca 2497 /* Read BIOS */
9535a86a
SZ
2498 if (amdgpu_device_read_bios(adev)) {
2499 if (!amdgpu_get_bios(adev))
2500 return -EINVAL;
21a249ca 2501
9535a86a
SZ
2502 r = amdgpu_atombios_init(adev);
2503 if (r) {
2504 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2505 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2506 return r;
2507 }
21a249ca 2508 }
77eabc6f
PJZ
2509
2510 /*get pf2vf msg info at it's earliest time*/
2511 if (amdgpu_sriov_vf(adev))
2512 amdgpu_virt_init_data_exchange(adev);
2513
21a249ca 2514 }
d38ceaf9 2515 }
ced69502
ML
2516 if (!total)
2517 return -ENODEV;
d38ceaf9 2518
00fa4035 2519 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2520 adev->cg_flags &= amdgpu_cg_mask;
2521 adev->pg_flags &= amdgpu_pg_mask;
2522
d38ceaf9
AD
2523 return 0;
2524}
2525
0a4f2520
RZ
2526static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2527{
2528 int i, r;
2529
2530 for (i = 0; i < adev->num_ip_blocks; i++) {
2531 if (!adev->ip_blocks[i].status.sw)
2532 continue;
2533 if (adev->ip_blocks[i].status.hw)
2534 continue;
2535 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2536 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2537 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2538 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2539 if (r) {
2540 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2541 adev->ip_blocks[i].version->funcs->name, r);
2542 return r;
2543 }
2544 adev->ip_blocks[i].status.hw = true;
2545 }
2546 }
2547
2548 return 0;
2549}
2550
2551static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2552{
2553 int i, r;
2554
2555 for (i = 0; i < adev->num_ip_blocks; i++) {
2556 if (!adev->ip_blocks[i].status.sw)
2557 continue;
2558 if (adev->ip_blocks[i].status.hw)
2559 continue;
2560 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2561 if (r) {
2562 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2563 adev->ip_blocks[i].version->funcs->name, r);
2564 return r;
2565 }
2566 adev->ip_blocks[i].status.hw = true;
2567 }
2568
2569 return 0;
2570}
2571
7a3e0bb2
RZ
2572static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2573{
2574 int r = 0;
2575 int i;
80f41f84 2576 uint32_t smu_version;
7a3e0bb2
RZ
2577
2578 if (adev->asic_type >= CHIP_VEGA10) {
2579 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2580 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2581 continue;
2582
e3c1b071 2583 if (!adev->ip_blocks[i].status.sw)
2584 continue;
2585
482f0e53
ML
2586 /* no need to do the fw loading again if already done*/
2587 if (adev->ip_blocks[i].status.hw == true)
2588 break;
2589
53b3f8f4 2590 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2591 r = adev->ip_blocks[i].version->funcs->resume(adev);
2592 if (r) {
2593 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2594 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2595 return r;
2596 }
2597 } else {
2598 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2599 if (r) {
2600 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2601 adev->ip_blocks[i].version->funcs->name, r);
2602 return r;
7a3e0bb2 2603 }
7a3e0bb2 2604 }
482f0e53
ML
2605
2606 adev->ip_blocks[i].status.hw = true;
2607 break;
7a3e0bb2
RZ
2608 }
2609 }
482f0e53 2610
8973d9ec
ED
2611 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2612 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2613
80f41f84 2614 return r;
7a3e0bb2
RZ
2615}
2616
5fd8518d
AG
2617static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2618{
2619 long timeout;
2620 int r, i;
2621
2622 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2623 struct amdgpu_ring *ring = adev->rings[i];
2624
2625 /* No need to setup the GPU scheduler for rings that don't need it */
2626 if (!ring || ring->no_scheduler)
2627 continue;
2628
2629 switch (ring->funcs->type) {
2630 case AMDGPU_RING_TYPE_GFX:
2631 timeout = adev->gfx_timeout;
2632 break;
2633 case AMDGPU_RING_TYPE_COMPUTE:
2634 timeout = adev->compute_timeout;
2635 break;
2636 case AMDGPU_RING_TYPE_SDMA:
2637 timeout = adev->sdma_timeout;
2638 break;
2639 default:
2640 timeout = adev->video_timeout;
2641 break;
2642 }
2643
a6149f03 2644 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
56e44960 2645 DRM_SCHED_PRIORITY_COUNT,
11f25c84 2646 ring->num_hw_submission, 0,
8ab62eda
JG
2647 timeout, adev->reset_domain->wq,
2648 ring->sched_score, ring->name,
2649 adev->dev);
5fd8518d
AG
2650 if (r) {
2651 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2652 ring->name);
2653 return r;
2654 }
037b98a2
AD
2655 r = amdgpu_uvd_entity_init(adev, ring);
2656 if (r) {
2657 DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2658 ring->name);
2659 return r;
2660 }
2661 r = amdgpu_vce_entity_init(adev, ring);
2662 if (r) {
2663 DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2664 ring->name);
2665 return r;
2666 }
5fd8518d
AG
2667 }
2668
d425c6f4
JZ
2669 amdgpu_xcp_update_partition_sched_list(adev);
2670
5fd8518d
AG
2671 return 0;
2672}
2673
2674
e3ecdffa
AD
2675/**
2676 * amdgpu_device_ip_init - run init for hardware IPs
2677 *
2678 * @adev: amdgpu_device pointer
2679 *
2680 * Main initialization pass for hardware IPs. The list of all the hardware
2681 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2682 * are run. sw_init initializes the software state associated with each IP
2683 * and hw_init initializes the hardware associated with each IP.
2684 * Returns 0 on success, negative error code on failure.
2685 */
06ec9070 2686static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2687{
2688 int i, r;
2689
c030f2e4 2690 r = amdgpu_ras_init(adev);
2691 if (r)
2692 return r;
2693
d38ceaf9 2694 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2695 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2696 continue;
a1255107 2697 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2698 if (r) {
a1255107
AD
2699 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2700 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2701 goto init_failed;
2c1a2784 2702 }
a1255107 2703 adev->ip_blocks[i].status.sw = true;
bfca0289 2704
c1c39032
AD
2705 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2706 /* need to do common hw init early so everything is set up for gmc */
2707 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2708 if (r) {
2709 DRM_ERROR("hw_init %d failed %d\n", i, r);
2710 goto init_failed;
2711 }
2712 adev->ip_blocks[i].status.hw = true;
2713 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2714 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2715 /* Try to reserve bad pages early */
2716 if (amdgpu_sriov_vf(adev))
2717 amdgpu_virt_exchange_data(adev);
2718
7ccfd79f 2719 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2720 if (r) {
7ccfd79f 2721 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2722 goto init_failed;
2c1a2784 2723 }
a1255107 2724 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2725 if (r) {
2726 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2727 goto init_failed;
2c1a2784 2728 }
06ec9070 2729 r = amdgpu_device_wb_init(adev);
2c1a2784 2730 if (r) {
06ec9070 2731 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2732 goto init_failed;
2c1a2784 2733 }
a1255107 2734 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2735
2736 /* right after GMC hw init, we create CSA */
02ff519e 2737 if (adev->gfx.mcbp) {
1e256e27 2738 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2739 AMDGPU_GEM_DOMAIN_VRAM |
2740 AMDGPU_GEM_DOMAIN_GTT,
2741 AMDGPU_CSA_SIZE);
2493664f
ML
2742 if (r) {
2743 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2744 goto init_failed;
2493664f
ML
2745 }
2746 }
c8031019
APS
2747
2748 r = amdgpu_seq64_init(adev);
2749 if (r) {
2750 DRM_ERROR("allocate seq64 failed %d\n", r);
2751 goto init_failed;
2752 }
d38ceaf9
AD
2753 }
2754 }
2755
c9ffa427 2756 if (amdgpu_sriov_vf(adev))
22c16d25 2757 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2758
533aed27
AG
2759 r = amdgpu_ib_pool_init(adev);
2760 if (r) {
2761 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2762 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2763 goto init_failed;
2764 }
2765
c8963ea4
RZ
2766 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2767 if (r)
72d3f592 2768 goto init_failed;
0a4f2520
RZ
2769
2770 r = amdgpu_device_ip_hw_init_phase1(adev);
2771 if (r)
72d3f592 2772 goto init_failed;
0a4f2520 2773
7a3e0bb2
RZ
2774 r = amdgpu_device_fw_loading(adev);
2775 if (r)
72d3f592 2776 goto init_failed;
7a3e0bb2 2777
0a4f2520
RZ
2778 r = amdgpu_device_ip_hw_init_phase2(adev);
2779 if (r)
72d3f592 2780 goto init_failed;
d38ceaf9 2781
121a2bc6
AG
2782 /*
2783 * retired pages will be loaded from eeprom and reserved here,
2784 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2785 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2786 * for I2C communication which only true at this point.
b82e65a9
GC
2787 *
2788 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2789 * failure from bad gpu situation and stop amdgpu init process
2790 * accordingly. For other failed cases, it will still release all
2791 * the resource and print error message, rather than returning one
2792 * negative value to upper level.
121a2bc6
AG
2793 *
2794 * Note: theoretically, this should be called before all vram allocations
2795 * to protect retired page from abusing
2796 */
b82e65a9
GC
2797 r = amdgpu_ras_recovery_init(adev);
2798 if (r)
2799 goto init_failed;
121a2bc6 2800
cfbb6b00
AG
2801 /**
2802 * In case of XGMI grab extra reference for reset domain for this device
2803 */
a4c63caf 2804 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2805 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2806 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2807 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2808
dfd0287b
LH
2809 if (WARN_ON(!hive)) {
2810 r = -ENOENT;
2811 goto init_failed;
2812 }
2813
46c67660 2814 if (!hive->reset_domain ||
2815 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2816 r = -ENOENT;
2817 amdgpu_put_xgmi_hive(hive);
2818 goto init_failed;
2819 }
2820
2821 /* Drop the early temporary reset domain we created for device */
2822 amdgpu_reset_put_reset_domain(adev->reset_domain);
2823 adev->reset_domain = hive->reset_domain;
9dfa4860 2824 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2825 }
a4c63caf
AG
2826 }
2827 }
2828
5fd8518d
AG
2829 r = amdgpu_device_init_schedulers(adev);
2830 if (r)
2831 goto init_failed;
e3c1b071 2832
b7043800
AD
2833 if (adev->mman.buffer_funcs_ring->sched.ready)
2834 amdgpu_ttm_set_buffer_funcs_status(adev, true);
2835
e3c1b071 2836 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2837 if (!adev->gmc.xgmi.pending_reset) {
2838 kgd2kfd_init_zone_device(adev);
e3c1b071 2839 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2840 }
c6332b97 2841
bd607166
KR
2842 amdgpu_fru_get_product_info(adev);
2843
72d3f592 2844init_failed:
c6332b97 2845
72d3f592 2846 return r;
d38ceaf9
AD
2847}
2848
e3ecdffa
AD
2849/**
2850 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2851 *
2852 * @adev: amdgpu_device pointer
2853 *
2854 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2855 * this function before a GPU reset. If the value is retained after a
2856 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2857 */
06ec9070 2858static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2859{
2860 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2861}
2862
e3ecdffa
AD
2863/**
2864 * amdgpu_device_check_vram_lost - check if vram is valid
2865 *
2866 * @adev: amdgpu_device pointer
2867 *
2868 * Checks the reset magic value written to the gart pointer in VRAM.
2869 * The driver calls this after a GPU reset to see if the contents of
2870 * VRAM is lost or now.
2871 * returns true if vram is lost, false if not.
2872 */
06ec9070 2873static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2874{
dadce777
EQ
2875 if (memcmp(adev->gart.ptr, adev->reset_magic,
2876 AMDGPU_RESET_MAGIC_NUM))
2877 return true;
2878
53b3f8f4 2879 if (!amdgpu_in_reset(adev))
dadce777
EQ
2880 return false;
2881
2882 /*
2883 * For all ASICs with baco/mode1 reset, the VRAM is
2884 * always assumed to be lost.
2885 */
2886 switch (amdgpu_asic_reset_method(adev)) {
2887 case AMD_RESET_METHOD_BACO:
2888 case AMD_RESET_METHOD_MODE1:
2889 return true;
2890 default:
2891 return false;
2892 }
0c49e0b8
CZ
2893}
2894
e3ecdffa 2895/**
1112a46b 2896 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2897 *
2898 * @adev: amdgpu_device pointer
b8b72130 2899 * @state: clockgating state (gate or ungate)
e3ecdffa 2900 *
e3ecdffa 2901 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2902 * set_clockgating_state callbacks are run.
2903 * Late initialization pass enabling clockgating for hardware IPs.
2904 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2905 * Returns 0 on success, negative error code on failure.
2906 */
fdd34271 2907
5d89bb2d
LL
2908int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2909 enum amd_clockgating_state state)
d38ceaf9 2910{
1112a46b 2911 int i, j, r;
d38ceaf9 2912
4a2ba394
SL
2913 if (amdgpu_emu_mode == 1)
2914 return 0;
2915
1112a46b
RZ
2916 for (j = 0; j < adev->num_ip_blocks; j++) {
2917 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2918 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2919 continue;
47198eb7 2920 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2921 if (adev->in_s0ix &&
47198eb7
AD
2922 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2923 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2924 continue;
4a446d55 2925 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2926 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2927 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2928 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2929 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2930 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2931 /* enable clockgating to save power */
a1255107 2932 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2933 state);
4a446d55
AD
2934 if (r) {
2935 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2936 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2937 return r;
2938 }
b0b00ff1 2939 }
d38ceaf9 2940 }
06b18f61 2941
c9f96fd5
RZ
2942 return 0;
2943}
2944
5d89bb2d
LL
2945int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2946 enum amd_powergating_state state)
c9f96fd5 2947{
1112a46b 2948 int i, j, r;
06b18f61 2949
c9f96fd5
RZ
2950 if (amdgpu_emu_mode == 1)
2951 return 0;
2952
1112a46b
RZ
2953 for (j = 0; j < adev->num_ip_blocks; j++) {
2954 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2955 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2956 continue;
47198eb7 2957 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2958 if (adev->in_s0ix &&
47198eb7
AD
2959 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2960 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2961 continue;
c9f96fd5
RZ
2962 /* skip CG for VCE/UVD, it's handled specially */
2963 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2964 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2965 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2966 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2967 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2968 /* enable powergating to save power */
2969 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2970 state);
c9f96fd5
RZ
2971 if (r) {
2972 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2973 adev->ip_blocks[i].version->funcs->name, r);
2974 return r;
2975 }
2976 }
2977 }
2dc80b00
S
2978 return 0;
2979}
2980
beff74bc
AD
2981static int amdgpu_device_enable_mgpu_fan_boost(void)
2982{
2983 struct amdgpu_gpu_instance *gpu_ins;
2984 struct amdgpu_device *adev;
2985 int i, ret = 0;
2986
2987 mutex_lock(&mgpu_info.mutex);
2988
2989 /*
2990 * MGPU fan boost feature should be enabled
2991 * only when there are two or more dGPUs in
2992 * the system
2993 */
2994 if (mgpu_info.num_dgpu < 2)
2995 goto out;
2996
2997 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2998 gpu_ins = &(mgpu_info.gpu_ins[i]);
2999 adev = gpu_ins->adev;
3000 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 3001 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
3002 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3003 if (ret)
3004 break;
3005
3006 gpu_ins->mgpu_fan_enabled = 1;
3007 }
3008 }
3009
3010out:
3011 mutex_unlock(&mgpu_info.mutex);
3012
3013 return ret;
3014}
3015
e3ecdffa
AD
3016/**
3017 * amdgpu_device_ip_late_init - run late init for hardware IPs
3018 *
3019 * @adev: amdgpu_device pointer
3020 *
3021 * Late initialization pass for hardware IPs. The list of all the hardware
3022 * IPs that make up the asic is walked and the late_init callbacks are run.
3023 * late_init covers any special initialization that an IP requires
3024 * after all of the have been initialized or something that needs to happen
3025 * late in the init process.
3026 * Returns 0 on success, negative error code on failure.
3027 */
06ec9070 3028static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 3029{
60599a03 3030 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
3031 int i = 0, r;
3032
3033 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 3034 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
3035 continue;
3036 if (adev->ip_blocks[i].version->funcs->late_init) {
3037 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3038 if (r) {
3039 DRM_ERROR("late_init of IP block <%s> failed %d\n",
3040 adev->ip_blocks[i].version->funcs->name, r);
3041 return r;
3042 }
2dc80b00 3043 }
73f847db 3044 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
3045 }
3046
867e24ca 3047 r = amdgpu_ras_late_init(adev);
3048 if (r) {
3049 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3050 return r;
3051 }
3052
a891d239
DL
3053 amdgpu_ras_set_error_query_ready(adev, true);
3054
1112a46b
RZ
3055 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3056 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 3057
06ec9070 3058 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 3059
beff74bc
AD
3060 r = amdgpu_device_enable_mgpu_fan_boost();
3061 if (r)
3062 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3063
4da8b639 3064 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
3065 if (amdgpu_passthrough(adev) &&
3066 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3067 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 3068 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
3069
3070 if (adev->gmc.xgmi.num_physical_nodes > 1) {
3071 mutex_lock(&mgpu_info.mutex);
3072
3073 /*
3074 * Reset device p-state to low as this was booted with high.
3075 *
3076 * This should be performed only after all devices from the same
3077 * hive get initialized.
3078 *
3079 * However, it's unknown how many device in the hive in advance.
3080 * As this is counted one by one during devices initializations.
3081 *
3082 * So, we wait for all XGMI interlinked devices initialized.
3083 * This may bring some delays as those devices may come from
3084 * different hives. But that should be OK.
3085 */
3086 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3087 for (i = 0; i < mgpu_info.num_gpu; i++) {
3088 gpu_instance = &(mgpu_info.gpu_ins[i]);
3089 if (gpu_instance->adev->flags & AMD_IS_APU)
3090 continue;
3091
d84a430d
JK
3092 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3093 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
3094 if (r) {
3095 DRM_ERROR("pstate setting failed (%d).\n", r);
3096 break;
3097 }
3098 }
3099 }
3100
3101 mutex_unlock(&mgpu_info.mutex);
3102 }
3103
d38ceaf9
AD
3104 return 0;
3105}
3106
613aa3ea
LY
3107/**
3108 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3109 *
3110 * @adev: amdgpu_device pointer
3111 *
3112 * For ASICs need to disable SMC first
3113 */
3114static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3115{
3116 int i, r;
3117
4e8303cf 3118 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
3119 return;
3120
3121 for (i = 0; i < adev->num_ip_blocks; i++) {
3122 if (!adev->ip_blocks[i].status.hw)
3123 continue;
3124 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3125 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3126 /* XXX handle errors */
3127 if (r) {
3128 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3129 adev->ip_blocks[i].version->funcs->name, r);
3130 }
3131 adev->ip_blocks[i].status.hw = false;
3132 break;
3133 }
3134 }
3135}
3136
e9669fb7 3137static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
3138{
3139 int i, r;
3140
e9669fb7
AG
3141 for (i = 0; i < adev->num_ip_blocks; i++) {
3142 if (!adev->ip_blocks[i].version->funcs->early_fini)
3143 continue;
5278a159 3144
e9669fb7
AG
3145 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3146 if (r) {
3147 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3148 adev->ip_blocks[i].version->funcs->name, r);
3149 }
3150 }
c030f2e4 3151
05df1f01 3152 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
3153 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3154
7270e895
TY
3155 amdgpu_amdkfd_suspend(adev, false);
3156
613aa3ea
LY
3157 /* Workaroud for ASICs need to disable SMC first */
3158 amdgpu_device_smu_fini_early(adev);
3e96dbfd 3159
d38ceaf9 3160 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3161 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 3162 continue;
8201a67a 3163
a1255107 3164 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 3165 /* XXX handle errors */
2c1a2784 3166 if (r) {
a1255107
AD
3167 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3168 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3169 }
8201a67a 3170
a1255107 3171 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
3172 }
3173
6effad8a
GC
3174 if (amdgpu_sriov_vf(adev)) {
3175 if (amdgpu_virt_release_full_gpu(adev, false))
3176 DRM_ERROR("failed to release exclusive mode on fini\n");
3177 }
3178
e9669fb7
AG
3179 return 0;
3180}
3181
3182/**
3183 * amdgpu_device_ip_fini - run fini for hardware IPs
3184 *
3185 * @adev: amdgpu_device pointer
3186 *
3187 * Main teardown pass for hardware IPs. The list of all the hardware
3188 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3189 * are run. hw_fini tears down the hardware associated with each IP
3190 * and sw_fini tears down any software state associated with each IP.
3191 * Returns 0 on success, negative error code on failure.
3192 */
3193static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3194{
3195 int i, r;
3196
3197 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3198 amdgpu_virt_release_ras_err_handler_data(adev);
3199
e9669fb7
AG
3200 if (adev->gmc.xgmi.num_physical_nodes > 1)
3201 amdgpu_xgmi_remove_device(adev);
3202
c004d44e 3203 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3204
d38ceaf9 3205 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3206 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3207 continue;
c12aba3a
ML
3208
3209 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3210 amdgpu_ucode_free_bo(adev);
1e256e27 3211 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3212 amdgpu_device_wb_fini(adev);
7ccfd79f 3213 amdgpu_device_mem_scratch_fini(adev);
533aed27 3214 amdgpu_ib_pool_fini(adev);
c8031019 3215 amdgpu_seq64_fini(adev);
c12aba3a
ML
3216 }
3217
a1255107 3218 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3219 /* XXX handle errors */
2c1a2784 3220 if (r) {
a1255107
AD
3221 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3222 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3223 }
a1255107
AD
3224 adev->ip_blocks[i].status.sw = false;
3225 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3226 }
3227
a6dcfd9c 3228 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3229 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3230 continue;
a1255107
AD
3231 if (adev->ip_blocks[i].version->funcs->late_fini)
3232 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3233 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3234 }
3235
c030f2e4 3236 amdgpu_ras_fini(adev);
3237
d38ceaf9
AD
3238 return 0;
3239}
3240
e3ecdffa 3241/**
beff74bc 3242 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3243 *
1112a46b 3244 * @work: work_struct.
e3ecdffa 3245 */
beff74bc 3246static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3247{
3248 struct amdgpu_device *adev =
beff74bc 3249 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3250 int r;
3251
3252 r = amdgpu_ib_ring_tests(adev);
3253 if (r)
3254 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3255}
3256
1e317b99
RZ
3257static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3258{
3259 struct amdgpu_device *adev =
3260 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3261
90a92662
MD
3262 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3263 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3264
3265 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3266 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3267}
3268
e3ecdffa 3269/**
e7854a03 3270 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3271 *
3272 * @adev: amdgpu_device pointer
3273 *
3274 * Main suspend function for hardware IPs. The list of all the hardware
3275 * IPs that make up the asic is walked, clockgating is disabled and the
3276 * suspend callbacks are run. suspend puts the hardware and software state
3277 * in each IP into a state suitable for suspend.
3278 * Returns 0 on success, negative error code on failure.
3279 */
e7854a03
AD
3280static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3281{
3282 int i, r;
3283
50ec83f0
AD
3284 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3285 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3286
b31d6ada
EQ
3287 /*
3288 * Per PMFW team's suggestion, driver needs to handle gfxoff
3289 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3290 * scenario. Add the missing df cstate disablement here.
3291 */
3292 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3293 dev_warn(adev->dev, "Failed to disallow df cstate");
3294
e7854a03
AD
3295 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3296 if (!adev->ip_blocks[i].status.valid)
3297 continue;
2b9f7848 3298
e7854a03 3299 /* displays are handled separately */
2b9f7848
ND
3300 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3301 continue;
3302
3303 /* XXX handle errors */
3304 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3305 /* XXX handle errors */
3306 if (r) {
3307 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3308 adev->ip_blocks[i].version->funcs->name, r);
3309 return r;
e7854a03 3310 }
2b9f7848
ND
3311
3312 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3313 }
3314
e7854a03
AD
3315 return 0;
3316}
3317
3318/**
3319 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3320 *
3321 * @adev: amdgpu_device pointer
3322 *
3323 * Main suspend function for hardware IPs. The list of all the hardware
3324 * IPs that make up the asic is walked, clockgating is disabled and the
3325 * suspend callbacks are run. suspend puts the hardware and software state
3326 * in each IP into a state suitable for suspend.
3327 * Returns 0 on success, negative error code on failure.
3328 */
3329static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3330{
3331 int i, r;
3332
557f42a2 3333 if (adev->in_s0ix)
bc143d8b 3334 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3335
d38ceaf9 3336 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3337 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3338 continue;
e7854a03
AD
3339 /* displays are handled in phase1 */
3340 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3341 continue;
bff77e86
LM
3342 /* PSP lost connection when err_event_athub occurs */
3343 if (amdgpu_ras_intr_triggered() &&
3344 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3345 adev->ip_blocks[i].status.hw = false;
3346 continue;
3347 }
e3c1b071 3348
3349 /* skip unnecessary suspend if we do not initialize them yet */
3350 if (adev->gmc.xgmi.pending_reset &&
3351 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3352 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3353 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3354 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3355 adev->ip_blocks[i].status.hw = false;
3356 continue;
3357 }
557f42a2 3358
afa6646b 3359 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3360 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3361 * like at runtime. PSP is also part of the always on hardware
3362 * so no need to suspend it.
3363 */
557f42a2 3364 if (adev->in_s0ix &&
32ff160d 3365 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3366 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3367 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3368 continue;
3369
2a7798ea
AD
3370 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3371 if (adev->in_s0ix &&
4e8303cf
LL
3372 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3373 IP_VERSION(5, 0, 0)) &&
3374 (adev->ip_blocks[i].version->type ==
3375 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3376 continue;
3377
e11c7750
TH
3378 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3379 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3380 * from this location and RLC Autoload automatically also gets loaded
3381 * from here based on PMFW -> PSP message during re-init sequence.
3382 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3383 * the TMR and reload FWs again for IMU enabled APU ASICs.
3384 */
3385 if (amdgpu_in_reset(adev) &&
3386 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3387 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3388 continue;
3389
d38ceaf9 3390 /* XXX handle errors */
a1255107 3391 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3392 /* XXX handle errors */
2c1a2784 3393 if (r) {
a1255107
AD
3394 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3395 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3396 }
876923fb 3397 adev->ip_blocks[i].status.hw = false;
a3a09142 3398 /* handle putting the SMC in the appropriate state */
47fc644f 3399 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3400 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3401 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3402 if (r) {
3403 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3404 adev->mp1_state, r);
3405 return r;
3406 }
a3a09142
AD
3407 }
3408 }
d38ceaf9
AD
3409 }
3410
3411 return 0;
3412}
3413
e7854a03
AD
3414/**
3415 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3416 *
3417 * @adev: amdgpu_device pointer
3418 *
3419 * Main suspend function for hardware IPs. The list of all the hardware
3420 * IPs that make up the asic is walked, clockgating is disabled and the
3421 * suspend callbacks are run. suspend puts the hardware and software state
3422 * in each IP into a state suitable for suspend.
3423 * Returns 0 on success, negative error code on failure.
3424 */
3425int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3426{
3427 int r;
3428
3c73683c
JC
3429 if (amdgpu_sriov_vf(adev)) {
3430 amdgpu_virt_fini_data_exchange(adev);
e7819644 3431 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3432 }
e7819644 3433
b7043800
AD
3434 amdgpu_ttm_set_buffer_funcs_status(adev, false);
3435
e7854a03
AD
3436 r = amdgpu_device_ip_suspend_phase1(adev);
3437 if (r)
3438 return r;
3439 r = amdgpu_device_ip_suspend_phase2(adev);
3440
e7819644
YT
3441 if (amdgpu_sriov_vf(adev))
3442 amdgpu_virt_release_full_gpu(adev, false);
3443
e7854a03
AD
3444 return r;
3445}
3446
06ec9070 3447static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3448{
3449 int i, r;
3450
2cb681b6 3451 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3452 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3453 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3454 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3455 AMD_IP_BLOCK_TYPE_IH,
3456 };
a90ad3c2 3457
95ea3dbc 3458 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3459 int j;
3460 struct amdgpu_ip_block *block;
a90ad3c2 3461
4cd2a96d
J
3462 block = &adev->ip_blocks[i];
3463 block->status.hw = false;
2cb681b6 3464
4cd2a96d 3465 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3466
4cd2a96d 3467 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3468 !block->status.valid)
3469 continue;
3470
3471 r = block->version->funcs->hw_init(adev);
0aaeefcc 3472 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3473 if (r)
3474 return r;
482f0e53 3475 block->status.hw = true;
a90ad3c2
ML
3476 }
3477 }
3478
3479 return 0;
3480}
3481
06ec9070 3482static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3483{
3484 int i, r;
3485
2cb681b6
ML
3486 static enum amd_ip_block_type ip_order[] = {
3487 AMD_IP_BLOCK_TYPE_SMC,
3488 AMD_IP_BLOCK_TYPE_DCE,
3489 AMD_IP_BLOCK_TYPE_GFX,
3490 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3491 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3492 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3493 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3494 AMD_IP_BLOCK_TYPE_VCN,
3495 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3496 };
a90ad3c2 3497
2cb681b6
ML
3498 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3499 int j;
3500 struct amdgpu_ip_block *block;
a90ad3c2 3501
2cb681b6
ML
3502 for (j = 0; j < adev->num_ip_blocks; j++) {
3503 block = &adev->ip_blocks[j];
3504
3505 if (block->version->type != ip_order[i] ||
482f0e53
ML
3506 !block->status.valid ||
3507 block->status.hw)
2cb681b6
ML
3508 continue;
3509
895bd048
JZ
3510 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3511 r = block->version->funcs->resume(adev);
3512 else
3513 r = block->version->funcs->hw_init(adev);
3514
0aaeefcc 3515 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3516 if (r)
3517 return r;
482f0e53 3518 block->status.hw = true;
a90ad3c2
ML
3519 }
3520 }
3521
3522 return 0;
3523}
3524
e3ecdffa
AD
3525/**
3526 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3527 *
3528 * @adev: amdgpu_device pointer
3529 *
3530 * First resume function for hardware IPs. The list of all the hardware
3531 * IPs that make up the asic is walked and the resume callbacks are run for
3532 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3533 * after a suspend and updates the software state as necessary. This
3534 * function is also used for restoring the GPU after a GPU reset.
3535 * Returns 0 on success, negative error code on failure.
3536 */
06ec9070 3537static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3538{
3539 int i, r;
3540
a90ad3c2 3541 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3542 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3543 continue;
a90ad3c2 3544 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3545 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3546 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3547 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3548
fcf0649f
CZ
3549 r = adev->ip_blocks[i].version->funcs->resume(adev);
3550 if (r) {
3551 DRM_ERROR("resume of IP block <%s> failed %d\n",
3552 adev->ip_blocks[i].version->funcs->name, r);
3553 return r;
3554 }
482f0e53 3555 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3556 }
3557 }
3558
3559 return 0;
3560}
3561
e3ecdffa
AD
3562/**
3563 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3564 *
3565 * @adev: amdgpu_device pointer
3566 *
3567 * First resume function for hardware IPs. The list of all the hardware
3568 * IPs that make up the asic is walked and the resume callbacks are run for
3569 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3570 * functional state after a suspend and updates the software state as
3571 * necessary. This function is also used for restoring the GPU after a GPU
3572 * reset.
3573 * Returns 0 on success, negative error code on failure.
3574 */
06ec9070 3575static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3576{
3577 int i, r;
3578
3579 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3580 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3581 continue;
fcf0649f 3582 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3583 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3584 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3585 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3586 continue;
a1255107 3587 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3588 if (r) {
a1255107
AD
3589 DRM_ERROR("resume of IP block <%s> failed %d\n",
3590 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3591 return r;
2c1a2784 3592 }
482f0e53 3593 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3594 }
3595
3596 return 0;
3597}
3598
e3ecdffa
AD
3599/**
3600 * amdgpu_device_ip_resume - run resume for hardware IPs
3601 *
3602 * @adev: amdgpu_device pointer
3603 *
3604 * Main resume function for hardware IPs. The hardware IPs
3605 * are split into two resume functions because they are
b8920e1e 3606 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3607 * steps need to be take between them. In this case (S3/S4) they are
3608 * run sequentially.
3609 * Returns 0 on success, negative error code on failure.
3610 */
06ec9070 3611static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3612{
3613 int r;
3614
06ec9070 3615 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3616 if (r)
3617 return r;
7a3e0bb2
RZ
3618
3619 r = amdgpu_device_fw_loading(adev);
3620 if (r)
3621 return r;
3622
06ec9070 3623 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f 3624
b7043800
AD
3625 if (adev->mman.buffer_funcs_ring->sched.ready)
3626 amdgpu_ttm_set_buffer_funcs_status(adev, true);
3627
fcf0649f
CZ
3628 return r;
3629}
3630
e3ecdffa
AD
3631/**
3632 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3633 *
3634 * @adev: amdgpu_device pointer
3635 *
3636 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3637 */
4e99a44e 3638static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3639{
6867e1b5
ML
3640 if (amdgpu_sriov_vf(adev)) {
3641 if (adev->is_atom_fw) {
58ff791a 3642 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3643 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3644 } else {
3645 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3646 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3647 }
3648
3649 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3650 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3651 }
048765ad
AR
3652}
3653
e3ecdffa
AD
3654/**
3655 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3656 *
3657 * @asic_type: AMD asic type
3658 *
3659 * Check if there is DC (new modesetting infrastructre) support for an asic.
3660 * returns true if DC has support, false if not.
3661 */
4562236b
HW
3662bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3663{
3664 switch (asic_type) {
0637d417
AD
3665#ifdef CONFIG_DRM_AMDGPU_SI
3666 case CHIP_HAINAN:
3667#endif
3668 case CHIP_TOPAZ:
3669 /* chips with no display hardware */
3670 return false;
4562236b 3671#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3672 case CHIP_TAHITI:
3673 case CHIP_PITCAIRN:
3674 case CHIP_VERDE:
3675 case CHIP_OLAND:
2d32ffd6
AD
3676 /*
3677 * We have systems in the wild with these ASICs that require
3678 * LVDS and VGA support which is not supported with DC.
3679 *
3680 * Fallback to the non-DC driver here by default so as not to
3681 * cause regressions.
3682 */
3683#if defined(CONFIG_DRM_AMD_DC_SI)
3684 return amdgpu_dc > 0;
3685#else
3686 return false;
64200c46 3687#endif
4562236b 3688 case CHIP_BONAIRE:
0d6fbccb 3689 case CHIP_KAVERI:
367e6687
AD
3690 case CHIP_KABINI:
3691 case CHIP_MULLINS:
d9fda248
HW
3692 /*
3693 * We have systems in the wild with these ASICs that require
b5a0168e 3694 * VGA support which is not supported with DC.
d9fda248
HW
3695 *
3696 * Fallback to the non-DC driver here by default so as not to
3697 * cause regressions.
3698 */
3699 return amdgpu_dc > 0;
f7f12b25 3700 default:
fd187853 3701 return amdgpu_dc != 0;
f7f12b25 3702#else
4562236b 3703 default:
93b09a9a 3704 if (amdgpu_dc > 0)
b8920e1e 3705 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3706 return false;
f7f12b25 3707#endif
4562236b
HW
3708 }
3709}
3710
3711/**
3712 * amdgpu_device_has_dc_support - check if dc is supported
3713 *
982a820b 3714 * @adev: amdgpu_device pointer
4562236b
HW
3715 *
3716 * Returns true for supported, false for not supported
3717 */
3718bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3719{
25263da3 3720 if (adev->enable_virtual_display ||
abaf210c 3721 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3722 return false;
3723
4562236b
HW
3724 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3725}
3726
d4535e2c
AG
3727static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3728{
3729 struct amdgpu_device *adev =
3730 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3731 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3732
c6a6e2db
AG
3733 /* It's a bug to not have a hive within this function */
3734 if (WARN_ON(!hive))
3735 return;
3736
3737 /*
3738 * Use task barrier to synchronize all xgmi reset works across the
3739 * hive. task_barrier_enter and task_barrier_exit will block
3740 * until all the threads running the xgmi reset works reach
3741 * those points. task_barrier_full will do both blocks.
3742 */
3743 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3744
3745 task_barrier_enter(&hive->tb);
4a580877 3746 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3747
3748 if (adev->asic_reset_res)
3749 goto fail;
3750
3751 task_barrier_exit(&hive->tb);
4a580877 3752 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3753
3754 if (adev->asic_reset_res)
3755 goto fail;
43c4d576 3756
21226f02 3757 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
c6a6e2db
AG
3758 } else {
3759
3760 task_barrier_full(&hive->tb);
3761 adev->asic_reset_res = amdgpu_asic_reset(adev);
3762 }
ce316fa5 3763
c6a6e2db 3764fail:
d4535e2c 3765 if (adev->asic_reset_res)
fed184e9 3766 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3767 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3768 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3769}
3770
71f98027
AD
3771static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3772{
3773 char *input = amdgpu_lockup_timeout;
3774 char *timeout_setting = NULL;
3775 int index = 0;
3776 long timeout;
3777 int ret = 0;
3778
3779 /*
67387dfe
AD
3780 * By default timeout for non compute jobs is 10000
3781 * and 60000 for compute jobs.
71f98027 3782 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3783 * jobs are 60000 by default.
71f98027
AD
3784 */
3785 adev->gfx_timeout = msecs_to_jiffies(10000);
3786 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3787 if (amdgpu_sriov_vf(adev))
3788 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3789 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3790 else
67387dfe 3791 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3792
f440ff44 3793 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3794 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3795 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3796 ret = kstrtol(timeout_setting, 0, &timeout);
3797 if (ret)
3798 return ret;
3799
3800 if (timeout == 0) {
3801 index++;
3802 continue;
3803 } else if (timeout < 0) {
3804 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3805 dev_warn(adev->dev, "lockup timeout disabled");
3806 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3807 } else {
3808 timeout = msecs_to_jiffies(timeout);
3809 }
3810
3811 switch (index++) {
3812 case 0:
3813 adev->gfx_timeout = timeout;
3814 break;
3815 case 1:
3816 adev->compute_timeout = timeout;
3817 break;
3818 case 2:
3819 adev->sdma_timeout = timeout;
3820 break;
3821 case 3:
3822 adev->video_timeout = timeout;
3823 break;
3824 default:
3825 break;
3826 }
3827 }
3828 /*
3829 * There is only one value specified and
3830 * it should apply to all non-compute jobs.
3831 */
bcccee89 3832 if (index == 1) {
71f98027 3833 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3834 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3835 adev->compute_timeout = adev->gfx_timeout;
3836 }
71f98027
AD
3837 }
3838
3839 return ret;
3840}
d4535e2c 3841
4a74c38c
PY
3842/**
3843 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3844 *
3845 * @adev: amdgpu_device pointer
3846 *
3847 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3848 */
3849static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3850{
3851 struct iommu_domain *domain;
3852
3853 domain = iommu_get_domain_for_dev(adev->dev);
3854 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3855 adev->ram_is_direct_mapped = true;
3856}
3857
77f3a5cd 3858static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3859 &dev_attr_pcie_replay_count.attr,
3860 NULL
3861};
3862
02ff519e
AD
3863static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3864{
3865 if (amdgpu_mcbp == 1)
3866 adev->gfx.mcbp = true;
1e9e15dc
JZ
3867 else if (amdgpu_mcbp == 0)
3868 adev->gfx.mcbp = false;
50a7c876 3869
02ff519e
AD
3870 if (amdgpu_sriov_vf(adev))
3871 adev->gfx.mcbp = true;
3872
3873 if (adev->gfx.mcbp)
3874 DRM_INFO("MCBP is enabled\n");
3875}
3876
d38ceaf9
AD
3877/**
3878 * amdgpu_device_init - initialize the driver
3879 *
3880 * @adev: amdgpu_device pointer
d38ceaf9
AD
3881 * @flags: driver flags
3882 *
3883 * Initializes the driver info and hw (all asics).
3884 * Returns 0 for success or an error on failure.
3885 * Called at driver startup.
3886 */
3887int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3888 uint32_t flags)
3889{
8aba21b7
LT
3890 struct drm_device *ddev = adev_to_drm(adev);
3891 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3892 int r, i;
b98c6299 3893 bool px = false;
95844d20 3894 u32 max_MBps;
59e9fff1 3895 int tmp;
d38ceaf9
AD
3896
3897 adev->shutdown = false;
d38ceaf9 3898 adev->flags = flags;
4e66d7d2
YZ
3899
3900 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3901 adev->asic_type = amdgpu_force_asic_type;
3902 else
3903 adev->asic_type = flags & AMD_ASIC_MASK;
3904
d38ceaf9 3905 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3906 if (amdgpu_emu_mode == 1)
8bdab6bb 3907 adev->usec_timeout *= 10;
770d13b1 3908 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3909 adev->accel_working = false;
3910 adev->num_rings = 0;
68ce8b24 3911 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3912 adev->mman.buffer_funcs = NULL;
3913 adev->mman.buffer_funcs_ring = NULL;
3914 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3915 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3916 adev->gmc.gmc_funcs = NULL;
7bd939d0 3917 adev->harvest_ip_mask = 0x0;
f54d1867 3918 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3919 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3920
3921 adev->smc_rreg = &amdgpu_invalid_rreg;
3922 adev->smc_wreg = &amdgpu_invalid_wreg;
3923 adev->pcie_rreg = &amdgpu_invalid_rreg;
3924 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3925 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3926 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3927 adev->pciep_rreg = &amdgpu_invalid_rreg;
3928 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3929 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3930 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3931 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3932 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3933 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3934 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3935 adev->didt_rreg = &amdgpu_invalid_rreg;
3936 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3937 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3938 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3939 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3940 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3941
3e39ab90
AD
3942 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3943 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3944 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3945
3946 /* mutex initialization are all done here so we
b8920e1e
SS
3947 * can recall function without having locking issues
3948 */
0e5ca0d1 3949 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3950 mutex_init(&adev->pm.mutex);
3951 mutex_init(&adev->gfx.gpu_clock_mutex);
3952 mutex_init(&adev->srbm_mutex);
b8866c26 3953 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3954 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3955 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3956 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3957 mutex_init(&adev->mn_lock);
e23b74aa 3958 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3959 hash_init(adev->mn_hash);
32eaeae0 3960 mutex_init(&adev->psp.mutex);
bd052211 3961 mutex_init(&adev->notifier_lock);
8cda7a4f 3962 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3963 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3964
ab3b9de6 3965 amdgpu_device_init_apu_flags(adev);
9f6a7857 3966
912dfc84
EQ
3967 r = amdgpu_device_check_arguments(adev);
3968 if (r)
3969 return r;
d38ceaf9 3970
d38ceaf9
AD
3971 spin_lock_init(&adev->mmio_idx_lock);
3972 spin_lock_init(&adev->smc_idx_lock);
3973 spin_lock_init(&adev->pcie_idx_lock);
3974 spin_lock_init(&adev->uvd_ctx_idx_lock);
3975 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3976 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3977 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3978 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3979 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3980
0c4e7fa5
CZ
3981 INIT_LIST_HEAD(&adev->shadow_list);
3982 mutex_init(&adev->shadow_list_lock);
3983
655ce9cb 3984 INIT_LIST_HEAD(&adev->reset_list);
3985
6492e1b0 3986 INIT_LIST_HEAD(&adev->ras_list);
3987
3e38b634
EQ
3988 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3989
beff74bc
AD
3990 INIT_DELAYED_WORK(&adev->delayed_init_work,
3991 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3992 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3993 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3994
d4535e2c
AG
3995 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3996
d23ee13f 3997 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3998 adev->gfx.gfx_off_residency = 0;
3999 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 4000 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 4001
b265bdbd
EQ
4002 atomic_set(&adev->throttling_logging_enabled, 1);
4003 /*
4004 * If throttling continues, logging will be performed every minute
4005 * to avoid log flooding. "-1" is subtracted since the thermal
4006 * throttling interrupt comes every second. Thus, the total logging
4007 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4008 * for throttling interrupt) = 60 seconds.
4009 */
4010 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4011 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4012
0fa49558
AX
4013 /* Registers mapping */
4014 /* TODO: block userspace mapping of io register */
da69c161
KW
4015 if (adev->asic_type >= CHIP_BONAIRE) {
4016 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4017 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4018 } else {
4019 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4020 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4021 }
d38ceaf9 4022
6c08e0ef
EQ
4023 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4024 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4025
d38ceaf9 4026 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 4027 if (!adev->rmmio)
d38ceaf9 4028 return -ENOMEM;
b8920e1e 4029
d38ceaf9 4030 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 4031 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 4032
436afdfa
PY
4033 /*
4034 * Reset domain needs to be present early, before XGMI hive discovered
4035 * (if any) and intitialized to use reset sem and in_gpu reset flag
4036 * early on during init and before calling to RREG32.
4037 */
4038 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4039 if (!adev->reset_domain)
4040 return -ENOMEM;
4041
3aa0115d
ML
4042 /* detect hw virtualization here */
4043 amdgpu_detect_virtualization(adev);
4044
04e85958
TL
4045 amdgpu_device_get_pcie_info(adev);
4046
dffa11b4
ML
4047 r = amdgpu_device_get_job_timeout_settings(adev);
4048 if (r) {
4049 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 4050 return r;
a190d1c7
XY
4051 }
4052
d38ceaf9 4053 /* early init functions */
06ec9070 4054 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 4055 if (r)
4ef87d8f 4056 return r;
d38ceaf9 4057
02ff519e
AD
4058 amdgpu_device_set_mcbp(adev);
4059
b7cdb41e
ML
4060 /* Get rid of things like offb */
4061 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4062 if (r)
4063 return r;
4064
4d33e704
SK
4065 /* Enable TMZ based on IP_VERSION */
4066 amdgpu_gmc_tmz_set(adev);
4067
957b0787 4068 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
4069 /* Need to get xgmi info early to decide the reset behavior*/
4070 if (adev->gmc.xgmi.supported) {
4071 r = adev->gfxhub.funcs->get_xgmi_info(adev);
4072 if (r)
4073 return r;
4074 }
4075
8e6d0b69 4076 /* enable PCIE atomic ops */
b4520bfd
GW
4077 if (amdgpu_sriov_vf(adev)) {
4078 if (adev->virt.fw_reserve.p_pf2vf)
4079 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4080 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4081 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
4082 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4083 * internal path natively support atomics, set have_atomics_support to true.
4084 */
b4520bfd 4085 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
4086 (amdgpu_ip_version(adev, GC_HWIP, 0) >
4087 IP_VERSION(9, 0, 0))) {
0e768043 4088 adev->have_atomics_support = true;
b4520bfd 4089 } else {
8e6d0b69 4090 adev->have_atomics_support =
4091 !pci_enable_atomic_ops_to_root(adev->pdev,
4092 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4093 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
4094 }
4095
8e6d0b69 4096 if (!adev->have_atomics_support)
4097 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4098
6585661d 4099 /* doorbell bar mapping and doorbell index init*/
43c064db 4100 amdgpu_doorbell_init(adev);
6585661d 4101
9475a943
SL
4102 if (amdgpu_emu_mode == 1) {
4103 /* post the asic on emulation mode */
4104 emu_soc_asic_init(adev);
bfca0289 4105 goto fence_driver_init;
9475a943 4106 }
bfca0289 4107
04442bf7
LL
4108 amdgpu_reset_init(adev);
4109
4e99a44e 4110 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
4111 if (adev->bios)
4112 amdgpu_device_detect_sriov_bios(adev);
048765ad 4113
95e8e59e
AD
4114 /* check if we need to reset the asic
4115 * E.g., driver was not cleanly unloaded previously, etc.
4116 */
f14899fd 4117 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 4118 if (adev->gmc.xgmi.num_physical_nodes) {
4119 dev_info(adev->dev, "Pending hive reset.\n");
4120 adev->gmc.xgmi.pending_reset = true;
4121 /* Only need to init necessary block for SMU to handle the reset */
4122 for (i = 0; i < adev->num_ip_blocks; i++) {
4123 if (!adev->ip_blocks[i].status.valid)
4124 continue;
4125 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4126 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4127 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4128 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 4129 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 4130 adev->ip_blocks[i].version->funcs->name);
4131 adev->ip_blocks[i].status.hw = true;
4132 }
4133 }
4134 } else {
5f38ac54
KF
4135 switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
4136 case IP_VERSION(13, 0, 0):
4137 case IP_VERSION(13, 0, 7):
4138 case IP_VERSION(13, 0, 10):
4139 r = psp_gpu_reset(adev);
4140 break;
4141 default:
4142 tmp = amdgpu_reset_method;
4143 /* It should do a default reset when loading or reloading the driver,
4144 * regardless of the module parameter reset_method.
4145 */
4146 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4147 r = amdgpu_asic_reset(adev);
4148 amdgpu_reset_method = tmp;
4149 break;
4150 }
4151
e3c1b071 4152 if (r) {
4153 dev_err(adev->dev, "asic reset on init failed\n");
4154 goto failed;
4155 }
95e8e59e
AD
4156 }
4157 }
4158
d38ceaf9 4159 /* Post card if necessary */
39c640c0 4160 if (amdgpu_device_need_post(adev)) {
d38ceaf9 4161 if (!adev->bios) {
bec86378 4162 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
4163 r = -EINVAL;
4164 goto failed;
d38ceaf9 4165 }
bec86378 4166 DRM_INFO("GPU posting now...\n");
4d2997ab 4167 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
4168 if (r) {
4169 dev_err(adev->dev, "gpu post error!\n");
4170 goto failed;
4171 }
d38ceaf9
AD
4172 }
4173
9535a86a
SZ
4174 if (adev->bios) {
4175 if (adev->is_atom_fw) {
4176 /* Initialize clocks */
4177 r = amdgpu_atomfirmware_get_clock_info(adev);
4178 if (r) {
4179 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4180 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4181 goto failed;
4182 }
4183 } else {
4184 /* Initialize clocks */
4185 r = amdgpu_atombios_get_clock_info(adev);
4186 if (r) {
4187 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4188 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4189 goto failed;
4190 }
4191 /* init i2c buses */
4192 if (!amdgpu_device_has_dc_support(adev))
4193 amdgpu_atombios_i2c_init(adev);
a5bde2f9 4194 }
2c1a2784 4195 }
d38ceaf9 4196
bfca0289 4197fence_driver_init:
d38ceaf9 4198 /* Fence driver */
067f44c8 4199 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4200 if (r) {
067f44c8 4201 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4202 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4203 goto failed;
2c1a2784 4204 }
d38ceaf9
AD
4205
4206 /* init the mode config */
4a580877 4207 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4208
06ec9070 4209 r = amdgpu_device_ip_init(adev);
d38ceaf9 4210 if (r) {
06ec9070 4211 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4212 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4213 goto release_ras_con;
d38ceaf9
AD
4214 }
4215
8d35a259
LG
4216 amdgpu_fence_driver_hw_init(adev);
4217
d69b8971
YZ
4218 dev_info(adev->dev,
4219 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4220 adev->gfx.config.max_shader_engines,
4221 adev->gfx.config.max_sh_per_se,
4222 adev->gfx.config.max_cu_per_sh,
4223 adev->gfx.cu_info.number);
4224
d38ceaf9
AD
4225 adev->accel_working = true;
4226
e59c0205
AX
4227 amdgpu_vm_check_compute_bug(adev);
4228
95844d20
MO
4229 /* Initialize the buffer migration limit. */
4230 if (amdgpu_moverate >= 0)
4231 max_MBps = amdgpu_moverate;
4232 else
4233 max_MBps = 8; /* Allow 8 MB/s. */
4234 /* Get a log2 for easy divisions. */
4235 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4236
b0adca4d
EQ
4237 /*
4238 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4239 * Otherwise the mgpu fan boost feature will be skipped due to the
4240 * gpu instance is counted less.
4241 */
4242 amdgpu_register_gpu_instance(adev);
4243
d38ceaf9
AD
4244 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4245 * explicit gating rather than handling it automatically.
4246 */
e3c1b071 4247 if (!adev->gmc.xgmi.pending_reset) {
4248 r = amdgpu_device_ip_late_init(adev);
4249 if (r) {
4250 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4251 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4252 goto release_ras_con;
e3c1b071 4253 }
4254 /* must succeed. */
4255 amdgpu_ras_resume(adev);
4256 queue_delayed_work(system_wq, &adev->delayed_init_work,
4257 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4258 }
d38ceaf9 4259
38eecbe0
CL
4260 if (amdgpu_sriov_vf(adev)) {
4261 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4262 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4263 }
2c738637 4264
90bcb9b5
EQ
4265 /*
4266 * Place those sysfs registering after `late_init`. As some of those
4267 * operations performed in `late_init` might affect the sysfs
4268 * interfaces creating.
4269 */
4270 r = amdgpu_atombios_sysfs_init(adev);
4271 if (r)
4272 drm_err(&adev->ddev,
4273 "registering atombios sysfs failed (%d).\n", r);
4274
4275 r = amdgpu_pm_sysfs_init(adev);
4276 if (r)
4277 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4278
4279 r = amdgpu_ucode_sysfs_init(adev);
4280 if (r) {
4281 adev->ucode_sysfs_en = false;
4282 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4283 } else
4284 adev->ucode_sysfs_en = true;
4285
77f3a5cd 4286 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4287 if (r)
77f3a5cd 4288 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4289
76da73f0
LL
4290 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4291 if (r)
4292 dev_err(adev->dev,
4293 "Could not create amdgpu board attributes\n");
4294
7957ec80 4295 amdgpu_fru_sysfs_init(adev);
af39e6f4 4296 amdgpu_reg_state_sysfs_init(adev);
7957ec80 4297
d155bef0
AB
4298 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4299 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4300 if (r)
4301 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4302
c1dd4aa6
AG
4303 /* Have stored pci confspace at hand for restore in sudden PCI error */
4304 if (amdgpu_device_cache_pci_state(adev->pdev))
4305 pci_restore_state(pdev);
4306
8c3dd61c
KHF
4307 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4308 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4309 * ignore it
4310 */
8c3dd61c 4311 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4312 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4313
d37a3929
OC
4314 px = amdgpu_device_supports_px(ddev);
4315
7b1c6263 4316 if (px || (!dev_is_removable(&adev->pdev->dev) &&
d37a3929 4317 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4318 vga_switcheroo_register_client(adev->pdev,
4319 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4320
4321 if (px)
8c3dd61c 4322 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4323
e3c1b071 4324 if (adev->gmc.xgmi.pending_reset)
4325 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4326 msecs_to_jiffies(AMDGPU_RESUME_MS));
4327
4a74c38c
PY
4328 amdgpu_device_check_iommu_direct_map(adev);
4329
d38ceaf9 4330 return 0;
83ba126a 4331
970fd197 4332release_ras_con:
38eecbe0
CL
4333 if (amdgpu_sriov_vf(adev))
4334 amdgpu_virt_release_full_gpu(adev, true);
4335
4336 /* failed in exclusive mode due to timeout */
4337 if (amdgpu_sriov_vf(adev) &&
4338 !amdgpu_sriov_runtime(adev) &&
4339 amdgpu_virt_mmio_blocked(adev) &&
4340 !amdgpu_virt_wait_reset(adev)) {
4341 dev_err(adev->dev, "VF exclusive mode timeout\n");
4342 /* Don't send request since VF is inactive. */
4343 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4344 adev->virt.ops = NULL;
4345 r = -EAGAIN;
4346 }
970fd197
SY
4347 amdgpu_release_ras_context(adev);
4348
83ba126a 4349failed:
89041940 4350 amdgpu_vf_error_trans_all(adev);
8840a387 4351
83ba126a 4352 return r;
d38ceaf9
AD
4353}
4354
07775fc1
AG
4355static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4356{
62d5f9f7 4357
07775fc1
AG
4358 /* Clear all CPU mappings pointing to this device */
4359 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4360
4361 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4362 amdgpu_doorbell_fini(adev);
07775fc1
AG
4363
4364 iounmap(adev->rmmio);
4365 adev->rmmio = NULL;
4366 if (adev->mman.aper_base_kaddr)
4367 iounmap(adev->mman.aper_base_kaddr);
4368 adev->mman.aper_base_kaddr = NULL;
4369
4370 /* Memory manager related */
a0ba1279 4371 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4372 arch_phys_wc_del(adev->gmc.vram_mtrr);
4373 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4374 }
4375}
4376
d38ceaf9 4377/**
bbe04dec 4378 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4379 *
4380 * @adev: amdgpu_device pointer
4381 *
4382 * Tear down the driver info (all asics).
4383 * Called at driver shutdown.
4384 */
72c8c97b 4385void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4386{
aac89168 4387 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4388 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4389 adev->shutdown = true;
9f875167 4390
752c683d
ML
4391 /* make sure IB test finished before entering exclusive mode
4392 * to avoid preemption on IB test
b8920e1e 4393 */
519b8b76 4394 if (amdgpu_sriov_vf(adev)) {
752c683d 4395 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4396 amdgpu_virt_fini_data_exchange(adev);
4397 }
752c683d 4398
e5b03032
ML
4399 /* disable all interrupts */
4400 amdgpu_irq_disable_all(adev);
47fc644f 4401 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4402 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4403 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4404 else
4a580877 4405 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4406 }
8d35a259 4407 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4408
cd3a8a59 4409 if (adev->mman.initialized)
9bff18d1 4410 drain_workqueue(adev->mman.bdev.wq);
98f56188 4411
53e9d836 4412 if (adev->pm.sysfs_initialized)
7c868b59 4413 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4414 if (adev->ucode_sysfs_en)
4415 amdgpu_ucode_sysfs_fini(adev);
4416 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4417 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4418
af39e6f4
LL
4419 amdgpu_reg_state_sysfs_fini(adev);
4420
232d1d43
SY
4421 /* disable ras feature must before hw fini */
4422 amdgpu_ras_pre_fini(adev);
4423
b7043800
AD
4424 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4425
e9669fb7 4426 amdgpu_device_ip_fini_early(adev);
d10d0daa 4427
a3848df6
YW
4428 amdgpu_irq_fini_hw(adev);
4429
b6fd6e0f
SK
4430 if (adev->mman.initialized)
4431 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4432
d10d0daa 4433 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4434
39934d3e
VP
4435 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4436 amdgpu_device_unmap_mmio(adev);
87172e89 4437
72c8c97b
AG
4438}
4439
4440void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4441{
62d5f9f7 4442 int idx;
d37a3929 4443 bool px;
62d5f9f7 4444
8d35a259 4445 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4446 amdgpu_device_ip_fini(adev);
b31d3063 4447 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4448 adev->accel_working = false;
68ce8b24 4449 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4450
4451 amdgpu_reset_fini(adev);
4452
d38ceaf9 4453 /* free i2c buses */
4562236b
HW
4454 if (!amdgpu_device_has_dc_support(adev))
4455 amdgpu_i2c_fini(adev);
bfca0289
SL
4456
4457 if (amdgpu_emu_mode != 1)
4458 amdgpu_atombios_fini(adev);
4459
d38ceaf9
AD
4460 kfree(adev->bios);
4461 adev->bios = NULL;
d37a3929 4462
8a2b5139
LL
4463 kfree(adev->fru_info);
4464 adev->fru_info = NULL;
4465
d37a3929
OC
4466 px = amdgpu_device_supports_px(adev_to_drm(adev));
4467
7b1c6263 4468 if (px || (!dev_is_removable(&adev->pdev->dev) &&
d37a3929 4469 apple_gmux_detect(NULL, NULL)))
84c8b22e 4470 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4471
4472 if (px)
83ba126a 4473 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4474
38d6be81 4475 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4476 vga_client_unregister(adev->pdev);
e9bc1bf7 4477
62d5f9f7
LS
4478 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4479
4480 iounmap(adev->rmmio);
4481 adev->rmmio = NULL;
43c064db 4482 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4483 drm_dev_exit(idx);
4484 }
4485
d155bef0
AB
4486 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4487 amdgpu_pmu_fini(adev);
72de33f8 4488 if (adev->mman.discovery_bin)
a190d1c7 4489 amdgpu_discovery_fini(adev);
72c8c97b 4490
cfbb6b00
AG
4491 amdgpu_reset_put_reset_domain(adev->reset_domain);
4492 adev->reset_domain = NULL;
4493
72c8c97b
AG
4494 kfree(adev->pci_state);
4495
d38ceaf9
AD
4496}
4497
58144d28
ND
4498/**
4499 * amdgpu_device_evict_resources - evict device resources
4500 * @adev: amdgpu device object
4501 *
4502 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4503 * of the vram memory type. Mainly used for evicting device resources
4504 * at suspend time.
4505 *
4506 */
7863c155 4507static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4508{
7863c155
ML
4509 int ret;
4510
e53d9665
ML
4511 /* No need to evict vram on APUs for suspend to ram or s2idle */
4512 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4513 return 0;
58144d28 4514
7863c155
ML
4515 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4516 if (ret)
58144d28 4517 DRM_WARN("evicting device resources failed\n");
7863c155 4518 return ret;
58144d28 4519}
d38ceaf9
AD
4520
4521/*
4522 * Suspend & resume.
4523 */
5095d541
ML
4524/**
4525 * amdgpu_device_prepare - prepare for device suspend
4526 *
4527 * @dev: drm dev pointer
4528 *
4529 * Prepare to put the hw in the suspend state (all asics).
4530 * Returns 0 for success or an error on failure.
4531 * Called at driver suspend.
4532 */
4533int amdgpu_device_prepare(struct drm_device *dev)
4534{
4535 struct amdgpu_device *adev = drm_to_adev(dev);
cb11ca32 4536 int i, r;
5095d541
ML
4537
4538 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4539 return 0;
4540
4541 /* Evict the majority of BOs before starting suspend sequence */
4542 r = amdgpu_device_evict_resources(adev);
4543 if (r)
4544 return r;
4545
cb11ca32
ML
4546 for (i = 0; i < adev->num_ip_blocks; i++) {
4547 if (!adev->ip_blocks[i].status.valid)
4548 continue;
4549 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4550 continue;
4551 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4552 if (r)
4553 return r;
4554 }
4555
5095d541
ML
4556 return 0;
4557}
4558
d38ceaf9 4559/**
810ddc3a 4560 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4561 *
87e3f136 4562 * @dev: drm dev pointer
87e3f136 4563 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4564 *
4565 * Puts the hw in the suspend state (all asics).
4566 * Returns 0 for success or an error on failure.
4567 * Called at driver suspend.
4568 */
de185019 4569int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4570{
a2e15b0e 4571 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4572 int r = 0;
d38ceaf9 4573
d38ceaf9
AD
4574 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4575 return 0;
4576
44779b43 4577 adev->in_suspend = true;
3fa8f89d 4578
d7274ec7
BZ
4579 if (amdgpu_sriov_vf(adev)) {
4580 amdgpu_virt_fini_data_exchange(adev);
4581 r = amdgpu_virt_request_full_gpu(adev, false);
4582 if (r)
4583 return r;
4584 }
4585
3fa8f89d
S
4586 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4587 DRM_WARN("smart shift update failed\n");
4588
5f818173 4589 if (fbcon)
087451f3 4590 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4591
beff74bc 4592 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4593 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4594
5e6932fe 4595 amdgpu_ras_suspend(adev);
4596
2196927b 4597 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4598
c004d44e 4599 if (!adev->in_s0ix)
5d3a2d95 4600 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4601
7863c155
ML
4602 r = amdgpu_device_evict_resources(adev);
4603 if (r)
4604 return r;
d38ceaf9 4605
dab96d8b
AD
4606 amdgpu_ttm_set_buffer_funcs_status(adev, false);
4607
8d35a259 4608 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4609
2196927b 4610 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4611
d7274ec7
BZ
4612 if (amdgpu_sriov_vf(adev))
4613 amdgpu_virt_release_full_gpu(adev, false);
4614
2e9b1523
PY
4615 r = amdgpu_dpm_notify_rlc_state(adev, false);
4616 if (r)
4617 return r;
4618
d38ceaf9
AD
4619 return 0;
4620}
4621
4622/**
810ddc3a 4623 * amdgpu_device_resume - initiate device resume
d38ceaf9 4624 *
87e3f136 4625 * @dev: drm dev pointer
87e3f136 4626 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4627 *
4628 * Bring the hw back to operating state (all asics).
4629 * Returns 0 for success or an error on failure.
4630 * Called at driver resume.
4631 */
de185019 4632int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4633{
1348969a 4634 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4635 int r = 0;
d38ceaf9 4636
d7274ec7
BZ
4637 if (amdgpu_sriov_vf(adev)) {
4638 r = amdgpu_virt_request_full_gpu(adev, true);
4639 if (r)
4640 return r;
4641 }
4642
d38ceaf9
AD
4643 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4644 return 0;
4645
62498733 4646 if (adev->in_s0ix)
bc143d8b 4647 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4648
d38ceaf9 4649 /* post card */
39c640c0 4650 if (amdgpu_device_need_post(adev)) {
4d2997ab 4651 r = amdgpu_device_asic_init(adev);
74b0b157 4652 if (r)
aac89168 4653 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4654 }
d38ceaf9 4655
06ec9070 4656 r = amdgpu_device_ip_resume(adev);
d7274ec7 4657
e6707218 4658 if (r) {
aac89168 4659 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4660 goto exit;
e6707218 4661 }
8d35a259 4662 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4663
c004d44e 4664 if (!adev->in_s0ix) {
5d3a2d95
AD
4665 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4666 if (r)
3c22c1ea 4667 goto exit;
5d3a2d95 4668 }
756e6880 4669
8ed79c40
TH
4670 r = amdgpu_device_ip_late_init(adev);
4671 if (r)
4672 goto exit;
4673
4674 queue_delayed_work(system_wq, &adev->delayed_init_work,
4675 msecs_to_jiffies(AMDGPU_RESUME_MS));
3c22c1ea
SF
4676exit:
4677 if (amdgpu_sriov_vf(adev)) {
4678 amdgpu_virt_init_data_exchange(adev);
4679 amdgpu_virt_release_full_gpu(adev, true);
4680 }
4681
4682 if (r)
4683 return r;
4684
96a5d8d4 4685 /* Make sure IB tests flushed */
beff74bc 4686 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4687
a2e15b0e 4688 if (fbcon)
087451f3 4689 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4690
5e6932fe 4691 amdgpu_ras_resume(adev);
4692
d09ef243
AD
4693 if (adev->mode_info.num_crtc) {
4694 /*
4695 * Most of the connector probing functions try to acquire runtime pm
4696 * refs to ensure that the GPU is powered on when connector polling is
4697 * performed. Since we're calling this from a runtime PM callback,
4698 * trying to acquire rpm refs will cause us to deadlock.
4699 *
4700 * Since we're guaranteed to be holding the rpm lock, it's safe to
4701 * temporarily disable the rpm helpers so this doesn't deadlock us.
4702 */
23a1a9e5 4703#ifdef CONFIG_PM
d09ef243 4704 dev->dev->power.disable_depth++;
23a1a9e5 4705#endif
d09ef243
AD
4706 if (!adev->dc_enabled)
4707 drm_helper_hpd_irq_event(dev);
4708 else
4709 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4710#ifdef CONFIG_PM
d09ef243 4711 dev->dev->power.disable_depth--;
23a1a9e5 4712#endif
d09ef243 4713 }
44779b43
RZ
4714 adev->in_suspend = false;
4715
dc907c9d
JX
4716 if (adev->enable_mes)
4717 amdgpu_mes_self_test(adev);
4718
3fa8f89d
S
4719 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4720 DRM_WARN("smart shift update failed\n");
4721
4d3b9ae5 4722 return 0;
d38ceaf9
AD
4723}
4724
e3ecdffa
AD
4725/**
4726 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4727 *
4728 * @adev: amdgpu_device pointer
4729 *
4730 * The list of all the hardware IPs that make up the asic is walked and
4731 * the check_soft_reset callbacks are run. check_soft_reset determines
4732 * if the asic is still hung or not.
4733 * Returns true if any of the IPs are still in a hung state, false if not.
4734 */
06ec9070 4735static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4736{
4737 int i;
4738 bool asic_hang = false;
4739
f993d628
ML
4740 if (amdgpu_sriov_vf(adev))
4741 return true;
4742
8bc04c29
AD
4743 if (amdgpu_asic_need_full_reset(adev))
4744 return true;
4745
63fbf42f 4746 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4747 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4748 continue;
a1255107
AD
4749 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4750 adev->ip_blocks[i].status.hang =
4751 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4752 if (adev->ip_blocks[i].status.hang) {
aac89168 4753 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4754 asic_hang = true;
4755 }
4756 }
4757 return asic_hang;
4758}
4759
e3ecdffa
AD
4760/**
4761 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4762 *
4763 * @adev: amdgpu_device pointer
4764 *
4765 * The list of all the hardware IPs that make up the asic is walked and the
4766 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4767 * handles any IP specific hardware or software state changes that are
4768 * necessary for a soft reset to succeed.
4769 * Returns 0 on success, negative error code on failure.
4770 */
06ec9070 4771static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4772{
4773 int i, r = 0;
4774
4775 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4776 if (!adev->ip_blocks[i].status.valid)
d31a501e 4777 continue;
a1255107
AD
4778 if (adev->ip_blocks[i].status.hang &&
4779 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4780 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4781 if (r)
4782 return r;
4783 }
4784 }
4785
4786 return 0;
4787}
4788
e3ecdffa
AD
4789/**
4790 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4791 *
4792 * @adev: amdgpu_device pointer
4793 *
4794 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4795 * reset is necessary to recover.
4796 * Returns true if a full asic reset is required, false if not.
4797 */
06ec9070 4798static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4799{
da146d3b
AD
4800 int i;
4801
8bc04c29
AD
4802 if (amdgpu_asic_need_full_reset(adev))
4803 return true;
4804
da146d3b 4805 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4806 if (!adev->ip_blocks[i].status.valid)
da146d3b 4807 continue;
a1255107
AD
4808 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4809 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4810 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4811 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4812 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4813 if (adev->ip_blocks[i].status.hang) {
aac89168 4814 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4815 return true;
4816 }
4817 }
35d782fe
CZ
4818 }
4819 return false;
4820}
4821
e3ecdffa
AD
4822/**
4823 * amdgpu_device_ip_soft_reset - do a soft reset
4824 *
4825 * @adev: amdgpu_device pointer
4826 *
4827 * The list of all the hardware IPs that make up the asic is walked and the
4828 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4829 * IP specific hardware or software state changes that are necessary to soft
4830 * reset the IP.
4831 * Returns 0 on success, negative error code on failure.
4832 */
06ec9070 4833static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4834{
4835 int i, r = 0;
4836
4837 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4838 if (!adev->ip_blocks[i].status.valid)
35d782fe 4839 continue;
a1255107
AD
4840 if (adev->ip_blocks[i].status.hang &&
4841 adev->ip_blocks[i].version->funcs->soft_reset) {
4842 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4843 if (r)
4844 return r;
4845 }
4846 }
4847
4848 return 0;
4849}
4850
e3ecdffa
AD
4851/**
4852 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4853 *
4854 * @adev: amdgpu_device pointer
4855 *
4856 * The list of all the hardware IPs that make up the asic is walked and the
4857 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4858 * handles any IP specific hardware or software state changes that are
4859 * necessary after the IP has been soft reset.
4860 * Returns 0 on success, negative error code on failure.
4861 */
06ec9070 4862static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4863{
4864 int i, r = 0;
4865
4866 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4867 if (!adev->ip_blocks[i].status.valid)
35d782fe 4868 continue;
a1255107
AD
4869 if (adev->ip_blocks[i].status.hang &&
4870 adev->ip_blocks[i].version->funcs->post_soft_reset)
4871 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4872 if (r)
4873 return r;
4874 }
4875
4876 return 0;
4877}
4878
e3ecdffa 4879/**
c33adbc7 4880 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4881 *
4882 * @adev: amdgpu_device pointer
4883 *
4884 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4885 * restore things like GPUVM page tables after a GPU reset where
4886 * the contents of VRAM might be lost.
403009bf
CK
4887 *
4888 * Returns:
4889 * 0 on success, negative error code on failure.
e3ecdffa 4890 */
c33adbc7 4891static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4892{
c41d1cf6 4893 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4894 struct amdgpu_bo *shadow;
e18aaea7 4895 struct amdgpu_bo_vm *vmbo;
403009bf 4896 long r = 1, tmo;
c41d1cf6
ML
4897
4898 if (amdgpu_sriov_runtime(adev))
b045d3af 4899 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4900 else
4901 tmo = msecs_to_jiffies(100);
4902
aac89168 4903 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4904 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4905 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4906 /* If vm is compute context or adev is APU, shadow will be NULL */
4907 if (!vmbo->shadow)
4908 continue;
4909 shadow = vmbo->shadow;
4910
403009bf 4911 /* No need to recover an evicted BO */
d3116756
CK
4912 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4913 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4914 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4915 continue;
4916
4917 r = amdgpu_bo_restore_shadow(shadow, &next);
4918 if (r)
4919 break;
4920
c41d1cf6 4921 if (fence) {
1712fb1a 4922 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4923 dma_fence_put(fence);
4924 fence = next;
1712fb1a 4925 if (tmo == 0) {
4926 r = -ETIMEDOUT;
c41d1cf6 4927 break;
1712fb1a 4928 } else if (tmo < 0) {
4929 r = tmo;
4930 break;
4931 }
403009bf
CK
4932 } else {
4933 fence = next;
c41d1cf6 4934 }
c41d1cf6
ML
4935 }
4936 mutex_unlock(&adev->shadow_list_lock);
4937
403009bf
CK
4938 if (fence)
4939 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4940 dma_fence_put(fence);
4941
1712fb1a 4942 if (r < 0 || tmo <= 0) {
aac89168 4943 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4944 return -EIO;
4945 }
c41d1cf6 4946
aac89168 4947 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4948 return 0;
c41d1cf6
ML
4949}
4950
a90ad3c2 4951
e3ecdffa 4952/**
06ec9070 4953 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4954 *
982a820b 4955 * @adev: amdgpu_device pointer
87e3f136 4956 * @from_hypervisor: request from hypervisor
5740682e
ML
4957 *
4958 * do VF FLR and reinitialize Asic
3f48c681 4959 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4960 */
4961static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4962 bool from_hypervisor)
5740682e
ML
4963{
4964 int r;
a5f67c93 4965 struct amdgpu_hive_info *hive = NULL;
7258fa31 4966 int retry_limit = 0;
5740682e 4967
7258fa31 4968retry:
c004d44e 4969 amdgpu_amdkfd_pre_reset(adev);
428890a3 4970
5740682e
ML
4971 if (from_hypervisor)
4972 r = amdgpu_virt_request_full_gpu(adev, true);
4973 else
4974 r = amdgpu_virt_reset_gpu(adev);
4975 if (r)
4976 return r;
f734b213 4977 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4978
83f24a8f
HC
4979 /* some sw clean up VF needs to do before recover */
4980 amdgpu_virt_post_reset(adev);
4981
a90ad3c2 4982 /* Resume IP prior to SMC */
06ec9070 4983 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4984 if (r)
4985 goto error;
a90ad3c2 4986
c9ffa427 4987 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4988
7a3e0bb2
RZ
4989 r = amdgpu_device_fw_loading(adev);
4990 if (r)
4991 return r;
4992
a90ad3c2 4993 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4994 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4995 if (r)
4996 goto error;
a90ad3c2 4997
a5f67c93
ZL
4998 hive = amdgpu_get_xgmi_hive(adev);
4999 /* Update PSP FW topology after reset */
5000 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5001 r = amdgpu_xgmi_update_topology(hive, adev);
5002
5003 if (hive)
5004 amdgpu_put_xgmi_hive(hive);
5005
5006 if (!r) {
a5f67c93 5007 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 5008
c004d44e 5009 amdgpu_amdkfd_post_reset(adev);
a5f67c93 5010 }
a90ad3c2 5011
abc34253 5012error:
c41d1cf6 5013 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 5014 amdgpu_inc_vram_lost(adev);
c33adbc7 5015 r = amdgpu_device_recover_vram(adev);
a90ad3c2 5016 }
437f3e0b 5017 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 5018
7258fa31
SK
5019 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
5020 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
5021 retry_limit++;
5022 goto retry;
5023 } else
5024 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
5025 }
5026
a90ad3c2
ML
5027 return r;
5028}
5029
9a1cddd6 5030/**
5031 * amdgpu_device_has_job_running - check if there is any job in mirror list
5032 *
982a820b 5033 * @adev: amdgpu_device pointer
9a1cddd6 5034 *
5035 * check if there is any job in mirror list
5036 */
5037bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5038{
5039 int i;
5040 struct drm_sched_job *job;
5041
5042 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5043 struct amdgpu_ring *ring = adev->rings[i];
5044
35963cf2 5045 if (!ring || !drm_sched_wqueue_ready(&ring->sched))
9a1cddd6 5046 continue;
5047
5048 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
5049 job = list_first_entry_or_null(&ring->sched.pending_list,
5050 struct drm_sched_job, list);
9a1cddd6 5051 spin_unlock(&ring->sched.job_list_lock);
5052 if (job)
5053 return true;
5054 }
5055 return false;
5056}
5057
12938fad
CK
5058/**
5059 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5060 *
982a820b 5061 * @adev: amdgpu_device pointer
12938fad
CK
5062 *
5063 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5064 * a hung GPU.
5065 */
5066bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5067{
12938fad 5068
3ba7b418
AG
5069 if (amdgpu_gpu_recovery == 0)
5070 goto disabled;
5071
1a11a65d
YC
5072 /* Skip soft reset check in fatal error mode */
5073 if (!amdgpu_ras_is_poison_mode_supported(adev))
5074 return true;
5075
3ba7b418
AG
5076 if (amdgpu_sriov_vf(adev))
5077 return true;
5078
5079 if (amdgpu_gpu_recovery == -1) {
5080 switch (adev->asic_type) {
b3523c45
AD
5081#ifdef CONFIG_DRM_AMDGPU_SI
5082 case CHIP_VERDE:
5083 case CHIP_TAHITI:
5084 case CHIP_PITCAIRN:
5085 case CHIP_OLAND:
5086 case CHIP_HAINAN:
5087#endif
5088#ifdef CONFIG_DRM_AMDGPU_CIK
5089 case CHIP_KAVERI:
5090 case CHIP_KABINI:
5091 case CHIP_MULLINS:
5092#endif
5093 case CHIP_CARRIZO:
5094 case CHIP_STONEY:
5095 case CHIP_CYAN_SKILLFISH:
3ba7b418 5096 goto disabled;
b3523c45
AD
5097 default:
5098 break;
3ba7b418 5099 }
12938fad
CK
5100 }
5101
5102 return true;
3ba7b418
AG
5103
5104disabled:
aac89168 5105 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 5106 return false;
12938fad
CK
5107}
5108
5c03e584
FX
5109int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5110{
47fc644f
SS
5111 u32 i;
5112 int ret = 0;
5c03e584 5113
47fc644f 5114 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 5115
47fc644f 5116 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 5117
47fc644f
SS
5118 /* disable BM */
5119 pci_clear_master(adev->pdev);
5c03e584 5120
47fc644f 5121 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 5122
47fc644f
SS
5123 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5124 dev_info(adev->dev, "GPU smu mode1 reset\n");
5125 ret = amdgpu_dpm_mode1_reset(adev);
5126 } else {
5127 dev_info(adev->dev, "GPU psp mode1 reset\n");
5128 ret = psp_gpu_reset(adev);
5129 }
5c03e584 5130
47fc644f 5131 if (ret)
7d442437 5132 goto mode1_reset_failed;
5c03e584 5133
47fc644f 5134 amdgpu_device_load_pci_state(adev->pdev);
7656168a
LL
5135 ret = amdgpu_psp_wait_for_bootloader(adev);
5136 if (ret)
7d442437 5137 goto mode1_reset_failed;
5c03e584 5138
47fc644f
SS
5139 /* wait for asic to come out of reset */
5140 for (i = 0; i < adev->usec_timeout; i++) {
5141 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 5142
47fc644f
SS
5143 if (memsize != 0xffffffff)
5144 break;
5145 udelay(1);
5146 }
5c03e584 5147
7d442437
HZ
5148 if (i >= adev->usec_timeout) {
5149 ret = -ETIMEDOUT;
5150 goto mode1_reset_failed;
5151 }
5152
47fc644f 5153 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
7656168a 5154
7d442437
HZ
5155 return 0;
5156
5157mode1_reset_failed:
5158 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 5159 return ret;
5c03e584 5160}
5c6dd71e 5161
e3c1b071 5162int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 5163 struct amdgpu_reset_context *reset_context)
26bc5340 5164{
5c1e6fa4 5165 int i, r = 0;
04442bf7
LL
5166 struct amdgpu_job *job = NULL;
5167 bool need_full_reset =
5168 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5169
5170 if (reset_context->reset_req_dev == adev)
5171 job = reset_context->job;
71182665 5172
b602ca5f
TZ
5173 if (amdgpu_sriov_vf(adev)) {
5174 /* stop the data exchange thread */
5175 amdgpu_virt_fini_data_exchange(adev);
5176 }
5177
9e225fb9
AG
5178 amdgpu_fence_driver_isr_toggle(adev, true);
5179
71182665 5180 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
5181 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5182 struct amdgpu_ring *ring = adev->rings[i];
5183
35963cf2 5184 if (!ring || !drm_sched_wqueue_ready(&ring->sched))
0875dc9e 5185 continue;
5740682e 5186
b8920e1e
SS
5187 /* Clear job fence from fence drv to avoid force_completion
5188 * leave NULL and vm flush fence in fence drv
5189 */
5c1e6fa4 5190 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 5191
2f9d4084
ML
5192 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5193 amdgpu_fence_driver_force_completion(ring);
0875dc9e 5194 }
d38ceaf9 5195
9e225fb9
AG
5196 amdgpu_fence_driver_isr_toggle(adev, false);
5197
ff99849b 5198 if (job && job->vm)
222b5f04
AG
5199 drm_sched_increase_karma(&job->base);
5200
04442bf7 5201 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 5202 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5203 if (r == -EOPNOTSUPP)
404b277b
LL
5204 r = 0;
5205 else
04442bf7
LL
5206 return r;
5207
1d721ed6 5208 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
5209 if (!amdgpu_sriov_vf(adev)) {
5210
5211 if (!need_full_reset)
5212 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5213
360cd081
LG
5214 if (!need_full_reset && amdgpu_gpu_recovery &&
5215 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
5216 amdgpu_device_ip_pre_soft_reset(adev);
5217 r = amdgpu_device_ip_soft_reset(adev);
5218 amdgpu_device_ip_post_soft_reset(adev);
5219 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 5220 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
5221 need_full_reset = true;
5222 }
5223 }
5224
5225 if (need_full_reset)
5226 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
5227 if (need_full_reset)
5228 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5229 else
5230 clear_bit(AMDGPU_NEED_FULL_RESET,
5231 &reset_context->flags);
26bc5340
AG
5232 }
5233
5234 return r;
5235}
5236
15fd09a0
SA
5237static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5238{
15fd09a0
SA
5239 int i;
5240
38a15ad9 5241 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0 5242
2d6a2a28
AA
5243 for (i = 0; i < adev->reset_info.num_regs; i++) {
5244 adev->reset_info.reset_dump_reg_value[i] =
5245 RREG32(adev->reset_info.reset_dump_reg_list[i]);
3d8785f6 5246
2d6a2a28
AA
5247 trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5248 adev->reset_info.reset_dump_reg_value[i]);
3d8785f6
SA
5249 }
5250
15fd09a0 5251 return 0;
3d8785f6 5252}
3d8785f6 5253
04442bf7
LL
5254int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5255 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5256{
5257 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5258 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5259 int r = 0;
f5c7e779 5260 bool gpu_reset_for_dev_remove = 0;
26bc5340 5261
04442bf7
LL
5262 /* Try reset handler method first */
5263 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5264 reset_list);
15fd09a0 5265 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5266
5267 reset_context->reset_device_list = device_list_handle;
04442bf7 5268 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5269 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5270 if (r == -EOPNOTSUPP)
404b277b
LL
5271 r = 0;
5272 else
04442bf7
LL
5273 return r;
5274
5275 /* Reset handler not implemented, use the default method */
5276 need_full_reset =
5277 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5278 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5279
f5c7e779
YC
5280 gpu_reset_for_dev_remove =
5281 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5282 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5283
26bc5340 5284 /*
655ce9cb 5285 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5286 * to allow proper links negotiation in FW (within 1 sec)
5287 */
7ac71382 5288 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5289 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5290 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5291 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5292 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5293 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5294 r = -EALREADY;
5295 } else
5296 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5297
041a62bc 5298 if (r) {
aac89168 5299 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5300 r, adev_to_drm(tmp_adev)->unique);
19349072 5301 goto out;
ce316fa5
LM
5302 }
5303 }
5304
041a62bc
AG
5305 /* For XGMI wait for all resets to complete before proceed */
5306 if (!r) {
655ce9cb 5307 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5308 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5309 flush_work(&tmp_adev->xgmi_reset_work);
5310 r = tmp_adev->asic_reset_res;
5311 if (r)
5312 break;
ce316fa5
LM
5313 }
5314 }
5315 }
ce316fa5 5316 }
26bc5340 5317
43c4d576 5318 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5319 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
21226f02 5320 amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
43c4d576
JC
5321 }
5322
00eaa571 5323 amdgpu_ras_intr_cleared();
43c4d576 5324 }
00eaa571 5325
f5c7e779
YC
5326 /* Since the mode1 reset affects base ip blocks, the
5327 * phase1 ip blocks need to be resumed. Otherwise there
5328 * will be a BIOS signature error and the psp bootloader
5329 * can't load kdb on the next amdgpu install.
5330 */
5331 if (gpu_reset_for_dev_remove) {
5332 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5333 amdgpu_device_ip_resume_phase1(tmp_adev);
5334
5335 goto end;
5336 }
5337
655ce9cb 5338 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5339 if (need_full_reset) {
5340 /* post card */
e3c1b071 5341 r = amdgpu_device_asic_init(tmp_adev);
5342 if (r) {
aac89168 5343 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5344 } else {
26bc5340 5345 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5346
26bc5340
AG
5347 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5348 if (r)
5349 goto out;
5350
5351 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785
AA
5352
5353 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5354
26bc5340 5355 if (vram_lost) {
77e7f829 5356 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5357 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5358 }
5359
26bc5340
AG
5360 r = amdgpu_device_fw_loading(tmp_adev);
5361 if (r)
5362 return r;
5363
c45e38f2
LL
5364 r = amdgpu_xcp_restore_partition_mode(
5365 tmp_adev->xcp_mgr);
5366 if (r)
5367 goto out;
5368
26bc5340
AG
5369 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5370 if (r)
5371 goto out;
5372
b7043800
AD
5373 if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5374 amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5375
26bc5340
AG
5376 if (vram_lost)
5377 amdgpu_device_fill_reset_magic(tmp_adev);
5378
fdafb359
EQ
5379 /*
5380 * Add this ASIC as tracked as reset was already
5381 * complete successfully.
5382 */
5383 amdgpu_register_gpu_instance(tmp_adev);
5384
04442bf7
LL
5385 if (!reset_context->hive &&
5386 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5387 amdgpu_xgmi_add_device(tmp_adev);
5388
7c04ca50 5389 r = amdgpu_device_ip_late_init(tmp_adev);
5390 if (r)
5391 goto out;
5392
087451f3 5393 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5394
e8fbaf03
GC
5395 /*
5396 * The GPU enters bad state once faulty pages
5397 * by ECC has reached the threshold, and ras
5398 * recovery is scheduled next. So add one check
5399 * here to break recovery if it indeed exceeds
5400 * bad page threshold, and remind user to
5401 * retire this GPU or setting one bigger
5402 * bad_page_threshold value to fix this once
5403 * probing driver again.
5404 */
11003c68 5405 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5406 /* must succeed. */
5407 amdgpu_ras_resume(tmp_adev);
5408 } else {
5409 r = -EINVAL;
5410 goto out;
5411 }
e79a04d5 5412
26bc5340 5413 /* Update PSP FW topology after reset */
04442bf7
LL
5414 if (reset_context->hive &&
5415 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5416 r = amdgpu_xgmi_update_topology(
5417 reset_context->hive, tmp_adev);
26bc5340
AG
5418 }
5419 }
5420
26bc5340
AG
5421out:
5422 if (!r) {
5423 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5424 r = amdgpu_ib_ring_tests(tmp_adev);
5425 if (r) {
5426 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5427 need_full_reset = true;
5428 r = -EAGAIN;
5429 goto end;
5430 }
5431 }
5432
5433 if (!r)
5434 r = amdgpu_device_recover_vram(tmp_adev);
5435 else
5436 tmp_adev->asic_reset_res = r;
5437 }
5438
5439end:
04442bf7
LL
5440 if (need_full_reset)
5441 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5442 else
5443 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5444 return r;
5445}
5446
e923be99 5447static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5448{
5740682e 5449
a3a09142
AD
5450 switch (amdgpu_asic_reset_method(adev)) {
5451 case AMD_RESET_METHOD_MODE1:
5452 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5453 break;
5454 case AMD_RESET_METHOD_MODE2:
5455 adev->mp1_state = PP_MP1_STATE_RESET;
5456 break;
5457 default:
5458 adev->mp1_state = PP_MP1_STATE_NONE;
5459 break;
5460 }
26bc5340 5461}
d38ceaf9 5462
e923be99 5463static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5464{
89041940 5465 amdgpu_vf_error_trans_all(adev);
a3a09142 5466 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5467}
5468
3f12acc8
EQ
5469static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5470{
5471 struct pci_dev *p = NULL;
5472
5473 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5474 adev->pdev->bus->number, 1);
5475 if (p) {
5476 pm_runtime_enable(&(p->dev));
5477 pm_runtime_resume(&(p->dev));
5478 }
b85e285e
YY
5479
5480 pci_dev_put(p);
3f12acc8
EQ
5481}
5482
5483static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5484{
5485 enum amd_reset_method reset_method;
5486 struct pci_dev *p = NULL;
5487 u64 expires;
5488
5489 /*
5490 * For now, only BACO and mode1 reset are confirmed
5491 * to suffer the audio issue without proper suspended.
5492 */
5493 reset_method = amdgpu_asic_reset_method(adev);
5494 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5495 (reset_method != AMD_RESET_METHOD_MODE1))
5496 return -EINVAL;
5497
5498 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5499 adev->pdev->bus->number, 1);
5500 if (!p)
5501 return -ENODEV;
5502
5503 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5504 if (!expires)
5505 /*
5506 * If we cannot get the audio device autosuspend delay,
5507 * a fixed 4S interval will be used. Considering 3S is
5508 * the audio controller default autosuspend delay setting.
5509 * 4S used here is guaranteed to cover that.
5510 */
54b7feb9 5511 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5512
5513 while (!pm_runtime_status_suspended(&(p->dev))) {
5514 if (!pm_runtime_suspend(&(p->dev)))
5515 break;
5516
5517 if (expires < ktime_get_mono_fast_ns()) {
5518 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5519 pci_dev_put(p);
3f12acc8
EQ
5520 /* TODO: abort the succeeding gpu reset? */
5521 return -ETIMEDOUT;
5522 }
5523 }
5524
5525 pm_runtime_disable(&(p->dev));
5526
b85e285e 5527 pci_dev_put(p);
3f12acc8
EQ
5528 return 0;
5529}
5530
d193b12b 5531static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5532{
5533 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5534
5535#if defined(CONFIG_DEBUG_FS)
5536 if (!amdgpu_sriov_vf(adev))
5537 cancel_work(&adev->reset_work);
5538#endif
5539
5540 if (adev->kfd.dev)
5541 cancel_work(&adev->kfd.reset_work);
5542
5543 if (amdgpu_sriov_vf(adev))
5544 cancel_work(&adev->virt.flr_work);
5545
5546 if (con && adev->ras_enabled)
5547 cancel_work(&con->recovery_work);
5548
5549}
5550
26bc5340 5551/**
6e9c65f7 5552 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5553 *
982a820b 5554 * @adev: amdgpu_device pointer
26bc5340 5555 * @job: which job trigger hang
80bd2de1 5556 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5557 *
5558 * Attempt to reset the GPU if it has hung (all asics).
5559 * Attempt to do soft-reset or full-reset and reinitialize Asic
5560 * Returns 0 for success or an error on failure.
5561 */
5562
cf727044 5563int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5564 struct amdgpu_job *job,
5565 struct amdgpu_reset_context *reset_context)
26bc5340 5566{
1d721ed6 5567 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5568 bool job_signaled = false;
26bc5340 5569 struct amdgpu_hive_info *hive = NULL;
26bc5340 5570 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5571 int i, r = 0;
bb5c7235 5572 bool need_emergency_restart = false;
3f12acc8 5573 bool audio_suspended = false;
f5c7e779
YC
5574 bool gpu_reset_for_dev_remove = false;
5575
5576 gpu_reset_for_dev_remove =
5577 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5578 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5579
6e3cd2a9 5580 /*
bb5c7235
WS
5581 * Special case: RAS triggered and full reset isn't supported
5582 */
5583 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5584
d5ea093e
AG
5585 /*
5586 * Flush RAM to disk so that after reboot
5587 * the user can read log and see why the system rebooted.
5588 */
80285ae1
SY
5589 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5590 amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5591 DRM_WARN("Emergency reboot.");
5592
5593 ksys_sync_helper();
5594 emergency_restart();
5595 }
5596
b823821f 5597 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5598 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5599
175ac6ec
ZL
5600 if (!amdgpu_sriov_vf(adev))
5601 hive = amdgpu_get_xgmi_hive(adev);
681260df 5602 if (hive)
53b3f8f4 5603 mutex_lock(&hive->hive_lock);
26bc5340 5604
f1549c09
LG
5605 reset_context->job = job;
5606 reset_context->hive = hive;
9e94d22c
EQ
5607 /*
5608 * Build list of devices to reset.
5609 * In case we are in XGMI hive mode, resort the device list
5610 * to put adev in the 1st position.
5611 */
5612 INIT_LIST_HEAD(&device_list);
175ac6ec 5613 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5614 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5615 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5616 if (gpu_reset_for_dev_remove && adev->shutdown)
5617 tmp_adev->shutdown = true;
5618 }
655ce9cb 5619 if (!list_is_first(&adev->reset_list, &device_list))
5620 list_rotate_to_front(&adev->reset_list, &device_list);
5621 device_list_handle = &device_list;
26bc5340 5622 } else {
655ce9cb 5623 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5624 device_list_handle = &device_list;
5625 }
5626
e923be99
AG
5627 /* We need to lock reset domain only once both for XGMI and single device */
5628 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5629 reset_list);
3675c2f2 5630 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5631
1d721ed6 5632 /* block all schedulers and reset given job's ring */
655ce9cb 5633 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5634
e923be99 5635 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5636
3f12acc8
EQ
5637 /*
5638 * Try to put the audio codec into suspend state
5639 * before gpu reset started.
5640 *
5641 * Due to the power domain of the graphics device
5642 * is shared with AZ power domain. Without this,
5643 * we may change the audio hardware from behind
5644 * the audio driver's back. That will trigger
5645 * some audio codec errors.
5646 */
5647 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5648 audio_suspended = true;
5649
9e94d22c
EQ
5650 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5651
52fb44cf
EQ
5652 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5653
c004d44e 5654 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5655 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5656
12ffa55d
AG
5657 /*
5658 * Mark these ASICs to be reseted as untracked first
5659 * And add them back after reset completed
5660 */
5661 amdgpu_unregister_gpu_instance(tmp_adev);
5662
163d4cd2 5663 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5664
f1c1314b 5665 /* disable ras on ALL IPs */
bb5c7235 5666 if (!need_emergency_restart &&
b823821f 5667 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5668 amdgpu_ras_suspend(tmp_adev);
5669
1d721ed6
AG
5670 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5671 struct amdgpu_ring *ring = tmp_adev->rings[i];
5672
35963cf2 5673 if (!ring || !drm_sched_wqueue_ready(&ring->sched))
1d721ed6
AG
5674 continue;
5675
0b2d2c2e 5676 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5677
bb5c7235 5678 if (need_emergency_restart)
7c6e68c7 5679 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5680 }
8f8c80f4 5681 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5682 }
5683
bb5c7235 5684 if (need_emergency_restart)
7c6e68c7
AG
5685 goto skip_sched_resume;
5686
1d721ed6
AG
5687 /*
5688 * Must check guilty signal here since after this point all old
5689 * HW fences are force signaled.
5690 *
5691 * job->base holds a reference to parent fence
5692 */
f6a3f660 5693 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5694 job_signaled = true;
1d721ed6
AG
5695 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5696 goto skip_hw_reset;
5697 }
5698
26bc5340 5699retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5700 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5701 if (gpu_reset_for_dev_remove) {
5702 /* Workaroud for ASICs need to disable SMC first */
5703 amdgpu_device_smu_fini_early(tmp_adev);
5704 }
f1549c09 5705 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5706 /*TODO Should we stop ?*/
5707 if (r) {
aac89168 5708 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5709 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5710 tmp_adev->asic_reset_res = r;
5711 }
247c7b0d
AG
5712
5713 /*
5714 * Drop all pending non scheduler resets. Scheduler resets
5715 * were already dropped during drm_sched_stop
5716 */
d193b12b 5717 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5718 }
5719
5720 /* Actual ASIC resets if needed.*/
4f30d920 5721 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5722 if (amdgpu_sriov_vf(adev)) {
5723 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5724 if (r)
5725 adev->asic_reset_res = r;
950d6425 5726
28606c4e 5727 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5728 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5729 IP_VERSION(9, 4, 2) ||
5730 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5731 amdgpu_ras_resume(adev);
26bc5340 5732 } else {
f1549c09 5733 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5734 if (r && r == -EAGAIN)
26bc5340 5735 goto retry;
f5c7e779
YC
5736
5737 if (!r && gpu_reset_for_dev_remove)
5738 goto recover_end;
26bc5340
AG
5739 }
5740
1d721ed6
AG
5741skip_hw_reset:
5742
26bc5340 5743 /* Post ASIC reset for all devs .*/
655ce9cb 5744 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5745
1d721ed6
AG
5746 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5747 struct amdgpu_ring *ring = tmp_adev->rings[i];
5748
35963cf2 5749 if (!ring || !drm_sched_wqueue_ready(&ring->sched))
1d721ed6
AG
5750 continue;
5751
6868a2c4 5752 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5753 }
5754
b8920e1e 5755 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5756 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5757
7258fa31
SK
5758 if (tmp_adev->asic_reset_res)
5759 r = tmp_adev->asic_reset_res;
5760
1d721ed6 5761 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5762
5763 if (r) {
5764 /* bad news, how to tell it to userspace ? */
12ffa55d 5765 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5766 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5767 } else {
12ffa55d 5768 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5769 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5770 DRM_WARN("smart shift update failed\n");
26bc5340 5771 }
7c6e68c7 5772 }
26bc5340 5773
7c6e68c7 5774skip_sched_resume:
655ce9cb 5775 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5776 /* unlock kfd: SRIOV would do it separately */
c004d44e 5777 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5778 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5779
5780 /* kfd_post_reset will do nothing if kfd device is not initialized,
5781 * need to bring up kfd here if it's not be initialized before
5782 */
5783 if (!adev->kfd.init_complete)
5784 amdgpu_amdkfd_device_init(adev);
5785
3f12acc8
EQ
5786 if (audio_suspended)
5787 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5788
5789 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5790
5791 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5792 }
5793
f5c7e779 5794recover_end:
e923be99
AG
5795 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5796 reset_list);
5797 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5798
9e94d22c 5799 if (hive) {
9e94d22c 5800 mutex_unlock(&hive->hive_lock);
d95e8e97 5801 amdgpu_put_xgmi_hive(hive);
9e94d22c 5802 }
26bc5340 5803
f287a3c5 5804 if (r)
26bc5340 5805 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5806
5807 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5808 return r;
5809}
5810
466a7d11
ML
5811/**
5812 * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
5813 *
5814 * @adev: amdgpu_device pointer
5815 * @speed: pointer to the speed of the link
5816 * @width: pointer to the width of the link
5817 *
5818 * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
5819 * first physical partner to an AMD dGPU.
5820 * This will exclude any virtual switches and links.
5821 */
5822static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
5823 enum pci_bus_speed *speed,
5824 enum pcie_link_width *width)
5825{
5826 struct pci_dev *parent = adev->pdev;
5827
5828 if (!speed || !width)
5829 return;
5830
5831 *speed = PCI_SPEED_UNKNOWN;
5832 *width = PCIE_LNK_WIDTH_UNKNOWN;
5833
5834 while ((parent = pci_upstream_bridge(parent))) {
5835 /* skip upstream/downstream switches internal to dGPU*/
5836 if (parent->vendor == PCI_VENDOR_ID_ATI)
5837 continue;
5838 *speed = pcie_get_speed_cap(parent);
5839 *width = pcie_get_width_cap(parent);
5840 break;
5841 }
5842}
5843
e3ecdffa
AD
5844/**
5845 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5846 *
5847 * @adev: amdgpu_device pointer
5848 *
5849 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5850 * and lanes) of the slot the device is in. Handles APUs and
5851 * virtualized environments where PCIE config space may not be available.
5852 */
5494d864 5853static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5854{
5d9a6330 5855 struct pci_dev *pdev;
c5313457
HK
5856 enum pci_bus_speed speed_cap, platform_speed_cap;
5857 enum pcie_link_width platform_link_width;
d0dd7f0c 5858
cd474ba0
AD
5859 if (amdgpu_pcie_gen_cap)
5860 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5861
cd474ba0
AD
5862 if (amdgpu_pcie_lane_cap)
5863 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5864
cd474ba0 5865 /* covers APUs as well */
04e85958 5866 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5867 if (adev->pm.pcie_gen_mask == 0)
5868 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5869 if (adev->pm.pcie_mlw_mask == 0)
5870 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5871 return;
cd474ba0 5872 }
d0dd7f0c 5873
c5313457
HK
5874 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5875 return;
5876
466a7d11
ML
5877 amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
5878 &platform_link_width);
c5313457 5879
cd474ba0 5880 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5881 /* asic caps */
5882 pdev = adev->pdev;
5883 speed_cap = pcie_get_speed_cap(pdev);
5884 if (speed_cap == PCI_SPEED_UNKNOWN) {
5885 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5886 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5887 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5888 } else {
2b3a1f51
FX
5889 if (speed_cap == PCIE_SPEED_32_0GT)
5890 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5891 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5892 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5893 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5894 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5895 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5896 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5897 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5898 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5899 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5900 else if (speed_cap == PCIE_SPEED_8_0GT)
5901 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5902 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5903 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5904 else if (speed_cap == PCIE_SPEED_5_0GT)
5905 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5906 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5907 else
5908 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5909 }
5910 /* platform caps */
c5313457 5911 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5912 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5913 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5914 } else {
2b3a1f51
FX
5915 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5916 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5917 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5918 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5919 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5920 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5921 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5922 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5923 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5924 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5925 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5926 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5927 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5928 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5929 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5930 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5931 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5932 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5933 else
5934 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5935
cd474ba0
AD
5936 }
5937 }
5938 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5939 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5940 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5941 } else {
c5313457 5942 switch (platform_link_width) {
5d9a6330 5943 case PCIE_LNK_X32:
cd474ba0
AD
5944 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5945 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5946 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5947 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5948 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5949 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5950 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5951 break;
5d9a6330 5952 case PCIE_LNK_X16:
cd474ba0
AD
5953 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5954 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5955 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5956 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5957 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5958 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5959 break;
5d9a6330 5960 case PCIE_LNK_X12:
cd474ba0
AD
5961 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5962 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5963 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5964 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5965 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5966 break;
5d9a6330 5967 case PCIE_LNK_X8:
cd474ba0
AD
5968 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5969 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5970 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5971 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5972 break;
5d9a6330 5973 case PCIE_LNK_X4:
cd474ba0
AD
5974 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5975 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5976 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5977 break;
5d9a6330 5978 case PCIE_LNK_X2:
cd474ba0
AD
5979 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5980 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5981 break;
5d9a6330 5982 case PCIE_LNK_X1:
cd474ba0
AD
5983 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5984 break;
5985 default:
5986 break;
5987 }
d0dd7f0c
AD
5988 }
5989 }
5990}
d38ceaf9 5991
08a2fd23
RE
5992/**
5993 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5994 *
5995 * @adev: amdgpu_device pointer
5996 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5997 *
5998 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5999 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6000 * @peer_adev.
6001 */
6002bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6003 struct amdgpu_device *peer_adev)
6004{
6005#ifdef CONFIG_HSA_AMD_P2P
6006 uint64_t address_mask = peer_adev->dev->dma_mask ?
6007 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6008 resource_size_t aper_limit =
6009 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
6010 bool p2p_access =
6011 !adev->gmc.xgmi.connected_to_cpu &&
6012 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
6013
6014 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
6015 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
6016 !(adev->gmc.aper_base & address_mask ||
6017 aper_limit & address_mask));
6018#else
6019 return false;
6020#endif
6021}
6022
361dbd01
AD
6023int amdgpu_device_baco_enter(struct drm_device *dev)
6024{
1348969a 6025 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 6026 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 6027
6ab68650 6028 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
6029 return -ENOTSUPP;
6030
8ab0d6f0 6031 if (ras && adev->ras_enabled &&
acdae216 6032 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
6033 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6034
9530273e 6035 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
6036}
6037
6038int amdgpu_device_baco_exit(struct drm_device *dev)
6039{
1348969a 6040 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 6041 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 6042 int ret = 0;
361dbd01 6043
6ab68650 6044 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
6045 return -ENOTSUPP;
6046
9530273e
EQ
6047 ret = amdgpu_dpm_baco_exit(adev);
6048 if (ret)
6049 return ret;
7a22677b 6050
8ab0d6f0 6051 if (ras && adev->ras_enabled &&
acdae216 6052 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
6053 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6054
1bece222
CL
6055 if (amdgpu_passthrough(adev) &&
6056 adev->nbio.funcs->clear_doorbell_interrupt)
6057 adev->nbio.funcs->clear_doorbell_interrupt(adev);
6058
7a22677b 6059 return 0;
361dbd01 6060}
c9a6b82f
AG
6061
6062/**
6063 * amdgpu_pci_error_detected - Called when a PCI error is detected.
6064 * @pdev: PCI device struct
6065 * @state: PCI channel state
6066 *
6067 * Description: Called when a PCI error is detected.
6068 *
6069 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6070 */
6071pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6072{
6073 struct drm_device *dev = pci_get_drvdata(pdev);
6074 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 6075 int i;
c9a6b82f
AG
6076
6077 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6078
6894305c
AG
6079 if (adev->gmc.xgmi.num_physical_nodes > 1) {
6080 DRM_WARN("No support for XGMI hive yet...");
6081 return PCI_ERS_RESULT_DISCONNECT;
6082 }
6083
e17e27f9
GC
6084 adev->pci_channel_state = state;
6085
c9a6b82f
AG
6086 switch (state) {
6087 case pci_channel_io_normal:
6088 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 6089 /* Fatal error, prepare for slot reset */
8a11d283
TZ
6090 case pci_channel_io_frozen:
6091 /*
d0fb18b5 6092 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
6093 * to GPU during PCI error recovery
6094 */
3675c2f2 6095 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 6096 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
6097
6098 /*
6099 * Block any work scheduling as we do for regular GPU reset
6100 * for the duration of the recovery
6101 */
6102 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6103 struct amdgpu_ring *ring = adev->rings[i];
6104
35963cf2 6105 if (!ring || !drm_sched_wqueue_ready(&ring->sched))
acd89fca
AG
6106 continue;
6107
6108 drm_sched_stop(&ring->sched, NULL);
6109 }
8f8c80f4 6110 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
6111 return PCI_ERS_RESULT_NEED_RESET;
6112 case pci_channel_io_perm_failure:
6113 /* Permanent error, prepare for device removal */
6114 return PCI_ERS_RESULT_DISCONNECT;
6115 }
6116
6117 return PCI_ERS_RESULT_NEED_RESET;
6118}
6119
6120/**
6121 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6122 * @pdev: pointer to PCI device
6123 */
6124pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6125{
6126
6127 DRM_INFO("PCI error: mmio enabled callback!!\n");
6128
6129 /* TODO - dump whatever for debugging purposes */
6130
6131 /* This called only if amdgpu_pci_error_detected returns
6132 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6133 * works, no need to reset slot.
6134 */
6135
6136 return PCI_ERS_RESULT_RECOVERED;
6137}
6138
6139/**
6140 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6141 * @pdev: PCI device struct
6142 *
6143 * Description: This routine is called by the pci error recovery
6144 * code after the PCI slot has been reset, just before we
6145 * should resume normal operations.
6146 */
6147pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6148{
6149 struct drm_device *dev = pci_get_drvdata(pdev);
6150 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 6151 int r, i;
04442bf7 6152 struct amdgpu_reset_context reset_context;
362c7b91 6153 u32 memsize;
7ac71382 6154 struct list_head device_list;
c9a6b82f
AG
6155
6156 DRM_INFO("PCI error: slot reset callback!!\n");
6157
04442bf7
LL
6158 memset(&reset_context, 0, sizeof(reset_context));
6159
7ac71382 6160 INIT_LIST_HEAD(&device_list);
655ce9cb 6161 list_add_tail(&adev->reset_list, &device_list);
7ac71382 6162
362c7b91
AG
6163 /* wait for asic to come out of reset */
6164 msleep(500);
6165
7ac71382 6166 /* Restore PCI confspace */
c1dd4aa6 6167 amdgpu_device_load_pci_state(pdev);
c9a6b82f 6168
362c7b91
AG
6169 /* confirm ASIC came out of reset */
6170 for (i = 0; i < adev->usec_timeout; i++) {
6171 memsize = amdgpu_asic_get_config_memsize(adev);
6172
6173 if (memsize != 0xffffffff)
6174 break;
6175 udelay(1);
6176 }
6177 if (memsize == 0xffffffff) {
6178 r = -ETIME;
6179 goto out;
6180 }
6181
04442bf7
LL
6182 reset_context.method = AMD_RESET_METHOD_NONE;
6183 reset_context.reset_req_dev = adev;
6184 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6185 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6186
7afefb81 6187 adev->no_hw_access = true;
04442bf7 6188 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 6189 adev->no_hw_access = false;
c9a6b82f
AG
6190 if (r)
6191 goto out;
6192
04442bf7 6193 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
6194
6195out:
c9a6b82f 6196 if (!r) {
c1dd4aa6
AG
6197 if (amdgpu_device_cache_pci_state(adev->pdev))
6198 pci_restore_state(adev->pdev);
6199
c9a6b82f
AG
6200 DRM_INFO("PCIe error recovery succeeded\n");
6201 } else {
6202 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
6203 amdgpu_device_unset_mp1_state(adev);
6204 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
6205 }
6206
6207 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6208}
6209
6210/**
6211 * amdgpu_pci_resume() - resume normal ops after PCI reset
6212 * @pdev: pointer to PCI device
6213 *
6214 * Called when the error recovery driver tells us that its
505199a3 6215 * OK to resume normal operation.
c9a6b82f
AG
6216 */
6217void amdgpu_pci_resume(struct pci_dev *pdev)
6218{
6219 struct drm_device *dev = pci_get_drvdata(pdev);
6220 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 6221 int i;
c9a6b82f 6222
c9a6b82f
AG
6223
6224 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 6225
e17e27f9
GC
6226 /* Only continue execution for the case of pci_channel_io_frozen */
6227 if (adev->pci_channel_state != pci_channel_io_frozen)
6228 return;
6229
acd89fca
AG
6230 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6231 struct amdgpu_ring *ring = adev->rings[i];
6232
35963cf2 6233 if (!ring || !drm_sched_wqueue_ready(&ring->sched))
acd89fca
AG
6234 continue;
6235
acd89fca
AG
6236 drm_sched_start(&ring->sched, true);
6237 }
6238
e923be99
AG
6239 amdgpu_device_unset_mp1_state(adev);
6240 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 6241}
c1dd4aa6
AG
6242
6243bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6244{
6245 struct drm_device *dev = pci_get_drvdata(pdev);
6246 struct amdgpu_device *adev = drm_to_adev(dev);
6247 int r;
6248
6249 r = pci_save_state(pdev);
6250 if (!r) {
6251 kfree(adev->pci_state);
6252
6253 adev->pci_state = pci_store_saved_state(pdev);
6254
6255 if (!adev->pci_state) {
6256 DRM_ERROR("Failed to store PCI saved state");
6257 return false;
6258 }
6259 } else {
6260 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6261 return false;
6262 }
6263
6264 return true;
6265}
6266
6267bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6268{
6269 struct drm_device *dev = pci_get_drvdata(pdev);
6270 struct amdgpu_device *adev = drm_to_adev(dev);
6271 int r;
6272
6273 if (!adev->pci_state)
6274 return false;
6275
6276 r = pci_load_saved_state(pdev, adev->pci_state);
6277
6278 if (!r) {
6279 pci_restore_state(pdev);
6280 } else {
6281 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6282 return false;
6283 }
6284
6285 return true;
6286}
6287
810085dd
EH
6288void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6289 struct amdgpu_ring *ring)
6290{
6291#ifdef CONFIG_X86_64
b818a5d3 6292 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6293 return;
6294#endif
6295 if (adev->gmc.xgmi.connected_to_cpu)
6296 return;
6297
6298 if (ring && ring->funcs->emit_hdp_flush)
6299 amdgpu_ring_emit_hdp_flush(ring);
6300 else
6301 amdgpu_asic_flush_hdp(adev, ring);
6302}
c1dd4aa6 6303
810085dd
EH
6304void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6305 struct amdgpu_ring *ring)
6306{
6307#ifdef CONFIG_X86_64
b818a5d3 6308 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6309 return;
6310#endif
6311 if (adev->gmc.xgmi.connected_to_cpu)
6312 return;
c1dd4aa6 6313
810085dd
EH
6314 amdgpu_asic_invalidate_hdp(adev, ring);
6315}
34f3a4a9 6316
89a7a870
AG
6317int amdgpu_in_reset(struct amdgpu_device *adev)
6318{
6319 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6320}
6321
34f3a4a9
LY
6322/**
6323 * amdgpu_device_halt() - bring hardware to some kind of halt state
6324 *
6325 * @adev: amdgpu_device pointer
6326 *
6327 * Bring hardware to some kind of halt state so that no one can touch it
6328 * any more. It will help to maintain error context when error occurred.
6329 * Compare to a simple hang, the system will keep stable at least for SSH
6330 * access. Then it should be trivial to inspect the hardware state and
6331 * see what's going on. Implemented as following:
6332 *
6333 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6334 * clears all CPU mappings to device, disallows remappings through page faults
6335 * 2. amdgpu_irq_disable_all() disables all interrupts
6336 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6337 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6338 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6339 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6340 * flush any in flight DMA operations
6341 */
6342void amdgpu_device_halt(struct amdgpu_device *adev)
6343{
6344 struct pci_dev *pdev = adev->pdev;
e0f943b4 6345 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6346
2c1c7ba4 6347 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6348 drm_dev_unplug(ddev);
6349
6350 amdgpu_irq_disable_all(adev);
6351
6352 amdgpu_fence_driver_hw_fini(adev);
6353
6354 adev->no_hw_access = true;
6355
6356 amdgpu_device_unmap_mmio(adev);
6357
6358 pci_disable_device(pdev);
6359 pci_wait_for_pending_transaction(pdev);
6360}
86700a40
XD
6361
6362u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6363 u32 reg)
6364{
6365 unsigned long flags, address, data;
6366 u32 r;
6367
6368 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6369 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6370
6371 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6372 WREG32(address, reg * 4);
6373 (void)RREG32(address);
6374 r = RREG32(data);
6375 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6376 return r;
6377}
6378
6379void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6380 u32 reg, u32 v)
6381{
6382 unsigned long flags, address, data;
6383
6384 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6385 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6386
6387 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6388 WREG32(address, reg * 4);
6389 (void)RREG32(address);
6390 WREG32(data, v);
6391 (void)RREG32(data);
6392 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6393}
68ce8b24
CK
6394
6395/**
6396 * amdgpu_device_switch_gang - switch to a new gang
6397 * @adev: amdgpu_device pointer
6398 * @gang: the gang to switch to
6399 *
6400 * Try to switch to a new gang.
6401 * Returns: NULL if we switched to the new gang or a reference to the current
6402 * gang leader.
6403 */
6404struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6405 struct dma_fence *gang)
6406{
6407 struct dma_fence *old = NULL;
6408
6409 do {
6410 dma_fence_put(old);
6411 rcu_read_lock();
6412 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6413 rcu_read_unlock();
6414
6415 if (old == gang)
6416 break;
6417
6418 if (!dma_fence_is_signaled(old))
6419 return old;
6420
6421 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6422 old, gang) != old);
6423
6424 dma_fence_put(old);
6425 return NULL;
6426}
220c8cc8
AD
6427
6428bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6429{
6430 switch (adev->asic_type) {
6431#ifdef CONFIG_DRM_AMDGPU_SI
6432 case CHIP_HAINAN:
6433#endif
6434 case CHIP_TOPAZ:
6435 /* chips with no display hardware */
6436 return false;
6437#ifdef CONFIG_DRM_AMDGPU_SI
6438 case CHIP_TAHITI:
6439 case CHIP_PITCAIRN:
6440 case CHIP_VERDE:
6441 case CHIP_OLAND:
6442#endif
6443#ifdef CONFIG_DRM_AMDGPU_CIK
6444 case CHIP_BONAIRE:
6445 case CHIP_HAWAII:
6446 case CHIP_KAVERI:
6447 case CHIP_KABINI:
6448 case CHIP_MULLINS:
6449#endif
6450 case CHIP_TONGA:
6451 case CHIP_FIJI:
6452 case CHIP_POLARIS10:
6453 case CHIP_POLARIS11:
6454 case CHIP_POLARIS12:
6455 case CHIP_VEGAM:
6456 case CHIP_CARRIZO:
6457 case CHIP_STONEY:
6458 /* chips with display hardware */
6459 return true;
6460 default:
6461 /* IP discovery */
4e8303cf 6462 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6463 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6464 return false;
6465 return true;
6466 }
6467}
81283fee
JZ
6468
6469uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6470 uint32_t inst, uint32_t reg_addr, char reg_name[],
6471 uint32_t expected_value, uint32_t mask)
6472{
6473 uint32_t ret = 0;
6474 uint32_t old_ = 0;
6475 uint32_t tmp_ = RREG32(reg_addr);
6476 uint32_t loop = adev->usec_timeout;
6477
6478 while ((tmp_ & (mask)) != (expected_value)) {
6479 if (old_ != tmp_) {
6480 loop = adev->usec_timeout;
6481 old_ = tmp_;
6482 } else
6483 udelay(1);
6484 tmp_ = RREG32(reg_addr);
6485 loop--;
6486 if (!loop) {
6487 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6488 inst, reg_name, (uint32_t)expected_value,
6489 (uint32_t)(tmp_ & (mask)));
6490 ret = -ETIMEDOUT;
6491 break;
6492 }
6493 }
6494 return ret;
6495}