drm/amdgpu: support the port num info based on the capability flag
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
4798db85
LL
165/**
166 * DOC: board_info
167 *
168 * The amdgpu driver provides a sysfs API for giving board related information.
169 * It provides the form factor information in the format
170 *
171 * type : form factor
172 *
173 * Possible form factor values
174 *
175 * - "cem" - PCIE CEM card
176 * - "oam" - Open Compute Accelerator Module
177 * - "unknown" - Not known
178 *
179 */
180
76da73f0
LL
181static ssize_t amdgpu_device_get_board_info(struct device *dev,
182 struct device_attribute *attr,
183 char *buf)
184{
185 struct drm_device *ddev = dev_get_drvdata(dev);
186 struct amdgpu_device *adev = drm_to_adev(ddev);
187 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
188 const char *pkg;
189
190 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
191 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
192
193 switch (pkg_type) {
194 case AMDGPU_PKG_TYPE_CEM:
195 pkg = "cem";
196 break;
197 case AMDGPU_PKG_TYPE_OAM:
198 pkg = "oam";
199 break;
200 default:
201 pkg = "unknown";
202 break;
203 }
204
205 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
206}
207
208static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
209
210static struct attribute *amdgpu_board_attrs[] = {
211 &dev_attr_board_info.attr,
212 NULL,
213};
214
215static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
216 struct attribute *attr, int n)
217{
218 struct device *dev = kobj_to_dev(kobj);
219 struct drm_device *ddev = dev_get_drvdata(dev);
220 struct amdgpu_device *adev = drm_to_adev(ddev);
221
222 if (adev->flags & AMD_IS_APU)
223 return 0;
224
225 return attr->mode;
226}
227
228static const struct attribute_group amdgpu_board_attrs_group = {
229 .attrs = amdgpu_board_attrs,
230 .is_visible = amdgpu_board_attrs_is_visible
231};
232
5494d864
AD
233static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
234
bd607166 235
fd496ca8 236/**
b98c6299 237 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
238 *
239 * @dev: drm_device pointer
240 *
b98c6299 241 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
242 * otherwise return false.
243 */
b98c6299 244bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
245{
246 struct amdgpu_device *adev = drm_to_adev(dev);
247
b98c6299 248 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
249 return true;
250 return false;
251}
252
e3ecdffa 253/**
0330b848 254 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
255 *
256 * @dev: drm_device pointer
257 *
b98c6299 258 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
259 * otherwise return false.
260 */
31af062a 261bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 262{
1348969a 263 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 264
b98c6299
AD
265 if (adev->has_pr3 ||
266 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
267 return true;
268 return false;
269}
270
a69cba42
AD
271/**
272 * amdgpu_device_supports_baco - Does the device support BACO
273 *
274 * @dev: drm_device pointer
275 *
276 * Returns true if the device supporte BACO,
277 * otherwise return false.
278 */
279bool amdgpu_device_supports_baco(struct drm_device *dev)
280{
1348969a 281 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
282
283 return amdgpu_asic_supports_baco(adev);
284}
285
3fa8f89d
S
286/**
287 * amdgpu_device_supports_smart_shift - Is the device dGPU with
288 * smart shift support
289 *
290 * @dev: drm_device pointer
291 *
292 * Returns true if the device is a dGPU with Smart Shift support,
293 * otherwise returns false.
294 */
295bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
296{
297 return (amdgpu_device_supports_boco(dev) &&
298 amdgpu_acpi_is_power_shift_control_supported());
299}
300
6e3cd2a9
MCC
301/*
302 * VRAM access helper functions
303 */
304
e35e2b11 305/**
048af66b 306 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
307 *
308 * @adev: amdgpu_device pointer
309 * @pos: offset of the buffer in vram
310 * @buf: virtual address of the buffer in system memory
311 * @size: read/write size, sizeof(@buf) must > @size
312 * @write: true - write to vram, otherwise - read from vram
313 */
048af66b
KW
314void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
315 void *buf, size_t size, bool write)
e35e2b11 316{
e35e2b11 317 unsigned long flags;
048af66b
KW
318 uint32_t hi = ~0, tmp = 0;
319 uint32_t *data = buf;
ce05ac56 320 uint64_t last;
f89f8c6b 321 int idx;
ce05ac56 322
c58a863b 323 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 324 return;
9d11eb0d 325
048af66b
KW
326 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
327
328 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
329 for (last = pos + size; pos < last; pos += 4) {
330 tmp = pos >> 31;
331
332 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
333 if (tmp != hi) {
334 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
335 hi = tmp;
336 }
337 if (write)
338 WREG32_NO_KIQ(mmMM_DATA, *data++);
339 else
340 *data++ = RREG32_NO_KIQ(mmMM_DATA);
341 }
342
343 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
344 drm_dev_exit(idx);
345}
346
347/**
bbe04dec 348 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
349 *
350 * @adev: amdgpu_device pointer
351 * @pos: offset of the buffer in vram
352 * @buf: virtual address of the buffer in system memory
353 * @size: read/write size, sizeof(@buf) must > @size
354 * @write: true - write to vram, otherwise - read from vram
355 *
356 * The return value means how many bytes have been transferred.
357 */
358size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
359 void *buf, size_t size, bool write)
360{
9d11eb0d 361#ifdef CONFIG_64BIT
048af66b
KW
362 void __iomem *addr;
363 size_t count = 0;
364 uint64_t last;
365
366 if (!adev->mman.aper_base_kaddr)
367 return 0;
368
9d11eb0d
CK
369 last = min(pos + size, adev->gmc.visible_vram_size);
370 if (last > pos) {
048af66b
KW
371 addr = adev->mman.aper_base_kaddr + pos;
372 count = last - pos;
9d11eb0d
CK
373
374 if (write) {
375 memcpy_toio(addr, buf, count);
4c452b5c
SS
376 /* Make sure HDP write cache flush happens without any reordering
377 * after the system memory contents are sent over PCIe device
378 */
9d11eb0d 379 mb();
810085dd 380 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 381 } else {
810085dd 382 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
383 /* Make sure HDP read cache is invalidated before issuing a read
384 * to the PCIe device
385 */
9d11eb0d
CK
386 mb();
387 memcpy_fromio(buf, addr, count);
388 }
389
9d11eb0d 390 }
048af66b
KW
391
392 return count;
393#else
394 return 0;
9d11eb0d 395#endif
048af66b 396}
9d11eb0d 397
048af66b
KW
398/**
399 * amdgpu_device_vram_access - read/write a buffer in vram
400 *
401 * @adev: amdgpu_device pointer
402 * @pos: offset of the buffer in vram
403 * @buf: virtual address of the buffer in system memory
404 * @size: read/write size, sizeof(@buf) must > @size
405 * @write: true - write to vram, otherwise - read from vram
406 */
407void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
408 void *buf, size_t size, bool write)
409{
410 size_t count;
e35e2b11 411
048af66b
KW
412 /* try to using vram apreature to access vram first */
413 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
414 size -= count;
415 if (size) {
416 /* using MM to access rest vram */
417 pos += count;
418 buf += count;
419 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
420 }
421}
422
d38ceaf9 423/*
f7ee1874 424 * register access helper functions.
d38ceaf9 425 */
56b53c0b
DL
426
427/* Check if hw access should be skipped because of hotplug or device error */
428bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
429{
7afefb81 430 if (adev->no_hw_access)
56b53c0b
DL
431 return true;
432
433#ifdef CONFIG_LOCKDEP
434 /*
435 * This is a bit complicated to understand, so worth a comment. What we assert
436 * here is that the GPU reset is not running on another thread in parallel.
437 *
438 * For this we trylock the read side of the reset semaphore, if that succeeds
439 * we know that the reset is not running in paralell.
440 *
441 * If the trylock fails we assert that we are either already holding the read
442 * side of the lock or are the reset thread itself and hold the write side of
443 * the lock.
444 */
445 if (in_task()) {
d0fb18b5
AG
446 if (down_read_trylock(&adev->reset_domain->sem))
447 up_read(&adev->reset_domain->sem);
56b53c0b 448 else
d0fb18b5 449 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
450 }
451#endif
452 return false;
453}
454
e3ecdffa 455/**
f7ee1874 456 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
457 *
458 * @adev: amdgpu_device pointer
459 * @reg: dword aligned register offset
460 * @acc_flags: access flags which require special behavior
461 *
462 * Returns the 32 bit value from the offset specified.
463 */
f7ee1874
HZ
464uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
465 uint32_t reg, uint32_t acc_flags)
d38ceaf9 466{
f4b373f4
TSD
467 uint32_t ret;
468
56b53c0b 469 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
470 return 0;
471
f7ee1874
HZ
472 if ((reg * 4) < adev->rmmio_size) {
473 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
474 amdgpu_sriov_runtime(adev) &&
d0fb18b5 475 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 476 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 477 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
478 } else {
479 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
480 }
481 } else {
482 ret = adev->pcie_rreg(adev, reg * 4);
81202807 483 }
bc992ba5 484
f7ee1874 485 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 486
f4b373f4 487 return ret;
d38ceaf9
AD
488}
489
421a2a30
ML
490/*
491 * MMIO register read with bytes helper functions
492 * @offset:bytes offset from MMIO start
b8920e1e 493 */
421a2a30 494
e3ecdffa
AD
495/**
496 * amdgpu_mm_rreg8 - read a memory mapped IO register
497 *
498 * @adev: amdgpu_device pointer
499 * @offset: byte aligned register offset
500 *
501 * Returns the 8 bit value from the offset specified.
502 */
7cbbc745
AG
503uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
504{
56b53c0b 505 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
506 return 0;
507
421a2a30
ML
508 if (offset < adev->rmmio_size)
509 return (readb(adev->rmmio + offset));
510 BUG();
511}
512
513/*
514 * MMIO register write with bytes helper functions
515 * @offset:bytes offset from MMIO start
516 * @value: the value want to be written to the register
b8920e1e
SS
517 */
518
e3ecdffa
AD
519/**
520 * amdgpu_mm_wreg8 - read a memory mapped IO register
521 *
522 * @adev: amdgpu_device pointer
523 * @offset: byte aligned register offset
524 * @value: 8 bit value to write
525 *
526 * Writes the value specified to the offset specified.
527 */
7cbbc745
AG
528void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
529{
56b53c0b 530 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
531 return;
532
421a2a30
ML
533 if (offset < adev->rmmio_size)
534 writeb(value, adev->rmmio + offset);
535 else
536 BUG();
537}
538
e3ecdffa 539/**
f7ee1874 540 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
541 *
542 * @adev: amdgpu_device pointer
543 * @reg: dword aligned register offset
544 * @v: 32 bit value to write to the register
545 * @acc_flags: access flags which require special behavior
546 *
547 * Writes the value specified to the offset specified.
548 */
f7ee1874
HZ
549void amdgpu_device_wreg(struct amdgpu_device *adev,
550 uint32_t reg, uint32_t v,
551 uint32_t acc_flags)
d38ceaf9 552{
56b53c0b 553 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
554 return;
555
f7ee1874
HZ
556 if ((reg * 4) < adev->rmmio_size) {
557 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
558 amdgpu_sriov_runtime(adev) &&
d0fb18b5 559 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 560 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 561 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
562 } else {
563 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
564 }
565 } else {
566 adev->pcie_wreg(adev, reg * 4, v);
81202807 567 }
bc992ba5 568
f7ee1874 569 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 570}
d38ceaf9 571
03f2abb0 572/**
4cc9f86f 573 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 574 *
71579346
RB
575 * @adev: amdgpu_device pointer
576 * @reg: mmio/rlc register
577 * @v: value to write
8057a9d6 578 * @xcc_id: xcc accelerated compute core id
71579346
RB
579 *
580 * this function is invoked only for the debugfs register access
03f2abb0 581 */
f7ee1874 582void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
583 uint32_t reg, uint32_t v,
584 uint32_t xcc_id)
2e0cc4d4 585{
56b53c0b 586 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
587 return;
588
2e0cc4d4 589 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
590 adev->gfx.rlc.funcs &&
591 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 592 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 593 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
594 } else if ((reg * 4) >= adev->rmmio_size) {
595 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
596 } else {
597 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 598 }
d38ceaf9
AD
599}
600
1bba3683
HZ
601/**
602 * amdgpu_device_indirect_rreg - read an indirect register
603 *
604 * @adev: amdgpu_device pointer
22f453fb 605 * @reg_addr: indirect register address to read from
1bba3683
HZ
606 *
607 * Returns the value of indirect register @reg_addr
608 */
609u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
610 u32 reg_addr)
611{
65ba96e9 612 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
613 void __iomem *pcie_index_offset;
614 void __iomem *pcie_data_offset;
65ba96e9
HZ
615 u32 r;
616
617 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
618 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
619
620 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
621 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
622 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
623
624 writel(reg_addr, pcie_index_offset);
625 readl(pcie_index_offset);
626 r = readl(pcie_data_offset);
627 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
628
629 return r;
630}
631
0c552ed3
LM
632u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
633 u64 reg_addr)
634{
635 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
636 u32 r;
637 void __iomem *pcie_index_offset;
638 void __iomem *pcie_index_hi_offset;
639 void __iomem *pcie_data_offset;
640
641 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
642 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 643 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
644 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
645 else
646 pcie_index_hi = 0;
647
648 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
649 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
650 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
651 if (pcie_index_hi != 0)
652 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
653 pcie_index_hi * 4;
654
655 writel(reg_addr, pcie_index_offset);
656 readl(pcie_index_offset);
657 if (pcie_index_hi != 0) {
658 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
659 readl(pcie_index_hi_offset);
660 }
661 r = readl(pcie_data_offset);
662
663 /* clear the high bits */
664 if (pcie_index_hi != 0) {
665 writel(0, pcie_index_hi_offset);
666 readl(pcie_index_hi_offset);
667 }
668
669 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
670
671 return r;
672}
673
1bba3683
HZ
674/**
675 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
676 *
677 * @adev: amdgpu_device pointer
22f453fb 678 * @reg_addr: indirect register address to read from
1bba3683
HZ
679 *
680 * Returns the value of indirect register @reg_addr
681 */
682u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
683 u32 reg_addr)
684{
65ba96e9 685 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
686 void __iomem *pcie_index_offset;
687 void __iomem *pcie_data_offset;
65ba96e9
HZ
688 u64 r;
689
690 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
691 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
692
693 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
694 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
695 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
696
697 /* read low 32 bits */
698 writel(reg_addr, pcie_index_offset);
699 readl(pcie_index_offset);
700 r = readl(pcie_data_offset);
701 /* read high 32 bits */
702 writel(reg_addr + 4, pcie_index_offset);
703 readl(pcie_index_offset);
704 r |= ((u64)readl(pcie_data_offset) << 32);
705 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
706
707 return r;
708}
709
a76b2870
CL
710u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
711 u64 reg_addr)
712{
713 unsigned long flags, pcie_index, pcie_data;
714 unsigned long pcie_index_hi = 0;
715 void __iomem *pcie_index_offset;
716 void __iomem *pcie_index_hi_offset;
717 void __iomem *pcie_data_offset;
718 u64 r;
719
720 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
721 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
722 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
723 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
724
725 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
726 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
727 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
728 if (pcie_index_hi != 0)
729 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
730 pcie_index_hi * 4;
731
732 /* read low 32 bits */
733 writel(reg_addr, pcie_index_offset);
734 readl(pcie_index_offset);
735 if (pcie_index_hi != 0) {
736 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
737 readl(pcie_index_hi_offset);
738 }
739 r = readl(pcie_data_offset);
740 /* read high 32 bits */
741 writel(reg_addr + 4, pcie_index_offset);
742 readl(pcie_index_offset);
743 if (pcie_index_hi != 0) {
744 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
745 readl(pcie_index_hi_offset);
746 }
747 r |= ((u64)readl(pcie_data_offset) << 32);
748
749 /* clear the high bits */
750 if (pcie_index_hi != 0) {
751 writel(0, pcie_index_hi_offset);
752 readl(pcie_index_hi_offset);
753 }
754
755 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
756
757 return r;
758}
759
1bba3683
HZ
760/**
761 * amdgpu_device_indirect_wreg - write an indirect register address
762 *
763 * @adev: amdgpu_device pointer
1bba3683
HZ
764 * @reg_addr: indirect register offset
765 * @reg_data: indirect register data
766 *
767 */
768void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
769 u32 reg_addr, u32 reg_data)
770{
65ba96e9 771 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
772 void __iomem *pcie_index_offset;
773 void __iomem *pcie_data_offset;
774
65ba96e9
HZ
775 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
776 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
777
1bba3683
HZ
778 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
779 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
780 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
781
782 writel(reg_addr, pcie_index_offset);
783 readl(pcie_index_offset);
784 writel(reg_data, pcie_data_offset);
785 readl(pcie_data_offset);
786 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
787}
788
0c552ed3
LM
789void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
790 u64 reg_addr, u32 reg_data)
791{
792 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
793 void __iomem *pcie_index_offset;
794 void __iomem *pcie_index_hi_offset;
795 void __iomem *pcie_data_offset;
796
797 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
798 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 799 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
800 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
801 else
802 pcie_index_hi = 0;
803
804 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
805 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
806 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
807 if (pcie_index_hi != 0)
808 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
809 pcie_index_hi * 4;
810
811 writel(reg_addr, pcie_index_offset);
812 readl(pcie_index_offset);
813 if (pcie_index_hi != 0) {
814 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
815 readl(pcie_index_hi_offset);
816 }
817 writel(reg_data, pcie_data_offset);
818 readl(pcie_data_offset);
819
820 /* clear the high bits */
821 if (pcie_index_hi != 0) {
822 writel(0, pcie_index_hi_offset);
823 readl(pcie_index_hi_offset);
824 }
825
826 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
827}
828
1bba3683
HZ
829/**
830 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
831 *
832 * @adev: amdgpu_device pointer
1bba3683
HZ
833 * @reg_addr: indirect register offset
834 * @reg_data: indirect register data
835 *
836 */
837void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
838 u32 reg_addr, u64 reg_data)
839{
65ba96e9 840 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
841 void __iomem *pcie_index_offset;
842 void __iomem *pcie_data_offset;
843
65ba96e9
HZ
844 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
845 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
846
1bba3683
HZ
847 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
848 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
849 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
850
851 /* write low 32 bits */
852 writel(reg_addr, pcie_index_offset);
853 readl(pcie_index_offset);
854 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
855 readl(pcie_data_offset);
856 /* write high 32 bits */
857 writel(reg_addr + 4, pcie_index_offset);
858 readl(pcie_index_offset);
859 writel((u32)(reg_data >> 32), pcie_data_offset);
860 readl(pcie_data_offset);
861 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
862}
863
a76b2870
CL
864void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
865 u64 reg_addr, u64 reg_data)
866{
867 unsigned long flags, pcie_index, pcie_data;
868 unsigned long pcie_index_hi = 0;
869 void __iomem *pcie_index_offset;
870 void __iomem *pcie_index_hi_offset;
871 void __iomem *pcie_data_offset;
872
873 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
874 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
875 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
876 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
877
878 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
879 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
880 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
881 if (pcie_index_hi != 0)
882 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
883 pcie_index_hi * 4;
884
885 /* write low 32 bits */
886 writel(reg_addr, pcie_index_offset);
887 readl(pcie_index_offset);
888 if (pcie_index_hi != 0) {
889 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
890 readl(pcie_index_hi_offset);
891 }
892 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
893 readl(pcie_data_offset);
894 /* write high 32 bits */
895 writel(reg_addr + 4, pcie_index_offset);
896 readl(pcie_index_offset);
897 if (pcie_index_hi != 0) {
898 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
899 readl(pcie_index_hi_offset);
900 }
901 writel((u32)(reg_data >> 32), pcie_data_offset);
902 readl(pcie_data_offset);
903
904 /* clear the high bits */
905 if (pcie_index_hi != 0) {
906 writel(0, pcie_index_hi_offset);
907 readl(pcie_index_hi_offset);
908 }
909
910 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
911}
912
dabc114e
HZ
913/**
914 * amdgpu_device_get_rev_id - query device rev_id
915 *
916 * @adev: amdgpu_device pointer
917 *
918 * Return device rev_id
919 */
920u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
921{
922 return adev->nbio.funcs->get_rev_id(adev);
923}
924
d38ceaf9
AD
925/**
926 * amdgpu_invalid_rreg - dummy reg read function
927 *
982a820b 928 * @adev: amdgpu_device pointer
d38ceaf9
AD
929 * @reg: offset of register
930 *
931 * Dummy register read function. Used for register blocks
932 * that certain asics don't have (all asics).
933 * Returns the value in the register.
934 */
935static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
936{
937 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
938 BUG();
939 return 0;
940}
941
0c552ed3
LM
942static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
943{
944 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
945 BUG();
946 return 0;
947}
948
d38ceaf9
AD
949/**
950 * amdgpu_invalid_wreg - dummy reg write function
951 *
982a820b 952 * @adev: amdgpu_device pointer
d38ceaf9
AD
953 * @reg: offset of register
954 * @v: value to write to the register
955 *
956 * Dummy register read function. Used for register blocks
957 * that certain asics don't have (all asics).
958 */
959static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
960{
961 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
962 reg, v);
963 BUG();
964}
965
0c552ed3
LM
966static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
967{
968 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
969 reg, v);
970 BUG();
971}
972
4fa1c6a6
TZ
973/**
974 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
975 *
982a820b 976 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
977 * @reg: offset of register
978 *
979 * Dummy register read function. Used for register blocks
980 * that certain asics don't have (all asics).
981 * Returns the value in the register.
982 */
983static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
984{
985 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
986 BUG();
987 return 0;
988}
989
a76b2870
CL
990static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
991{
992 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
993 BUG();
994 return 0;
995}
996
4fa1c6a6
TZ
997/**
998 * amdgpu_invalid_wreg64 - dummy reg write function
999 *
982a820b 1000 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1001 * @reg: offset of register
1002 * @v: value to write to the register
1003 *
1004 * Dummy register read function. Used for register blocks
1005 * that certain asics don't have (all asics).
1006 */
1007static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1008{
1009 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1010 reg, v);
1011 BUG();
1012}
1013
a76b2870
CL
1014static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1015{
1016 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1017 reg, v);
1018 BUG();
1019}
1020
d38ceaf9
AD
1021/**
1022 * amdgpu_block_invalid_rreg - dummy reg read function
1023 *
982a820b 1024 * @adev: amdgpu_device pointer
d38ceaf9
AD
1025 * @block: offset of instance
1026 * @reg: offset of register
1027 *
1028 * Dummy register read function. Used for register blocks
1029 * that certain asics don't have (all asics).
1030 * Returns the value in the register.
1031 */
1032static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1033 uint32_t block, uint32_t reg)
1034{
1035 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1036 reg, block);
1037 BUG();
1038 return 0;
1039}
1040
1041/**
1042 * amdgpu_block_invalid_wreg - dummy reg write function
1043 *
982a820b 1044 * @adev: amdgpu_device pointer
d38ceaf9
AD
1045 * @block: offset of instance
1046 * @reg: offset of register
1047 * @v: value to write to the register
1048 *
1049 * Dummy register read function. Used for register blocks
1050 * that certain asics don't have (all asics).
1051 */
1052static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1053 uint32_t block,
1054 uint32_t reg, uint32_t v)
1055{
1056 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1057 reg, block, v);
1058 BUG();
1059}
1060
4d2997ab
AD
1061/**
1062 * amdgpu_device_asic_init - Wrapper for atom asic_init
1063 *
982a820b 1064 * @adev: amdgpu_device pointer
4d2997ab
AD
1065 *
1066 * Does any asic specific work and then calls atom asic init.
1067 */
1068static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1069{
15c5c5f5
LL
1070 int ret;
1071
4d2997ab
AD
1072 amdgpu_asic_pre_asic_init(adev);
1073
4e8303cf
LL
1074 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1075 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1076 amdgpu_psp_wait_for_bootloader(adev);
1077 ret = amdgpu_atomfirmware_asic_init(adev, true);
1078 return ret;
1079 } else {
85d1bcc6 1080 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1081 }
1082
1083 return 0;
4d2997ab
AD
1084}
1085
e3ecdffa 1086/**
7ccfd79f 1087 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1088 *
982a820b 1089 * @adev: amdgpu_device pointer
e3ecdffa
AD
1090 *
1091 * Allocates a scratch page of VRAM for use by various things in the
1092 * driver.
1093 */
7ccfd79f 1094static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1095{
7ccfd79f
CK
1096 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1097 AMDGPU_GEM_DOMAIN_VRAM |
1098 AMDGPU_GEM_DOMAIN_GTT,
1099 &adev->mem_scratch.robj,
1100 &adev->mem_scratch.gpu_addr,
1101 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1102}
1103
e3ecdffa 1104/**
7ccfd79f 1105 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1106 *
982a820b 1107 * @adev: amdgpu_device pointer
e3ecdffa
AD
1108 *
1109 * Frees the VRAM scratch page.
1110 */
7ccfd79f 1111static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1112{
7ccfd79f 1113 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1114}
1115
1116/**
9c3f2b54 1117 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1118 *
1119 * @adev: amdgpu_device pointer
1120 * @registers: pointer to the register array
1121 * @array_size: size of the register array
1122 *
b8920e1e 1123 * Programs an array or registers with and or masks.
d38ceaf9
AD
1124 * This is a helper for setting golden registers.
1125 */
9c3f2b54
AD
1126void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1127 const u32 *registers,
1128 const u32 array_size)
d38ceaf9
AD
1129{
1130 u32 tmp, reg, and_mask, or_mask;
1131 int i;
1132
1133 if (array_size % 3)
1134 return;
1135
47fc644f 1136 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1137 reg = registers[i + 0];
1138 and_mask = registers[i + 1];
1139 or_mask = registers[i + 2];
1140
1141 if (and_mask == 0xffffffff) {
1142 tmp = or_mask;
1143 } else {
1144 tmp = RREG32(reg);
1145 tmp &= ~and_mask;
e0d07657
HZ
1146 if (adev->family >= AMDGPU_FAMILY_AI)
1147 tmp |= (or_mask & and_mask);
1148 else
1149 tmp |= or_mask;
d38ceaf9
AD
1150 }
1151 WREG32(reg, tmp);
1152 }
1153}
1154
e3ecdffa
AD
1155/**
1156 * amdgpu_device_pci_config_reset - reset the GPU
1157 *
1158 * @adev: amdgpu_device pointer
1159 *
1160 * Resets the GPU using the pci config reset sequence.
1161 * Only applicable to asics prior to vega10.
1162 */
8111c387 1163void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1164{
1165 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1166}
1167
af484df8
AD
1168/**
1169 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1170 *
1171 * @adev: amdgpu_device pointer
1172 *
1173 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1174 */
1175int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1176{
1177 return pci_reset_function(adev->pdev);
1178}
1179
d38ceaf9 1180/*
06ec9070 1181 * amdgpu_device_wb_*()
455a7bc2 1182 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1183 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1184 */
1185
1186/**
06ec9070 1187 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1188 *
1189 * @adev: amdgpu_device pointer
1190 *
1191 * Disables Writeback and frees the Writeback memory (all asics).
1192 * Used at driver shutdown.
1193 */
06ec9070 1194static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1195{
1196 if (adev->wb.wb_obj) {
a76ed485
AD
1197 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1198 &adev->wb.gpu_addr,
1199 (void **)&adev->wb.wb);
d38ceaf9
AD
1200 adev->wb.wb_obj = NULL;
1201 }
1202}
1203
1204/**
03f2abb0 1205 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1206 *
1207 * @adev: amdgpu_device pointer
1208 *
455a7bc2 1209 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1210 * Used at driver startup.
1211 * Returns 0 on success or an -error on failure.
1212 */
06ec9070 1213static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1214{
1215 int r;
1216
1217 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1218 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1219 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1220 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1221 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1222 (void **)&adev->wb.wb);
d38ceaf9
AD
1223 if (r) {
1224 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1225 return r;
1226 }
d38ceaf9
AD
1227
1228 adev->wb.num_wb = AMDGPU_MAX_WB;
1229 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1230
1231 /* clear wb memory */
73469585 1232 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1233 }
1234
1235 return 0;
1236}
1237
1238/**
131b4b36 1239 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1240 *
1241 * @adev: amdgpu_device pointer
1242 * @wb: wb index
1243 *
1244 * Allocate a wb slot for use by the driver (all asics).
1245 * Returns 0 on success or -EINVAL on failure.
1246 */
131b4b36 1247int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1248{
1249 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1250
97407b63 1251 if (offset < adev->wb.num_wb) {
7014285a 1252 __set_bit(offset, adev->wb.used);
63ae07ca 1253 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1254 return 0;
1255 } else {
1256 return -EINVAL;
1257 }
1258}
1259
d38ceaf9 1260/**
131b4b36 1261 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1262 *
1263 * @adev: amdgpu_device pointer
1264 * @wb: wb index
1265 *
1266 * Free a wb slot allocated for use by the driver (all asics)
1267 */
131b4b36 1268void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1269{
73469585 1270 wb >>= 3;
d38ceaf9 1271 if (wb < adev->wb.num_wb)
73469585 1272 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1273}
1274
d6895ad3
CK
1275/**
1276 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1277 *
1278 * @adev: amdgpu_device pointer
1279 *
1280 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1281 * to fail, but if any of the BARs is not accessible after the size we abort
1282 * driver loading by returning -ENODEV.
1283 */
1284int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1285{
453f617a 1286 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1287 struct pci_bus *root;
1288 struct resource *res;
b8920e1e 1289 unsigned int i;
d6895ad3
CK
1290 u16 cmd;
1291 int r;
1292
822130b5
AB
1293 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1294 return 0;
1295
0c03b912 1296 /* Bypass for VF */
1297 if (amdgpu_sriov_vf(adev))
1298 return 0;
1299
b7221f2b
AD
1300 /* skip if the bios has already enabled large BAR */
1301 if (adev->gmc.real_vram_size &&
1302 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1303 return 0;
1304
31b8adab
CK
1305 /* Check if the root BUS has 64bit memory resources */
1306 root = adev->pdev->bus;
1307 while (root->parent)
1308 root = root->parent;
1309
1310 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1311 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1312 res->start > 0x100000000ull)
1313 break;
1314 }
1315
1316 /* Trying to resize is pointless without a root hub window above 4GB */
1317 if (!res)
1318 return 0;
1319
453f617a
ND
1320 /* Limit the BAR size to what is available */
1321 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1322 rbar_size);
1323
d6895ad3
CK
1324 /* Disable memory decoding while we change the BAR addresses and size */
1325 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1326 pci_write_config_word(adev->pdev, PCI_COMMAND,
1327 cmd & ~PCI_COMMAND_MEMORY);
1328
1329 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1330 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1331 if (adev->asic_type >= CHIP_BONAIRE)
1332 pci_release_resource(adev->pdev, 2);
1333
1334 pci_release_resource(adev->pdev, 0);
1335
1336 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1337 if (r == -ENOSPC)
1338 DRM_INFO("Not enough PCI address space for a large BAR.");
1339 else if (r && r != -ENOTSUPP)
1340 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1341
1342 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1343
1344 /* When the doorbell or fb BAR isn't available we have no chance of
1345 * using the device.
1346 */
43c064db 1347 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1348 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1349 return -ENODEV;
1350
1351 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1352
1353 return 0;
1354}
a05502e5 1355
9535a86a
SZ
1356static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1357{
b8920e1e 1358 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1359 return false;
9535a86a
SZ
1360
1361 return true;
1362}
1363
d38ceaf9
AD
1364/*
1365 * GPU helpers function.
1366 */
1367/**
39c640c0 1368 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1369 *
1370 * @adev: amdgpu_device pointer
1371 *
c836fec5
JQ
1372 * Check if the asic has been initialized (all asics) at driver startup
1373 * or post is needed if hw reset is performed.
1374 * Returns true if need or false if not.
d38ceaf9 1375 */
39c640c0 1376bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1377{
1378 uint32_t reg;
1379
bec86378
ML
1380 if (amdgpu_sriov_vf(adev))
1381 return false;
1382
9535a86a
SZ
1383 if (!amdgpu_device_read_bios(adev))
1384 return false;
1385
bec86378 1386 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1387 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1388 * some old smc fw still need driver do vPost otherwise gpu hang, while
1389 * those smc fw version above 22.15 doesn't have this flaw, so we force
1390 * vpost executed for smc version below 22.15
bec86378
ML
1391 */
1392 if (adev->asic_type == CHIP_FIJI) {
1393 int err;
1394 uint32_t fw_ver;
b8920e1e 1395
bec86378
ML
1396 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1397 /* force vPost if error occured */
1398 if (err)
1399 return true;
1400
1401 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1402 if (fw_ver < 0x00160e00)
1403 return true;
bec86378 1404 }
bec86378 1405 }
91fe77eb 1406
e3c1b071 1407 /* Don't post if we need to reset whole hive on init */
1408 if (adev->gmc.xgmi.pending_reset)
1409 return false;
1410
91fe77eb 1411 if (adev->has_hw_reset) {
1412 adev->has_hw_reset = false;
1413 return true;
1414 }
1415
1416 /* bios scratch used on CIK+ */
1417 if (adev->asic_type >= CHIP_BONAIRE)
1418 return amdgpu_atombios_scratch_need_asic_init(adev);
1419
1420 /* check MEM_SIZE for older asics */
1421 reg = amdgpu_asic_get_config_memsize(adev);
1422
1423 if ((reg != 0) && (reg != 0xffffffff))
1424 return false;
1425
1426 return true;
70e64c4d
ML
1427}
1428
bb0f8429
ML
1429/*
1430 * Check whether seamless boot is supported.
1431 *
7f4ce7b5
ML
1432 * So far we only support seamless boot on DCE 3.0 or later.
1433 * If users report that it works on older ASICS as well, we may
1434 * loosen this.
bb0f8429
ML
1435 */
1436bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1437{
5dc270d3
ML
1438 switch (amdgpu_seamless) {
1439 case -1:
1440 break;
1441 case 1:
1442 return true;
1443 case 0:
1444 return false;
1445 default:
1446 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1447 amdgpu_seamless);
1448 return false;
1449 }
1450
3657a1d5
ML
1451 if (!(adev->flags & AMD_IS_APU))
1452 return false;
1453
5dc270d3
ML
1454 if (adev->mman.keep_stolen_vga_memory)
1455 return false;
1456
7f4ce7b5 1457 return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1458}
1459
5d1eb4c4
ML
1460/*
1461 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1462 * speed switching. Until we have confirmation from Intel that a specific host
1463 * supports it, it's safer that we keep it disabled for all.
1464 *
1465 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1466 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1467 */
1468bool amdgpu_device_pcie_dynamic_switching_supported(void)
1469{
1470#if IS_ENABLED(CONFIG_X86)
1471 struct cpuinfo_x86 *c = &cpu_data(0);
1472
1473 if (c->x86_vendor == X86_VENDOR_INTEL)
1474 return false;
1475#endif
1476 return true;
1477}
1478
0ab5d711
ML
1479/**
1480 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1481 *
1482 * @adev: amdgpu_device pointer
1483 *
1484 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1485 * be set for this device.
1486 *
1487 * Returns true if it should be used or false if not.
1488 */
1489bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1490{
1491 switch (amdgpu_aspm) {
1492 case -1:
1493 break;
1494 case 0:
1495 return false;
1496 case 1:
1497 return true;
1498 default:
1499 return false;
1500 }
1501 return pcie_aspm_enabled(adev->pdev);
1502}
1503
3ad5dcfe
KHF
1504bool amdgpu_device_aspm_support_quirk(void)
1505{
1506#if IS_ENABLED(CONFIG_X86)
1507 struct cpuinfo_x86 *c = &cpu_data(0);
1508
1509 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1510#else
1511 return true;
1512#endif
1513}
1514
d38ceaf9
AD
1515/* if we get transitioned to only one device, take VGA back */
1516/**
06ec9070 1517 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1518 *
bf44e8ce 1519 * @pdev: PCI device pointer
d38ceaf9
AD
1520 * @state: enable/disable vga decode
1521 *
1522 * Enable/disable vga decode (all asics).
1523 * Returns VGA resource flags.
1524 */
bf44e8ce
CH
1525static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1526 bool state)
d38ceaf9 1527{
bf44e8ce 1528 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1529
d38ceaf9
AD
1530 amdgpu_asic_set_vga_state(adev, state);
1531 if (state)
1532 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1533 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1534 else
1535 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1536}
1537
e3ecdffa
AD
1538/**
1539 * amdgpu_device_check_block_size - validate the vm block size
1540 *
1541 * @adev: amdgpu_device pointer
1542 *
1543 * Validates the vm block size specified via module parameter.
1544 * The vm block size defines number of bits in page table versus page directory,
1545 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1546 * page table and the remaining bits are in the page directory.
1547 */
06ec9070 1548static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1549{
1550 /* defines number of bits in page table versus page directory,
1551 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1552 * page table and the remaining bits are in the page directory
1553 */
bab4fee7
JZ
1554 if (amdgpu_vm_block_size == -1)
1555 return;
a1adf8be 1556
bab4fee7 1557 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1558 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1559 amdgpu_vm_block_size);
97489129 1560 amdgpu_vm_block_size = -1;
a1adf8be 1561 }
a1adf8be
CZ
1562}
1563
e3ecdffa
AD
1564/**
1565 * amdgpu_device_check_vm_size - validate the vm size
1566 *
1567 * @adev: amdgpu_device pointer
1568 *
1569 * Validates the vm size in GB specified via module parameter.
1570 * The VM size is the size of the GPU virtual memory space in GB.
1571 */
06ec9070 1572static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1573{
64dab074
AD
1574 /* no need to check the default value */
1575 if (amdgpu_vm_size == -1)
1576 return;
1577
83ca145d
ZJ
1578 if (amdgpu_vm_size < 1) {
1579 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1580 amdgpu_vm_size);
f3368128 1581 amdgpu_vm_size = -1;
83ca145d 1582 }
83ca145d
ZJ
1583}
1584
7951e376
RZ
1585static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1586{
1587 struct sysinfo si;
a9d4fe2f 1588 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1589 uint64_t total_memory;
1590 uint64_t dram_size_seven_GB = 0x1B8000000;
1591 uint64_t dram_size_three_GB = 0xB8000000;
1592
1593 if (amdgpu_smu_memory_pool_size == 0)
1594 return;
1595
1596 if (!is_os_64) {
1597 DRM_WARN("Not 64-bit OS, feature not supported\n");
1598 goto def_value;
1599 }
1600 si_meminfo(&si);
1601 total_memory = (uint64_t)si.totalram * si.mem_unit;
1602
1603 if ((amdgpu_smu_memory_pool_size == 1) ||
1604 (amdgpu_smu_memory_pool_size == 2)) {
1605 if (total_memory < dram_size_three_GB)
1606 goto def_value1;
1607 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1608 (amdgpu_smu_memory_pool_size == 8)) {
1609 if (total_memory < dram_size_seven_GB)
1610 goto def_value1;
1611 } else {
1612 DRM_WARN("Smu memory pool size not supported\n");
1613 goto def_value;
1614 }
1615 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1616
1617 return;
1618
1619def_value1:
1620 DRM_WARN("No enough system memory\n");
1621def_value:
1622 adev->pm.smu_prv_buffer_size = 0;
1623}
1624
9f6a7857
HR
1625static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1626{
1627 if (!(adev->flags & AMD_IS_APU) ||
1628 adev->asic_type < CHIP_RAVEN)
1629 return 0;
1630
1631 switch (adev->asic_type) {
1632 case CHIP_RAVEN:
1633 if (adev->pdev->device == 0x15dd)
1634 adev->apu_flags |= AMD_APU_IS_RAVEN;
1635 if (adev->pdev->device == 0x15d8)
1636 adev->apu_flags |= AMD_APU_IS_PICASSO;
1637 break;
1638 case CHIP_RENOIR:
1639 if ((adev->pdev->device == 0x1636) ||
1640 (adev->pdev->device == 0x164c))
1641 adev->apu_flags |= AMD_APU_IS_RENOIR;
1642 else
1643 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1644 break;
1645 case CHIP_VANGOGH:
1646 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1647 break;
1648 case CHIP_YELLOW_CARP:
1649 break;
d0f56dc2 1650 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1651 if ((adev->pdev->device == 0x13FE) ||
1652 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1653 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1654 break;
9f6a7857 1655 default:
4eaf21b7 1656 break;
9f6a7857
HR
1657 }
1658
1659 return 0;
1660}
1661
d38ceaf9 1662/**
06ec9070 1663 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1664 *
1665 * @adev: amdgpu_device pointer
1666 *
1667 * Validates certain module parameters and updates
1668 * the associated values used by the driver (all asics).
1669 */
912dfc84 1670static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1671{
5b011235
CZ
1672 if (amdgpu_sched_jobs < 4) {
1673 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1674 amdgpu_sched_jobs);
1675 amdgpu_sched_jobs = 4;
47fc644f 1676 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1677 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1678 amdgpu_sched_jobs);
1679 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1680 }
d38ceaf9 1681
83e74db6 1682 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1683 /* gart size must be greater or equal to 32M */
1684 dev_warn(adev->dev, "gart size (%d) too small\n",
1685 amdgpu_gart_size);
83e74db6 1686 amdgpu_gart_size = -1;
d38ceaf9
AD
1687 }
1688
36d38372 1689 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1690 /* gtt size must be greater or equal to 32M */
36d38372
CK
1691 dev_warn(adev->dev, "gtt size (%d) too small\n",
1692 amdgpu_gtt_size);
1693 amdgpu_gtt_size = -1;
d38ceaf9
AD
1694 }
1695
d07f14be
RH
1696 /* valid range is between 4 and 9 inclusive */
1697 if (amdgpu_vm_fragment_size != -1 &&
1698 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1699 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1700 amdgpu_vm_fragment_size = -1;
1701 }
1702
5d5bd5e3
KW
1703 if (amdgpu_sched_hw_submission < 2) {
1704 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1705 amdgpu_sched_hw_submission);
1706 amdgpu_sched_hw_submission = 2;
1707 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1708 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1709 amdgpu_sched_hw_submission);
1710 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1711 }
1712
2656fd23
AG
1713 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1714 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1715 amdgpu_reset_method = -1;
1716 }
1717
7951e376
RZ
1718 amdgpu_device_check_smu_prv_buffer_size(adev);
1719
06ec9070 1720 amdgpu_device_check_vm_size(adev);
d38ceaf9 1721
06ec9070 1722 amdgpu_device_check_block_size(adev);
6a7f76e7 1723
19aede77 1724 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1725
e3c00faa 1726 return 0;
d38ceaf9
AD
1727}
1728
1729/**
1730 * amdgpu_switcheroo_set_state - set switcheroo state
1731 *
1732 * @pdev: pci dev pointer
1694467b 1733 * @state: vga_switcheroo state
d38ceaf9 1734 *
12024b17 1735 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1736 * the asics before or after it is powered up using ACPI methods.
1737 */
8aba21b7
LT
1738static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1739 enum vga_switcheroo_state state)
d38ceaf9
AD
1740{
1741 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1742 int r;
d38ceaf9 1743
b98c6299 1744 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1745 return;
1746
1747 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1748 pr_info("switched on\n");
d38ceaf9
AD
1749 /* don't suspend or resume card normally */
1750 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1751
8f66090b
TZ
1752 pci_set_power_state(pdev, PCI_D0);
1753 amdgpu_device_load_pci_state(pdev);
1754 r = pci_enable_device(pdev);
de185019
AD
1755 if (r)
1756 DRM_WARN("pci_enable_device failed (%d)\n", r);
1757 amdgpu_device_resume(dev, true);
d38ceaf9 1758
d38ceaf9 1759 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1760 } else {
dd4fa6c1 1761 pr_info("switched off\n");
d38ceaf9 1762 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
5095d541 1763 amdgpu_device_prepare(dev);
de185019 1764 amdgpu_device_suspend(dev, true);
8f66090b 1765 amdgpu_device_cache_pci_state(pdev);
de185019 1766 /* Shut down the device */
8f66090b
TZ
1767 pci_disable_device(pdev);
1768 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1769 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1770 }
1771}
1772
1773/**
1774 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1775 *
1776 * @pdev: pci dev pointer
1777 *
1778 * Callback for the switcheroo driver. Check of the switcheroo
1779 * state can be changed.
1780 * Returns true if the state can be changed, false if not.
1781 */
1782static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1783{
1784 struct drm_device *dev = pci_get_drvdata(pdev);
1785
b8920e1e 1786 /*
d38ceaf9
AD
1787 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1788 * locking inversion with the driver load path. And the access here is
1789 * completely racy anyway. So don't bother with locking for now.
1790 */
7e13ad89 1791 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1792}
1793
1794static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1795 .set_gpu_state = amdgpu_switcheroo_set_state,
1796 .reprobe = NULL,
1797 .can_switch = amdgpu_switcheroo_can_switch,
1798};
1799
e3ecdffa
AD
1800/**
1801 * amdgpu_device_ip_set_clockgating_state - set the CG state
1802 *
87e3f136 1803 * @dev: amdgpu_device pointer
e3ecdffa
AD
1804 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1805 * @state: clockgating state (gate or ungate)
1806 *
1807 * Sets the requested clockgating state for all instances of
1808 * the hardware IP specified.
1809 * Returns the error code from the last instance.
1810 */
43fa561f 1811int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1812 enum amd_ip_block_type block_type,
1813 enum amd_clockgating_state state)
d38ceaf9 1814{
43fa561f 1815 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1816 int i, r = 0;
1817
1818 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1819 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1820 continue;
c722865a
RZ
1821 if (adev->ip_blocks[i].version->type != block_type)
1822 continue;
1823 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1824 continue;
1825 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1826 (void *)adev, state);
1827 if (r)
1828 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1829 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1830 }
1831 return r;
1832}
1833
e3ecdffa
AD
1834/**
1835 * amdgpu_device_ip_set_powergating_state - set the PG state
1836 *
87e3f136 1837 * @dev: amdgpu_device pointer
e3ecdffa
AD
1838 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1839 * @state: powergating state (gate or ungate)
1840 *
1841 * Sets the requested powergating state for all instances of
1842 * the hardware IP specified.
1843 * Returns the error code from the last instance.
1844 */
43fa561f 1845int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1846 enum amd_ip_block_type block_type,
1847 enum amd_powergating_state state)
d38ceaf9 1848{
43fa561f 1849 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1850 int i, r = 0;
1851
1852 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1853 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1854 continue;
c722865a
RZ
1855 if (adev->ip_blocks[i].version->type != block_type)
1856 continue;
1857 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1858 continue;
1859 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1860 (void *)adev, state);
1861 if (r)
1862 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1863 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1864 }
1865 return r;
1866}
1867
e3ecdffa
AD
1868/**
1869 * amdgpu_device_ip_get_clockgating_state - get the CG state
1870 *
1871 * @adev: amdgpu_device pointer
1872 * @flags: clockgating feature flags
1873 *
1874 * Walks the list of IPs on the device and updates the clockgating
1875 * flags for each IP.
1876 * Updates @flags with the feature flags for each hardware IP where
1877 * clockgating is enabled.
1878 */
2990a1fc 1879void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1880 u64 *flags)
6cb2d4e4
HR
1881{
1882 int i;
1883
1884 for (i = 0; i < adev->num_ip_blocks; i++) {
1885 if (!adev->ip_blocks[i].status.valid)
1886 continue;
1887 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1888 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1889 }
1890}
1891
e3ecdffa
AD
1892/**
1893 * amdgpu_device_ip_wait_for_idle - wait for idle
1894 *
1895 * @adev: amdgpu_device pointer
1896 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1897 *
1898 * Waits for the request hardware IP to be idle.
1899 * Returns 0 for success or a negative error code on failure.
1900 */
2990a1fc
AD
1901int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1902 enum amd_ip_block_type block_type)
5dbbb60b
AD
1903{
1904 int i, r;
1905
1906 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1907 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1908 continue;
a1255107
AD
1909 if (adev->ip_blocks[i].version->type == block_type) {
1910 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1911 if (r)
1912 return r;
1913 break;
1914 }
1915 }
1916 return 0;
1917
1918}
1919
e3ecdffa
AD
1920/**
1921 * amdgpu_device_ip_is_idle - is the hardware IP idle
1922 *
1923 * @adev: amdgpu_device pointer
1924 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1925 *
1926 * Check if the hardware IP is idle or not.
1927 * Returns true if it the IP is idle, false if not.
1928 */
2990a1fc
AD
1929bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1930 enum amd_ip_block_type block_type)
5dbbb60b
AD
1931{
1932 int i;
1933
1934 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1935 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1936 continue;
a1255107
AD
1937 if (adev->ip_blocks[i].version->type == block_type)
1938 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1939 }
1940 return true;
1941
1942}
1943
e3ecdffa
AD
1944/**
1945 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1946 *
1947 * @adev: amdgpu_device pointer
87e3f136 1948 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1949 *
1950 * Returns a pointer to the hardware IP block structure
1951 * if it exists for the asic, otherwise NULL.
1952 */
2990a1fc
AD
1953struct amdgpu_ip_block *
1954amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1955 enum amd_ip_block_type type)
d38ceaf9
AD
1956{
1957 int i;
1958
1959 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1960 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1961 return &adev->ip_blocks[i];
1962
1963 return NULL;
1964}
1965
1966/**
2990a1fc 1967 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1968 *
1969 * @adev: amdgpu_device pointer
5fc3aeeb 1970 * @type: enum amd_ip_block_type
d38ceaf9
AD
1971 * @major: major version
1972 * @minor: minor version
1973 *
1974 * return 0 if equal or greater
1975 * return 1 if smaller or the ip_block doesn't exist
1976 */
2990a1fc
AD
1977int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1978 enum amd_ip_block_type type,
1979 u32 major, u32 minor)
d38ceaf9 1980{
2990a1fc 1981 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1982
a1255107
AD
1983 if (ip_block && ((ip_block->version->major > major) ||
1984 ((ip_block->version->major == major) &&
1985 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1986 return 0;
1987
1988 return 1;
1989}
1990
a1255107 1991/**
2990a1fc 1992 * amdgpu_device_ip_block_add
a1255107
AD
1993 *
1994 * @adev: amdgpu_device pointer
1995 * @ip_block_version: pointer to the IP to add
1996 *
1997 * Adds the IP block driver information to the collection of IPs
1998 * on the asic.
1999 */
2990a1fc
AD
2000int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2001 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
2002{
2003 if (!ip_block_version)
2004 return -EINVAL;
2005
7bd939d0
LG
2006 switch (ip_block_version->type) {
2007 case AMD_IP_BLOCK_TYPE_VCN:
2008 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2009 return 0;
2010 break;
2011 case AMD_IP_BLOCK_TYPE_JPEG:
2012 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2013 return 0;
2014 break;
2015 default:
2016 break;
2017 }
2018
e966a725 2019 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
2020 ip_block_version->funcs->name);
2021
a1255107
AD
2022 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2023
2024 return 0;
2025}
2026
e3ecdffa
AD
2027/**
2028 * amdgpu_device_enable_virtual_display - enable virtual display feature
2029 *
2030 * @adev: amdgpu_device pointer
2031 *
2032 * Enabled the virtual display feature if the user has enabled it via
2033 * the module parameter virtual_display. This feature provides a virtual
2034 * display hardware on headless boards or in virtualized environments.
2035 * This function parses and validates the configuration string specified by
2036 * the user and configues the virtual display configuration (number of
2037 * virtual connectors, crtcs, etc.) specified.
2038 */
483ef985 2039static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2040{
2041 adev->enable_virtual_display = false;
2042
2043 if (amdgpu_virtual_display) {
8f66090b 2044 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2045 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2046
2047 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2048 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2049 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2050 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2051 if (!strcmp("all", pciaddname)
2052 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2053 long num_crtc;
2054 int res = -1;
2055
9accf2fd 2056 adev->enable_virtual_display = true;
0f66356d
ED
2057
2058 if (pciaddname_tmp)
2059 res = kstrtol(pciaddname_tmp, 10,
2060 &num_crtc);
2061
2062 if (!res) {
2063 if (num_crtc < 1)
2064 num_crtc = 1;
2065 if (num_crtc > 6)
2066 num_crtc = 6;
2067 adev->mode_info.num_crtc = num_crtc;
2068 } else {
2069 adev->mode_info.num_crtc = 1;
2070 }
9accf2fd
ED
2071 break;
2072 }
2073 }
2074
0f66356d
ED
2075 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2076 amdgpu_virtual_display, pci_address_name,
2077 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2078
2079 kfree(pciaddstr);
2080 }
2081}
2082
25263da3
AD
2083void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2084{
2085 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2086 adev->mode_info.num_crtc = 1;
2087 adev->enable_virtual_display = true;
2088 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2089 adev->enable_virtual_display, adev->mode_info.num_crtc);
2090 }
2091}
2092
e3ecdffa
AD
2093/**
2094 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2095 *
2096 * @adev: amdgpu_device pointer
2097 *
2098 * Parses the asic configuration parameters specified in the gpu info
2099 * firmware and makes them availale to the driver for use in configuring
2100 * the asic.
2101 * Returns 0 on success, -EINVAL on failure.
2102 */
e2a75f88
AD
2103static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2104{
e2a75f88 2105 const char *chip_name;
c0a43457 2106 char fw_name[40];
e2a75f88
AD
2107 int err;
2108 const struct gpu_info_firmware_header_v1_0 *hdr;
2109
ab4fe3e1
HR
2110 adev->firmware.gpu_info_fw = NULL;
2111
72de33f8 2112 if (adev->mman.discovery_bin) {
cc375d8c
TY
2113 /*
2114 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2115 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2116 * when DAL no longer needs it.
2117 */
2118 if (adev->asic_type != CHIP_NAVI12)
2119 return 0;
258620d0
AD
2120 }
2121
e2a75f88 2122 switch (adev->asic_type) {
e2a75f88
AD
2123 default:
2124 return 0;
2125 case CHIP_VEGA10:
2126 chip_name = "vega10";
2127 break;
3f76dced
AD
2128 case CHIP_VEGA12:
2129 chip_name = "vega12";
2130 break;
2d2e5e7e 2131 case CHIP_RAVEN:
54f78a76 2132 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2133 chip_name = "raven2";
54f78a76 2134 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2135 chip_name = "picasso";
54c4d17e
FX
2136 else
2137 chip_name = "raven";
2d2e5e7e 2138 break;
65e60f6e
LM
2139 case CHIP_ARCTURUS:
2140 chip_name = "arcturus";
2141 break;
42b325e5
XY
2142 case CHIP_NAVI12:
2143 chip_name = "navi12";
2144 break;
e2a75f88
AD
2145 }
2146
2147 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2148 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2149 if (err) {
2150 dev_err(adev->dev,
b31d3063 2151 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2152 fw_name);
2153 goto out;
2154 }
2155
ab4fe3e1 2156 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2157 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2158
2159 switch (hdr->version_major) {
2160 case 1:
2161 {
2162 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2163 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2164 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2165
cc375d8c
TY
2166 /*
2167 * Should be droped when DAL no longer needs it.
2168 */
2169 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2170 goto parse_soc_bounding_box;
2171
b5ab16bf
AD
2172 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2173 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2174 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2175 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2176 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2177 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2178 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2179 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2180 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2181 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2182 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2183 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2184 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2185 adev->gfx.cu_info.max_waves_per_simd =
2186 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2187 adev->gfx.cu_info.max_scratch_slots_per_cu =
2188 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2189 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2190 if (hdr->version_minor >= 1) {
35c2e910
HZ
2191 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2192 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2193 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2194 adev->gfx.config.num_sc_per_sh =
2195 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2196 adev->gfx.config.num_packer_per_sc =
2197 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2198 }
ec51d3fa
XY
2199
2200parse_soc_bounding_box:
ec51d3fa
XY
2201 /*
2202 * soc bounding box info is not integrated in disocovery table,
258620d0 2203 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2204 */
48321c3d
HW
2205 if (hdr->version_minor == 2) {
2206 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2207 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2208 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2209 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2210 }
e2a75f88
AD
2211 break;
2212 }
2213 default:
2214 dev_err(adev->dev,
2215 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2216 err = -EINVAL;
2217 goto out;
2218 }
2219out:
e2a75f88
AD
2220 return err;
2221}
2222
e3ecdffa
AD
2223/**
2224 * amdgpu_device_ip_early_init - run early init for hardware IPs
2225 *
2226 * @adev: amdgpu_device pointer
2227 *
2228 * Early initialization pass for hardware IPs. The hardware IPs that make
2229 * up each asic are discovered each IP's early_init callback is run. This
2230 * is the first stage in initializing the asic.
2231 * Returns 0 on success, negative error code on failure.
2232 */
06ec9070 2233static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2234{
901e2be2
AD
2235 struct drm_device *dev = adev_to_drm(adev);
2236 struct pci_dev *parent;
aaa36a97 2237 int i, r;
ced69502 2238 bool total;
d38ceaf9 2239
483ef985 2240 amdgpu_device_enable_virtual_display(adev);
a6be7570 2241
00a979f3 2242 if (amdgpu_sriov_vf(adev)) {
00a979f3 2243 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2244 if (r)
2245 return r;
00a979f3
WS
2246 }
2247
d38ceaf9 2248 switch (adev->asic_type) {
33f34802
KW
2249#ifdef CONFIG_DRM_AMDGPU_SI
2250 case CHIP_VERDE:
2251 case CHIP_TAHITI:
2252 case CHIP_PITCAIRN:
2253 case CHIP_OLAND:
2254 case CHIP_HAINAN:
295d0daf 2255 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2256 r = si_set_ip_blocks(adev);
2257 if (r)
2258 return r;
2259 break;
2260#endif
a2e73f56
AD
2261#ifdef CONFIG_DRM_AMDGPU_CIK
2262 case CHIP_BONAIRE:
2263 case CHIP_HAWAII:
2264 case CHIP_KAVERI:
2265 case CHIP_KABINI:
2266 case CHIP_MULLINS:
e1ad2d53 2267 if (adev->flags & AMD_IS_APU)
a2e73f56 2268 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2269 else
2270 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2271
2272 r = cik_set_ip_blocks(adev);
2273 if (r)
2274 return r;
2275 break;
2276#endif
da87c30b
AD
2277 case CHIP_TOPAZ:
2278 case CHIP_TONGA:
2279 case CHIP_FIJI:
2280 case CHIP_POLARIS10:
2281 case CHIP_POLARIS11:
2282 case CHIP_POLARIS12:
2283 case CHIP_VEGAM:
2284 case CHIP_CARRIZO:
2285 case CHIP_STONEY:
2286 if (adev->flags & AMD_IS_APU)
2287 adev->family = AMDGPU_FAMILY_CZ;
2288 else
2289 adev->family = AMDGPU_FAMILY_VI;
2290
2291 r = vi_set_ip_blocks(adev);
2292 if (r)
2293 return r;
2294 break;
d38ceaf9 2295 default:
63352b7f
AD
2296 r = amdgpu_discovery_set_ip_blocks(adev);
2297 if (r)
2298 return r;
2299 break;
d38ceaf9
AD
2300 }
2301
901e2be2
AD
2302 if (amdgpu_has_atpx() &&
2303 (amdgpu_is_atpx_hybrid() ||
2304 amdgpu_has_atpx_dgpu_power_cntl()) &&
2305 ((adev->flags & AMD_IS_APU) == 0) &&
2306 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2307 adev->flags |= AMD_IS_PX;
2308
85ac2021 2309 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2310 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2311 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2312 }
901e2be2 2313
1884734a 2314
3b94fb10 2315 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2316 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2317 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2318 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2319 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2320
ced69502 2321 total = true;
d38ceaf9
AD
2322 for (i = 0; i < adev->num_ip_blocks; i++) {
2323 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2324 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2325 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2326 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2327 } else {
a1255107
AD
2328 if (adev->ip_blocks[i].version->funcs->early_init) {
2329 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2330 if (r == -ENOENT) {
a1255107 2331 adev->ip_blocks[i].status.valid = false;
2c1a2784 2332 } else if (r) {
a1255107
AD
2333 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2334 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2335 total = false;
2c1a2784 2336 } else {
a1255107 2337 adev->ip_blocks[i].status.valid = true;
2c1a2784 2338 }
974e6b64 2339 } else {
a1255107 2340 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2341 }
d38ceaf9 2342 }
21a249ca
AD
2343 /* get the vbios after the asic_funcs are set up */
2344 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2345 r = amdgpu_device_parse_gpu_info_fw(adev);
2346 if (r)
2347 return r;
2348
21a249ca 2349 /* Read BIOS */
9535a86a
SZ
2350 if (amdgpu_device_read_bios(adev)) {
2351 if (!amdgpu_get_bios(adev))
2352 return -EINVAL;
21a249ca 2353
9535a86a
SZ
2354 r = amdgpu_atombios_init(adev);
2355 if (r) {
2356 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2357 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2358 return r;
2359 }
21a249ca 2360 }
77eabc6f
PJZ
2361
2362 /*get pf2vf msg info at it's earliest time*/
2363 if (amdgpu_sriov_vf(adev))
2364 amdgpu_virt_init_data_exchange(adev);
2365
21a249ca 2366 }
d38ceaf9 2367 }
ced69502
ML
2368 if (!total)
2369 return -ENODEV;
d38ceaf9 2370
00fa4035 2371 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2372 adev->cg_flags &= amdgpu_cg_mask;
2373 adev->pg_flags &= amdgpu_pg_mask;
2374
d38ceaf9
AD
2375 return 0;
2376}
2377
0a4f2520
RZ
2378static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2379{
2380 int i, r;
2381
2382 for (i = 0; i < adev->num_ip_blocks; i++) {
2383 if (!adev->ip_blocks[i].status.sw)
2384 continue;
2385 if (adev->ip_blocks[i].status.hw)
2386 continue;
2387 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2388 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2389 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2390 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2391 if (r) {
2392 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2393 adev->ip_blocks[i].version->funcs->name, r);
2394 return r;
2395 }
2396 adev->ip_blocks[i].status.hw = true;
2397 }
2398 }
2399
2400 return 0;
2401}
2402
2403static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2404{
2405 int i, r;
2406
2407 for (i = 0; i < adev->num_ip_blocks; i++) {
2408 if (!adev->ip_blocks[i].status.sw)
2409 continue;
2410 if (adev->ip_blocks[i].status.hw)
2411 continue;
2412 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2413 if (r) {
2414 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2415 adev->ip_blocks[i].version->funcs->name, r);
2416 return r;
2417 }
2418 adev->ip_blocks[i].status.hw = true;
2419 }
2420
2421 return 0;
2422}
2423
7a3e0bb2
RZ
2424static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2425{
2426 int r = 0;
2427 int i;
80f41f84 2428 uint32_t smu_version;
7a3e0bb2
RZ
2429
2430 if (adev->asic_type >= CHIP_VEGA10) {
2431 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2432 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2433 continue;
2434
e3c1b071 2435 if (!adev->ip_blocks[i].status.sw)
2436 continue;
2437
482f0e53
ML
2438 /* no need to do the fw loading again if already done*/
2439 if (adev->ip_blocks[i].status.hw == true)
2440 break;
2441
53b3f8f4 2442 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2443 r = adev->ip_blocks[i].version->funcs->resume(adev);
2444 if (r) {
2445 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2446 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2447 return r;
2448 }
2449 } else {
2450 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2451 if (r) {
2452 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2453 adev->ip_blocks[i].version->funcs->name, r);
2454 return r;
7a3e0bb2 2455 }
7a3e0bb2 2456 }
482f0e53
ML
2457
2458 adev->ip_blocks[i].status.hw = true;
2459 break;
7a3e0bb2
RZ
2460 }
2461 }
482f0e53 2462
8973d9ec
ED
2463 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2464 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2465
80f41f84 2466 return r;
7a3e0bb2
RZ
2467}
2468
5fd8518d
AG
2469static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2470{
2471 long timeout;
2472 int r, i;
2473
2474 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2475 struct amdgpu_ring *ring = adev->rings[i];
2476
2477 /* No need to setup the GPU scheduler for rings that don't need it */
2478 if (!ring || ring->no_scheduler)
2479 continue;
2480
2481 switch (ring->funcs->type) {
2482 case AMDGPU_RING_TYPE_GFX:
2483 timeout = adev->gfx_timeout;
2484 break;
2485 case AMDGPU_RING_TYPE_COMPUTE:
2486 timeout = adev->compute_timeout;
2487 break;
2488 case AMDGPU_RING_TYPE_SDMA:
2489 timeout = adev->sdma_timeout;
2490 break;
2491 default:
2492 timeout = adev->video_timeout;
2493 break;
2494 }
2495
2496 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2497 ring->num_hw_submission, 0,
8ab62eda
JG
2498 timeout, adev->reset_domain->wq,
2499 ring->sched_score, ring->name,
2500 adev->dev);
5fd8518d
AG
2501 if (r) {
2502 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2503 ring->name);
2504 return r;
2505 }
2506 }
2507
d425c6f4
JZ
2508 amdgpu_xcp_update_partition_sched_list(adev);
2509
5fd8518d
AG
2510 return 0;
2511}
2512
2513
e3ecdffa
AD
2514/**
2515 * amdgpu_device_ip_init - run init for hardware IPs
2516 *
2517 * @adev: amdgpu_device pointer
2518 *
2519 * Main initialization pass for hardware IPs. The list of all the hardware
2520 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2521 * are run. sw_init initializes the software state associated with each IP
2522 * and hw_init initializes the hardware associated with each IP.
2523 * Returns 0 on success, negative error code on failure.
2524 */
06ec9070 2525static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2526{
2527 int i, r;
2528
c030f2e4 2529 r = amdgpu_ras_init(adev);
2530 if (r)
2531 return r;
2532
d38ceaf9 2533 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2534 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2535 continue;
a1255107 2536 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2537 if (r) {
a1255107
AD
2538 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2539 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2540 goto init_failed;
2c1a2784 2541 }
a1255107 2542 adev->ip_blocks[i].status.sw = true;
bfca0289 2543
c1c39032
AD
2544 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2545 /* need to do common hw init early so everything is set up for gmc */
2546 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2547 if (r) {
2548 DRM_ERROR("hw_init %d failed %d\n", i, r);
2549 goto init_failed;
2550 }
2551 adev->ip_blocks[i].status.hw = true;
2552 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2553 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2554 /* Try to reserve bad pages early */
2555 if (amdgpu_sriov_vf(adev))
2556 amdgpu_virt_exchange_data(adev);
2557
7ccfd79f 2558 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2559 if (r) {
7ccfd79f 2560 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2561 goto init_failed;
2c1a2784 2562 }
a1255107 2563 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2564 if (r) {
2565 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2566 goto init_failed;
2c1a2784 2567 }
06ec9070 2568 r = amdgpu_device_wb_init(adev);
2c1a2784 2569 if (r) {
06ec9070 2570 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2571 goto init_failed;
2c1a2784 2572 }
a1255107 2573 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2574
2575 /* right after GMC hw init, we create CSA */
02ff519e 2576 if (adev->gfx.mcbp) {
1e256e27 2577 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2578 AMDGPU_GEM_DOMAIN_VRAM |
2579 AMDGPU_GEM_DOMAIN_GTT,
2580 AMDGPU_CSA_SIZE);
2493664f
ML
2581 if (r) {
2582 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2583 goto init_failed;
2493664f
ML
2584 }
2585 }
d38ceaf9
AD
2586 }
2587 }
2588
c9ffa427 2589 if (amdgpu_sriov_vf(adev))
22c16d25 2590 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2591
533aed27
AG
2592 r = amdgpu_ib_pool_init(adev);
2593 if (r) {
2594 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2595 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2596 goto init_failed;
2597 }
2598
c8963ea4
RZ
2599 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2600 if (r)
72d3f592 2601 goto init_failed;
0a4f2520
RZ
2602
2603 r = amdgpu_device_ip_hw_init_phase1(adev);
2604 if (r)
72d3f592 2605 goto init_failed;
0a4f2520 2606
7a3e0bb2
RZ
2607 r = amdgpu_device_fw_loading(adev);
2608 if (r)
72d3f592 2609 goto init_failed;
7a3e0bb2 2610
0a4f2520
RZ
2611 r = amdgpu_device_ip_hw_init_phase2(adev);
2612 if (r)
72d3f592 2613 goto init_failed;
d38ceaf9 2614
121a2bc6
AG
2615 /*
2616 * retired pages will be loaded from eeprom and reserved here,
2617 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2618 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2619 * for I2C communication which only true at this point.
b82e65a9
GC
2620 *
2621 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2622 * failure from bad gpu situation and stop amdgpu init process
2623 * accordingly. For other failed cases, it will still release all
2624 * the resource and print error message, rather than returning one
2625 * negative value to upper level.
121a2bc6
AG
2626 *
2627 * Note: theoretically, this should be called before all vram allocations
2628 * to protect retired page from abusing
2629 */
b82e65a9
GC
2630 r = amdgpu_ras_recovery_init(adev);
2631 if (r)
2632 goto init_failed;
121a2bc6 2633
cfbb6b00
AG
2634 /**
2635 * In case of XGMI grab extra reference for reset domain for this device
2636 */
a4c63caf 2637 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2638 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2639 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2640 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2641
dfd0287b
LH
2642 if (WARN_ON(!hive)) {
2643 r = -ENOENT;
2644 goto init_failed;
2645 }
2646
46c67660 2647 if (!hive->reset_domain ||
2648 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2649 r = -ENOENT;
2650 amdgpu_put_xgmi_hive(hive);
2651 goto init_failed;
2652 }
2653
2654 /* Drop the early temporary reset domain we created for device */
2655 amdgpu_reset_put_reset_domain(adev->reset_domain);
2656 adev->reset_domain = hive->reset_domain;
9dfa4860 2657 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2658 }
a4c63caf
AG
2659 }
2660 }
2661
5fd8518d
AG
2662 r = amdgpu_device_init_schedulers(adev);
2663 if (r)
2664 goto init_failed;
e3c1b071 2665
2666 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2667 if (!adev->gmc.xgmi.pending_reset) {
2668 kgd2kfd_init_zone_device(adev);
e3c1b071 2669 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2670 }
c6332b97 2671
bd607166
KR
2672 amdgpu_fru_get_product_info(adev);
2673
72d3f592 2674init_failed:
c6332b97 2675
72d3f592 2676 return r;
d38ceaf9
AD
2677}
2678
e3ecdffa
AD
2679/**
2680 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2681 *
2682 * @adev: amdgpu_device pointer
2683 *
2684 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2685 * this function before a GPU reset. If the value is retained after a
2686 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2687 */
06ec9070 2688static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2689{
2690 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2691}
2692
e3ecdffa
AD
2693/**
2694 * amdgpu_device_check_vram_lost - check if vram is valid
2695 *
2696 * @adev: amdgpu_device pointer
2697 *
2698 * Checks the reset magic value written to the gart pointer in VRAM.
2699 * The driver calls this after a GPU reset to see if the contents of
2700 * VRAM is lost or now.
2701 * returns true if vram is lost, false if not.
2702 */
06ec9070 2703static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2704{
dadce777
EQ
2705 if (memcmp(adev->gart.ptr, adev->reset_magic,
2706 AMDGPU_RESET_MAGIC_NUM))
2707 return true;
2708
53b3f8f4 2709 if (!amdgpu_in_reset(adev))
dadce777
EQ
2710 return false;
2711
2712 /*
2713 * For all ASICs with baco/mode1 reset, the VRAM is
2714 * always assumed to be lost.
2715 */
2716 switch (amdgpu_asic_reset_method(adev)) {
2717 case AMD_RESET_METHOD_BACO:
2718 case AMD_RESET_METHOD_MODE1:
2719 return true;
2720 default:
2721 return false;
2722 }
0c49e0b8
CZ
2723}
2724
e3ecdffa 2725/**
1112a46b 2726 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2727 *
2728 * @adev: amdgpu_device pointer
b8b72130 2729 * @state: clockgating state (gate or ungate)
e3ecdffa 2730 *
e3ecdffa 2731 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2732 * set_clockgating_state callbacks are run.
2733 * Late initialization pass enabling clockgating for hardware IPs.
2734 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2735 * Returns 0 on success, negative error code on failure.
2736 */
fdd34271 2737
5d89bb2d
LL
2738int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2739 enum amd_clockgating_state state)
d38ceaf9 2740{
1112a46b 2741 int i, j, r;
d38ceaf9 2742
4a2ba394
SL
2743 if (amdgpu_emu_mode == 1)
2744 return 0;
2745
1112a46b
RZ
2746 for (j = 0; j < adev->num_ip_blocks; j++) {
2747 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2748 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2749 continue;
47198eb7 2750 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2751 if (adev->in_s0ix &&
47198eb7
AD
2752 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2753 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2754 continue;
4a446d55 2755 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2756 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2757 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2758 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2759 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2760 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2761 /* enable clockgating to save power */
a1255107 2762 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2763 state);
4a446d55
AD
2764 if (r) {
2765 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2766 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2767 return r;
2768 }
b0b00ff1 2769 }
d38ceaf9 2770 }
06b18f61 2771
c9f96fd5
RZ
2772 return 0;
2773}
2774
5d89bb2d
LL
2775int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2776 enum amd_powergating_state state)
c9f96fd5 2777{
1112a46b 2778 int i, j, r;
06b18f61 2779
c9f96fd5
RZ
2780 if (amdgpu_emu_mode == 1)
2781 return 0;
2782
1112a46b
RZ
2783 for (j = 0; j < adev->num_ip_blocks; j++) {
2784 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2785 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2786 continue;
47198eb7 2787 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2788 if (adev->in_s0ix &&
47198eb7
AD
2789 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2790 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2791 continue;
c9f96fd5
RZ
2792 /* skip CG for VCE/UVD, it's handled specially */
2793 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2794 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2795 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2796 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2797 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2798 /* enable powergating to save power */
2799 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2800 state);
c9f96fd5
RZ
2801 if (r) {
2802 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2803 adev->ip_blocks[i].version->funcs->name, r);
2804 return r;
2805 }
2806 }
2807 }
2dc80b00
S
2808 return 0;
2809}
2810
beff74bc
AD
2811static int amdgpu_device_enable_mgpu_fan_boost(void)
2812{
2813 struct amdgpu_gpu_instance *gpu_ins;
2814 struct amdgpu_device *adev;
2815 int i, ret = 0;
2816
2817 mutex_lock(&mgpu_info.mutex);
2818
2819 /*
2820 * MGPU fan boost feature should be enabled
2821 * only when there are two or more dGPUs in
2822 * the system
2823 */
2824 if (mgpu_info.num_dgpu < 2)
2825 goto out;
2826
2827 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2828 gpu_ins = &(mgpu_info.gpu_ins[i]);
2829 adev = gpu_ins->adev;
2830 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2831 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2832 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2833 if (ret)
2834 break;
2835
2836 gpu_ins->mgpu_fan_enabled = 1;
2837 }
2838 }
2839
2840out:
2841 mutex_unlock(&mgpu_info.mutex);
2842
2843 return ret;
2844}
2845
e3ecdffa
AD
2846/**
2847 * amdgpu_device_ip_late_init - run late init for hardware IPs
2848 *
2849 * @adev: amdgpu_device pointer
2850 *
2851 * Late initialization pass for hardware IPs. The list of all the hardware
2852 * IPs that make up the asic is walked and the late_init callbacks are run.
2853 * late_init covers any special initialization that an IP requires
2854 * after all of the have been initialized or something that needs to happen
2855 * late in the init process.
2856 * Returns 0 on success, negative error code on failure.
2857 */
06ec9070 2858static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2859{
60599a03 2860 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2861 int i = 0, r;
2862
2863 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2864 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2865 continue;
2866 if (adev->ip_blocks[i].version->funcs->late_init) {
2867 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2868 if (r) {
2869 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2870 adev->ip_blocks[i].version->funcs->name, r);
2871 return r;
2872 }
2dc80b00 2873 }
73f847db 2874 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2875 }
2876
867e24ca 2877 r = amdgpu_ras_late_init(adev);
2878 if (r) {
2879 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2880 return r;
2881 }
2882
a891d239
DL
2883 amdgpu_ras_set_error_query_ready(adev, true);
2884
1112a46b
RZ
2885 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2886 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2887
06ec9070 2888 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2889
beff74bc
AD
2890 r = amdgpu_device_enable_mgpu_fan_boost();
2891 if (r)
2892 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2893
4da8b639 2894 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2895 if (amdgpu_passthrough(adev) &&
2896 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2897 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2898 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2899
2900 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2901 mutex_lock(&mgpu_info.mutex);
2902
2903 /*
2904 * Reset device p-state to low as this was booted with high.
2905 *
2906 * This should be performed only after all devices from the same
2907 * hive get initialized.
2908 *
2909 * However, it's unknown how many device in the hive in advance.
2910 * As this is counted one by one during devices initializations.
2911 *
2912 * So, we wait for all XGMI interlinked devices initialized.
2913 * This may bring some delays as those devices may come from
2914 * different hives. But that should be OK.
2915 */
2916 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2917 for (i = 0; i < mgpu_info.num_gpu; i++) {
2918 gpu_instance = &(mgpu_info.gpu_ins[i]);
2919 if (gpu_instance->adev->flags & AMD_IS_APU)
2920 continue;
2921
d84a430d
JK
2922 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2923 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2924 if (r) {
2925 DRM_ERROR("pstate setting failed (%d).\n", r);
2926 break;
2927 }
2928 }
2929 }
2930
2931 mutex_unlock(&mgpu_info.mutex);
2932 }
2933
d38ceaf9
AD
2934 return 0;
2935}
2936
613aa3ea
LY
2937/**
2938 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2939 *
2940 * @adev: amdgpu_device pointer
2941 *
2942 * For ASICs need to disable SMC first
2943 */
2944static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2945{
2946 int i, r;
2947
4e8303cf 2948 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
2949 return;
2950
2951 for (i = 0; i < adev->num_ip_blocks; i++) {
2952 if (!adev->ip_blocks[i].status.hw)
2953 continue;
2954 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2955 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2956 /* XXX handle errors */
2957 if (r) {
2958 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2959 adev->ip_blocks[i].version->funcs->name, r);
2960 }
2961 adev->ip_blocks[i].status.hw = false;
2962 break;
2963 }
2964 }
2965}
2966
e9669fb7 2967static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2968{
2969 int i, r;
2970
e9669fb7
AG
2971 for (i = 0; i < adev->num_ip_blocks; i++) {
2972 if (!adev->ip_blocks[i].version->funcs->early_fini)
2973 continue;
5278a159 2974
e9669fb7
AG
2975 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2976 if (r) {
2977 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2978 adev->ip_blocks[i].version->funcs->name, r);
2979 }
2980 }
c030f2e4 2981
05df1f01 2982 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2983 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2984
7270e895
TY
2985 amdgpu_amdkfd_suspend(adev, false);
2986
613aa3ea
LY
2987 /* Workaroud for ASICs need to disable SMC first */
2988 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2989
d38ceaf9 2990 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2991 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2992 continue;
8201a67a 2993
a1255107 2994 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2995 /* XXX handle errors */
2c1a2784 2996 if (r) {
a1255107
AD
2997 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2998 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2999 }
8201a67a 3000
a1255107 3001 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
3002 }
3003
6effad8a
GC
3004 if (amdgpu_sriov_vf(adev)) {
3005 if (amdgpu_virt_release_full_gpu(adev, false))
3006 DRM_ERROR("failed to release exclusive mode on fini\n");
3007 }
3008
e9669fb7
AG
3009 return 0;
3010}
3011
3012/**
3013 * amdgpu_device_ip_fini - run fini for hardware IPs
3014 *
3015 * @adev: amdgpu_device pointer
3016 *
3017 * Main teardown pass for hardware IPs. The list of all the hardware
3018 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3019 * are run. hw_fini tears down the hardware associated with each IP
3020 * and sw_fini tears down any software state associated with each IP.
3021 * Returns 0 on success, negative error code on failure.
3022 */
3023static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3024{
3025 int i, r;
3026
3027 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3028 amdgpu_virt_release_ras_err_handler_data(adev);
3029
e9669fb7
AG
3030 if (adev->gmc.xgmi.num_physical_nodes > 1)
3031 amdgpu_xgmi_remove_device(adev);
3032
c004d44e 3033 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3034
d38ceaf9 3035 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3036 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3037 continue;
c12aba3a
ML
3038
3039 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3040 amdgpu_ucode_free_bo(adev);
1e256e27 3041 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3042 amdgpu_device_wb_fini(adev);
7ccfd79f 3043 amdgpu_device_mem_scratch_fini(adev);
533aed27 3044 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
3045 }
3046
a1255107 3047 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3048 /* XXX handle errors */
2c1a2784 3049 if (r) {
a1255107
AD
3050 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3051 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3052 }
a1255107
AD
3053 adev->ip_blocks[i].status.sw = false;
3054 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3055 }
3056
a6dcfd9c 3057 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3058 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3059 continue;
a1255107
AD
3060 if (adev->ip_blocks[i].version->funcs->late_fini)
3061 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3062 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3063 }
3064
c030f2e4 3065 amdgpu_ras_fini(adev);
3066
d38ceaf9
AD
3067 return 0;
3068}
3069
e3ecdffa 3070/**
beff74bc 3071 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3072 *
1112a46b 3073 * @work: work_struct.
e3ecdffa 3074 */
beff74bc 3075static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3076{
3077 struct amdgpu_device *adev =
beff74bc 3078 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3079 int r;
3080
3081 r = amdgpu_ib_ring_tests(adev);
3082 if (r)
3083 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3084}
3085
1e317b99
RZ
3086static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3087{
3088 struct amdgpu_device *adev =
3089 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3090
90a92662
MD
3091 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3092 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3093
3094 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3095 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3096}
3097
e3ecdffa 3098/**
e7854a03 3099 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3100 *
3101 * @adev: amdgpu_device pointer
3102 *
3103 * Main suspend function for hardware IPs. The list of all the hardware
3104 * IPs that make up the asic is walked, clockgating is disabled and the
3105 * suspend callbacks are run. suspend puts the hardware and software state
3106 * in each IP into a state suitable for suspend.
3107 * Returns 0 on success, negative error code on failure.
3108 */
e7854a03
AD
3109static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3110{
3111 int i, r;
3112
50ec83f0
AD
3113 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3114 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3115
b31d6ada
EQ
3116 /*
3117 * Per PMFW team's suggestion, driver needs to handle gfxoff
3118 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3119 * scenario. Add the missing df cstate disablement here.
3120 */
3121 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3122 dev_warn(adev->dev, "Failed to disallow df cstate");
3123
e7854a03
AD
3124 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3125 if (!adev->ip_blocks[i].status.valid)
3126 continue;
2b9f7848 3127
e7854a03 3128 /* displays are handled separately */
2b9f7848
ND
3129 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3130 continue;
3131
3132 /* XXX handle errors */
3133 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3134 /* XXX handle errors */
3135 if (r) {
3136 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3137 adev->ip_blocks[i].version->funcs->name, r);
3138 return r;
e7854a03 3139 }
2b9f7848
ND
3140
3141 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3142 }
3143
e7854a03
AD
3144 return 0;
3145}
3146
3147/**
3148 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3149 *
3150 * @adev: amdgpu_device pointer
3151 *
3152 * Main suspend function for hardware IPs. The list of all the hardware
3153 * IPs that make up the asic is walked, clockgating is disabled and the
3154 * suspend callbacks are run. suspend puts the hardware and software state
3155 * in each IP into a state suitable for suspend.
3156 * Returns 0 on success, negative error code on failure.
3157 */
3158static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3159{
3160 int i, r;
3161
557f42a2 3162 if (adev->in_s0ix)
bc143d8b 3163 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3164
d38ceaf9 3165 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3166 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3167 continue;
e7854a03
AD
3168 /* displays are handled in phase1 */
3169 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3170 continue;
bff77e86
LM
3171 /* PSP lost connection when err_event_athub occurs */
3172 if (amdgpu_ras_intr_triggered() &&
3173 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3174 adev->ip_blocks[i].status.hw = false;
3175 continue;
3176 }
e3c1b071 3177
3178 /* skip unnecessary suspend if we do not initialize them yet */
3179 if (adev->gmc.xgmi.pending_reset &&
3180 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3181 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3182 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3183 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3184 adev->ip_blocks[i].status.hw = false;
3185 continue;
3186 }
557f42a2 3187
afa6646b 3188 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3189 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3190 * like at runtime. PSP is also part of the always on hardware
3191 * so no need to suspend it.
3192 */
557f42a2 3193 if (adev->in_s0ix &&
32ff160d 3194 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3195 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3196 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3197 continue;
3198
2a7798ea
AD
3199 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3200 if (adev->in_s0ix &&
4e8303cf
LL
3201 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3202 IP_VERSION(5, 0, 0)) &&
3203 (adev->ip_blocks[i].version->type ==
3204 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3205 continue;
3206
e11c7750
TH
3207 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3208 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3209 * from this location and RLC Autoload automatically also gets loaded
3210 * from here based on PMFW -> PSP message during re-init sequence.
3211 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3212 * the TMR and reload FWs again for IMU enabled APU ASICs.
3213 */
3214 if (amdgpu_in_reset(adev) &&
3215 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3216 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3217 continue;
3218
d38ceaf9 3219 /* XXX handle errors */
a1255107 3220 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3221 /* XXX handle errors */
2c1a2784 3222 if (r) {
a1255107
AD
3223 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3224 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3225 }
876923fb 3226 adev->ip_blocks[i].status.hw = false;
a3a09142 3227 /* handle putting the SMC in the appropriate state */
47fc644f 3228 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3229 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3230 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3231 if (r) {
3232 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3233 adev->mp1_state, r);
3234 return r;
3235 }
a3a09142
AD
3236 }
3237 }
d38ceaf9
AD
3238 }
3239
3240 return 0;
3241}
3242
e7854a03
AD
3243/**
3244 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3245 *
3246 * @adev: amdgpu_device pointer
3247 *
3248 * Main suspend function for hardware IPs. The list of all the hardware
3249 * IPs that make up the asic is walked, clockgating is disabled and the
3250 * suspend callbacks are run. suspend puts the hardware and software state
3251 * in each IP into a state suitable for suspend.
3252 * Returns 0 on success, negative error code on failure.
3253 */
3254int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3255{
3256 int r;
3257
3c73683c
JC
3258 if (amdgpu_sriov_vf(adev)) {
3259 amdgpu_virt_fini_data_exchange(adev);
e7819644 3260 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3261 }
e7819644 3262
e7854a03
AD
3263 r = amdgpu_device_ip_suspend_phase1(adev);
3264 if (r)
3265 return r;
3266 r = amdgpu_device_ip_suspend_phase2(adev);
3267
e7819644
YT
3268 if (amdgpu_sriov_vf(adev))
3269 amdgpu_virt_release_full_gpu(adev, false);
3270
e7854a03
AD
3271 return r;
3272}
3273
06ec9070 3274static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3275{
3276 int i, r;
3277
2cb681b6 3278 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3279 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3280 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3281 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3282 AMD_IP_BLOCK_TYPE_IH,
3283 };
a90ad3c2 3284
95ea3dbc 3285 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3286 int j;
3287 struct amdgpu_ip_block *block;
a90ad3c2 3288
4cd2a96d
J
3289 block = &adev->ip_blocks[i];
3290 block->status.hw = false;
2cb681b6 3291
4cd2a96d 3292 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3293
4cd2a96d 3294 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3295 !block->status.valid)
3296 continue;
3297
3298 r = block->version->funcs->hw_init(adev);
0aaeefcc 3299 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3300 if (r)
3301 return r;
482f0e53 3302 block->status.hw = true;
a90ad3c2
ML
3303 }
3304 }
3305
3306 return 0;
3307}
3308
06ec9070 3309static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3310{
3311 int i, r;
3312
2cb681b6
ML
3313 static enum amd_ip_block_type ip_order[] = {
3314 AMD_IP_BLOCK_TYPE_SMC,
3315 AMD_IP_BLOCK_TYPE_DCE,
3316 AMD_IP_BLOCK_TYPE_GFX,
3317 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3318 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3319 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3320 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3321 AMD_IP_BLOCK_TYPE_VCN,
3322 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3323 };
a90ad3c2 3324
2cb681b6
ML
3325 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3326 int j;
3327 struct amdgpu_ip_block *block;
a90ad3c2 3328
2cb681b6
ML
3329 for (j = 0; j < adev->num_ip_blocks; j++) {
3330 block = &adev->ip_blocks[j];
3331
3332 if (block->version->type != ip_order[i] ||
482f0e53
ML
3333 !block->status.valid ||
3334 block->status.hw)
2cb681b6
ML
3335 continue;
3336
895bd048
JZ
3337 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3338 r = block->version->funcs->resume(adev);
3339 else
3340 r = block->version->funcs->hw_init(adev);
3341
0aaeefcc 3342 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3343 if (r)
3344 return r;
482f0e53 3345 block->status.hw = true;
a90ad3c2
ML
3346 }
3347 }
3348
3349 return 0;
3350}
3351
e3ecdffa
AD
3352/**
3353 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3354 *
3355 * @adev: amdgpu_device pointer
3356 *
3357 * First resume function for hardware IPs. The list of all the hardware
3358 * IPs that make up the asic is walked and the resume callbacks are run for
3359 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3360 * after a suspend and updates the software state as necessary. This
3361 * function is also used for restoring the GPU after a GPU reset.
3362 * Returns 0 on success, negative error code on failure.
3363 */
06ec9070 3364static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3365{
3366 int i, r;
3367
a90ad3c2 3368 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3369 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3370 continue;
a90ad3c2 3371 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3372 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3373 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3374 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3375
fcf0649f
CZ
3376 r = adev->ip_blocks[i].version->funcs->resume(adev);
3377 if (r) {
3378 DRM_ERROR("resume of IP block <%s> failed %d\n",
3379 adev->ip_blocks[i].version->funcs->name, r);
3380 return r;
3381 }
482f0e53 3382 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3383 }
3384 }
3385
3386 return 0;
3387}
3388
e3ecdffa
AD
3389/**
3390 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3391 *
3392 * @adev: amdgpu_device pointer
3393 *
3394 * First resume function for hardware IPs. The list of all the hardware
3395 * IPs that make up the asic is walked and the resume callbacks are run for
3396 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3397 * functional state after a suspend and updates the software state as
3398 * necessary. This function is also used for restoring the GPU after a GPU
3399 * reset.
3400 * Returns 0 on success, negative error code on failure.
3401 */
06ec9070 3402static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3403{
3404 int i, r;
3405
3406 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3407 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3408 continue;
fcf0649f 3409 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3410 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3411 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3412 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3413 continue;
a1255107 3414 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3415 if (r) {
a1255107
AD
3416 DRM_ERROR("resume of IP block <%s> failed %d\n",
3417 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3418 return r;
2c1a2784 3419 }
482f0e53 3420 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3421 }
3422
3423 return 0;
3424}
3425
e3ecdffa
AD
3426/**
3427 * amdgpu_device_ip_resume - run resume for hardware IPs
3428 *
3429 * @adev: amdgpu_device pointer
3430 *
3431 * Main resume function for hardware IPs. The hardware IPs
3432 * are split into two resume functions because they are
b8920e1e 3433 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3434 * steps need to be take between them. In this case (S3/S4) they are
3435 * run sequentially.
3436 * Returns 0 on success, negative error code on failure.
3437 */
06ec9070 3438static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3439{
3440 int r;
3441
06ec9070 3442 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3443 if (r)
3444 return r;
7a3e0bb2
RZ
3445
3446 r = amdgpu_device_fw_loading(adev);
3447 if (r)
3448 return r;
3449
06ec9070 3450 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3451
3452 return r;
3453}
3454
e3ecdffa
AD
3455/**
3456 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3457 *
3458 * @adev: amdgpu_device pointer
3459 *
3460 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3461 */
4e99a44e 3462static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3463{
6867e1b5
ML
3464 if (amdgpu_sriov_vf(adev)) {
3465 if (adev->is_atom_fw) {
58ff791a 3466 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3467 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3468 } else {
3469 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3470 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3471 }
3472
3473 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3474 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3475 }
048765ad
AR
3476}
3477
e3ecdffa
AD
3478/**
3479 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3480 *
3481 * @asic_type: AMD asic type
3482 *
3483 * Check if there is DC (new modesetting infrastructre) support for an asic.
3484 * returns true if DC has support, false if not.
3485 */
4562236b
HW
3486bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3487{
3488 switch (asic_type) {
0637d417
AD
3489#ifdef CONFIG_DRM_AMDGPU_SI
3490 case CHIP_HAINAN:
3491#endif
3492 case CHIP_TOPAZ:
3493 /* chips with no display hardware */
3494 return false;
4562236b 3495#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3496 case CHIP_TAHITI:
3497 case CHIP_PITCAIRN:
3498 case CHIP_VERDE:
3499 case CHIP_OLAND:
2d32ffd6
AD
3500 /*
3501 * We have systems in the wild with these ASICs that require
3502 * LVDS and VGA support which is not supported with DC.
3503 *
3504 * Fallback to the non-DC driver here by default so as not to
3505 * cause regressions.
3506 */
3507#if defined(CONFIG_DRM_AMD_DC_SI)
3508 return amdgpu_dc > 0;
3509#else
3510 return false;
64200c46 3511#endif
4562236b 3512 case CHIP_BONAIRE:
0d6fbccb 3513 case CHIP_KAVERI:
367e6687
AD
3514 case CHIP_KABINI:
3515 case CHIP_MULLINS:
d9fda248
HW
3516 /*
3517 * We have systems in the wild with these ASICs that require
b5a0168e 3518 * VGA support which is not supported with DC.
d9fda248
HW
3519 *
3520 * Fallback to the non-DC driver here by default so as not to
3521 * cause regressions.
3522 */
3523 return amdgpu_dc > 0;
f7f12b25 3524 default:
fd187853 3525 return amdgpu_dc != 0;
f7f12b25 3526#else
4562236b 3527 default:
93b09a9a 3528 if (amdgpu_dc > 0)
b8920e1e 3529 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3530 return false;
f7f12b25 3531#endif
4562236b
HW
3532 }
3533}
3534
3535/**
3536 * amdgpu_device_has_dc_support - check if dc is supported
3537 *
982a820b 3538 * @adev: amdgpu_device pointer
4562236b
HW
3539 *
3540 * Returns true for supported, false for not supported
3541 */
3542bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3543{
25263da3 3544 if (adev->enable_virtual_display ||
abaf210c 3545 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3546 return false;
3547
4562236b
HW
3548 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3549}
3550
d4535e2c
AG
3551static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3552{
3553 struct amdgpu_device *adev =
3554 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3555 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3556
c6a6e2db
AG
3557 /* It's a bug to not have a hive within this function */
3558 if (WARN_ON(!hive))
3559 return;
3560
3561 /*
3562 * Use task barrier to synchronize all xgmi reset works across the
3563 * hive. task_barrier_enter and task_barrier_exit will block
3564 * until all the threads running the xgmi reset works reach
3565 * those points. task_barrier_full will do both blocks.
3566 */
3567 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3568
3569 task_barrier_enter(&hive->tb);
4a580877 3570 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3571
3572 if (adev->asic_reset_res)
3573 goto fail;
3574
3575 task_barrier_exit(&hive->tb);
4a580877 3576 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3577
3578 if (adev->asic_reset_res)
3579 goto fail;
43c4d576 3580
21226f02 3581 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
c6a6e2db
AG
3582 } else {
3583
3584 task_barrier_full(&hive->tb);
3585 adev->asic_reset_res = amdgpu_asic_reset(adev);
3586 }
ce316fa5 3587
c6a6e2db 3588fail:
d4535e2c 3589 if (adev->asic_reset_res)
fed184e9 3590 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3591 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3592 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3593}
3594
71f98027
AD
3595static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3596{
3597 char *input = amdgpu_lockup_timeout;
3598 char *timeout_setting = NULL;
3599 int index = 0;
3600 long timeout;
3601 int ret = 0;
3602
3603 /*
67387dfe
AD
3604 * By default timeout for non compute jobs is 10000
3605 * and 60000 for compute jobs.
71f98027 3606 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3607 * jobs are 60000 by default.
71f98027
AD
3608 */
3609 adev->gfx_timeout = msecs_to_jiffies(10000);
3610 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3611 if (amdgpu_sriov_vf(adev))
3612 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3613 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3614 else
67387dfe 3615 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3616
f440ff44 3617 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3618 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3619 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3620 ret = kstrtol(timeout_setting, 0, &timeout);
3621 if (ret)
3622 return ret;
3623
3624 if (timeout == 0) {
3625 index++;
3626 continue;
3627 } else if (timeout < 0) {
3628 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3629 dev_warn(adev->dev, "lockup timeout disabled");
3630 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3631 } else {
3632 timeout = msecs_to_jiffies(timeout);
3633 }
3634
3635 switch (index++) {
3636 case 0:
3637 adev->gfx_timeout = timeout;
3638 break;
3639 case 1:
3640 adev->compute_timeout = timeout;
3641 break;
3642 case 2:
3643 adev->sdma_timeout = timeout;
3644 break;
3645 case 3:
3646 adev->video_timeout = timeout;
3647 break;
3648 default:
3649 break;
3650 }
3651 }
3652 /*
3653 * There is only one value specified and
3654 * it should apply to all non-compute jobs.
3655 */
bcccee89 3656 if (index == 1) {
71f98027 3657 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3658 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3659 adev->compute_timeout = adev->gfx_timeout;
3660 }
71f98027
AD
3661 }
3662
3663 return ret;
3664}
d4535e2c 3665
4a74c38c
PY
3666/**
3667 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3668 *
3669 * @adev: amdgpu_device pointer
3670 *
3671 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3672 */
3673static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3674{
3675 struct iommu_domain *domain;
3676
3677 domain = iommu_get_domain_for_dev(adev->dev);
3678 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3679 adev->ram_is_direct_mapped = true;
3680}
3681
77f3a5cd 3682static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3683 &dev_attr_pcie_replay_count.attr,
3684 NULL
3685};
3686
02ff519e
AD
3687static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3688{
3689 if (amdgpu_mcbp == 1)
3690 adev->gfx.mcbp = true;
1e9e15dc
JZ
3691 else if (amdgpu_mcbp == 0)
3692 adev->gfx.mcbp = false;
4e8303cf
LL
3693 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3694 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3695 adev->gfx.num_gfx_rings)
50a7c876
AD
3696 adev->gfx.mcbp = true;
3697
02ff519e
AD
3698 if (amdgpu_sriov_vf(adev))
3699 adev->gfx.mcbp = true;
3700
3701 if (adev->gfx.mcbp)
3702 DRM_INFO("MCBP is enabled\n");
3703}
3704
d38ceaf9
AD
3705/**
3706 * amdgpu_device_init - initialize the driver
3707 *
3708 * @adev: amdgpu_device pointer
d38ceaf9
AD
3709 * @flags: driver flags
3710 *
3711 * Initializes the driver info and hw (all asics).
3712 * Returns 0 for success or an error on failure.
3713 * Called at driver startup.
3714 */
3715int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3716 uint32_t flags)
3717{
8aba21b7
LT
3718 struct drm_device *ddev = adev_to_drm(adev);
3719 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3720 int r, i;
b98c6299 3721 bool px = false;
95844d20 3722 u32 max_MBps;
59e9fff1 3723 int tmp;
d38ceaf9
AD
3724
3725 adev->shutdown = false;
d38ceaf9 3726 adev->flags = flags;
4e66d7d2
YZ
3727
3728 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3729 adev->asic_type = amdgpu_force_asic_type;
3730 else
3731 adev->asic_type = flags & AMD_ASIC_MASK;
3732
d38ceaf9 3733 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3734 if (amdgpu_emu_mode == 1)
8bdab6bb 3735 adev->usec_timeout *= 10;
770d13b1 3736 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3737 adev->accel_working = false;
3738 adev->num_rings = 0;
68ce8b24 3739 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3740 adev->mman.buffer_funcs = NULL;
3741 adev->mman.buffer_funcs_ring = NULL;
3742 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3743 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3744 adev->gmc.gmc_funcs = NULL;
7bd939d0 3745 adev->harvest_ip_mask = 0x0;
f54d1867 3746 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3747 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3748
3749 adev->smc_rreg = &amdgpu_invalid_rreg;
3750 adev->smc_wreg = &amdgpu_invalid_wreg;
3751 adev->pcie_rreg = &amdgpu_invalid_rreg;
3752 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3753 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3754 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3755 adev->pciep_rreg = &amdgpu_invalid_rreg;
3756 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3757 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3758 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3759 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3760 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3761 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3762 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3763 adev->didt_rreg = &amdgpu_invalid_rreg;
3764 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3765 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3766 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3767 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3768 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3769
3e39ab90
AD
3770 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3771 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3772 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3773
3774 /* mutex initialization are all done here so we
b8920e1e
SS
3775 * can recall function without having locking issues
3776 */
0e5ca0d1 3777 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3778 mutex_init(&adev->pm.mutex);
3779 mutex_init(&adev->gfx.gpu_clock_mutex);
3780 mutex_init(&adev->srbm_mutex);
b8866c26 3781 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3782 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3783 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3784 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3785 mutex_init(&adev->mn_lock);
e23b74aa 3786 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3787 hash_init(adev->mn_hash);
32eaeae0 3788 mutex_init(&adev->psp.mutex);
bd052211 3789 mutex_init(&adev->notifier_lock);
8cda7a4f 3790 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3791 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3792
ab3b9de6 3793 amdgpu_device_init_apu_flags(adev);
9f6a7857 3794
912dfc84
EQ
3795 r = amdgpu_device_check_arguments(adev);
3796 if (r)
3797 return r;
d38ceaf9 3798
d38ceaf9
AD
3799 spin_lock_init(&adev->mmio_idx_lock);
3800 spin_lock_init(&adev->smc_idx_lock);
3801 spin_lock_init(&adev->pcie_idx_lock);
3802 spin_lock_init(&adev->uvd_ctx_idx_lock);
3803 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3804 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3805 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3806 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3807 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3808
0c4e7fa5
CZ
3809 INIT_LIST_HEAD(&adev->shadow_list);
3810 mutex_init(&adev->shadow_list_lock);
3811
655ce9cb 3812 INIT_LIST_HEAD(&adev->reset_list);
3813
6492e1b0 3814 INIT_LIST_HEAD(&adev->ras_list);
3815
3e38b634
EQ
3816 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3817
beff74bc
AD
3818 INIT_DELAYED_WORK(&adev->delayed_init_work,
3819 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3820 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3821 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3822
d4535e2c
AG
3823 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3824
d23ee13f 3825 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3826 adev->gfx.gfx_off_residency = 0;
3827 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3828 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3829
b265bdbd
EQ
3830 atomic_set(&adev->throttling_logging_enabled, 1);
3831 /*
3832 * If throttling continues, logging will be performed every minute
3833 * to avoid log flooding. "-1" is subtracted since the thermal
3834 * throttling interrupt comes every second. Thus, the total logging
3835 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3836 * for throttling interrupt) = 60 seconds.
3837 */
3838 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3839 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3840
0fa49558
AX
3841 /* Registers mapping */
3842 /* TODO: block userspace mapping of io register */
da69c161
KW
3843 if (adev->asic_type >= CHIP_BONAIRE) {
3844 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3845 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3846 } else {
3847 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3848 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3849 }
d38ceaf9 3850
6c08e0ef
EQ
3851 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3852 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3853
d38ceaf9 3854 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3855 if (!adev->rmmio)
d38ceaf9 3856 return -ENOMEM;
b8920e1e 3857
d38ceaf9 3858 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3859 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3860
436afdfa
PY
3861 /*
3862 * Reset domain needs to be present early, before XGMI hive discovered
3863 * (if any) and intitialized to use reset sem and in_gpu reset flag
3864 * early on during init and before calling to RREG32.
3865 */
3866 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3867 if (!adev->reset_domain)
3868 return -ENOMEM;
3869
3aa0115d
ML
3870 /* detect hw virtualization here */
3871 amdgpu_detect_virtualization(adev);
3872
04e85958
TL
3873 amdgpu_device_get_pcie_info(adev);
3874
dffa11b4
ML
3875 r = amdgpu_device_get_job_timeout_settings(adev);
3876 if (r) {
3877 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3878 return r;
a190d1c7
XY
3879 }
3880
d38ceaf9 3881 /* early init functions */
06ec9070 3882 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3883 if (r)
4ef87d8f 3884 return r;
d38ceaf9 3885
02ff519e
AD
3886 amdgpu_device_set_mcbp(adev);
3887
b7cdb41e
ML
3888 /* Get rid of things like offb */
3889 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3890 if (r)
3891 return r;
3892
4d33e704
SK
3893 /* Enable TMZ based on IP_VERSION */
3894 amdgpu_gmc_tmz_set(adev);
3895
957b0787 3896 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3897 /* Need to get xgmi info early to decide the reset behavior*/
3898 if (adev->gmc.xgmi.supported) {
3899 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3900 if (r)
3901 return r;
3902 }
3903
8e6d0b69 3904 /* enable PCIE atomic ops */
b4520bfd
GW
3905 if (amdgpu_sriov_vf(adev)) {
3906 if (adev->virt.fw_reserve.p_pf2vf)
3907 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3908 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3909 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3910 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3911 * internal path natively support atomics, set have_atomics_support to true.
3912 */
b4520bfd 3913 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
3914 (amdgpu_ip_version(adev, GC_HWIP, 0) >
3915 IP_VERSION(9, 0, 0))) {
0e768043 3916 adev->have_atomics_support = true;
b4520bfd 3917 } else {
8e6d0b69 3918 adev->have_atomics_support =
3919 !pci_enable_atomic_ops_to_root(adev->pdev,
3920 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3921 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3922 }
3923
8e6d0b69 3924 if (!adev->have_atomics_support)
3925 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3926
6585661d 3927 /* doorbell bar mapping and doorbell index init*/
43c064db 3928 amdgpu_doorbell_init(adev);
6585661d 3929
9475a943
SL
3930 if (amdgpu_emu_mode == 1) {
3931 /* post the asic on emulation mode */
3932 emu_soc_asic_init(adev);
bfca0289 3933 goto fence_driver_init;
9475a943 3934 }
bfca0289 3935
04442bf7
LL
3936 amdgpu_reset_init(adev);
3937
4e99a44e 3938 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3939 if (adev->bios)
3940 amdgpu_device_detect_sriov_bios(adev);
048765ad 3941
95e8e59e
AD
3942 /* check if we need to reset the asic
3943 * E.g., driver was not cleanly unloaded previously, etc.
3944 */
f14899fd 3945 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3946 if (adev->gmc.xgmi.num_physical_nodes) {
3947 dev_info(adev->dev, "Pending hive reset.\n");
3948 adev->gmc.xgmi.pending_reset = true;
3949 /* Only need to init necessary block for SMU to handle the reset */
3950 for (i = 0; i < adev->num_ip_blocks; i++) {
3951 if (!adev->ip_blocks[i].status.valid)
3952 continue;
3953 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3954 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3955 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3956 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3957 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3958 adev->ip_blocks[i].version->funcs->name);
3959 adev->ip_blocks[i].status.hw = true;
3960 }
3961 }
3962 } else {
59e9fff1 3963 tmp = amdgpu_reset_method;
3964 /* It should do a default reset when loading or reloading the driver,
3965 * regardless of the module parameter reset_method.
3966 */
3967 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3968 r = amdgpu_asic_reset(adev);
59e9fff1 3969 amdgpu_reset_method = tmp;
e3c1b071 3970 if (r) {
3971 dev_err(adev->dev, "asic reset on init failed\n");
3972 goto failed;
3973 }
95e8e59e
AD
3974 }
3975 }
3976
d38ceaf9 3977 /* Post card if necessary */
39c640c0 3978 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3979 if (!adev->bios) {
bec86378 3980 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3981 r = -EINVAL;
3982 goto failed;
d38ceaf9 3983 }
bec86378 3984 DRM_INFO("GPU posting now...\n");
4d2997ab 3985 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3986 if (r) {
3987 dev_err(adev->dev, "gpu post error!\n");
3988 goto failed;
3989 }
d38ceaf9
AD
3990 }
3991
9535a86a
SZ
3992 if (adev->bios) {
3993 if (adev->is_atom_fw) {
3994 /* Initialize clocks */
3995 r = amdgpu_atomfirmware_get_clock_info(adev);
3996 if (r) {
3997 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3998 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3999 goto failed;
4000 }
4001 } else {
4002 /* Initialize clocks */
4003 r = amdgpu_atombios_get_clock_info(adev);
4004 if (r) {
4005 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4006 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4007 goto failed;
4008 }
4009 /* init i2c buses */
4010 if (!amdgpu_device_has_dc_support(adev))
4011 amdgpu_atombios_i2c_init(adev);
a5bde2f9 4012 }
2c1a2784 4013 }
d38ceaf9 4014
bfca0289 4015fence_driver_init:
d38ceaf9 4016 /* Fence driver */
067f44c8 4017 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4018 if (r) {
067f44c8 4019 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4020 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4021 goto failed;
2c1a2784 4022 }
d38ceaf9
AD
4023
4024 /* init the mode config */
4a580877 4025 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4026
06ec9070 4027 r = amdgpu_device_ip_init(adev);
d38ceaf9 4028 if (r) {
06ec9070 4029 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4030 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4031 goto release_ras_con;
d38ceaf9
AD
4032 }
4033
8d35a259
LG
4034 amdgpu_fence_driver_hw_init(adev);
4035
d69b8971
YZ
4036 dev_info(adev->dev,
4037 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4038 adev->gfx.config.max_shader_engines,
4039 adev->gfx.config.max_sh_per_se,
4040 adev->gfx.config.max_cu_per_sh,
4041 adev->gfx.cu_info.number);
4042
d38ceaf9
AD
4043 adev->accel_working = true;
4044
e59c0205
AX
4045 amdgpu_vm_check_compute_bug(adev);
4046
95844d20
MO
4047 /* Initialize the buffer migration limit. */
4048 if (amdgpu_moverate >= 0)
4049 max_MBps = amdgpu_moverate;
4050 else
4051 max_MBps = 8; /* Allow 8 MB/s. */
4052 /* Get a log2 for easy divisions. */
4053 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4054
b0adca4d
EQ
4055 /*
4056 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4057 * Otherwise the mgpu fan boost feature will be skipped due to the
4058 * gpu instance is counted less.
4059 */
4060 amdgpu_register_gpu_instance(adev);
4061
d38ceaf9
AD
4062 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4063 * explicit gating rather than handling it automatically.
4064 */
e3c1b071 4065 if (!adev->gmc.xgmi.pending_reset) {
4066 r = amdgpu_device_ip_late_init(adev);
4067 if (r) {
4068 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4069 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4070 goto release_ras_con;
e3c1b071 4071 }
4072 /* must succeed. */
4073 amdgpu_ras_resume(adev);
4074 queue_delayed_work(system_wq, &adev->delayed_init_work,
4075 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4076 }
d38ceaf9 4077
38eecbe0
CL
4078 if (amdgpu_sriov_vf(adev)) {
4079 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4080 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4081 }
2c738637 4082
90bcb9b5
EQ
4083 /*
4084 * Place those sysfs registering after `late_init`. As some of those
4085 * operations performed in `late_init` might affect the sysfs
4086 * interfaces creating.
4087 */
4088 r = amdgpu_atombios_sysfs_init(adev);
4089 if (r)
4090 drm_err(&adev->ddev,
4091 "registering atombios sysfs failed (%d).\n", r);
4092
4093 r = amdgpu_pm_sysfs_init(adev);
4094 if (r)
4095 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4096
4097 r = amdgpu_ucode_sysfs_init(adev);
4098 if (r) {
4099 adev->ucode_sysfs_en = false;
4100 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4101 } else
4102 adev->ucode_sysfs_en = true;
4103
77f3a5cd 4104 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4105 if (r)
77f3a5cd 4106 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4107
76da73f0
LL
4108 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4109 if (r)
4110 dev_err(adev->dev,
4111 "Could not create amdgpu board attributes\n");
4112
7957ec80
LL
4113 amdgpu_fru_sysfs_init(adev);
4114
d155bef0
AB
4115 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4116 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4117 if (r)
4118 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4119
c1dd4aa6
AG
4120 /* Have stored pci confspace at hand for restore in sudden PCI error */
4121 if (amdgpu_device_cache_pci_state(adev->pdev))
4122 pci_restore_state(pdev);
4123
8c3dd61c
KHF
4124 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4125 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4126 * ignore it
4127 */
8c3dd61c 4128 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4129 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4130
d37a3929
OC
4131 px = amdgpu_device_supports_px(ddev);
4132
4133 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4134 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4135 vga_switcheroo_register_client(adev->pdev,
4136 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4137
4138 if (px)
8c3dd61c 4139 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4140
e3c1b071 4141 if (adev->gmc.xgmi.pending_reset)
4142 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4143 msecs_to_jiffies(AMDGPU_RESUME_MS));
4144
4a74c38c
PY
4145 amdgpu_device_check_iommu_direct_map(adev);
4146
d38ceaf9 4147 return 0;
83ba126a 4148
970fd197 4149release_ras_con:
38eecbe0
CL
4150 if (amdgpu_sriov_vf(adev))
4151 amdgpu_virt_release_full_gpu(adev, true);
4152
4153 /* failed in exclusive mode due to timeout */
4154 if (amdgpu_sriov_vf(adev) &&
4155 !amdgpu_sriov_runtime(adev) &&
4156 amdgpu_virt_mmio_blocked(adev) &&
4157 !amdgpu_virt_wait_reset(adev)) {
4158 dev_err(adev->dev, "VF exclusive mode timeout\n");
4159 /* Don't send request since VF is inactive. */
4160 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4161 adev->virt.ops = NULL;
4162 r = -EAGAIN;
4163 }
970fd197
SY
4164 amdgpu_release_ras_context(adev);
4165
83ba126a 4166failed:
89041940 4167 amdgpu_vf_error_trans_all(adev);
8840a387 4168
83ba126a 4169 return r;
d38ceaf9
AD
4170}
4171
07775fc1
AG
4172static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4173{
62d5f9f7 4174
07775fc1
AG
4175 /* Clear all CPU mappings pointing to this device */
4176 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4177
4178 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4179 amdgpu_doorbell_fini(adev);
07775fc1
AG
4180
4181 iounmap(adev->rmmio);
4182 adev->rmmio = NULL;
4183 if (adev->mman.aper_base_kaddr)
4184 iounmap(adev->mman.aper_base_kaddr);
4185 adev->mman.aper_base_kaddr = NULL;
4186
4187 /* Memory manager related */
a0ba1279 4188 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4189 arch_phys_wc_del(adev->gmc.vram_mtrr);
4190 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4191 }
4192}
4193
d38ceaf9 4194/**
bbe04dec 4195 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4196 *
4197 * @adev: amdgpu_device pointer
4198 *
4199 * Tear down the driver info (all asics).
4200 * Called at driver shutdown.
4201 */
72c8c97b 4202void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4203{
aac89168 4204 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4205 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4206 adev->shutdown = true;
9f875167 4207
752c683d
ML
4208 /* make sure IB test finished before entering exclusive mode
4209 * to avoid preemption on IB test
b8920e1e 4210 */
519b8b76 4211 if (amdgpu_sriov_vf(adev)) {
752c683d 4212 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4213 amdgpu_virt_fini_data_exchange(adev);
4214 }
752c683d 4215
e5b03032
ML
4216 /* disable all interrupts */
4217 amdgpu_irq_disable_all(adev);
47fc644f 4218 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4219 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4220 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4221 else
4a580877 4222 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4223 }
8d35a259 4224 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4225
cd3a8a59 4226 if (adev->mman.initialized)
9bff18d1 4227 drain_workqueue(adev->mman.bdev.wq);
98f56188 4228
53e9d836 4229 if (adev->pm.sysfs_initialized)
7c868b59 4230 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4231 if (adev->ucode_sysfs_en)
4232 amdgpu_ucode_sysfs_fini(adev);
4233 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4234 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4235
232d1d43
SY
4236 /* disable ras feature must before hw fini */
4237 amdgpu_ras_pre_fini(adev);
4238
e9669fb7 4239 amdgpu_device_ip_fini_early(adev);
d10d0daa 4240
a3848df6
YW
4241 amdgpu_irq_fini_hw(adev);
4242
b6fd6e0f
SK
4243 if (adev->mman.initialized)
4244 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4245
d10d0daa 4246 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4247
39934d3e
VP
4248 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4249 amdgpu_device_unmap_mmio(adev);
87172e89 4250
72c8c97b
AG
4251}
4252
4253void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4254{
62d5f9f7 4255 int idx;
d37a3929 4256 bool px;
62d5f9f7 4257
8d35a259 4258 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4259 amdgpu_device_ip_fini(adev);
b31d3063 4260 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4261 adev->accel_working = false;
68ce8b24 4262 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4263
4264 amdgpu_reset_fini(adev);
4265
d38ceaf9 4266 /* free i2c buses */
4562236b
HW
4267 if (!amdgpu_device_has_dc_support(adev))
4268 amdgpu_i2c_fini(adev);
bfca0289
SL
4269
4270 if (amdgpu_emu_mode != 1)
4271 amdgpu_atombios_fini(adev);
4272
d38ceaf9
AD
4273 kfree(adev->bios);
4274 adev->bios = NULL;
d37a3929 4275
8a2b5139
LL
4276 kfree(adev->fru_info);
4277 adev->fru_info = NULL;
4278
d37a3929
OC
4279 px = amdgpu_device_supports_px(adev_to_drm(adev));
4280
4281 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4282 apple_gmux_detect(NULL, NULL)))
84c8b22e 4283 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4284
4285 if (px)
83ba126a 4286 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4287
38d6be81 4288 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4289 vga_client_unregister(adev->pdev);
e9bc1bf7 4290
62d5f9f7
LS
4291 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4292
4293 iounmap(adev->rmmio);
4294 adev->rmmio = NULL;
43c064db 4295 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4296 drm_dev_exit(idx);
4297 }
4298
d155bef0
AB
4299 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4300 amdgpu_pmu_fini(adev);
72de33f8 4301 if (adev->mman.discovery_bin)
a190d1c7 4302 amdgpu_discovery_fini(adev);
72c8c97b 4303
cfbb6b00
AG
4304 amdgpu_reset_put_reset_domain(adev->reset_domain);
4305 adev->reset_domain = NULL;
4306
72c8c97b
AG
4307 kfree(adev->pci_state);
4308
d38ceaf9
AD
4309}
4310
58144d28
ND
4311/**
4312 * amdgpu_device_evict_resources - evict device resources
4313 * @adev: amdgpu device object
4314 *
4315 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4316 * of the vram memory type. Mainly used for evicting device resources
4317 * at suspend time.
4318 *
4319 */
7863c155 4320static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4321{
7863c155
ML
4322 int ret;
4323
e53d9665
ML
4324 /* No need to evict vram on APUs for suspend to ram or s2idle */
4325 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4326 return 0;
58144d28 4327
7863c155
ML
4328 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4329 if (ret)
58144d28 4330 DRM_WARN("evicting device resources failed\n");
7863c155 4331 return ret;
58144d28 4332}
d38ceaf9
AD
4333
4334/*
4335 * Suspend & resume.
4336 */
5095d541
ML
4337/**
4338 * amdgpu_device_prepare - prepare for device suspend
4339 *
4340 * @dev: drm dev pointer
4341 *
4342 * Prepare to put the hw in the suspend state (all asics).
4343 * Returns 0 for success or an error on failure.
4344 * Called at driver suspend.
4345 */
4346int amdgpu_device_prepare(struct drm_device *dev)
4347{
4348 struct amdgpu_device *adev = drm_to_adev(dev);
cb11ca32 4349 int i, r;
5095d541
ML
4350
4351 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4352 return 0;
4353
4354 /* Evict the majority of BOs before starting suspend sequence */
4355 r = amdgpu_device_evict_resources(adev);
4356 if (r)
4357 return r;
4358
cb11ca32
ML
4359 for (i = 0; i < adev->num_ip_blocks; i++) {
4360 if (!adev->ip_blocks[i].status.valid)
4361 continue;
4362 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4363 continue;
4364 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4365 if (r)
4366 return r;
4367 }
4368
5095d541
ML
4369 return 0;
4370}
4371
d38ceaf9 4372/**
810ddc3a 4373 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4374 *
87e3f136 4375 * @dev: drm dev pointer
87e3f136 4376 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4377 *
4378 * Puts the hw in the suspend state (all asics).
4379 * Returns 0 for success or an error on failure.
4380 * Called at driver suspend.
4381 */
de185019 4382int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4383{
a2e15b0e 4384 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4385 int r = 0;
d38ceaf9 4386
d38ceaf9
AD
4387 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4388 return 0;
4389
44779b43 4390 adev->in_suspend = true;
3fa8f89d 4391
d7274ec7
BZ
4392 if (amdgpu_sriov_vf(adev)) {
4393 amdgpu_virt_fini_data_exchange(adev);
4394 r = amdgpu_virt_request_full_gpu(adev, false);
4395 if (r)
4396 return r;
4397 }
4398
3fa8f89d
S
4399 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4400 DRM_WARN("smart shift update failed\n");
4401
5f818173 4402 if (fbcon)
087451f3 4403 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4404
beff74bc 4405 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4406 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4407
5e6932fe 4408 amdgpu_ras_suspend(adev);
4409
2196927b 4410 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4411
c004d44e 4412 if (!adev->in_s0ix)
5d3a2d95 4413 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4414
7863c155
ML
4415 r = amdgpu_device_evict_resources(adev);
4416 if (r)
4417 return r;
d38ceaf9 4418
8d35a259 4419 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4420
2196927b 4421 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4422
d7274ec7
BZ
4423 if (amdgpu_sriov_vf(adev))
4424 amdgpu_virt_release_full_gpu(adev, false);
4425
d38ceaf9
AD
4426 return 0;
4427}
4428
4429/**
810ddc3a 4430 * amdgpu_device_resume - initiate device resume
d38ceaf9 4431 *
87e3f136 4432 * @dev: drm dev pointer
87e3f136 4433 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4434 *
4435 * Bring the hw back to operating state (all asics).
4436 * Returns 0 for success or an error on failure.
4437 * Called at driver resume.
4438 */
de185019 4439int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4440{
1348969a 4441 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4442 int r = 0;
d38ceaf9 4443
d7274ec7
BZ
4444 if (amdgpu_sriov_vf(adev)) {
4445 r = amdgpu_virt_request_full_gpu(adev, true);
4446 if (r)
4447 return r;
4448 }
4449
d38ceaf9
AD
4450 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4451 return 0;
4452
62498733 4453 if (adev->in_s0ix)
bc143d8b 4454 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4455
d38ceaf9 4456 /* post card */
39c640c0 4457 if (amdgpu_device_need_post(adev)) {
4d2997ab 4458 r = amdgpu_device_asic_init(adev);
74b0b157 4459 if (r)
aac89168 4460 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4461 }
d38ceaf9 4462
06ec9070 4463 r = amdgpu_device_ip_resume(adev);
d7274ec7 4464
e6707218 4465 if (r) {
aac89168 4466 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4467 goto exit;
e6707218 4468 }
8d35a259 4469 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4470
06ec9070 4471 r = amdgpu_device_ip_late_init(adev);
03161a6e 4472 if (r)
3c22c1ea 4473 goto exit;
d38ceaf9 4474
beff74bc
AD
4475 queue_delayed_work(system_wq, &adev->delayed_init_work,
4476 msecs_to_jiffies(AMDGPU_RESUME_MS));
4477
c004d44e 4478 if (!adev->in_s0ix) {
5d3a2d95
AD
4479 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4480 if (r)
3c22c1ea 4481 goto exit;
5d3a2d95 4482 }
756e6880 4483
3c22c1ea
SF
4484exit:
4485 if (amdgpu_sriov_vf(adev)) {
4486 amdgpu_virt_init_data_exchange(adev);
4487 amdgpu_virt_release_full_gpu(adev, true);
4488 }
4489
4490 if (r)
4491 return r;
4492
96a5d8d4 4493 /* Make sure IB tests flushed */
beff74bc 4494 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4495
a2e15b0e 4496 if (fbcon)
087451f3 4497 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4498
5e6932fe 4499 amdgpu_ras_resume(adev);
4500
d09ef243
AD
4501 if (adev->mode_info.num_crtc) {
4502 /*
4503 * Most of the connector probing functions try to acquire runtime pm
4504 * refs to ensure that the GPU is powered on when connector polling is
4505 * performed. Since we're calling this from a runtime PM callback,
4506 * trying to acquire rpm refs will cause us to deadlock.
4507 *
4508 * Since we're guaranteed to be holding the rpm lock, it's safe to
4509 * temporarily disable the rpm helpers so this doesn't deadlock us.
4510 */
23a1a9e5 4511#ifdef CONFIG_PM
d09ef243 4512 dev->dev->power.disable_depth++;
23a1a9e5 4513#endif
d09ef243
AD
4514 if (!adev->dc_enabled)
4515 drm_helper_hpd_irq_event(dev);
4516 else
4517 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4518#ifdef CONFIG_PM
d09ef243 4519 dev->dev->power.disable_depth--;
23a1a9e5 4520#endif
d09ef243 4521 }
44779b43
RZ
4522 adev->in_suspend = false;
4523
dc907c9d
JX
4524 if (adev->enable_mes)
4525 amdgpu_mes_self_test(adev);
4526
3fa8f89d
S
4527 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4528 DRM_WARN("smart shift update failed\n");
4529
4d3b9ae5 4530 return 0;
d38ceaf9
AD
4531}
4532
e3ecdffa
AD
4533/**
4534 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4535 *
4536 * @adev: amdgpu_device pointer
4537 *
4538 * The list of all the hardware IPs that make up the asic is walked and
4539 * the check_soft_reset callbacks are run. check_soft_reset determines
4540 * if the asic is still hung or not.
4541 * Returns true if any of the IPs are still in a hung state, false if not.
4542 */
06ec9070 4543static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4544{
4545 int i;
4546 bool asic_hang = false;
4547
f993d628
ML
4548 if (amdgpu_sriov_vf(adev))
4549 return true;
4550
8bc04c29
AD
4551 if (amdgpu_asic_need_full_reset(adev))
4552 return true;
4553
63fbf42f 4554 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4555 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4556 continue;
a1255107
AD
4557 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4558 adev->ip_blocks[i].status.hang =
4559 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4560 if (adev->ip_blocks[i].status.hang) {
aac89168 4561 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4562 asic_hang = true;
4563 }
4564 }
4565 return asic_hang;
4566}
4567
e3ecdffa
AD
4568/**
4569 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4570 *
4571 * @adev: amdgpu_device pointer
4572 *
4573 * The list of all the hardware IPs that make up the asic is walked and the
4574 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4575 * handles any IP specific hardware or software state changes that are
4576 * necessary for a soft reset to succeed.
4577 * Returns 0 on success, negative error code on failure.
4578 */
06ec9070 4579static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4580{
4581 int i, r = 0;
4582
4583 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4584 if (!adev->ip_blocks[i].status.valid)
d31a501e 4585 continue;
a1255107
AD
4586 if (adev->ip_blocks[i].status.hang &&
4587 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4588 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4589 if (r)
4590 return r;
4591 }
4592 }
4593
4594 return 0;
4595}
4596
e3ecdffa
AD
4597/**
4598 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4599 *
4600 * @adev: amdgpu_device pointer
4601 *
4602 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4603 * reset is necessary to recover.
4604 * Returns true if a full asic reset is required, false if not.
4605 */
06ec9070 4606static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4607{
da146d3b
AD
4608 int i;
4609
8bc04c29
AD
4610 if (amdgpu_asic_need_full_reset(adev))
4611 return true;
4612
da146d3b 4613 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4614 if (!adev->ip_blocks[i].status.valid)
da146d3b 4615 continue;
a1255107
AD
4616 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4617 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4618 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4619 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4620 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4621 if (adev->ip_blocks[i].status.hang) {
aac89168 4622 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4623 return true;
4624 }
4625 }
35d782fe
CZ
4626 }
4627 return false;
4628}
4629
e3ecdffa
AD
4630/**
4631 * amdgpu_device_ip_soft_reset - do a soft reset
4632 *
4633 * @adev: amdgpu_device pointer
4634 *
4635 * The list of all the hardware IPs that make up the asic is walked and the
4636 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4637 * IP specific hardware or software state changes that are necessary to soft
4638 * reset the IP.
4639 * Returns 0 on success, negative error code on failure.
4640 */
06ec9070 4641static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4642{
4643 int i, r = 0;
4644
4645 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4646 if (!adev->ip_blocks[i].status.valid)
35d782fe 4647 continue;
a1255107
AD
4648 if (adev->ip_blocks[i].status.hang &&
4649 adev->ip_blocks[i].version->funcs->soft_reset) {
4650 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4651 if (r)
4652 return r;
4653 }
4654 }
4655
4656 return 0;
4657}
4658
e3ecdffa
AD
4659/**
4660 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4661 *
4662 * @adev: amdgpu_device pointer
4663 *
4664 * The list of all the hardware IPs that make up the asic is walked and the
4665 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4666 * handles any IP specific hardware or software state changes that are
4667 * necessary after the IP has been soft reset.
4668 * Returns 0 on success, negative error code on failure.
4669 */
06ec9070 4670static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4671{
4672 int i, r = 0;
4673
4674 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4675 if (!adev->ip_blocks[i].status.valid)
35d782fe 4676 continue;
a1255107
AD
4677 if (adev->ip_blocks[i].status.hang &&
4678 adev->ip_blocks[i].version->funcs->post_soft_reset)
4679 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4680 if (r)
4681 return r;
4682 }
4683
4684 return 0;
4685}
4686
e3ecdffa 4687/**
c33adbc7 4688 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4689 *
4690 * @adev: amdgpu_device pointer
4691 *
4692 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4693 * restore things like GPUVM page tables after a GPU reset where
4694 * the contents of VRAM might be lost.
403009bf
CK
4695 *
4696 * Returns:
4697 * 0 on success, negative error code on failure.
e3ecdffa 4698 */
c33adbc7 4699static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4700{
c41d1cf6 4701 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4702 struct amdgpu_bo *shadow;
e18aaea7 4703 struct amdgpu_bo_vm *vmbo;
403009bf 4704 long r = 1, tmo;
c41d1cf6
ML
4705
4706 if (amdgpu_sriov_runtime(adev))
b045d3af 4707 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4708 else
4709 tmo = msecs_to_jiffies(100);
4710
aac89168 4711 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4712 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4713 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4714 /* If vm is compute context or adev is APU, shadow will be NULL */
4715 if (!vmbo->shadow)
4716 continue;
4717 shadow = vmbo->shadow;
4718
403009bf 4719 /* No need to recover an evicted BO */
d3116756
CK
4720 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4721 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4722 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4723 continue;
4724
4725 r = amdgpu_bo_restore_shadow(shadow, &next);
4726 if (r)
4727 break;
4728
c41d1cf6 4729 if (fence) {
1712fb1a 4730 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4731 dma_fence_put(fence);
4732 fence = next;
1712fb1a 4733 if (tmo == 0) {
4734 r = -ETIMEDOUT;
c41d1cf6 4735 break;
1712fb1a 4736 } else if (tmo < 0) {
4737 r = tmo;
4738 break;
4739 }
403009bf
CK
4740 } else {
4741 fence = next;
c41d1cf6 4742 }
c41d1cf6
ML
4743 }
4744 mutex_unlock(&adev->shadow_list_lock);
4745
403009bf
CK
4746 if (fence)
4747 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4748 dma_fence_put(fence);
4749
1712fb1a 4750 if (r < 0 || tmo <= 0) {
aac89168 4751 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4752 return -EIO;
4753 }
c41d1cf6 4754
aac89168 4755 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4756 return 0;
c41d1cf6
ML
4757}
4758
a90ad3c2 4759
e3ecdffa 4760/**
06ec9070 4761 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4762 *
982a820b 4763 * @adev: amdgpu_device pointer
87e3f136 4764 * @from_hypervisor: request from hypervisor
5740682e
ML
4765 *
4766 * do VF FLR and reinitialize Asic
3f48c681 4767 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4768 */
4769static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4770 bool from_hypervisor)
5740682e
ML
4771{
4772 int r;
a5f67c93 4773 struct amdgpu_hive_info *hive = NULL;
7258fa31 4774 int retry_limit = 0;
5740682e 4775
7258fa31 4776retry:
c004d44e 4777 amdgpu_amdkfd_pre_reset(adev);
428890a3 4778
5740682e
ML
4779 if (from_hypervisor)
4780 r = amdgpu_virt_request_full_gpu(adev, true);
4781 else
4782 r = amdgpu_virt_reset_gpu(adev);
4783 if (r)
4784 return r;
f734b213 4785 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4786
83f24a8f
HC
4787 /* some sw clean up VF needs to do before recover */
4788 amdgpu_virt_post_reset(adev);
4789
a90ad3c2 4790 /* Resume IP prior to SMC */
06ec9070 4791 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4792 if (r)
4793 goto error;
a90ad3c2 4794
c9ffa427 4795 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4796
7a3e0bb2
RZ
4797 r = amdgpu_device_fw_loading(adev);
4798 if (r)
4799 return r;
4800
a90ad3c2 4801 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4802 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4803 if (r)
4804 goto error;
a90ad3c2 4805
a5f67c93
ZL
4806 hive = amdgpu_get_xgmi_hive(adev);
4807 /* Update PSP FW topology after reset */
4808 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4809 r = amdgpu_xgmi_update_topology(hive, adev);
4810
4811 if (hive)
4812 amdgpu_put_xgmi_hive(hive);
4813
4814 if (!r) {
a5f67c93 4815 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4816
c004d44e 4817 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4818 }
a90ad3c2 4819
abc34253 4820error:
c41d1cf6 4821 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4822 amdgpu_inc_vram_lost(adev);
c33adbc7 4823 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4824 }
437f3e0b 4825 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4826
7258fa31
SK
4827 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4828 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4829 retry_limit++;
4830 goto retry;
4831 } else
4832 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4833 }
4834
a90ad3c2
ML
4835 return r;
4836}
4837
9a1cddd6 4838/**
4839 * amdgpu_device_has_job_running - check if there is any job in mirror list
4840 *
982a820b 4841 * @adev: amdgpu_device pointer
9a1cddd6 4842 *
4843 * check if there is any job in mirror list
4844 */
4845bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4846{
4847 int i;
4848 struct drm_sched_job *job;
4849
4850 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4851 struct amdgpu_ring *ring = adev->rings[i];
4852
4853 if (!ring || !ring->sched.thread)
4854 continue;
4855
4856 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4857 job = list_first_entry_or_null(&ring->sched.pending_list,
4858 struct drm_sched_job, list);
9a1cddd6 4859 spin_unlock(&ring->sched.job_list_lock);
4860 if (job)
4861 return true;
4862 }
4863 return false;
4864}
4865
12938fad
CK
4866/**
4867 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4868 *
982a820b 4869 * @adev: amdgpu_device pointer
12938fad
CK
4870 *
4871 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4872 * a hung GPU.
4873 */
4874bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4875{
12938fad 4876
3ba7b418
AG
4877 if (amdgpu_gpu_recovery == 0)
4878 goto disabled;
4879
1a11a65d
YC
4880 /* Skip soft reset check in fatal error mode */
4881 if (!amdgpu_ras_is_poison_mode_supported(adev))
4882 return true;
4883
3ba7b418
AG
4884 if (amdgpu_sriov_vf(adev))
4885 return true;
4886
4887 if (amdgpu_gpu_recovery == -1) {
4888 switch (adev->asic_type) {
b3523c45
AD
4889#ifdef CONFIG_DRM_AMDGPU_SI
4890 case CHIP_VERDE:
4891 case CHIP_TAHITI:
4892 case CHIP_PITCAIRN:
4893 case CHIP_OLAND:
4894 case CHIP_HAINAN:
4895#endif
4896#ifdef CONFIG_DRM_AMDGPU_CIK
4897 case CHIP_KAVERI:
4898 case CHIP_KABINI:
4899 case CHIP_MULLINS:
4900#endif
4901 case CHIP_CARRIZO:
4902 case CHIP_STONEY:
4903 case CHIP_CYAN_SKILLFISH:
3ba7b418 4904 goto disabled;
b3523c45
AD
4905 default:
4906 break;
3ba7b418 4907 }
12938fad
CK
4908 }
4909
4910 return true;
3ba7b418
AG
4911
4912disabled:
aac89168 4913 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4914 return false;
12938fad
CK
4915}
4916
5c03e584
FX
4917int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4918{
47fc644f
SS
4919 u32 i;
4920 int ret = 0;
5c03e584 4921
47fc644f 4922 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4923
47fc644f 4924 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4925
47fc644f
SS
4926 /* disable BM */
4927 pci_clear_master(adev->pdev);
5c03e584 4928
47fc644f 4929 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4930
47fc644f
SS
4931 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4932 dev_info(adev->dev, "GPU smu mode1 reset\n");
4933 ret = amdgpu_dpm_mode1_reset(adev);
4934 } else {
4935 dev_info(adev->dev, "GPU psp mode1 reset\n");
4936 ret = psp_gpu_reset(adev);
4937 }
5c03e584 4938
47fc644f 4939 if (ret)
2c0f880a 4940 goto mode1_reset_failed;
5c03e584 4941
47fc644f 4942 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4943 ret = amdgpu_psp_wait_for_bootloader(adev);
4944 if (ret)
2c0f880a 4945 goto mode1_reset_failed;
5c03e584 4946
47fc644f
SS
4947 /* wait for asic to come out of reset */
4948 for (i = 0; i < adev->usec_timeout; i++) {
4949 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4950
47fc644f
SS
4951 if (memsize != 0xffffffff)
4952 break;
4953 udelay(1);
4954 }
5c03e584 4955
2c0f880a
HZ
4956 if (i >= adev->usec_timeout) {
4957 ret = -ETIMEDOUT;
4958 goto mode1_reset_failed;
4959 }
4960
47fc644f 4961 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4962
2c0f880a
HZ
4963 return 0;
4964
4965mode1_reset_failed:
4966 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4967 return ret;
5c03e584 4968}
5c6dd71e 4969
e3c1b071 4970int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4971 struct amdgpu_reset_context *reset_context)
26bc5340 4972{
5c1e6fa4 4973 int i, r = 0;
04442bf7
LL
4974 struct amdgpu_job *job = NULL;
4975 bool need_full_reset =
4976 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4977
4978 if (reset_context->reset_req_dev == adev)
4979 job = reset_context->job;
71182665 4980
b602ca5f
TZ
4981 if (amdgpu_sriov_vf(adev)) {
4982 /* stop the data exchange thread */
4983 amdgpu_virt_fini_data_exchange(adev);
4984 }
4985
9e225fb9
AG
4986 amdgpu_fence_driver_isr_toggle(adev, true);
4987
71182665 4988 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4989 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4990 struct amdgpu_ring *ring = adev->rings[i];
4991
51687759 4992 if (!ring || !ring->sched.thread)
0875dc9e 4993 continue;
5740682e 4994
b8920e1e
SS
4995 /* Clear job fence from fence drv to avoid force_completion
4996 * leave NULL and vm flush fence in fence drv
4997 */
5c1e6fa4 4998 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4999
2f9d4084
ML
5000 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5001 amdgpu_fence_driver_force_completion(ring);
0875dc9e 5002 }
d38ceaf9 5003
9e225fb9
AG
5004 amdgpu_fence_driver_isr_toggle(adev, false);
5005
ff99849b 5006 if (job && job->vm)
222b5f04
AG
5007 drm_sched_increase_karma(&job->base);
5008
04442bf7 5009 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 5010 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5011 if (r == -EOPNOTSUPP)
404b277b
LL
5012 r = 0;
5013 else
04442bf7
LL
5014 return r;
5015
1d721ed6 5016 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
5017 if (!amdgpu_sriov_vf(adev)) {
5018
5019 if (!need_full_reset)
5020 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5021
360cd081
LG
5022 if (!need_full_reset && amdgpu_gpu_recovery &&
5023 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
5024 amdgpu_device_ip_pre_soft_reset(adev);
5025 r = amdgpu_device_ip_soft_reset(adev);
5026 amdgpu_device_ip_post_soft_reset(adev);
5027 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 5028 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
5029 need_full_reset = true;
5030 }
5031 }
5032
5033 if (need_full_reset)
5034 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
5035 if (need_full_reset)
5036 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5037 else
5038 clear_bit(AMDGPU_NEED_FULL_RESET,
5039 &reset_context->flags);
26bc5340
AG
5040 }
5041
5042 return r;
5043}
5044
15fd09a0
SA
5045static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5046{
15fd09a0
SA
5047 int i;
5048
38a15ad9 5049 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
5050
5051 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
5052 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
5053 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
5054 adev->reset_dump_reg_value[i]);
15fd09a0
SA
5055 }
5056
5057 return 0;
5058}
5059
a7691785
AA
5060#ifndef CONFIG_DEV_COREDUMP
5061static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
5062 struct amdgpu_reset_context *reset_context)
5063{
5064}
5065#else
3d8785f6
SA
5066static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
5067 size_t count, void *data, size_t datalen)
5068{
5069 struct drm_printer p;
a7691785 5070 struct amdgpu_coredump_info *coredump = data;
3d8785f6
SA
5071 struct drm_print_iterator iter;
5072 int i;
5073
5074 iter.data = buffer;
5075 iter.offset = 0;
5076 iter.start = offset;
5077 iter.remain = count;
5078
5079 p = drm_coredump_printer(&iter);
5080
5081 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
5082 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
5083 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
a7691785
AA
5084 drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec);
5085 if (coredump->reset_task_info.pid)
3d8785f6 5086 drm_printf(&p, "process_name: %s PID: %d\n",
a7691785
AA
5087 coredump->reset_task_info.process_name,
5088 coredump->reset_task_info.pid);
3d8785f6 5089
a7691785 5090 if (coredump->reset_vram_lost)
3d8785f6 5091 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
a7691785 5092 if (coredump->adev->num_regs) {
3d8785f6
SA
5093 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
5094
a7691785 5095 for (i = 0; i < coredump->adev->num_regs; i++)
3d8785f6 5096 drm_printf(&p, "0x%08x: 0x%08x\n",
a7691785
AA
5097 coredump->adev->reset_dump_reg_list[i],
5098 coredump->adev->reset_dump_reg_value[i]);
3d8785f6
SA
5099 }
5100
5101 return count - iter.remain;
5102}
5103
5104static void amdgpu_devcoredump_free(void *data)
5105{
a7691785 5106 kfree(data);
3d8785f6
SA
5107}
5108
a7691785
AA
5109static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
5110 struct amdgpu_reset_context *reset_context)
3d8785f6 5111{
a7691785 5112 struct amdgpu_coredump_info *coredump;
3d8785f6
SA
5113 struct drm_device *dev = adev_to_drm(adev);
5114
a7691785
AA
5115 coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
5116
5117 if (!coredump) {
5118 DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
5119 return;
5120 }
5121
5122 coredump->reset_vram_lost = vram_lost;
5123
5124 if (reset_context->job && reset_context->job->vm)
5125 coredump->reset_task_info = reset_context->job->vm->task_info;
5126
5127 coredump->adev = adev;
5128
5129 ktime_get_ts64(&coredump->reset_time);
5130
5131 dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
3d8785f6
SA
5132 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
5133}
5134#endif
5135
04442bf7
LL
5136int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5137 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5138{
5139 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5140 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5141 int r = 0;
f5c7e779 5142 bool gpu_reset_for_dev_remove = 0;
26bc5340 5143
04442bf7
LL
5144 /* Try reset handler method first */
5145 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5146 reset_list);
15fd09a0 5147 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5148
5149 reset_context->reset_device_list = device_list_handle;
04442bf7 5150 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5151 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5152 if (r == -EOPNOTSUPP)
404b277b
LL
5153 r = 0;
5154 else
04442bf7
LL
5155 return r;
5156
5157 /* Reset handler not implemented, use the default method */
5158 need_full_reset =
5159 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5160 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5161
f5c7e779
YC
5162 gpu_reset_for_dev_remove =
5163 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5164 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5165
26bc5340 5166 /*
655ce9cb 5167 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5168 * to allow proper links negotiation in FW (within 1 sec)
5169 */
7ac71382 5170 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5171 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5172 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5173 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5174 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5175 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5176 r = -EALREADY;
5177 } else
5178 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5179
041a62bc 5180 if (r) {
aac89168 5181 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5182 r, adev_to_drm(tmp_adev)->unique);
19349072 5183 goto out;
ce316fa5
LM
5184 }
5185 }
5186
041a62bc
AG
5187 /* For XGMI wait for all resets to complete before proceed */
5188 if (!r) {
655ce9cb 5189 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5190 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5191 flush_work(&tmp_adev->xgmi_reset_work);
5192 r = tmp_adev->asic_reset_res;
5193 if (r)
5194 break;
ce316fa5
LM
5195 }
5196 }
5197 }
ce316fa5 5198 }
26bc5340 5199
43c4d576 5200 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5201 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
21226f02 5202 amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
43c4d576
JC
5203 }
5204
00eaa571 5205 amdgpu_ras_intr_cleared();
43c4d576 5206 }
00eaa571 5207
f5c7e779
YC
5208 /* Since the mode1 reset affects base ip blocks, the
5209 * phase1 ip blocks need to be resumed. Otherwise there
5210 * will be a BIOS signature error and the psp bootloader
5211 * can't load kdb on the next amdgpu install.
5212 */
5213 if (gpu_reset_for_dev_remove) {
5214 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5215 amdgpu_device_ip_resume_phase1(tmp_adev);
5216
5217 goto end;
5218 }
5219
655ce9cb 5220 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5221 if (need_full_reset) {
5222 /* post card */
e3c1b071 5223 r = amdgpu_device_asic_init(tmp_adev);
5224 if (r) {
aac89168 5225 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5226 } else {
26bc5340 5227 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5228
26bc5340
AG
5229 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5230 if (r)
5231 goto out;
5232
5233 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785
AA
5234
5235 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5236
26bc5340 5237 if (vram_lost) {
77e7f829 5238 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5239 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5240 }
5241
26bc5340
AG
5242 r = amdgpu_device_fw_loading(tmp_adev);
5243 if (r)
5244 return r;
5245
c45e38f2
LL
5246 r = amdgpu_xcp_restore_partition_mode(
5247 tmp_adev->xcp_mgr);
5248 if (r)
5249 goto out;
5250
26bc5340
AG
5251 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5252 if (r)
5253 goto out;
5254
5255 if (vram_lost)
5256 amdgpu_device_fill_reset_magic(tmp_adev);
5257
fdafb359
EQ
5258 /*
5259 * Add this ASIC as tracked as reset was already
5260 * complete successfully.
5261 */
5262 amdgpu_register_gpu_instance(tmp_adev);
5263
04442bf7
LL
5264 if (!reset_context->hive &&
5265 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5266 amdgpu_xgmi_add_device(tmp_adev);
5267
7c04ca50 5268 r = amdgpu_device_ip_late_init(tmp_adev);
5269 if (r)
5270 goto out;
5271
087451f3 5272 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5273
e8fbaf03
GC
5274 /*
5275 * The GPU enters bad state once faulty pages
5276 * by ECC has reached the threshold, and ras
5277 * recovery is scheduled next. So add one check
5278 * here to break recovery if it indeed exceeds
5279 * bad page threshold, and remind user to
5280 * retire this GPU or setting one bigger
5281 * bad_page_threshold value to fix this once
5282 * probing driver again.
5283 */
11003c68 5284 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5285 /* must succeed. */
5286 amdgpu_ras_resume(tmp_adev);
5287 } else {
5288 r = -EINVAL;
5289 goto out;
5290 }
e79a04d5 5291
26bc5340 5292 /* Update PSP FW topology after reset */
04442bf7
LL
5293 if (reset_context->hive &&
5294 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5295 r = amdgpu_xgmi_update_topology(
5296 reset_context->hive, tmp_adev);
26bc5340
AG
5297 }
5298 }
5299
26bc5340
AG
5300out:
5301 if (!r) {
5302 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5303 r = amdgpu_ib_ring_tests(tmp_adev);
5304 if (r) {
5305 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5306 need_full_reset = true;
5307 r = -EAGAIN;
5308 goto end;
5309 }
5310 }
5311
5312 if (!r)
5313 r = amdgpu_device_recover_vram(tmp_adev);
5314 else
5315 tmp_adev->asic_reset_res = r;
5316 }
5317
5318end:
04442bf7
LL
5319 if (need_full_reset)
5320 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5321 else
5322 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5323 return r;
5324}
5325
e923be99 5326static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5327{
5740682e 5328
a3a09142
AD
5329 switch (amdgpu_asic_reset_method(adev)) {
5330 case AMD_RESET_METHOD_MODE1:
5331 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5332 break;
5333 case AMD_RESET_METHOD_MODE2:
5334 adev->mp1_state = PP_MP1_STATE_RESET;
5335 break;
5336 default:
5337 adev->mp1_state = PP_MP1_STATE_NONE;
5338 break;
5339 }
26bc5340 5340}
d38ceaf9 5341
e923be99 5342static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5343{
89041940 5344 amdgpu_vf_error_trans_all(adev);
a3a09142 5345 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5346}
5347
3f12acc8
EQ
5348static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5349{
5350 struct pci_dev *p = NULL;
5351
5352 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5353 adev->pdev->bus->number, 1);
5354 if (p) {
5355 pm_runtime_enable(&(p->dev));
5356 pm_runtime_resume(&(p->dev));
5357 }
b85e285e
YY
5358
5359 pci_dev_put(p);
3f12acc8
EQ
5360}
5361
5362static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5363{
5364 enum amd_reset_method reset_method;
5365 struct pci_dev *p = NULL;
5366 u64 expires;
5367
5368 /*
5369 * For now, only BACO and mode1 reset are confirmed
5370 * to suffer the audio issue without proper suspended.
5371 */
5372 reset_method = amdgpu_asic_reset_method(adev);
5373 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5374 (reset_method != AMD_RESET_METHOD_MODE1))
5375 return -EINVAL;
5376
5377 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5378 adev->pdev->bus->number, 1);
5379 if (!p)
5380 return -ENODEV;
5381
5382 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5383 if (!expires)
5384 /*
5385 * If we cannot get the audio device autosuspend delay,
5386 * a fixed 4S interval will be used. Considering 3S is
5387 * the audio controller default autosuspend delay setting.
5388 * 4S used here is guaranteed to cover that.
5389 */
54b7feb9 5390 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5391
5392 while (!pm_runtime_status_suspended(&(p->dev))) {
5393 if (!pm_runtime_suspend(&(p->dev)))
5394 break;
5395
5396 if (expires < ktime_get_mono_fast_ns()) {
5397 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5398 pci_dev_put(p);
3f12acc8
EQ
5399 /* TODO: abort the succeeding gpu reset? */
5400 return -ETIMEDOUT;
5401 }
5402 }
5403
5404 pm_runtime_disable(&(p->dev));
5405
b85e285e 5406 pci_dev_put(p);
3f12acc8
EQ
5407 return 0;
5408}
5409
d193b12b 5410static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5411{
5412 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5413
5414#if defined(CONFIG_DEBUG_FS)
5415 if (!amdgpu_sriov_vf(adev))
5416 cancel_work(&adev->reset_work);
5417#endif
5418
5419 if (adev->kfd.dev)
5420 cancel_work(&adev->kfd.reset_work);
5421
5422 if (amdgpu_sriov_vf(adev))
5423 cancel_work(&adev->virt.flr_work);
5424
5425 if (con && adev->ras_enabled)
5426 cancel_work(&con->recovery_work);
5427
5428}
5429
26bc5340 5430/**
6e9c65f7 5431 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5432 *
982a820b 5433 * @adev: amdgpu_device pointer
26bc5340 5434 * @job: which job trigger hang
80bd2de1 5435 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5436 *
5437 * Attempt to reset the GPU if it has hung (all asics).
5438 * Attempt to do soft-reset or full-reset and reinitialize Asic
5439 * Returns 0 for success or an error on failure.
5440 */
5441
cf727044 5442int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5443 struct amdgpu_job *job,
5444 struct amdgpu_reset_context *reset_context)
26bc5340 5445{
1d721ed6 5446 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5447 bool job_signaled = false;
26bc5340 5448 struct amdgpu_hive_info *hive = NULL;
26bc5340 5449 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5450 int i, r = 0;
bb5c7235 5451 bool need_emergency_restart = false;
3f12acc8 5452 bool audio_suspended = false;
f5c7e779
YC
5453 bool gpu_reset_for_dev_remove = false;
5454
5455 gpu_reset_for_dev_remove =
5456 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5457 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5458
6e3cd2a9 5459 /*
bb5c7235
WS
5460 * Special case: RAS triggered and full reset isn't supported
5461 */
5462 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5463
d5ea093e
AG
5464 /*
5465 * Flush RAM to disk so that after reboot
5466 * the user can read log and see why the system rebooted.
5467 */
80285ae1
SY
5468 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5469 amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5470 DRM_WARN("Emergency reboot.");
5471
5472 ksys_sync_helper();
5473 emergency_restart();
5474 }
5475
b823821f 5476 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5477 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5478
175ac6ec
ZL
5479 if (!amdgpu_sriov_vf(adev))
5480 hive = amdgpu_get_xgmi_hive(adev);
681260df 5481 if (hive)
53b3f8f4 5482 mutex_lock(&hive->hive_lock);
26bc5340 5483
f1549c09
LG
5484 reset_context->job = job;
5485 reset_context->hive = hive;
9e94d22c
EQ
5486 /*
5487 * Build list of devices to reset.
5488 * In case we are in XGMI hive mode, resort the device list
5489 * to put adev in the 1st position.
5490 */
5491 INIT_LIST_HEAD(&device_list);
175ac6ec 5492 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5493 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5494 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5495 if (gpu_reset_for_dev_remove && adev->shutdown)
5496 tmp_adev->shutdown = true;
5497 }
655ce9cb 5498 if (!list_is_first(&adev->reset_list, &device_list))
5499 list_rotate_to_front(&adev->reset_list, &device_list);
5500 device_list_handle = &device_list;
26bc5340 5501 } else {
655ce9cb 5502 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5503 device_list_handle = &device_list;
5504 }
5505
e923be99
AG
5506 /* We need to lock reset domain only once both for XGMI and single device */
5507 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5508 reset_list);
3675c2f2 5509 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5510
1d721ed6 5511 /* block all schedulers and reset given job's ring */
655ce9cb 5512 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5513
e923be99 5514 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5515
3f12acc8
EQ
5516 /*
5517 * Try to put the audio codec into suspend state
5518 * before gpu reset started.
5519 *
5520 * Due to the power domain of the graphics device
5521 * is shared with AZ power domain. Without this,
5522 * we may change the audio hardware from behind
5523 * the audio driver's back. That will trigger
5524 * some audio codec errors.
5525 */
5526 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5527 audio_suspended = true;
5528
9e94d22c
EQ
5529 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5530
52fb44cf
EQ
5531 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5532
c004d44e 5533 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5534 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5535
12ffa55d
AG
5536 /*
5537 * Mark these ASICs to be reseted as untracked first
5538 * And add them back after reset completed
5539 */
5540 amdgpu_unregister_gpu_instance(tmp_adev);
5541
163d4cd2 5542 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5543
f1c1314b 5544 /* disable ras on ALL IPs */
bb5c7235 5545 if (!need_emergency_restart &&
b823821f 5546 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5547 amdgpu_ras_suspend(tmp_adev);
5548
1d721ed6
AG
5549 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5550 struct amdgpu_ring *ring = tmp_adev->rings[i];
5551
5552 if (!ring || !ring->sched.thread)
5553 continue;
5554
0b2d2c2e 5555 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5556
bb5c7235 5557 if (need_emergency_restart)
7c6e68c7 5558 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5559 }
8f8c80f4 5560 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5561 }
5562
bb5c7235 5563 if (need_emergency_restart)
7c6e68c7
AG
5564 goto skip_sched_resume;
5565
1d721ed6
AG
5566 /*
5567 * Must check guilty signal here since after this point all old
5568 * HW fences are force signaled.
5569 *
5570 * job->base holds a reference to parent fence
5571 */
f6a3f660 5572 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5573 job_signaled = true;
1d721ed6
AG
5574 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5575 goto skip_hw_reset;
5576 }
5577
26bc5340 5578retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5579 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5580 if (gpu_reset_for_dev_remove) {
5581 /* Workaroud for ASICs need to disable SMC first */
5582 amdgpu_device_smu_fini_early(tmp_adev);
5583 }
f1549c09 5584 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5585 /*TODO Should we stop ?*/
5586 if (r) {
aac89168 5587 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5588 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5589 tmp_adev->asic_reset_res = r;
5590 }
247c7b0d
AG
5591
5592 /*
5593 * Drop all pending non scheduler resets. Scheduler resets
5594 * were already dropped during drm_sched_stop
5595 */
d193b12b 5596 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5597 }
5598
5599 /* Actual ASIC resets if needed.*/
4f30d920 5600 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5601 if (amdgpu_sriov_vf(adev)) {
5602 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5603 if (r)
5604 adev->asic_reset_res = r;
950d6425 5605
28606c4e 5606 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5607 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5608 IP_VERSION(9, 4, 2) ||
5609 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5610 amdgpu_ras_resume(adev);
26bc5340 5611 } else {
f1549c09 5612 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5613 if (r && r == -EAGAIN)
26bc5340 5614 goto retry;
f5c7e779
YC
5615
5616 if (!r && gpu_reset_for_dev_remove)
5617 goto recover_end;
26bc5340
AG
5618 }
5619
1d721ed6
AG
5620skip_hw_reset:
5621
26bc5340 5622 /* Post ASIC reset for all devs .*/
655ce9cb 5623 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5624
1d721ed6
AG
5625 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5626 struct amdgpu_ring *ring = tmp_adev->rings[i];
5627
5628 if (!ring || !ring->sched.thread)
5629 continue;
5630
6868a2c4 5631 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5632 }
5633
4e8303cf
LL
5634 if (adev->enable_mes &&
5635 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
ed67f729
JX
5636 amdgpu_mes_self_test(tmp_adev);
5637
b8920e1e 5638 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5639 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5640
7258fa31
SK
5641 if (tmp_adev->asic_reset_res)
5642 r = tmp_adev->asic_reset_res;
5643
1d721ed6 5644 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5645
5646 if (r) {
5647 /* bad news, how to tell it to userspace ? */
12ffa55d 5648 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5649 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5650 } else {
12ffa55d 5651 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5652 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5653 DRM_WARN("smart shift update failed\n");
26bc5340 5654 }
7c6e68c7 5655 }
26bc5340 5656
7c6e68c7 5657skip_sched_resume:
655ce9cb 5658 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5659 /* unlock kfd: SRIOV would do it separately */
c004d44e 5660 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5661 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5662
5663 /* kfd_post_reset will do nothing if kfd device is not initialized,
5664 * need to bring up kfd here if it's not be initialized before
5665 */
5666 if (!adev->kfd.init_complete)
5667 amdgpu_amdkfd_device_init(adev);
5668
3f12acc8
EQ
5669 if (audio_suspended)
5670 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5671
5672 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5673
5674 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5675 }
5676
f5c7e779 5677recover_end:
e923be99
AG
5678 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5679 reset_list);
5680 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5681
9e94d22c 5682 if (hive) {
9e94d22c 5683 mutex_unlock(&hive->hive_lock);
d95e8e97 5684 amdgpu_put_xgmi_hive(hive);
9e94d22c 5685 }
26bc5340 5686
f287a3c5 5687 if (r)
26bc5340 5688 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5689
5690 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5691 return r;
5692}
5693
e3ecdffa
AD
5694/**
5695 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5696 *
5697 * @adev: amdgpu_device pointer
5698 *
5699 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5700 * and lanes) of the slot the device is in. Handles APUs and
5701 * virtualized environments where PCIE config space may not be available.
5702 */
5494d864 5703static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5704{
5d9a6330 5705 struct pci_dev *pdev;
c5313457
HK
5706 enum pci_bus_speed speed_cap, platform_speed_cap;
5707 enum pcie_link_width platform_link_width;
d0dd7f0c 5708
cd474ba0
AD
5709 if (amdgpu_pcie_gen_cap)
5710 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5711
cd474ba0
AD
5712 if (amdgpu_pcie_lane_cap)
5713 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5714
cd474ba0 5715 /* covers APUs as well */
04e85958 5716 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5717 if (adev->pm.pcie_gen_mask == 0)
5718 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5719 if (adev->pm.pcie_mlw_mask == 0)
5720 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5721 return;
cd474ba0 5722 }
d0dd7f0c 5723
c5313457
HK
5724 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5725 return;
5726
dbaa922b
AD
5727 pcie_bandwidth_available(adev->pdev, NULL,
5728 &platform_speed_cap, &platform_link_width);
c5313457 5729
cd474ba0 5730 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5731 /* asic caps */
5732 pdev = adev->pdev;
5733 speed_cap = pcie_get_speed_cap(pdev);
5734 if (speed_cap == PCI_SPEED_UNKNOWN) {
5735 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5736 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5737 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5738 } else {
2b3a1f51
FX
5739 if (speed_cap == PCIE_SPEED_32_0GT)
5740 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5741 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5742 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5743 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5744 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5745 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5746 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5747 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5748 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5749 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5750 else if (speed_cap == PCIE_SPEED_8_0GT)
5751 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5752 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5753 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5754 else if (speed_cap == PCIE_SPEED_5_0GT)
5755 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5756 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5757 else
5758 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5759 }
5760 /* platform caps */
c5313457 5761 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5762 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5763 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5764 } else {
2b3a1f51
FX
5765 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5766 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5767 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5768 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5769 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5770 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5771 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5772 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5773 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5774 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5775 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5776 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5777 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5778 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5779 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5780 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5781 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5782 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5783 else
5784 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5785
cd474ba0
AD
5786 }
5787 }
5788 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5789 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5790 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5791 } else {
c5313457 5792 switch (platform_link_width) {
5d9a6330 5793 case PCIE_LNK_X32:
cd474ba0
AD
5794 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5795 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5796 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5797 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5798 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5799 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5800 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5801 break;
5d9a6330 5802 case PCIE_LNK_X16:
cd474ba0
AD
5803 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5804 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5805 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5806 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5807 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5808 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5809 break;
5d9a6330 5810 case PCIE_LNK_X12:
cd474ba0
AD
5811 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5812 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5813 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5814 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5815 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5816 break;
5d9a6330 5817 case PCIE_LNK_X8:
cd474ba0
AD
5818 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5819 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5820 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5821 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5822 break;
5d9a6330 5823 case PCIE_LNK_X4:
cd474ba0
AD
5824 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5825 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5826 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5827 break;
5d9a6330 5828 case PCIE_LNK_X2:
cd474ba0
AD
5829 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5830 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5831 break;
5d9a6330 5832 case PCIE_LNK_X1:
cd474ba0
AD
5833 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5834 break;
5835 default:
5836 break;
5837 }
d0dd7f0c
AD
5838 }
5839 }
5840}
d38ceaf9 5841
08a2fd23
RE
5842/**
5843 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5844 *
5845 * @adev: amdgpu_device pointer
5846 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5847 *
5848 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5849 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5850 * @peer_adev.
5851 */
5852bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5853 struct amdgpu_device *peer_adev)
5854{
5855#ifdef CONFIG_HSA_AMD_P2P
5856 uint64_t address_mask = peer_adev->dev->dma_mask ?
5857 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5858 resource_size_t aper_limit =
5859 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5860 bool p2p_access =
5861 !adev->gmc.xgmi.connected_to_cpu &&
5862 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5863
5864 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5865 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5866 !(adev->gmc.aper_base & address_mask ||
5867 aper_limit & address_mask));
5868#else
5869 return false;
5870#endif
5871}
5872
361dbd01
AD
5873int amdgpu_device_baco_enter(struct drm_device *dev)
5874{
1348969a 5875 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5876 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5877
6ab68650 5878 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5879 return -ENOTSUPP;
5880
8ab0d6f0 5881 if (ras && adev->ras_enabled &&
acdae216 5882 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5883 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5884
9530273e 5885 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5886}
5887
5888int amdgpu_device_baco_exit(struct drm_device *dev)
5889{
1348969a 5890 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5891 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5892 int ret = 0;
361dbd01 5893
6ab68650 5894 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5895 return -ENOTSUPP;
5896
9530273e
EQ
5897 ret = amdgpu_dpm_baco_exit(adev);
5898 if (ret)
5899 return ret;
7a22677b 5900
8ab0d6f0 5901 if (ras && adev->ras_enabled &&
acdae216 5902 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5903 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5904
1bece222
CL
5905 if (amdgpu_passthrough(adev) &&
5906 adev->nbio.funcs->clear_doorbell_interrupt)
5907 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5908
7a22677b 5909 return 0;
361dbd01 5910}
c9a6b82f
AG
5911
5912/**
5913 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5914 * @pdev: PCI device struct
5915 * @state: PCI channel state
5916 *
5917 * Description: Called when a PCI error is detected.
5918 *
5919 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5920 */
5921pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5922{
5923 struct drm_device *dev = pci_get_drvdata(pdev);
5924 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5925 int i;
c9a6b82f
AG
5926
5927 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5928
6894305c
AG
5929 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5930 DRM_WARN("No support for XGMI hive yet...");
5931 return PCI_ERS_RESULT_DISCONNECT;
5932 }
5933
e17e27f9
GC
5934 adev->pci_channel_state = state;
5935
c9a6b82f
AG
5936 switch (state) {
5937 case pci_channel_io_normal:
5938 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5939 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5940 case pci_channel_io_frozen:
5941 /*
d0fb18b5 5942 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5943 * to GPU during PCI error recovery
5944 */
3675c2f2 5945 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5946 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5947
5948 /*
5949 * Block any work scheduling as we do for regular GPU reset
5950 * for the duration of the recovery
5951 */
5952 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5953 struct amdgpu_ring *ring = adev->rings[i];
5954
5955 if (!ring || !ring->sched.thread)
5956 continue;
5957
5958 drm_sched_stop(&ring->sched, NULL);
5959 }
8f8c80f4 5960 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5961 return PCI_ERS_RESULT_NEED_RESET;
5962 case pci_channel_io_perm_failure:
5963 /* Permanent error, prepare for device removal */
5964 return PCI_ERS_RESULT_DISCONNECT;
5965 }
5966
5967 return PCI_ERS_RESULT_NEED_RESET;
5968}
5969
5970/**
5971 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5972 * @pdev: pointer to PCI device
5973 */
5974pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5975{
5976
5977 DRM_INFO("PCI error: mmio enabled callback!!\n");
5978
5979 /* TODO - dump whatever for debugging purposes */
5980
5981 /* This called only if amdgpu_pci_error_detected returns
5982 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5983 * works, no need to reset slot.
5984 */
5985
5986 return PCI_ERS_RESULT_RECOVERED;
5987}
5988
5989/**
5990 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5991 * @pdev: PCI device struct
5992 *
5993 * Description: This routine is called by the pci error recovery
5994 * code after the PCI slot has been reset, just before we
5995 * should resume normal operations.
5996 */
5997pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5998{
5999 struct drm_device *dev = pci_get_drvdata(pdev);
6000 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 6001 int r, i;
04442bf7 6002 struct amdgpu_reset_context reset_context;
362c7b91 6003 u32 memsize;
7ac71382 6004 struct list_head device_list;
c9a6b82f
AG
6005
6006 DRM_INFO("PCI error: slot reset callback!!\n");
6007
04442bf7
LL
6008 memset(&reset_context, 0, sizeof(reset_context));
6009
7ac71382 6010 INIT_LIST_HEAD(&device_list);
655ce9cb 6011 list_add_tail(&adev->reset_list, &device_list);
7ac71382 6012
362c7b91
AG
6013 /* wait for asic to come out of reset */
6014 msleep(500);
6015
7ac71382 6016 /* Restore PCI confspace */
c1dd4aa6 6017 amdgpu_device_load_pci_state(pdev);
c9a6b82f 6018
362c7b91
AG
6019 /* confirm ASIC came out of reset */
6020 for (i = 0; i < adev->usec_timeout; i++) {
6021 memsize = amdgpu_asic_get_config_memsize(adev);
6022
6023 if (memsize != 0xffffffff)
6024 break;
6025 udelay(1);
6026 }
6027 if (memsize == 0xffffffff) {
6028 r = -ETIME;
6029 goto out;
6030 }
6031
04442bf7
LL
6032 reset_context.method = AMD_RESET_METHOD_NONE;
6033 reset_context.reset_req_dev = adev;
6034 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6035 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6036
7afefb81 6037 adev->no_hw_access = true;
04442bf7 6038 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 6039 adev->no_hw_access = false;
c9a6b82f
AG
6040 if (r)
6041 goto out;
6042
04442bf7 6043 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
6044
6045out:
c9a6b82f 6046 if (!r) {
c1dd4aa6
AG
6047 if (amdgpu_device_cache_pci_state(adev->pdev))
6048 pci_restore_state(adev->pdev);
6049
c9a6b82f
AG
6050 DRM_INFO("PCIe error recovery succeeded\n");
6051 } else {
6052 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
6053 amdgpu_device_unset_mp1_state(adev);
6054 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
6055 }
6056
6057 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6058}
6059
6060/**
6061 * amdgpu_pci_resume() - resume normal ops after PCI reset
6062 * @pdev: pointer to PCI device
6063 *
6064 * Called when the error recovery driver tells us that its
505199a3 6065 * OK to resume normal operation.
c9a6b82f
AG
6066 */
6067void amdgpu_pci_resume(struct pci_dev *pdev)
6068{
6069 struct drm_device *dev = pci_get_drvdata(pdev);
6070 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 6071 int i;
c9a6b82f 6072
c9a6b82f
AG
6073
6074 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 6075
e17e27f9
GC
6076 /* Only continue execution for the case of pci_channel_io_frozen */
6077 if (adev->pci_channel_state != pci_channel_io_frozen)
6078 return;
6079
acd89fca
AG
6080 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6081 struct amdgpu_ring *ring = adev->rings[i];
6082
6083 if (!ring || !ring->sched.thread)
6084 continue;
6085
acd89fca
AG
6086 drm_sched_start(&ring->sched, true);
6087 }
6088
e923be99
AG
6089 amdgpu_device_unset_mp1_state(adev);
6090 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 6091}
c1dd4aa6
AG
6092
6093bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6094{
6095 struct drm_device *dev = pci_get_drvdata(pdev);
6096 struct amdgpu_device *adev = drm_to_adev(dev);
6097 int r;
6098
6099 r = pci_save_state(pdev);
6100 if (!r) {
6101 kfree(adev->pci_state);
6102
6103 adev->pci_state = pci_store_saved_state(pdev);
6104
6105 if (!adev->pci_state) {
6106 DRM_ERROR("Failed to store PCI saved state");
6107 return false;
6108 }
6109 } else {
6110 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6111 return false;
6112 }
6113
6114 return true;
6115}
6116
6117bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6118{
6119 struct drm_device *dev = pci_get_drvdata(pdev);
6120 struct amdgpu_device *adev = drm_to_adev(dev);
6121 int r;
6122
6123 if (!adev->pci_state)
6124 return false;
6125
6126 r = pci_load_saved_state(pdev, adev->pci_state);
6127
6128 if (!r) {
6129 pci_restore_state(pdev);
6130 } else {
6131 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6132 return false;
6133 }
6134
6135 return true;
6136}
6137
810085dd
EH
6138void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6139 struct amdgpu_ring *ring)
6140{
6141#ifdef CONFIG_X86_64
b818a5d3 6142 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6143 return;
6144#endif
6145 if (adev->gmc.xgmi.connected_to_cpu)
6146 return;
6147
6148 if (ring && ring->funcs->emit_hdp_flush)
6149 amdgpu_ring_emit_hdp_flush(ring);
6150 else
6151 amdgpu_asic_flush_hdp(adev, ring);
6152}
c1dd4aa6 6153
810085dd
EH
6154void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6155 struct amdgpu_ring *ring)
6156{
6157#ifdef CONFIG_X86_64
b818a5d3 6158 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6159 return;
6160#endif
6161 if (adev->gmc.xgmi.connected_to_cpu)
6162 return;
c1dd4aa6 6163
810085dd
EH
6164 amdgpu_asic_invalidate_hdp(adev, ring);
6165}
34f3a4a9 6166
89a7a870
AG
6167int amdgpu_in_reset(struct amdgpu_device *adev)
6168{
6169 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6170}
6171
34f3a4a9
LY
6172/**
6173 * amdgpu_device_halt() - bring hardware to some kind of halt state
6174 *
6175 * @adev: amdgpu_device pointer
6176 *
6177 * Bring hardware to some kind of halt state so that no one can touch it
6178 * any more. It will help to maintain error context when error occurred.
6179 * Compare to a simple hang, the system will keep stable at least for SSH
6180 * access. Then it should be trivial to inspect the hardware state and
6181 * see what's going on. Implemented as following:
6182 *
6183 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6184 * clears all CPU mappings to device, disallows remappings through page faults
6185 * 2. amdgpu_irq_disable_all() disables all interrupts
6186 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6187 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6188 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6189 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6190 * flush any in flight DMA operations
6191 */
6192void amdgpu_device_halt(struct amdgpu_device *adev)
6193{
6194 struct pci_dev *pdev = adev->pdev;
e0f943b4 6195 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6196
2c1c7ba4 6197 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6198 drm_dev_unplug(ddev);
6199
6200 amdgpu_irq_disable_all(adev);
6201
6202 amdgpu_fence_driver_hw_fini(adev);
6203
6204 adev->no_hw_access = true;
6205
6206 amdgpu_device_unmap_mmio(adev);
6207
6208 pci_disable_device(pdev);
6209 pci_wait_for_pending_transaction(pdev);
6210}
86700a40
XD
6211
6212u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6213 u32 reg)
6214{
6215 unsigned long flags, address, data;
6216 u32 r;
6217
6218 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6219 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6220
6221 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6222 WREG32(address, reg * 4);
6223 (void)RREG32(address);
6224 r = RREG32(data);
6225 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6226 return r;
6227}
6228
6229void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6230 u32 reg, u32 v)
6231{
6232 unsigned long flags, address, data;
6233
6234 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6235 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6236
6237 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6238 WREG32(address, reg * 4);
6239 (void)RREG32(address);
6240 WREG32(data, v);
6241 (void)RREG32(data);
6242 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6243}
68ce8b24
CK
6244
6245/**
6246 * amdgpu_device_switch_gang - switch to a new gang
6247 * @adev: amdgpu_device pointer
6248 * @gang: the gang to switch to
6249 *
6250 * Try to switch to a new gang.
6251 * Returns: NULL if we switched to the new gang or a reference to the current
6252 * gang leader.
6253 */
6254struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6255 struct dma_fence *gang)
6256{
6257 struct dma_fence *old = NULL;
6258
6259 do {
6260 dma_fence_put(old);
6261 rcu_read_lock();
6262 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6263 rcu_read_unlock();
6264
6265 if (old == gang)
6266 break;
6267
6268 if (!dma_fence_is_signaled(old))
6269 return old;
6270
6271 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6272 old, gang) != old);
6273
6274 dma_fence_put(old);
6275 return NULL;
6276}
220c8cc8
AD
6277
6278bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6279{
6280 switch (adev->asic_type) {
6281#ifdef CONFIG_DRM_AMDGPU_SI
6282 case CHIP_HAINAN:
6283#endif
6284 case CHIP_TOPAZ:
6285 /* chips with no display hardware */
6286 return false;
6287#ifdef CONFIG_DRM_AMDGPU_SI
6288 case CHIP_TAHITI:
6289 case CHIP_PITCAIRN:
6290 case CHIP_VERDE:
6291 case CHIP_OLAND:
6292#endif
6293#ifdef CONFIG_DRM_AMDGPU_CIK
6294 case CHIP_BONAIRE:
6295 case CHIP_HAWAII:
6296 case CHIP_KAVERI:
6297 case CHIP_KABINI:
6298 case CHIP_MULLINS:
6299#endif
6300 case CHIP_TONGA:
6301 case CHIP_FIJI:
6302 case CHIP_POLARIS10:
6303 case CHIP_POLARIS11:
6304 case CHIP_POLARIS12:
6305 case CHIP_VEGAM:
6306 case CHIP_CARRIZO:
6307 case CHIP_STONEY:
6308 /* chips with display hardware */
6309 return true;
6310 default:
6311 /* IP discovery */
4e8303cf 6312 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6313 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6314 return false;
6315 return true;
6316 }
6317}
81283fee
JZ
6318
6319uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6320 uint32_t inst, uint32_t reg_addr, char reg_name[],
6321 uint32_t expected_value, uint32_t mask)
6322{
6323 uint32_t ret = 0;
6324 uint32_t old_ = 0;
6325 uint32_t tmp_ = RREG32(reg_addr);
6326 uint32_t loop = adev->usec_timeout;
6327
6328 while ((tmp_ & (mask)) != (expected_value)) {
6329 if (old_ != tmp_) {
6330 loop = adev->usec_timeout;
6331 old_ = tmp_;
6332 } else
6333 udelay(1);
6334 tmp_ = RREG32(reg_addr);
6335 loop--;
6336 if (!loop) {
6337 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6338 inst, reg_name, (uint32_t)expected_value,
6339 (uint32_t)(tmp_ & (mask)));
6340 ret = -ETIMEDOUT;
6341 break;
6342 }
6343 }
6344 return ret;
6345}