drm/amd/display: enable S/G display for for recent APUs by default
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
3ad5dcfe
KHF
84#if IS_ENABLED(CONFIG_X86)
85#include <asm/intel-family.h>
86#endif
87
e2a75f88 88MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 89MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 90MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 91MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 92MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 93MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 94MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 95
2dc80b00 96#define AMDGPU_RESUME_MS 2000
7258fa31
SK
97#define AMDGPU_MAX_RETRY_LIMIT 2
98#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 99
b7cdb41e
ML
100static const struct drm_driver amdgpu_kms_driver;
101
050091ab 102const char *amdgpu_asic_name[] = {
da69c161
KW
103 "TAHITI",
104 "PITCAIRN",
105 "VERDE",
106 "OLAND",
107 "HAINAN",
d38ceaf9
AD
108 "BONAIRE",
109 "KAVERI",
110 "KABINI",
111 "HAWAII",
112 "MULLINS",
113 "TOPAZ",
114 "TONGA",
48299f95 115 "FIJI",
d38ceaf9 116 "CARRIZO",
139f4917 117 "STONEY",
2cc0c0b5
FC
118 "POLARIS10",
119 "POLARIS11",
c4642a47 120 "POLARIS12",
48ff108d 121 "VEGAM",
d4196f01 122 "VEGA10",
8fab806a 123 "VEGA12",
956fcddc 124 "VEGA20",
2ca8a5d2 125 "RAVEN",
d6c3b24e 126 "ARCTURUS",
1eee4228 127 "RENOIR",
d46b417a 128 "ALDEBARAN",
852a6626 129 "NAVI10",
d0f56dc2 130 "CYAN_SKILLFISH",
87dbad02 131 "NAVI14",
9802f5d7 132 "NAVI12",
ccaf72d3 133 "SIENNA_CICHLID",
ddd8fbe7 134 "NAVY_FLOUNDER",
4f1e9a76 135 "VANGOGH",
a2468e04 136 "DIMGREY_CAVEFISH",
6f169591 137 "BEIGE_GOBY",
ee9236b7 138 "YELLOW_CARP",
3ae695d6 139 "IP DISCOVERY",
d38ceaf9
AD
140 "LAST",
141};
142
dcea6e65
KR
143/**
144 * DOC: pcie_replay_count
145 *
146 * The amdgpu driver provides a sysfs API for reporting the total number
147 * of PCIe replays (NAKs)
148 * The file pcie_replay_count is used for this and returns the total
149 * number of replays as a sum of the NAKs generated and NAKs received
150 */
151
152static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 156 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
157 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
36000c7a 159 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
160}
161
b8920e1e 162static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
163 amdgpu_device_get_pcie_replay_count, NULL);
164
4798db85
LL
165/**
166 * DOC: board_info
167 *
168 * The amdgpu driver provides a sysfs API for giving board related information.
169 * It provides the form factor information in the format
170 *
171 * type : form factor
172 *
173 * Possible form factor values
174 *
175 * - "cem" - PCIE CEM card
176 * - "oam" - Open Compute Accelerator Module
177 * - "unknown" - Not known
178 *
179 */
180
76da73f0
LL
181static ssize_t amdgpu_device_get_board_info(struct device *dev,
182 struct device_attribute *attr,
183 char *buf)
184{
185 struct drm_device *ddev = dev_get_drvdata(dev);
186 struct amdgpu_device *adev = drm_to_adev(ddev);
187 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
188 const char *pkg;
189
190 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
191 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
192
193 switch (pkg_type) {
194 case AMDGPU_PKG_TYPE_CEM:
195 pkg = "cem";
196 break;
197 case AMDGPU_PKG_TYPE_OAM:
198 pkg = "oam";
199 break;
200 default:
201 pkg = "unknown";
202 break;
203 }
204
205 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
206}
207
208static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
209
210static struct attribute *amdgpu_board_attrs[] = {
211 &dev_attr_board_info.attr,
212 NULL,
213};
214
215static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
216 struct attribute *attr, int n)
217{
218 struct device *dev = kobj_to_dev(kobj);
219 struct drm_device *ddev = dev_get_drvdata(dev);
220 struct amdgpu_device *adev = drm_to_adev(ddev);
221
222 if (adev->flags & AMD_IS_APU)
223 return 0;
224
225 return attr->mode;
226}
227
228static const struct attribute_group amdgpu_board_attrs_group = {
229 .attrs = amdgpu_board_attrs,
230 .is_visible = amdgpu_board_attrs_is_visible
231};
232
5494d864
AD
233static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
234
bd607166 235
fd496ca8 236/**
b98c6299 237 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
238 *
239 * @dev: drm_device pointer
240 *
b98c6299 241 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
242 * otherwise return false.
243 */
b98c6299 244bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
245{
246 struct amdgpu_device *adev = drm_to_adev(dev);
247
b98c6299 248 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
249 return true;
250 return false;
251}
252
e3ecdffa 253/**
0330b848 254 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
255 *
256 * @dev: drm_device pointer
257 *
b98c6299 258 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
259 * otherwise return false.
260 */
31af062a 261bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 262{
1348969a 263 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 264
b98c6299
AD
265 if (adev->has_pr3 ||
266 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
267 return true;
268 return false;
269}
270
a69cba42
AD
271/**
272 * amdgpu_device_supports_baco - Does the device support BACO
273 *
274 * @dev: drm_device pointer
275 *
276 * Returns true if the device supporte BACO,
277 * otherwise return false.
278 */
279bool amdgpu_device_supports_baco(struct drm_device *dev)
280{
1348969a 281 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
282
283 return amdgpu_asic_supports_baco(adev);
284}
285
3fa8f89d
S
286/**
287 * amdgpu_device_supports_smart_shift - Is the device dGPU with
288 * smart shift support
289 *
290 * @dev: drm_device pointer
291 *
292 * Returns true if the device is a dGPU with Smart Shift support,
293 * otherwise returns false.
294 */
295bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
296{
297 return (amdgpu_device_supports_boco(dev) &&
298 amdgpu_acpi_is_power_shift_control_supported());
299}
300
6e3cd2a9
MCC
301/*
302 * VRAM access helper functions
303 */
304
e35e2b11 305/**
048af66b 306 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
307 *
308 * @adev: amdgpu_device pointer
309 * @pos: offset of the buffer in vram
310 * @buf: virtual address of the buffer in system memory
311 * @size: read/write size, sizeof(@buf) must > @size
312 * @write: true - write to vram, otherwise - read from vram
313 */
048af66b
KW
314void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
315 void *buf, size_t size, bool write)
e35e2b11 316{
e35e2b11 317 unsigned long flags;
048af66b
KW
318 uint32_t hi = ~0, tmp = 0;
319 uint32_t *data = buf;
ce05ac56 320 uint64_t last;
f89f8c6b 321 int idx;
ce05ac56 322
c58a863b 323 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 324 return;
9d11eb0d 325
048af66b
KW
326 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
327
328 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
329 for (last = pos + size; pos < last; pos += 4) {
330 tmp = pos >> 31;
331
332 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
333 if (tmp != hi) {
334 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
335 hi = tmp;
336 }
337 if (write)
338 WREG32_NO_KIQ(mmMM_DATA, *data++);
339 else
340 *data++ = RREG32_NO_KIQ(mmMM_DATA);
341 }
342
343 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
344 drm_dev_exit(idx);
345}
346
347/**
bbe04dec 348 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
349 *
350 * @adev: amdgpu_device pointer
351 * @pos: offset of the buffer in vram
352 * @buf: virtual address of the buffer in system memory
353 * @size: read/write size, sizeof(@buf) must > @size
354 * @write: true - write to vram, otherwise - read from vram
355 *
356 * The return value means how many bytes have been transferred.
357 */
358size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
359 void *buf, size_t size, bool write)
360{
9d11eb0d 361#ifdef CONFIG_64BIT
048af66b
KW
362 void __iomem *addr;
363 size_t count = 0;
364 uint64_t last;
365
366 if (!adev->mman.aper_base_kaddr)
367 return 0;
368
9d11eb0d
CK
369 last = min(pos + size, adev->gmc.visible_vram_size);
370 if (last > pos) {
048af66b
KW
371 addr = adev->mman.aper_base_kaddr + pos;
372 count = last - pos;
9d11eb0d
CK
373
374 if (write) {
375 memcpy_toio(addr, buf, count);
4c452b5c
SS
376 /* Make sure HDP write cache flush happens without any reordering
377 * after the system memory contents are sent over PCIe device
378 */
9d11eb0d 379 mb();
810085dd 380 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 381 } else {
810085dd 382 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
383 /* Make sure HDP read cache is invalidated before issuing a read
384 * to the PCIe device
385 */
9d11eb0d
CK
386 mb();
387 memcpy_fromio(buf, addr, count);
388 }
389
9d11eb0d 390 }
048af66b
KW
391
392 return count;
393#else
394 return 0;
9d11eb0d 395#endif
048af66b 396}
9d11eb0d 397
048af66b
KW
398/**
399 * amdgpu_device_vram_access - read/write a buffer in vram
400 *
401 * @adev: amdgpu_device pointer
402 * @pos: offset of the buffer in vram
403 * @buf: virtual address of the buffer in system memory
404 * @size: read/write size, sizeof(@buf) must > @size
405 * @write: true - write to vram, otherwise - read from vram
406 */
407void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
408 void *buf, size_t size, bool write)
409{
410 size_t count;
e35e2b11 411
048af66b
KW
412 /* try to using vram apreature to access vram first */
413 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
414 size -= count;
415 if (size) {
416 /* using MM to access rest vram */
417 pos += count;
418 buf += count;
419 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
420 }
421}
422
d38ceaf9 423/*
f7ee1874 424 * register access helper functions.
d38ceaf9 425 */
56b53c0b
DL
426
427/* Check if hw access should be skipped because of hotplug or device error */
428bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
429{
7afefb81 430 if (adev->no_hw_access)
56b53c0b
DL
431 return true;
432
433#ifdef CONFIG_LOCKDEP
434 /*
435 * This is a bit complicated to understand, so worth a comment. What we assert
436 * here is that the GPU reset is not running on another thread in parallel.
437 *
438 * For this we trylock the read side of the reset semaphore, if that succeeds
439 * we know that the reset is not running in paralell.
440 *
441 * If the trylock fails we assert that we are either already holding the read
442 * side of the lock or are the reset thread itself and hold the write side of
443 * the lock.
444 */
445 if (in_task()) {
d0fb18b5
AG
446 if (down_read_trylock(&adev->reset_domain->sem))
447 up_read(&adev->reset_domain->sem);
56b53c0b 448 else
d0fb18b5 449 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
450 }
451#endif
452 return false;
453}
454
e3ecdffa 455/**
f7ee1874 456 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
457 *
458 * @adev: amdgpu_device pointer
459 * @reg: dword aligned register offset
460 * @acc_flags: access flags which require special behavior
461 *
462 * Returns the 32 bit value from the offset specified.
463 */
f7ee1874
HZ
464uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
465 uint32_t reg, uint32_t acc_flags)
d38ceaf9 466{
f4b373f4
TSD
467 uint32_t ret;
468
56b53c0b 469 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
470 return 0;
471
f7ee1874
HZ
472 if ((reg * 4) < adev->rmmio_size) {
473 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
474 amdgpu_sriov_runtime(adev) &&
d0fb18b5 475 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 476 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 477 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
478 } else {
479 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
480 }
481 } else {
482 ret = adev->pcie_rreg(adev, reg * 4);
81202807 483 }
bc992ba5 484
f7ee1874 485 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 486
f4b373f4 487 return ret;
d38ceaf9
AD
488}
489
421a2a30
ML
490/*
491 * MMIO register read with bytes helper functions
492 * @offset:bytes offset from MMIO start
b8920e1e 493 */
421a2a30 494
e3ecdffa
AD
495/**
496 * amdgpu_mm_rreg8 - read a memory mapped IO register
497 *
498 * @adev: amdgpu_device pointer
499 * @offset: byte aligned register offset
500 *
501 * Returns the 8 bit value from the offset specified.
502 */
7cbbc745
AG
503uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
504{
56b53c0b 505 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
506 return 0;
507
421a2a30
ML
508 if (offset < adev->rmmio_size)
509 return (readb(adev->rmmio + offset));
510 BUG();
511}
512
513/*
514 * MMIO register write with bytes helper functions
515 * @offset:bytes offset from MMIO start
516 * @value: the value want to be written to the register
b8920e1e
SS
517 */
518
e3ecdffa
AD
519/**
520 * amdgpu_mm_wreg8 - read a memory mapped IO register
521 *
522 * @adev: amdgpu_device pointer
523 * @offset: byte aligned register offset
524 * @value: 8 bit value to write
525 *
526 * Writes the value specified to the offset specified.
527 */
7cbbc745
AG
528void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
529{
56b53c0b 530 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
531 return;
532
421a2a30
ML
533 if (offset < adev->rmmio_size)
534 writeb(value, adev->rmmio + offset);
535 else
536 BUG();
537}
538
e3ecdffa 539/**
f7ee1874 540 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
541 *
542 * @adev: amdgpu_device pointer
543 * @reg: dword aligned register offset
544 * @v: 32 bit value to write to the register
545 * @acc_flags: access flags which require special behavior
546 *
547 * Writes the value specified to the offset specified.
548 */
f7ee1874
HZ
549void amdgpu_device_wreg(struct amdgpu_device *adev,
550 uint32_t reg, uint32_t v,
551 uint32_t acc_flags)
d38ceaf9 552{
56b53c0b 553 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
554 return;
555
f7ee1874
HZ
556 if ((reg * 4) < adev->rmmio_size) {
557 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
558 amdgpu_sriov_runtime(adev) &&
d0fb18b5 559 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 560 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 561 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
562 } else {
563 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
564 }
565 } else {
566 adev->pcie_wreg(adev, reg * 4, v);
81202807 567 }
bc992ba5 568
f7ee1874 569 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 570}
d38ceaf9 571
03f2abb0 572/**
4cc9f86f 573 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 574 *
71579346
RB
575 * @adev: amdgpu_device pointer
576 * @reg: mmio/rlc register
577 * @v: value to write
8057a9d6 578 * @xcc_id: xcc accelerated compute core id
71579346
RB
579 *
580 * this function is invoked only for the debugfs register access
03f2abb0 581 */
f7ee1874 582void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
583 uint32_t reg, uint32_t v,
584 uint32_t xcc_id)
2e0cc4d4 585{
56b53c0b 586 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
587 return;
588
2e0cc4d4 589 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
590 adev->gfx.rlc.funcs &&
591 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 592 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 593 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
594 } else if ((reg * 4) >= adev->rmmio_size) {
595 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
596 } else {
597 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 598 }
d38ceaf9
AD
599}
600
1bba3683
HZ
601/**
602 * amdgpu_device_indirect_rreg - read an indirect register
603 *
604 * @adev: amdgpu_device pointer
22f453fb 605 * @reg_addr: indirect register address to read from
1bba3683
HZ
606 *
607 * Returns the value of indirect register @reg_addr
608 */
609u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
610 u32 reg_addr)
611{
65ba96e9 612 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
613 void __iomem *pcie_index_offset;
614 void __iomem *pcie_data_offset;
65ba96e9
HZ
615 u32 r;
616
617 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
618 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
619
620 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
621 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
622 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
623
624 writel(reg_addr, pcie_index_offset);
625 readl(pcie_index_offset);
626 r = readl(pcie_data_offset);
627 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
628
629 return r;
630}
631
0c552ed3
LM
632u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
633 u64 reg_addr)
634{
635 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
636 u32 r;
637 void __iomem *pcie_index_offset;
638 void __iomem *pcie_index_hi_offset;
639 void __iomem *pcie_data_offset;
640
641 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
642 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 643 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
644 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
645 else
646 pcie_index_hi = 0;
647
648 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
649 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
650 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
651 if (pcie_index_hi != 0)
652 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
653 pcie_index_hi * 4;
654
655 writel(reg_addr, pcie_index_offset);
656 readl(pcie_index_offset);
657 if (pcie_index_hi != 0) {
658 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
659 readl(pcie_index_hi_offset);
660 }
661 r = readl(pcie_data_offset);
662
663 /* clear the high bits */
664 if (pcie_index_hi != 0) {
665 writel(0, pcie_index_hi_offset);
666 readl(pcie_index_hi_offset);
667 }
668
669 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
670
671 return r;
672}
673
1bba3683
HZ
674/**
675 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
676 *
677 * @adev: amdgpu_device pointer
22f453fb 678 * @reg_addr: indirect register address to read from
1bba3683
HZ
679 *
680 * Returns the value of indirect register @reg_addr
681 */
682u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
683 u32 reg_addr)
684{
65ba96e9 685 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
686 void __iomem *pcie_index_offset;
687 void __iomem *pcie_data_offset;
65ba96e9
HZ
688 u64 r;
689
690 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
691 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
692
693 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
694 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
695 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
696
697 /* read low 32 bits */
698 writel(reg_addr, pcie_index_offset);
699 readl(pcie_index_offset);
700 r = readl(pcie_data_offset);
701 /* read high 32 bits */
702 writel(reg_addr + 4, pcie_index_offset);
703 readl(pcie_index_offset);
704 r |= ((u64)readl(pcie_data_offset) << 32);
705 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
706
707 return r;
708}
709
a76b2870
CL
710u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
711 u64 reg_addr)
712{
713 unsigned long flags, pcie_index, pcie_data;
714 unsigned long pcie_index_hi = 0;
715 void __iomem *pcie_index_offset;
716 void __iomem *pcie_index_hi_offset;
717 void __iomem *pcie_data_offset;
718 u64 r;
719
720 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
721 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
722 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
723 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
724
725 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
726 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
727 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
728 if (pcie_index_hi != 0)
729 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
730 pcie_index_hi * 4;
731
732 /* read low 32 bits */
733 writel(reg_addr, pcie_index_offset);
734 readl(pcie_index_offset);
735 if (pcie_index_hi != 0) {
736 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
737 readl(pcie_index_hi_offset);
738 }
739 r = readl(pcie_data_offset);
740 /* read high 32 bits */
741 writel(reg_addr + 4, pcie_index_offset);
742 readl(pcie_index_offset);
743 if (pcie_index_hi != 0) {
744 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
745 readl(pcie_index_hi_offset);
746 }
747 r |= ((u64)readl(pcie_data_offset) << 32);
748
749 /* clear the high bits */
750 if (pcie_index_hi != 0) {
751 writel(0, pcie_index_hi_offset);
752 readl(pcie_index_hi_offset);
753 }
754
755 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
756
757 return r;
758}
759
1bba3683
HZ
760/**
761 * amdgpu_device_indirect_wreg - write an indirect register address
762 *
763 * @adev: amdgpu_device pointer
1bba3683
HZ
764 * @reg_addr: indirect register offset
765 * @reg_data: indirect register data
766 *
767 */
768void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
769 u32 reg_addr, u32 reg_data)
770{
65ba96e9 771 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
772 void __iomem *pcie_index_offset;
773 void __iomem *pcie_data_offset;
774
65ba96e9
HZ
775 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
776 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
777
1bba3683
HZ
778 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
779 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
780 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
781
782 writel(reg_addr, pcie_index_offset);
783 readl(pcie_index_offset);
784 writel(reg_data, pcie_data_offset);
785 readl(pcie_data_offset);
786 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
787}
788
0c552ed3
LM
789void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
790 u64 reg_addr, u32 reg_data)
791{
792 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
793 void __iomem *pcie_index_offset;
794 void __iomem *pcie_index_hi_offset;
795 void __iomem *pcie_data_offset;
796
797 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
798 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 799 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
800 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
801 else
802 pcie_index_hi = 0;
803
804 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
805 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
806 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
807 if (pcie_index_hi != 0)
808 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
809 pcie_index_hi * 4;
810
811 writel(reg_addr, pcie_index_offset);
812 readl(pcie_index_offset);
813 if (pcie_index_hi != 0) {
814 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
815 readl(pcie_index_hi_offset);
816 }
817 writel(reg_data, pcie_data_offset);
818 readl(pcie_data_offset);
819
820 /* clear the high bits */
821 if (pcie_index_hi != 0) {
822 writel(0, pcie_index_hi_offset);
823 readl(pcie_index_hi_offset);
824 }
825
826 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
827}
828
1bba3683
HZ
829/**
830 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
831 *
832 * @adev: amdgpu_device pointer
1bba3683
HZ
833 * @reg_addr: indirect register offset
834 * @reg_data: indirect register data
835 *
836 */
837void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
838 u32 reg_addr, u64 reg_data)
839{
65ba96e9 840 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
841 void __iomem *pcie_index_offset;
842 void __iomem *pcie_data_offset;
843
65ba96e9
HZ
844 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
845 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
846
1bba3683
HZ
847 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
848 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
849 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
850
851 /* write low 32 bits */
852 writel(reg_addr, pcie_index_offset);
853 readl(pcie_index_offset);
854 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
855 readl(pcie_data_offset);
856 /* write high 32 bits */
857 writel(reg_addr + 4, pcie_index_offset);
858 readl(pcie_index_offset);
859 writel((u32)(reg_data >> 32), pcie_data_offset);
860 readl(pcie_data_offset);
861 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
862}
863
a76b2870
CL
864void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
865 u64 reg_addr, u64 reg_data)
866{
867 unsigned long flags, pcie_index, pcie_data;
868 unsigned long pcie_index_hi = 0;
869 void __iomem *pcie_index_offset;
870 void __iomem *pcie_index_hi_offset;
871 void __iomem *pcie_data_offset;
872
873 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
874 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
875 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
876 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
877
878 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
879 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
880 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
881 if (pcie_index_hi != 0)
882 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
883 pcie_index_hi * 4;
884
885 /* write low 32 bits */
886 writel(reg_addr, pcie_index_offset);
887 readl(pcie_index_offset);
888 if (pcie_index_hi != 0) {
889 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
890 readl(pcie_index_hi_offset);
891 }
892 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
893 readl(pcie_data_offset);
894 /* write high 32 bits */
895 writel(reg_addr + 4, pcie_index_offset);
896 readl(pcie_index_offset);
897 if (pcie_index_hi != 0) {
898 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
899 readl(pcie_index_hi_offset);
900 }
901 writel((u32)(reg_data >> 32), pcie_data_offset);
902 readl(pcie_data_offset);
903
904 /* clear the high bits */
905 if (pcie_index_hi != 0) {
906 writel(0, pcie_index_hi_offset);
907 readl(pcie_index_hi_offset);
908 }
909
910 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
911}
912
dabc114e
HZ
913/**
914 * amdgpu_device_get_rev_id - query device rev_id
915 *
916 * @adev: amdgpu_device pointer
917 *
918 * Return device rev_id
919 */
920u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
921{
922 return adev->nbio.funcs->get_rev_id(adev);
923}
924
d38ceaf9
AD
925/**
926 * amdgpu_invalid_rreg - dummy reg read function
927 *
982a820b 928 * @adev: amdgpu_device pointer
d38ceaf9
AD
929 * @reg: offset of register
930 *
931 * Dummy register read function. Used for register blocks
932 * that certain asics don't have (all asics).
933 * Returns the value in the register.
934 */
935static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
936{
937 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
938 BUG();
939 return 0;
940}
941
0c552ed3
LM
942static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
943{
944 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
945 BUG();
946 return 0;
947}
948
d38ceaf9
AD
949/**
950 * amdgpu_invalid_wreg - dummy reg write function
951 *
982a820b 952 * @adev: amdgpu_device pointer
d38ceaf9
AD
953 * @reg: offset of register
954 * @v: value to write to the register
955 *
956 * Dummy register read function. Used for register blocks
957 * that certain asics don't have (all asics).
958 */
959static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
960{
961 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
962 reg, v);
963 BUG();
964}
965
0c552ed3
LM
966static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
967{
968 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
969 reg, v);
970 BUG();
971}
972
4fa1c6a6
TZ
973/**
974 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
975 *
982a820b 976 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
977 * @reg: offset of register
978 *
979 * Dummy register read function. Used for register blocks
980 * that certain asics don't have (all asics).
981 * Returns the value in the register.
982 */
983static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
984{
985 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
986 BUG();
987 return 0;
988}
989
a76b2870
CL
990static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
991{
992 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
993 BUG();
994 return 0;
995}
996
4fa1c6a6
TZ
997/**
998 * amdgpu_invalid_wreg64 - dummy reg write function
999 *
982a820b 1000 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
1001 * @reg: offset of register
1002 * @v: value to write to the register
1003 *
1004 * Dummy register read function. Used for register blocks
1005 * that certain asics don't have (all asics).
1006 */
1007static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1008{
1009 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1010 reg, v);
1011 BUG();
1012}
1013
a76b2870
CL
1014static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1015{
1016 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1017 reg, v);
1018 BUG();
1019}
1020
d38ceaf9
AD
1021/**
1022 * amdgpu_block_invalid_rreg - dummy reg read function
1023 *
982a820b 1024 * @adev: amdgpu_device pointer
d38ceaf9
AD
1025 * @block: offset of instance
1026 * @reg: offset of register
1027 *
1028 * Dummy register read function. Used for register blocks
1029 * that certain asics don't have (all asics).
1030 * Returns the value in the register.
1031 */
1032static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1033 uint32_t block, uint32_t reg)
1034{
1035 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1036 reg, block);
1037 BUG();
1038 return 0;
1039}
1040
1041/**
1042 * amdgpu_block_invalid_wreg - dummy reg write function
1043 *
982a820b 1044 * @adev: amdgpu_device pointer
d38ceaf9
AD
1045 * @block: offset of instance
1046 * @reg: offset of register
1047 * @v: value to write to the register
1048 *
1049 * Dummy register read function. Used for register blocks
1050 * that certain asics don't have (all asics).
1051 */
1052static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1053 uint32_t block,
1054 uint32_t reg, uint32_t v)
1055{
1056 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1057 reg, block, v);
1058 BUG();
1059}
1060
4d2997ab
AD
1061/**
1062 * amdgpu_device_asic_init - Wrapper for atom asic_init
1063 *
982a820b 1064 * @adev: amdgpu_device pointer
4d2997ab
AD
1065 *
1066 * Does any asic specific work and then calls atom asic init.
1067 */
1068static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1069{
15c5c5f5
LL
1070 int ret;
1071
4d2997ab
AD
1072 amdgpu_asic_pre_asic_init(adev);
1073
4e8303cf
LL
1074 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1075 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1076 amdgpu_psp_wait_for_bootloader(adev);
1077 ret = amdgpu_atomfirmware_asic_init(adev, true);
1078 return ret;
1079 } else {
85d1bcc6 1080 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1081 }
1082
1083 return 0;
4d2997ab
AD
1084}
1085
e3ecdffa 1086/**
7ccfd79f 1087 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1088 *
982a820b 1089 * @adev: amdgpu_device pointer
e3ecdffa
AD
1090 *
1091 * Allocates a scratch page of VRAM for use by various things in the
1092 * driver.
1093 */
7ccfd79f 1094static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1095{
7ccfd79f
CK
1096 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1097 AMDGPU_GEM_DOMAIN_VRAM |
1098 AMDGPU_GEM_DOMAIN_GTT,
1099 &adev->mem_scratch.robj,
1100 &adev->mem_scratch.gpu_addr,
1101 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1102}
1103
e3ecdffa 1104/**
7ccfd79f 1105 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1106 *
982a820b 1107 * @adev: amdgpu_device pointer
e3ecdffa
AD
1108 *
1109 * Frees the VRAM scratch page.
1110 */
7ccfd79f 1111static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1112{
7ccfd79f 1113 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1114}
1115
1116/**
9c3f2b54 1117 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1118 *
1119 * @adev: amdgpu_device pointer
1120 * @registers: pointer to the register array
1121 * @array_size: size of the register array
1122 *
b8920e1e 1123 * Programs an array or registers with and or masks.
d38ceaf9
AD
1124 * This is a helper for setting golden registers.
1125 */
9c3f2b54
AD
1126void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1127 const u32 *registers,
1128 const u32 array_size)
d38ceaf9
AD
1129{
1130 u32 tmp, reg, and_mask, or_mask;
1131 int i;
1132
1133 if (array_size % 3)
1134 return;
1135
47fc644f 1136 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1137 reg = registers[i + 0];
1138 and_mask = registers[i + 1];
1139 or_mask = registers[i + 2];
1140
1141 if (and_mask == 0xffffffff) {
1142 tmp = or_mask;
1143 } else {
1144 tmp = RREG32(reg);
1145 tmp &= ~and_mask;
e0d07657
HZ
1146 if (adev->family >= AMDGPU_FAMILY_AI)
1147 tmp |= (or_mask & and_mask);
1148 else
1149 tmp |= or_mask;
d38ceaf9
AD
1150 }
1151 WREG32(reg, tmp);
1152 }
1153}
1154
e3ecdffa
AD
1155/**
1156 * amdgpu_device_pci_config_reset - reset the GPU
1157 *
1158 * @adev: amdgpu_device pointer
1159 *
1160 * Resets the GPU using the pci config reset sequence.
1161 * Only applicable to asics prior to vega10.
1162 */
8111c387 1163void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1164{
1165 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1166}
1167
af484df8
AD
1168/**
1169 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1170 *
1171 * @adev: amdgpu_device pointer
1172 *
1173 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1174 */
1175int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1176{
1177 return pci_reset_function(adev->pdev);
1178}
1179
d38ceaf9 1180/*
06ec9070 1181 * amdgpu_device_wb_*()
455a7bc2 1182 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1183 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1184 */
1185
1186/**
06ec9070 1187 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1188 *
1189 * @adev: amdgpu_device pointer
1190 *
1191 * Disables Writeback and frees the Writeback memory (all asics).
1192 * Used at driver shutdown.
1193 */
06ec9070 1194static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1195{
1196 if (adev->wb.wb_obj) {
a76ed485
AD
1197 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1198 &adev->wb.gpu_addr,
1199 (void **)&adev->wb.wb);
d38ceaf9
AD
1200 adev->wb.wb_obj = NULL;
1201 }
1202}
1203
1204/**
03f2abb0 1205 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1206 *
1207 * @adev: amdgpu_device pointer
1208 *
455a7bc2 1209 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1210 * Used at driver startup.
1211 * Returns 0 on success or an -error on failure.
1212 */
06ec9070 1213static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1214{
1215 int r;
1216
1217 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1218 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1219 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1220 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1221 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1222 (void **)&adev->wb.wb);
d38ceaf9
AD
1223 if (r) {
1224 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1225 return r;
1226 }
d38ceaf9
AD
1227
1228 adev->wb.num_wb = AMDGPU_MAX_WB;
1229 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1230
1231 /* clear wb memory */
73469585 1232 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1233 }
1234
1235 return 0;
1236}
1237
1238/**
131b4b36 1239 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1240 *
1241 * @adev: amdgpu_device pointer
1242 * @wb: wb index
1243 *
1244 * Allocate a wb slot for use by the driver (all asics).
1245 * Returns 0 on success or -EINVAL on failure.
1246 */
131b4b36 1247int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1248{
1249 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1250
97407b63 1251 if (offset < adev->wb.num_wb) {
7014285a 1252 __set_bit(offset, adev->wb.used);
63ae07ca 1253 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1254 return 0;
1255 } else {
1256 return -EINVAL;
1257 }
1258}
1259
d38ceaf9 1260/**
131b4b36 1261 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1262 *
1263 * @adev: amdgpu_device pointer
1264 * @wb: wb index
1265 *
1266 * Free a wb slot allocated for use by the driver (all asics)
1267 */
131b4b36 1268void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1269{
73469585 1270 wb >>= 3;
d38ceaf9 1271 if (wb < adev->wb.num_wb)
73469585 1272 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1273}
1274
d6895ad3
CK
1275/**
1276 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1277 *
1278 * @adev: amdgpu_device pointer
1279 *
1280 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1281 * to fail, but if any of the BARs is not accessible after the size we abort
1282 * driver loading by returning -ENODEV.
1283 */
1284int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1285{
453f617a 1286 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1287 struct pci_bus *root;
1288 struct resource *res;
b8920e1e 1289 unsigned int i;
d6895ad3
CK
1290 u16 cmd;
1291 int r;
1292
822130b5
AB
1293 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1294 return 0;
1295
0c03b912 1296 /* Bypass for VF */
1297 if (amdgpu_sriov_vf(adev))
1298 return 0;
1299
b7221f2b
AD
1300 /* skip if the bios has already enabled large BAR */
1301 if (adev->gmc.real_vram_size &&
1302 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1303 return 0;
1304
31b8adab
CK
1305 /* Check if the root BUS has 64bit memory resources */
1306 root = adev->pdev->bus;
1307 while (root->parent)
1308 root = root->parent;
1309
1310 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1311 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1312 res->start > 0x100000000ull)
1313 break;
1314 }
1315
1316 /* Trying to resize is pointless without a root hub window above 4GB */
1317 if (!res)
1318 return 0;
1319
453f617a
ND
1320 /* Limit the BAR size to what is available */
1321 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1322 rbar_size);
1323
d6895ad3
CK
1324 /* Disable memory decoding while we change the BAR addresses and size */
1325 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1326 pci_write_config_word(adev->pdev, PCI_COMMAND,
1327 cmd & ~PCI_COMMAND_MEMORY);
1328
1329 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1330 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1331 if (adev->asic_type >= CHIP_BONAIRE)
1332 pci_release_resource(adev->pdev, 2);
1333
1334 pci_release_resource(adev->pdev, 0);
1335
1336 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1337 if (r == -ENOSPC)
1338 DRM_INFO("Not enough PCI address space for a large BAR.");
1339 else if (r && r != -ENOTSUPP)
1340 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1341
1342 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1343
1344 /* When the doorbell or fb BAR isn't available we have no chance of
1345 * using the device.
1346 */
43c064db 1347 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1348 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1349 return -ENODEV;
1350
1351 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1352
1353 return 0;
1354}
a05502e5 1355
9535a86a
SZ
1356static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1357{
b8920e1e 1358 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1359 return false;
9535a86a
SZ
1360
1361 return true;
1362}
1363
d38ceaf9
AD
1364/*
1365 * GPU helpers function.
1366 */
1367/**
39c640c0 1368 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1369 *
1370 * @adev: amdgpu_device pointer
1371 *
c836fec5
JQ
1372 * Check if the asic has been initialized (all asics) at driver startup
1373 * or post is needed if hw reset is performed.
1374 * Returns true if need or false if not.
d38ceaf9 1375 */
39c640c0 1376bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1377{
1378 uint32_t reg;
1379
bec86378
ML
1380 if (amdgpu_sriov_vf(adev))
1381 return false;
1382
9535a86a
SZ
1383 if (!amdgpu_device_read_bios(adev))
1384 return false;
1385
bec86378 1386 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1387 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1388 * some old smc fw still need driver do vPost otherwise gpu hang, while
1389 * those smc fw version above 22.15 doesn't have this flaw, so we force
1390 * vpost executed for smc version below 22.15
bec86378
ML
1391 */
1392 if (adev->asic_type == CHIP_FIJI) {
1393 int err;
1394 uint32_t fw_ver;
b8920e1e 1395
bec86378
ML
1396 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1397 /* force vPost if error occured */
1398 if (err)
1399 return true;
1400
1401 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1402 if (fw_ver < 0x00160e00)
1403 return true;
bec86378 1404 }
bec86378 1405 }
91fe77eb 1406
e3c1b071 1407 /* Don't post if we need to reset whole hive on init */
1408 if (adev->gmc.xgmi.pending_reset)
1409 return false;
1410
91fe77eb 1411 if (adev->has_hw_reset) {
1412 adev->has_hw_reset = false;
1413 return true;
1414 }
1415
1416 /* bios scratch used on CIK+ */
1417 if (adev->asic_type >= CHIP_BONAIRE)
1418 return amdgpu_atombios_scratch_need_asic_init(adev);
1419
1420 /* check MEM_SIZE for older asics */
1421 reg = amdgpu_asic_get_config_memsize(adev);
1422
1423 if ((reg != 0) && (reg != 0xffffffff))
1424 return false;
1425
1426 return true;
70e64c4d
ML
1427}
1428
bb0f8429
ML
1429/*
1430 * Check whether seamless boot is supported.
1431 *
7f4ce7b5
ML
1432 * So far we only support seamless boot on DCE 3.0 or later.
1433 * If users report that it works on older ASICS as well, we may
1434 * loosen this.
bb0f8429
ML
1435 */
1436bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1437{
5dc270d3
ML
1438 switch (amdgpu_seamless) {
1439 case -1:
1440 break;
1441 case 1:
1442 return true;
1443 case 0:
1444 return false;
1445 default:
1446 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1447 amdgpu_seamless);
1448 return false;
1449 }
1450
3657a1d5
ML
1451 if (!(adev->flags & AMD_IS_APU))
1452 return false;
1453
5dc270d3
ML
1454 if (adev->mman.keep_stolen_vga_memory)
1455 return false;
1456
7f4ce7b5 1457 return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1458}
1459
5d1eb4c4
ML
1460/*
1461 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1462 * speed switching. Until we have confirmation from Intel that a specific host
1463 * supports it, it's safer that we keep it disabled for all.
1464 *
1465 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1466 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1467 */
1468bool amdgpu_device_pcie_dynamic_switching_supported(void)
1469{
1470#if IS_ENABLED(CONFIG_X86)
1471 struct cpuinfo_x86 *c = &cpu_data(0);
1472
1473 if (c->x86_vendor == X86_VENDOR_INTEL)
1474 return false;
1475#endif
1476 return true;
1477}
1478
0ab5d711
ML
1479/**
1480 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1481 *
1482 * @adev: amdgpu_device pointer
1483 *
1484 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1485 * be set for this device.
1486 *
1487 * Returns true if it should be used or false if not.
1488 */
1489bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1490{
1491 switch (amdgpu_aspm) {
1492 case -1:
1493 break;
1494 case 0:
1495 return false;
1496 case 1:
1497 return true;
1498 default:
1499 return false;
1500 }
1501 return pcie_aspm_enabled(adev->pdev);
1502}
1503
3ad5dcfe
KHF
1504bool amdgpu_device_aspm_support_quirk(void)
1505{
1506#if IS_ENABLED(CONFIG_X86)
1507 struct cpuinfo_x86 *c = &cpu_data(0);
1508
1509 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1510#else
1511 return true;
1512#endif
1513}
1514
d38ceaf9
AD
1515/* if we get transitioned to only one device, take VGA back */
1516/**
06ec9070 1517 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1518 *
bf44e8ce 1519 * @pdev: PCI device pointer
d38ceaf9
AD
1520 * @state: enable/disable vga decode
1521 *
1522 * Enable/disable vga decode (all asics).
1523 * Returns VGA resource flags.
1524 */
bf44e8ce
CH
1525static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1526 bool state)
d38ceaf9 1527{
bf44e8ce 1528 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1529
d38ceaf9
AD
1530 amdgpu_asic_set_vga_state(adev, state);
1531 if (state)
1532 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1533 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1534 else
1535 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1536}
1537
e3ecdffa
AD
1538/**
1539 * amdgpu_device_check_block_size - validate the vm block size
1540 *
1541 * @adev: amdgpu_device pointer
1542 *
1543 * Validates the vm block size specified via module parameter.
1544 * The vm block size defines number of bits in page table versus page directory,
1545 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1546 * page table and the remaining bits are in the page directory.
1547 */
06ec9070 1548static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1549{
1550 /* defines number of bits in page table versus page directory,
1551 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1552 * page table and the remaining bits are in the page directory
1553 */
bab4fee7
JZ
1554 if (amdgpu_vm_block_size == -1)
1555 return;
a1adf8be 1556
bab4fee7 1557 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1558 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1559 amdgpu_vm_block_size);
97489129 1560 amdgpu_vm_block_size = -1;
a1adf8be 1561 }
a1adf8be
CZ
1562}
1563
e3ecdffa
AD
1564/**
1565 * amdgpu_device_check_vm_size - validate the vm size
1566 *
1567 * @adev: amdgpu_device pointer
1568 *
1569 * Validates the vm size in GB specified via module parameter.
1570 * The VM size is the size of the GPU virtual memory space in GB.
1571 */
06ec9070 1572static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1573{
64dab074
AD
1574 /* no need to check the default value */
1575 if (amdgpu_vm_size == -1)
1576 return;
1577
83ca145d
ZJ
1578 if (amdgpu_vm_size < 1) {
1579 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1580 amdgpu_vm_size);
f3368128 1581 amdgpu_vm_size = -1;
83ca145d 1582 }
83ca145d
ZJ
1583}
1584
7951e376
RZ
1585static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1586{
1587 struct sysinfo si;
a9d4fe2f 1588 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1589 uint64_t total_memory;
1590 uint64_t dram_size_seven_GB = 0x1B8000000;
1591 uint64_t dram_size_three_GB = 0xB8000000;
1592
1593 if (amdgpu_smu_memory_pool_size == 0)
1594 return;
1595
1596 if (!is_os_64) {
1597 DRM_WARN("Not 64-bit OS, feature not supported\n");
1598 goto def_value;
1599 }
1600 si_meminfo(&si);
1601 total_memory = (uint64_t)si.totalram * si.mem_unit;
1602
1603 if ((amdgpu_smu_memory_pool_size == 1) ||
1604 (amdgpu_smu_memory_pool_size == 2)) {
1605 if (total_memory < dram_size_three_GB)
1606 goto def_value1;
1607 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1608 (amdgpu_smu_memory_pool_size == 8)) {
1609 if (total_memory < dram_size_seven_GB)
1610 goto def_value1;
1611 } else {
1612 DRM_WARN("Smu memory pool size not supported\n");
1613 goto def_value;
1614 }
1615 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1616
1617 return;
1618
1619def_value1:
1620 DRM_WARN("No enough system memory\n");
1621def_value:
1622 adev->pm.smu_prv_buffer_size = 0;
1623}
1624
9f6a7857
HR
1625static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1626{
1627 if (!(adev->flags & AMD_IS_APU) ||
1628 adev->asic_type < CHIP_RAVEN)
1629 return 0;
1630
1631 switch (adev->asic_type) {
1632 case CHIP_RAVEN:
1633 if (adev->pdev->device == 0x15dd)
1634 adev->apu_flags |= AMD_APU_IS_RAVEN;
1635 if (adev->pdev->device == 0x15d8)
1636 adev->apu_flags |= AMD_APU_IS_PICASSO;
1637 break;
1638 case CHIP_RENOIR:
1639 if ((adev->pdev->device == 0x1636) ||
1640 (adev->pdev->device == 0x164c))
1641 adev->apu_flags |= AMD_APU_IS_RENOIR;
1642 else
1643 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1644 break;
1645 case CHIP_VANGOGH:
1646 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1647 break;
1648 case CHIP_YELLOW_CARP:
1649 break;
d0f56dc2 1650 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1651 if ((adev->pdev->device == 0x13FE) ||
1652 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1653 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1654 break;
9f6a7857 1655 default:
4eaf21b7 1656 break;
9f6a7857
HR
1657 }
1658
1659 return 0;
1660}
1661
d38ceaf9 1662/**
06ec9070 1663 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1664 *
1665 * @adev: amdgpu_device pointer
1666 *
1667 * Validates certain module parameters and updates
1668 * the associated values used by the driver (all asics).
1669 */
912dfc84 1670static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1671{
5b011235
CZ
1672 if (amdgpu_sched_jobs < 4) {
1673 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1674 amdgpu_sched_jobs);
1675 amdgpu_sched_jobs = 4;
47fc644f 1676 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1677 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1678 amdgpu_sched_jobs);
1679 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1680 }
d38ceaf9 1681
83e74db6 1682 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1683 /* gart size must be greater or equal to 32M */
1684 dev_warn(adev->dev, "gart size (%d) too small\n",
1685 amdgpu_gart_size);
83e74db6 1686 amdgpu_gart_size = -1;
d38ceaf9
AD
1687 }
1688
36d38372 1689 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1690 /* gtt size must be greater or equal to 32M */
36d38372
CK
1691 dev_warn(adev->dev, "gtt size (%d) too small\n",
1692 amdgpu_gtt_size);
1693 amdgpu_gtt_size = -1;
d38ceaf9
AD
1694 }
1695
d07f14be
RH
1696 /* valid range is between 4 and 9 inclusive */
1697 if (amdgpu_vm_fragment_size != -1 &&
1698 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1699 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1700 amdgpu_vm_fragment_size = -1;
1701 }
1702
5d5bd5e3
KW
1703 if (amdgpu_sched_hw_submission < 2) {
1704 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1705 amdgpu_sched_hw_submission);
1706 amdgpu_sched_hw_submission = 2;
1707 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1708 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1709 amdgpu_sched_hw_submission);
1710 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1711 }
1712
2656fd23
AG
1713 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1714 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1715 amdgpu_reset_method = -1;
1716 }
1717
7951e376
RZ
1718 amdgpu_device_check_smu_prv_buffer_size(adev);
1719
06ec9070 1720 amdgpu_device_check_vm_size(adev);
d38ceaf9 1721
06ec9070 1722 amdgpu_device_check_block_size(adev);
6a7f76e7 1723
19aede77 1724 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1725
e3c00faa 1726 return 0;
d38ceaf9
AD
1727}
1728
1729/**
1730 * amdgpu_switcheroo_set_state - set switcheroo state
1731 *
1732 * @pdev: pci dev pointer
1694467b 1733 * @state: vga_switcheroo state
d38ceaf9 1734 *
12024b17 1735 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1736 * the asics before or after it is powered up using ACPI methods.
1737 */
8aba21b7
LT
1738static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1739 enum vga_switcheroo_state state)
d38ceaf9
AD
1740{
1741 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1742 int r;
d38ceaf9 1743
b98c6299 1744 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1745 return;
1746
1747 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1748 pr_info("switched on\n");
d38ceaf9
AD
1749 /* don't suspend or resume card normally */
1750 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1751
8f66090b
TZ
1752 pci_set_power_state(pdev, PCI_D0);
1753 amdgpu_device_load_pci_state(pdev);
1754 r = pci_enable_device(pdev);
de185019
AD
1755 if (r)
1756 DRM_WARN("pci_enable_device failed (%d)\n", r);
1757 amdgpu_device_resume(dev, true);
d38ceaf9 1758
d38ceaf9 1759 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1760 } else {
dd4fa6c1 1761 pr_info("switched off\n");
d38ceaf9 1762 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1763 amdgpu_device_suspend(dev, true);
8f66090b 1764 amdgpu_device_cache_pci_state(pdev);
de185019 1765 /* Shut down the device */
8f66090b
TZ
1766 pci_disable_device(pdev);
1767 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1768 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1769 }
1770}
1771
1772/**
1773 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1774 *
1775 * @pdev: pci dev pointer
1776 *
1777 * Callback for the switcheroo driver. Check of the switcheroo
1778 * state can be changed.
1779 * Returns true if the state can be changed, false if not.
1780 */
1781static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1782{
1783 struct drm_device *dev = pci_get_drvdata(pdev);
1784
b8920e1e 1785 /*
d38ceaf9
AD
1786 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1787 * locking inversion with the driver load path. And the access here is
1788 * completely racy anyway. So don't bother with locking for now.
1789 */
7e13ad89 1790 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1791}
1792
1793static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1794 .set_gpu_state = amdgpu_switcheroo_set_state,
1795 .reprobe = NULL,
1796 .can_switch = amdgpu_switcheroo_can_switch,
1797};
1798
e3ecdffa
AD
1799/**
1800 * amdgpu_device_ip_set_clockgating_state - set the CG state
1801 *
87e3f136 1802 * @dev: amdgpu_device pointer
e3ecdffa
AD
1803 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1804 * @state: clockgating state (gate or ungate)
1805 *
1806 * Sets the requested clockgating state for all instances of
1807 * the hardware IP specified.
1808 * Returns the error code from the last instance.
1809 */
43fa561f 1810int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1811 enum amd_ip_block_type block_type,
1812 enum amd_clockgating_state state)
d38ceaf9 1813{
43fa561f 1814 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1815 int i, r = 0;
1816
1817 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1818 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1819 continue;
c722865a
RZ
1820 if (adev->ip_blocks[i].version->type != block_type)
1821 continue;
1822 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1823 continue;
1824 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1825 (void *)adev, state);
1826 if (r)
1827 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1828 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1829 }
1830 return r;
1831}
1832
e3ecdffa
AD
1833/**
1834 * amdgpu_device_ip_set_powergating_state - set the PG state
1835 *
87e3f136 1836 * @dev: amdgpu_device pointer
e3ecdffa
AD
1837 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1838 * @state: powergating state (gate or ungate)
1839 *
1840 * Sets the requested powergating state for all instances of
1841 * the hardware IP specified.
1842 * Returns the error code from the last instance.
1843 */
43fa561f 1844int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1845 enum amd_ip_block_type block_type,
1846 enum amd_powergating_state state)
d38ceaf9 1847{
43fa561f 1848 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1849 int i, r = 0;
1850
1851 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1852 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1853 continue;
c722865a
RZ
1854 if (adev->ip_blocks[i].version->type != block_type)
1855 continue;
1856 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1857 continue;
1858 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1859 (void *)adev, state);
1860 if (r)
1861 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1862 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1863 }
1864 return r;
1865}
1866
e3ecdffa
AD
1867/**
1868 * amdgpu_device_ip_get_clockgating_state - get the CG state
1869 *
1870 * @adev: amdgpu_device pointer
1871 * @flags: clockgating feature flags
1872 *
1873 * Walks the list of IPs on the device and updates the clockgating
1874 * flags for each IP.
1875 * Updates @flags with the feature flags for each hardware IP where
1876 * clockgating is enabled.
1877 */
2990a1fc 1878void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1879 u64 *flags)
6cb2d4e4
HR
1880{
1881 int i;
1882
1883 for (i = 0; i < adev->num_ip_blocks; i++) {
1884 if (!adev->ip_blocks[i].status.valid)
1885 continue;
1886 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1887 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1888 }
1889}
1890
e3ecdffa
AD
1891/**
1892 * amdgpu_device_ip_wait_for_idle - wait for idle
1893 *
1894 * @adev: amdgpu_device pointer
1895 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1896 *
1897 * Waits for the request hardware IP to be idle.
1898 * Returns 0 for success or a negative error code on failure.
1899 */
2990a1fc
AD
1900int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1901 enum amd_ip_block_type block_type)
5dbbb60b
AD
1902{
1903 int i, r;
1904
1905 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1906 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1907 continue;
a1255107
AD
1908 if (adev->ip_blocks[i].version->type == block_type) {
1909 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1910 if (r)
1911 return r;
1912 break;
1913 }
1914 }
1915 return 0;
1916
1917}
1918
e3ecdffa
AD
1919/**
1920 * amdgpu_device_ip_is_idle - is the hardware IP idle
1921 *
1922 * @adev: amdgpu_device pointer
1923 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1924 *
1925 * Check if the hardware IP is idle or not.
1926 * Returns true if it the IP is idle, false if not.
1927 */
2990a1fc
AD
1928bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1929 enum amd_ip_block_type block_type)
5dbbb60b
AD
1930{
1931 int i;
1932
1933 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1934 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1935 continue;
a1255107
AD
1936 if (adev->ip_blocks[i].version->type == block_type)
1937 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1938 }
1939 return true;
1940
1941}
1942
e3ecdffa
AD
1943/**
1944 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1945 *
1946 * @adev: amdgpu_device pointer
87e3f136 1947 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1948 *
1949 * Returns a pointer to the hardware IP block structure
1950 * if it exists for the asic, otherwise NULL.
1951 */
2990a1fc
AD
1952struct amdgpu_ip_block *
1953amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1954 enum amd_ip_block_type type)
d38ceaf9
AD
1955{
1956 int i;
1957
1958 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1959 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1960 return &adev->ip_blocks[i];
1961
1962 return NULL;
1963}
1964
1965/**
2990a1fc 1966 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1967 *
1968 * @adev: amdgpu_device pointer
5fc3aeeb 1969 * @type: enum amd_ip_block_type
d38ceaf9
AD
1970 * @major: major version
1971 * @minor: minor version
1972 *
1973 * return 0 if equal or greater
1974 * return 1 if smaller or the ip_block doesn't exist
1975 */
2990a1fc
AD
1976int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1977 enum amd_ip_block_type type,
1978 u32 major, u32 minor)
d38ceaf9 1979{
2990a1fc 1980 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1981
a1255107
AD
1982 if (ip_block && ((ip_block->version->major > major) ||
1983 ((ip_block->version->major == major) &&
1984 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1985 return 0;
1986
1987 return 1;
1988}
1989
a1255107 1990/**
2990a1fc 1991 * amdgpu_device_ip_block_add
a1255107
AD
1992 *
1993 * @adev: amdgpu_device pointer
1994 * @ip_block_version: pointer to the IP to add
1995 *
1996 * Adds the IP block driver information to the collection of IPs
1997 * on the asic.
1998 */
2990a1fc
AD
1999int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2000 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
2001{
2002 if (!ip_block_version)
2003 return -EINVAL;
2004
7bd939d0
LG
2005 switch (ip_block_version->type) {
2006 case AMD_IP_BLOCK_TYPE_VCN:
2007 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2008 return 0;
2009 break;
2010 case AMD_IP_BLOCK_TYPE_JPEG:
2011 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2012 return 0;
2013 break;
2014 default:
2015 break;
2016 }
2017
e966a725 2018 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
2019 ip_block_version->funcs->name);
2020
a1255107
AD
2021 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2022
2023 return 0;
2024}
2025
e3ecdffa
AD
2026/**
2027 * amdgpu_device_enable_virtual_display - enable virtual display feature
2028 *
2029 * @adev: amdgpu_device pointer
2030 *
2031 * Enabled the virtual display feature if the user has enabled it via
2032 * the module parameter virtual_display. This feature provides a virtual
2033 * display hardware on headless boards or in virtualized environments.
2034 * This function parses and validates the configuration string specified by
2035 * the user and configues the virtual display configuration (number of
2036 * virtual connectors, crtcs, etc.) specified.
2037 */
483ef985 2038static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2039{
2040 adev->enable_virtual_display = false;
2041
2042 if (amdgpu_virtual_display) {
8f66090b 2043 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2044 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2045
2046 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2047 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2048 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2049 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2050 if (!strcmp("all", pciaddname)
2051 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2052 long num_crtc;
2053 int res = -1;
2054
9accf2fd 2055 adev->enable_virtual_display = true;
0f66356d
ED
2056
2057 if (pciaddname_tmp)
2058 res = kstrtol(pciaddname_tmp, 10,
2059 &num_crtc);
2060
2061 if (!res) {
2062 if (num_crtc < 1)
2063 num_crtc = 1;
2064 if (num_crtc > 6)
2065 num_crtc = 6;
2066 adev->mode_info.num_crtc = num_crtc;
2067 } else {
2068 adev->mode_info.num_crtc = 1;
2069 }
9accf2fd
ED
2070 break;
2071 }
2072 }
2073
0f66356d
ED
2074 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2075 amdgpu_virtual_display, pci_address_name,
2076 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2077
2078 kfree(pciaddstr);
2079 }
2080}
2081
25263da3
AD
2082void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2083{
2084 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2085 adev->mode_info.num_crtc = 1;
2086 adev->enable_virtual_display = true;
2087 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2088 adev->enable_virtual_display, adev->mode_info.num_crtc);
2089 }
2090}
2091
e3ecdffa
AD
2092/**
2093 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2094 *
2095 * @adev: amdgpu_device pointer
2096 *
2097 * Parses the asic configuration parameters specified in the gpu info
2098 * firmware and makes them availale to the driver for use in configuring
2099 * the asic.
2100 * Returns 0 on success, -EINVAL on failure.
2101 */
e2a75f88
AD
2102static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2103{
e2a75f88 2104 const char *chip_name;
c0a43457 2105 char fw_name[40];
e2a75f88
AD
2106 int err;
2107 const struct gpu_info_firmware_header_v1_0 *hdr;
2108
ab4fe3e1
HR
2109 adev->firmware.gpu_info_fw = NULL;
2110
72de33f8 2111 if (adev->mman.discovery_bin) {
cc375d8c
TY
2112 /*
2113 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2114 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2115 * when DAL no longer needs it.
2116 */
2117 if (adev->asic_type != CHIP_NAVI12)
2118 return 0;
258620d0
AD
2119 }
2120
e2a75f88 2121 switch (adev->asic_type) {
e2a75f88
AD
2122 default:
2123 return 0;
2124 case CHIP_VEGA10:
2125 chip_name = "vega10";
2126 break;
3f76dced
AD
2127 case CHIP_VEGA12:
2128 chip_name = "vega12";
2129 break;
2d2e5e7e 2130 case CHIP_RAVEN:
54f78a76 2131 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2132 chip_name = "raven2";
54f78a76 2133 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2134 chip_name = "picasso";
54c4d17e
FX
2135 else
2136 chip_name = "raven";
2d2e5e7e 2137 break;
65e60f6e
LM
2138 case CHIP_ARCTURUS:
2139 chip_name = "arcturus";
2140 break;
42b325e5
XY
2141 case CHIP_NAVI12:
2142 chip_name = "navi12";
2143 break;
e2a75f88
AD
2144 }
2145
2146 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2147 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2148 if (err) {
2149 dev_err(adev->dev,
b31d3063 2150 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2151 fw_name);
2152 goto out;
2153 }
2154
ab4fe3e1 2155 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2156 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2157
2158 switch (hdr->version_major) {
2159 case 1:
2160 {
2161 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2162 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2163 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2164
cc375d8c
TY
2165 /*
2166 * Should be droped when DAL no longer needs it.
2167 */
2168 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2169 goto parse_soc_bounding_box;
2170
b5ab16bf
AD
2171 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2172 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2173 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2174 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2175 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2176 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2177 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2178 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2179 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2180 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2181 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2182 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2183 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2184 adev->gfx.cu_info.max_waves_per_simd =
2185 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2186 adev->gfx.cu_info.max_scratch_slots_per_cu =
2187 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2188 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2189 if (hdr->version_minor >= 1) {
35c2e910
HZ
2190 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2191 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2192 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2193 adev->gfx.config.num_sc_per_sh =
2194 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2195 adev->gfx.config.num_packer_per_sc =
2196 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2197 }
ec51d3fa
XY
2198
2199parse_soc_bounding_box:
ec51d3fa
XY
2200 /*
2201 * soc bounding box info is not integrated in disocovery table,
258620d0 2202 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2203 */
48321c3d
HW
2204 if (hdr->version_minor == 2) {
2205 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2206 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2207 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2208 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2209 }
e2a75f88
AD
2210 break;
2211 }
2212 default:
2213 dev_err(adev->dev,
2214 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2215 err = -EINVAL;
2216 goto out;
2217 }
2218out:
e2a75f88
AD
2219 return err;
2220}
2221
e3ecdffa
AD
2222/**
2223 * amdgpu_device_ip_early_init - run early init for hardware IPs
2224 *
2225 * @adev: amdgpu_device pointer
2226 *
2227 * Early initialization pass for hardware IPs. The hardware IPs that make
2228 * up each asic are discovered each IP's early_init callback is run. This
2229 * is the first stage in initializing the asic.
2230 * Returns 0 on success, negative error code on failure.
2231 */
06ec9070 2232static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2233{
901e2be2
AD
2234 struct drm_device *dev = adev_to_drm(adev);
2235 struct pci_dev *parent;
aaa36a97 2236 int i, r;
ced69502 2237 bool total;
d38ceaf9 2238
483ef985 2239 amdgpu_device_enable_virtual_display(adev);
a6be7570 2240
00a979f3 2241 if (amdgpu_sriov_vf(adev)) {
00a979f3 2242 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2243 if (r)
2244 return r;
00a979f3
WS
2245 }
2246
d38ceaf9 2247 switch (adev->asic_type) {
33f34802
KW
2248#ifdef CONFIG_DRM_AMDGPU_SI
2249 case CHIP_VERDE:
2250 case CHIP_TAHITI:
2251 case CHIP_PITCAIRN:
2252 case CHIP_OLAND:
2253 case CHIP_HAINAN:
295d0daf 2254 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2255 r = si_set_ip_blocks(adev);
2256 if (r)
2257 return r;
2258 break;
2259#endif
a2e73f56
AD
2260#ifdef CONFIG_DRM_AMDGPU_CIK
2261 case CHIP_BONAIRE:
2262 case CHIP_HAWAII:
2263 case CHIP_KAVERI:
2264 case CHIP_KABINI:
2265 case CHIP_MULLINS:
e1ad2d53 2266 if (adev->flags & AMD_IS_APU)
a2e73f56 2267 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2268 else
2269 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2270
2271 r = cik_set_ip_blocks(adev);
2272 if (r)
2273 return r;
2274 break;
2275#endif
da87c30b
AD
2276 case CHIP_TOPAZ:
2277 case CHIP_TONGA:
2278 case CHIP_FIJI:
2279 case CHIP_POLARIS10:
2280 case CHIP_POLARIS11:
2281 case CHIP_POLARIS12:
2282 case CHIP_VEGAM:
2283 case CHIP_CARRIZO:
2284 case CHIP_STONEY:
2285 if (adev->flags & AMD_IS_APU)
2286 adev->family = AMDGPU_FAMILY_CZ;
2287 else
2288 adev->family = AMDGPU_FAMILY_VI;
2289
2290 r = vi_set_ip_blocks(adev);
2291 if (r)
2292 return r;
2293 break;
d38ceaf9 2294 default:
63352b7f
AD
2295 r = amdgpu_discovery_set_ip_blocks(adev);
2296 if (r)
2297 return r;
2298 break;
d38ceaf9
AD
2299 }
2300
901e2be2
AD
2301 if (amdgpu_has_atpx() &&
2302 (amdgpu_is_atpx_hybrid() ||
2303 amdgpu_has_atpx_dgpu_power_cntl()) &&
2304 ((adev->flags & AMD_IS_APU) == 0) &&
2305 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2306 adev->flags |= AMD_IS_PX;
2307
85ac2021 2308 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2309 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2310 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2311 }
901e2be2 2312
1884734a 2313
3b94fb10 2314 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2315 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2316 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2317 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2318 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2319
ced69502 2320 total = true;
d38ceaf9
AD
2321 for (i = 0; i < adev->num_ip_blocks; i++) {
2322 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2323 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2324 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2325 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2326 } else {
a1255107
AD
2327 if (adev->ip_blocks[i].version->funcs->early_init) {
2328 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2329 if (r == -ENOENT) {
a1255107 2330 adev->ip_blocks[i].status.valid = false;
2c1a2784 2331 } else if (r) {
a1255107
AD
2332 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2333 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2334 total = false;
2c1a2784 2335 } else {
a1255107 2336 adev->ip_blocks[i].status.valid = true;
2c1a2784 2337 }
974e6b64 2338 } else {
a1255107 2339 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2340 }
d38ceaf9 2341 }
21a249ca
AD
2342 /* get the vbios after the asic_funcs are set up */
2343 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2344 r = amdgpu_device_parse_gpu_info_fw(adev);
2345 if (r)
2346 return r;
2347
21a249ca 2348 /* Read BIOS */
9535a86a
SZ
2349 if (amdgpu_device_read_bios(adev)) {
2350 if (!amdgpu_get_bios(adev))
2351 return -EINVAL;
21a249ca 2352
9535a86a
SZ
2353 r = amdgpu_atombios_init(adev);
2354 if (r) {
2355 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2356 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2357 return r;
2358 }
21a249ca 2359 }
77eabc6f
PJZ
2360
2361 /*get pf2vf msg info at it's earliest time*/
2362 if (amdgpu_sriov_vf(adev))
2363 amdgpu_virt_init_data_exchange(adev);
2364
21a249ca 2365 }
d38ceaf9 2366 }
ced69502
ML
2367 if (!total)
2368 return -ENODEV;
d38ceaf9 2369
00fa4035 2370 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2371 adev->cg_flags &= amdgpu_cg_mask;
2372 adev->pg_flags &= amdgpu_pg_mask;
2373
d38ceaf9
AD
2374 return 0;
2375}
2376
0a4f2520
RZ
2377static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2378{
2379 int i, r;
2380
2381 for (i = 0; i < adev->num_ip_blocks; i++) {
2382 if (!adev->ip_blocks[i].status.sw)
2383 continue;
2384 if (adev->ip_blocks[i].status.hw)
2385 continue;
2386 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2387 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2388 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2389 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2390 if (r) {
2391 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2392 adev->ip_blocks[i].version->funcs->name, r);
2393 return r;
2394 }
2395 adev->ip_blocks[i].status.hw = true;
2396 }
2397 }
2398
2399 return 0;
2400}
2401
2402static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2403{
2404 int i, r;
2405
2406 for (i = 0; i < adev->num_ip_blocks; i++) {
2407 if (!adev->ip_blocks[i].status.sw)
2408 continue;
2409 if (adev->ip_blocks[i].status.hw)
2410 continue;
2411 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2412 if (r) {
2413 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2414 adev->ip_blocks[i].version->funcs->name, r);
2415 return r;
2416 }
2417 adev->ip_blocks[i].status.hw = true;
2418 }
2419
2420 return 0;
2421}
2422
7a3e0bb2
RZ
2423static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2424{
2425 int r = 0;
2426 int i;
80f41f84 2427 uint32_t smu_version;
7a3e0bb2
RZ
2428
2429 if (adev->asic_type >= CHIP_VEGA10) {
2430 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2431 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2432 continue;
2433
e3c1b071 2434 if (!adev->ip_blocks[i].status.sw)
2435 continue;
2436
482f0e53
ML
2437 /* no need to do the fw loading again if already done*/
2438 if (adev->ip_blocks[i].status.hw == true)
2439 break;
2440
53b3f8f4 2441 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2442 r = adev->ip_blocks[i].version->funcs->resume(adev);
2443 if (r) {
2444 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2445 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2446 return r;
2447 }
2448 } else {
2449 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2450 if (r) {
2451 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2452 adev->ip_blocks[i].version->funcs->name, r);
2453 return r;
7a3e0bb2 2454 }
7a3e0bb2 2455 }
482f0e53
ML
2456
2457 adev->ip_blocks[i].status.hw = true;
2458 break;
7a3e0bb2
RZ
2459 }
2460 }
482f0e53 2461
8973d9ec
ED
2462 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2463 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2464
80f41f84 2465 return r;
7a3e0bb2
RZ
2466}
2467
5fd8518d
AG
2468static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2469{
2470 long timeout;
2471 int r, i;
2472
2473 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2474 struct amdgpu_ring *ring = adev->rings[i];
2475
2476 /* No need to setup the GPU scheduler for rings that don't need it */
2477 if (!ring || ring->no_scheduler)
2478 continue;
2479
2480 switch (ring->funcs->type) {
2481 case AMDGPU_RING_TYPE_GFX:
2482 timeout = adev->gfx_timeout;
2483 break;
2484 case AMDGPU_RING_TYPE_COMPUTE:
2485 timeout = adev->compute_timeout;
2486 break;
2487 case AMDGPU_RING_TYPE_SDMA:
2488 timeout = adev->sdma_timeout;
2489 break;
2490 default:
2491 timeout = adev->video_timeout;
2492 break;
2493 }
2494
2495 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2496 ring->num_hw_submission, 0,
8ab62eda
JG
2497 timeout, adev->reset_domain->wq,
2498 ring->sched_score, ring->name,
2499 adev->dev);
5fd8518d
AG
2500 if (r) {
2501 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2502 ring->name);
2503 return r;
2504 }
2505 }
2506
d425c6f4
JZ
2507 amdgpu_xcp_update_partition_sched_list(adev);
2508
5fd8518d
AG
2509 return 0;
2510}
2511
2512
e3ecdffa
AD
2513/**
2514 * amdgpu_device_ip_init - run init for hardware IPs
2515 *
2516 * @adev: amdgpu_device pointer
2517 *
2518 * Main initialization pass for hardware IPs. The list of all the hardware
2519 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2520 * are run. sw_init initializes the software state associated with each IP
2521 * and hw_init initializes the hardware associated with each IP.
2522 * Returns 0 on success, negative error code on failure.
2523 */
06ec9070 2524static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2525{
2526 int i, r;
2527
c030f2e4 2528 r = amdgpu_ras_init(adev);
2529 if (r)
2530 return r;
2531
d38ceaf9 2532 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2533 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2534 continue;
a1255107 2535 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2536 if (r) {
a1255107
AD
2537 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2538 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2539 goto init_failed;
2c1a2784 2540 }
a1255107 2541 adev->ip_blocks[i].status.sw = true;
bfca0289 2542
c1c39032
AD
2543 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2544 /* need to do common hw init early so everything is set up for gmc */
2545 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2546 if (r) {
2547 DRM_ERROR("hw_init %d failed %d\n", i, r);
2548 goto init_failed;
2549 }
2550 adev->ip_blocks[i].status.hw = true;
2551 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2552 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2553 /* Try to reserve bad pages early */
2554 if (amdgpu_sriov_vf(adev))
2555 amdgpu_virt_exchange_data(adev);
2556
7ccfd79f 2557 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2558 if (r) {
7ccfd79f 2559 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2560 goto init_failed;
2c1a2784 2561 }
a1255107 2562 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2563 if (r) {
2564 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2565 goto init_failed;
2c1a2784 2566 }
06ec9070 2567 r = amdgpu_device_wb_init(adev);
2c1a2784 2568 if (r) {
06ec9070 2569 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2570 goto init_failed;
2c1a2784 2571 }
a1255107 2572 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2573
2574 /* right after GMC hw init, we create CSA */
02ff519e 2575 if (adev->gfx.mcbp) {
1e256e27 2576 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2577 AMDGPU_GEM_DOMAIN_VRAM |
2578 AMDGPU_GEM_DOMAIN_GTT,
2579 AMDGPU_CSA_SIZE);
2493664f
ML
2580 if (r) {
2581 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2582 goto init_failed;
2493664f
ML
2583 }
2584 }
d38ceaf9
AD
2585 }
2586 }
2587
c9ffa427 2588 if (amdgpu_sriov_vf(adev))
22c16d25 2589 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2590
533aed27
AG
2591 r = amdgpu_ib_pool_init(adev);
2592 if (r) {
2593 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2594 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2595 goto init_failed;
2596 }
2597
c8963ea4
RZ
2598 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2599 if (r)
72d3f592 2600 goto init_failed;
0a4f2520
RZ
2601
2602 r = amdgpu_device_ip_hw_init_phase1(adev);
2603 if (r)
72d3f592 2604 goto init_failed;
0a4f2520 2605
7a3e0bb2
RZ
2606 r = amdgpu_device_fw_loading(adev);
2607 if (r)
72d3f592 2608 goto init_failed;
7a3e0bb2 2609
0a4f2520
RZ
2610 r = amdgpu_device_ip_hw_init_phase2(adev);
2611 if (r)
72d3f592 2612 goto init_failed;
d38ceaf9 2613
121a2bc6
AG
2614 /*
2615 * retired pages will be loaded from eeprom and reserved here,
2616 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2617 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2618 * for I2C communication which only true at this point.
b82e65a9
GC
2619 *
2620 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2621 * failure from bad gpu situation and stop amdgpu init process
2622 * accordingly. For other failed cases, it will still release all
2623 * the resource and print error message, rather than returning one
2624 * negative value to upper level.
121a2bc6
AG
2625 *
2626 * Note: theoretically, this should be called before all vram allocations
2627 * to protect retired page from abusing
2628 */
b82e65a9
GC
2629 r = amdgpu_ras_recovery_init(adev);
2630 if (r)
2631 goto init_failed;
121a2bc6 2632
cfbb6b00
AG
2633 /**
2634 * In case of XGMI grab extra reference for reset domain for this device
2635 */
a4c63caf 2636 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2637 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2638 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2639 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2640
dfd0287b
LH
2641 if (WARN_ON(!hive)) {
2642 r = -ENOENT;
2643 goto init_failed;
2644 }
2645
46c67660 2646 if (!hive->reset_domain ||
2647 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2648 r = -ENOENT;
2649 amdgpu_put_xgmi_hive(hive);
2650 goto init_failed;
2651 }
2652
2653 /* Drop the early temporary reset domain we created for device */
2654 amdgpu_reset_put_reset_domain(adev->reset_domain);
2655 adev->reset_domain = hive->reset_domain;
9dfa4860 2656 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2657 }
a4c63caf
AG
2658 }
2659 }
2660
5fd8518d
AG
2661 r = amdgpu_device_init_schedulers(adev);
2662 if (r)
2663 goto init_failed;
e3c1b071 2664
2665 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2666 if (!adev->gmc.xgmi.pending_reset) {
2667 kgd2kfd_init_zone_device(adev);
e3c1b071 2668 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2669 }
c6332b97 2670
bd607166
KR
2671 amdgpu_fru_get_product_info(adev);
2672
72d3f592 2673init_failed:
c6332b97 2674
72d3f592 2675 return r;
d38ceaf9
AD
2676}
2677
e3ecdffa
AD
2678/**
2679 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2680 *
2681 * @adev: amdgpu_device pointer
2682 *
2683 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2684 * this function before a GPU reset. If the value is retained after a
2685 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2686 */
06ec9070 2687static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2688{
2689 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2690}
2691
e3ecdffa
AD
2692/**
2693 * amdgpu_device_check_vram_lost - check if vram is valid
2694 *
2695 * @adev: amdgpu_device pointer
2696 *
2697 * Checks the reset magic value written to the gart pointer in VRAM.
2698 * The driver calls this after a GPU reset to see if the contents of
2699 * VRAM is lost or now.
2700 * returns true if vram is lost, false if not.
2701 */
06ec9070 2702static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2703{
dadce777
EQ
2704 if (memcmp(adev->gart.ptr, adev->reset_magic,
2705 AMDGPU_RESET_MAGIC_NUM))
2706 return true;
2707
53b3f8f4 2708 if (!amdgpu_in_reset(adev))
dadce777
EQ
2709 return false;
2710
2711 /*
2712 * For all ASICs with baco/mode1 reset, the VRAM is
2713 * always assumed to be lost.
2714 */
2715 switch (amdgpu_asic_reset_method(adev)) {
2716 case AMD_RESET_METHOD_BACO:
2717 case AMD_RESET_METHOD_MODE1:
2718 return true;
2719 default:
2720 return false;
2721 }
0c49e0b8
CZ
2722}
2723
e3ecdffa 2724/**
1112a46b 2725 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2726 *
2727 * @adev: amdgpu_device pointer
b8b72130 2728 * @state: clockgating state (gate or ungate)
e3ecdffa 2729 *
e3ecdffa 2730 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2731 * set_clockgating_state callbacks are run.
2732 * Late initialization pass enabling clockgating for hardware IPs.
2733 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2734 * Returns 0 on success, negative error code on failure.
2735 */
fdd34271 2736
5d89bb2d
LL
2737int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2738 enum amd_clockgating_state state)
d38ceaf9 2739{
1112a46b 2740 int i, j, r;
d38ceaf9 2741
4a2ba394
SL
2742 if (amdgpu_emu_mode == 1)
2743 return 0;
2744
1112a46b
RZ
2745 for (j = 0; j < adev->num_ip_blocks; j++) {
2746 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2747 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2748 continue;
47198eb7 2749 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2750 if (adev->in_s0ix &&
47198eb7
AD
2751 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2752 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2753 continue;
4a446d55 2754 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2755 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2756 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2757 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2758 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2759 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2760 /* enable clockgating to save power */
a1255107 2761 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2762 state);
4a446d55
AD
2763 if (r) {
2764 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2765 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2766 return r;
2767 }
b0b00ff1 2768 }
d38ceaf9 2769 }
06b18f61 2770
c9f96fd5
RZ
2771 return 0;
2772}
2773
5d89bb2d
LL
2774int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2775 enum amd_powergating_state state)
c9f96fd5 2776{
1112a46b 2777 int i, j, r;
06b18f61 2778
c9f96fd5
RZ
2779 if (amdgpu_emu_mode == 1)
2780 return 0;
2781
1112a46b
RZ
2782 for (j = 0; j < adev->num_ip_blocks; j++) {
2783 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2784 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2785 continue;
47198eb7 2786 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2787 if (adev->in_s0ix &&
47198eb7
AD
2788 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2789 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2790 continue;
c9f96fd5
RZ
2791 /* skip CG for VCE/UVD, it's handled specially */
2792 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2793 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2794 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2795 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2796 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2797 /* enable powergating to save power */
2798 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2799 state);
c9f96fd5
RZ
2800 if (r) {
2801 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2802 adev->ip_blocks[i].version->funcs->name, r);
2803 return r;
2804 }
2805 }
2806 }
2dc80b00
S
2807 return 0;
2808}
2809
beff74bc
AD
2810static int amdgpu_device_enable_mgpu_fan_boost(void)
2811{
2812 struct amdgpu_gpu_instance *gpu_ins;
2813 struct amdgpu_device *adev;
2814 int i, ret = 0;
2815
2816 mutex_lock(&mgpu_info.mutex);
2817
2818 /*
2819 * MGPU fan boost feature should be enabled
2820 * only when there are two or more dGPUs in
2821 * the system
2822 */
2823 if (mgpu_info.num_dgpu < 2)
2824 goto out;
2825
2826 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2827 gpu_ins = &(mgpu_info.gpu_ins[i]);
2828 adev = gpu_ins->adev;
2829 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2830 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2831 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2832 if (ret)
2833 break;
2834
2835 gpu_ins->mgpu_fan_enabled = 1;
2836 }
2837 }
2838
2839out:
2840 mutex_unlock(&mgpu_info.mutex);
2841
2842 return ret;
2843}
2844
e3ecdffa
AD
2845/**
2846 * amdgpu_device_ip_late_init - run late init for hardware IPs
2847 *
2848 * @adev: amdgpu_device pointer
2849 *
2850 * Late initialization pass for hardware IPs. The list of all the hardware
2851 * IPs that make up the asic is walked and the late_init callbacks are run.
2852 * late_init covers any special initialization that an IP requires
2853 * after all of the have been initialized or something that needs to happen
2854 * late in the init process.
2855 * Returns 0 on success, negative error code on failure.
2856 */
06ec9070 2857static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2858{
60599a03 2859 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2860 int i = 0, r;
2861
2862 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2863 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2864 continue;
2865 if (adev->ip_blocks[i].version->funcs->late_init) {
2866 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2867 if (r) {
2868 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2869 adev->ip_blocks[i].version->funcs->name, r);
2870 return r;
2871 }
2dc80b00 2872 }
73f847db 2873 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2874 }
2875
867e24ca 2876 r = amdgpu_ras_late_init(adev);
2877 if (r) {
2878 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2879 return r;
2880 }
2881
a891d239
DL
2882 amdgpu_ras_set_error_query_ready(adev, true);
2883
1112a46b
RZ
2884 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2885 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2886
06ec9070 2887 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2888
beff74bc
AD
2889 r = amdgpu_device_enable_mgpu_fan_boost();
2890 if (r)
2891 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2892
4da8b639 2893 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2894 if (amdgpu_passthrough(adev) &&
2895 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2896 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2897 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2898
2899 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2900 mutex_lock(&mgpu_info.mutex);
2901
2902 /*
2903 * Reset device p-state to low as this was booted with high.
2904 *
2905 * This should be performed only after all devices from the same
2906 * hive get initialized.
2907 *
2908 * However, it's unknown how many device in the hive in advance.
2909 * As this is counted one by one during devices initializations.
2910 *
2911 * So, we wait for all XGMI interlinked devices initialized.
2912 * This may bring some delays as those devices may come from
2913 * different hives. But that should be OK.
2914 */
2915 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2916 for (i = 0; i < mgpu_info.num_gpu; i++) {
2917 gpu_instance = &(mgpu_info.gpu_ins[i]);
2918 if (gpu_instance->adev->flags & AMD_IS_APU)
2919 continue;
2920
d84a430d
JK
2921 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2922 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2923 if (r) {
2924 DRM_ERROR("pstate setting failed (%d).\n", r);
2925 break;
2926 }
2927 }
2928 }
2929
2930 mutex_unlock(&mgpu_info.mutex);
2931 }
2932
d38ceaf9
AD
2933 return 0;
2934}
2935
613aa3ea
LY
2936/**
2937 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2938 *
2939 * @adev: amdgpu_device pointer
2940 *
2941 * For ASICs need to disable SMC first
2942 */
2943static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2944{
2945 int i, r;
2946
4e8303cf 2947 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
2948 return;
2949
2950 for (i = 0; i < adev->num_ip_blocks; i++) {
2951 if (!adev->ip_blocks[i].status.hw)
2952 continue;
2953 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2954 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2955 /* XXX handle errors */
2956 if (r) {
2957 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2958 adev->ip_blocks[i].version->funcs->name, r);
2959 }
2960 adev->ip_blocks[i].status.hw = false;
2961 break;
2962 }
2963 }
2964}
2965
e9669fb7 2966static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2967{
2968 int i, r;
2969
e9669fb7
AG
2970 for (i = 0; i < adev->num_ip_blocks; i++) {
2971 if (!adev->ip_blocks[i].version->funcs->early_fini)
2972 continue;
5278a159 2973
e9669fb7
AG
2974 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2975 if (r) {
2976 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2977 adev->ip_blocks[i].version->funcs->name, r);
2978 }
2979 }
c030f2e4 2980
05df1f01 2981 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2982 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2983
7270e895
TY
2984 amdgpu_amdkfd_suspend(adev, false);
2985
613aa3ea
LY
2986 /* Workaroud for ASICs need to disable SMC first */
2987 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2988
d38ceaf9 2989 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2990 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2991 continue;
8201a67a 2992
a1255107 2993 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2994 /* XXX handle errors */
2c1a2784 2995 if (r) {
a1255107
AD
2996 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2997 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2998 }
8201a67a 2999
a1255107 3000 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
3001 }
3002
6effad8a
GC
3003 if (amdgpu_sriov_vf(adev)) {
3004 if (amdgpu_virt_release_full_gpu(adev, false))
3005 DRM_ERROR("failed to release exclusive mode on fini\n");
3006 }
3007
e9669fb7
AG
3008 return 0;
3009}
3010
3011/**
3012 * amdgpu_device_ip_fini - run fini for hardware IPs
3013 *
3014 * @adev: amdgpu_device pointer
3015 *
3016 * Main teardown pass for hardware IPs. The list of all the hardware
3017 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3018 * are run. hw_fini tears down the hardware associated with each IP
3019 * and sw_fini tears down any software state associated with each IP.
3020 * Returns 0 on success, negative error code on failure.
3021 */
3022static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3023{
3024 int i, r;
3025
3026 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3027 amdgpu_virt_release_ras_err_handler_data(adev);
3028
e9669fb7
AG
3029 if (adev->gmc.xgmi.num_physical_nodes > 1)
3030 amdgpu_xgmi_remove_device(adev);
3031
c004d44e 3032 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3033
d38ceaf9 3034 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3035 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3036 continue;
c12aba3a
ML
3037
3038 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3039 amdgpu_ucode_free_bo(adev);
1e256e27 3040 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3041 amdgpu_device_wb_fini(adev);
7ccfd79f 3042 amdgpu_device_mem_scratch_fini(adev);
533aed27 3043 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
3044 }
3045
a1255107 3046 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3047 /* XXX handle errors */
2c1a2784 3048 if (r) {
a1255107
AD
3049 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3050 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3051 }
a1255107
AD
3052 adev->ip_blocks[i].status.sw = false;
3053 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3054 }
3055
a6dcfd9c 3056 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3057 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3058 continue;
a1255107
AD
3059 if (adev->ip_blocks[i].version->funcs->late_fini)
3060 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3061 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3062 }
3063
c030f2e4 3064 amdgpu_ras_fini(adev);
3065
d38ceaf9
AD
3066 return 0;
3067}
3068
e3ecdffa 3069/**
beff74bc 3070 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3071 *
1112a46b 3072 * @work: work_struct.
e3ecdffa 3073 */
beff74bc 3074static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3075{
3076 struct amdgpu_device *adev =
beff74bc 3077 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3078 int r;
3079
3080 r = amdgpu_ib_ring_tests(adev);
3081 if (r)
3082 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3083}
3084
1e317b99
RZ
3085static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3086{
3087 struct amdgpu_device *adev =
3088 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3089
90a92662
MD
3090 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3091 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3092
3093 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3094 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3095}
3096
e3ecdffa 3097/**
e7854a03 3098 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3099 *
3100 * @adev: amdgpu_device pointer
3101 *
3102 * Main suspend function for hardware IPs. The list of all the hardware
3103 * IPs that make up the asic is walked, clockgating is disabled and the
3104 * suspend callbacks are run. suspend puts the hardware and software state
3105 * in each IP into a state suitable for suspend.
3106 * Returns 0 on success, negative error code on failure.
3107 */
e7854a03
AD
3108static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3109{
3110 int i, r;
3111
50ec83f0
AD
3112 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3113 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3114
b31d6ada
EQ
3115 /*
3116 * Per PMFW team's suggestion, driver needs to handle gfxoff
3117 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3118 * scenario. Add the missing df cstate disablement here.
3119 */
3120 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3121 dev_warn(adev->dev, "Failed to disallow df cstate");
3122
e7854a03
AD
3123 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3124 if (!adev->ip_blocks[i].status.valid)
3125 continue;
2b9f7848 3126
e7854a03 3127 /* displays are handled separately */
2b9f7848
ND
3128 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3129 continue;
3130
3131 /* XXX handle errors */
3132 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3133 /* XXX handle errors */
3134 if (r) {
3135 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3136 adev->ip_blocks[i].version->funcs->name, r);
3137 return r;
e7854a03 3138 }
2b9f7848
ND
3139
3140 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3141 }
3142
e7854a03
AD
3143 return 0;
3144}
3145
3146/**
3147 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3148 *
3149 * @adev: amdgpu_device pointer
3150 *
3151 * Main suspend function for hardware IPs. The list of all the hardware
3152 * IPs that make up the asic is walked, clockgating is disabled and the
3153 * suspend callbacks are run. suspend puts the hardware and software state
3154 * in each IP into a state suitable for suspend.
3155 * Returns 0 on success, negative error code on failure.
3156 */
3157static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3158{
3159 int i, r;
3160
557f42a2 3161 if (adev->in_s0ix)
bc143d8b 3162 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3163
d38ceaf9 3164 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3165 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3166 continue;
e7854a03
AD
3167 /* displays are handled in phase1 */
3168 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3169 continue;
bff77e86
LM
3170 /* PSP lost connection when err_event_athub occurs */
3171 if (amdgpu_ras_intr_triggered() &&
3172 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3173 adev->ip_blocks[i].status.hw = false;
3174 continue;
3175 }
e3c1b071 3176
3177 /* skip unnecessary suspend if we do not initialize them yet */
3178 if (adev->gmc.xgmi.pending_reset &&
3179 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3180 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3181 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3182 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3183 adev->ip_blocks[i].status.hw = false;
3184 continue;
3185 }
557f42a2 3186
afa6646b 3187 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3188 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3189 * like at runtime. PSP is also part of the always on hardware
3190 * so no need to suspend it.
3191 */
557f42a2 3192 if (adev->in_s0ix &&
32ff160d 3193 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3194 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3195 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3196 continue;
3197
2a7798ea
AD
3198 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3199 if (adev->in_s0ix &&
4e8303cf
LL
3200 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3201 IP_VERSION(5, 0, 0)) &&
3202 (adev->ip_blocks[i].version->type ==
3203 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3204 continue;
3205
e11c7750
TH
3206 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3207 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3208 * from this location and RLC Autoload automatically also gets loaded
3209 * from here based on PMFW -> PSP message during re-init sequence.
3210 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3211 * the TMR and reload FWs again for IMU enabled APU ASICs.
3212 */
3213 if (amdgpu_in_reset(adev) &&
3214 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3215 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3216 continue;
3217
d38ceaf9 3218 /* XXX handle errors */
a1255107 3219 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3220 /* XXX handle errors */
2c1a2784 3221 if (r) {
a1255107
AD
3222 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3223 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3224 }
876923fb 3225 adev->ip_blocks[i].status.hw = false;
a3a09142 3226 /* handle putting the SMC in the appropriate state */
47fc644f 3227 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3228 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3229 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3230 if (r) {
3231 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3232 adev->mp1_state, r);
3233 return r;
3234 }
a3a09142
AD
3235 }
3236 }
d38ceaf9
AD
3237 }
3238
3239 return 0;
3240}
3241
e7854a03
AD
3242/**
3243 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3244 *
3245 * @adev: amdgpu_device pointer
3246 *
3247 * Main suspend function for hardware IPs. The list of all the hardware
3248 * IPs that make up the asic is walked, clockgating is disabled and the
3249 * suspend callbacks are run. suspend puts the hardware and software state
3250 * in each IP into a state suitable for suspend.
3251 * Returns 0 on success, negative error code on failure.
3252 */
3253int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3254{
3255 int r;
3256
3c73683c
JC
3257 if (amdgpu_sriov_vf(adev)) {
3258 amdgpu_virt_fini_data_exchange(adev);
e7819644 3259 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3260 }
e7819644 3261
e7854a03
AD
3262 r = amdgpu_device_ip_suspend_phase1(adev);
3263 if (r)
3264 return r;
3265 r = amdgpu_device_ip_suspend_phase2(adev);
3266
e7819644
YT
3267 if (amdgpu_sriov_vf(adev))
3268 amdgpu_virt_release_full_gpu(adev, false);
3269
e7854a03
AD
3270 return r;
3271}
3272
06ec9070 3273static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3274{
3275 int i, r;
3276
2cb681b6 3277 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3278 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3279 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3280 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3281 AMD_IP_BLOCK_TYPE_IH,
3282 };
a90ad3c2 3283
95ea3dbc 3284 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3285 int j;
3286 struct amdgpu_ip_block *block;
a90ad3c2 3287
4cd2a96d
J
3288 block = &adev->ip_blocks[i];
3289 block->status.hw = false;
2cb681b6 3290
4cd2a96d 3291 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3292
4cd2a96d 3293 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3294 !block->status.valid)
3295 continue;
3296
3297 r = block->version->funcs->hw_init(adev);
0aaeefcc 3298 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3299 if (r)
3300 return r;
482f0e53 3301 block->status.hw = true;
a90ad3c2
ML
3302 }
3303 }
3304
3305 return 0;
3306}
3307
06ec9070 3308static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3309{
3310 int i, r;
3311
2cb681b6
ML
3312 static enum amd_ip_block_type ip_order[] = {
3313 AMD_IP_BLOCK_TYPE_SMC,
3314 AMD_IP_BLOCK_TYPE_DCE,
3315 AMD_IP_BLOCK_TYPE_GFX,
3316 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3317 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3318 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3319 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3320 AMD_IP_BLOCK_TYPE_VCN,
3321 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3322 };
a90ad3c2 3323
2cb681b6
ML
3324 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3325 int j;
3326 struct amdgpu_ip_block *block;
a90ad3c2 3327
2cb681b6
ML
3328 for (j = 0; j < adev->num_ip_blocks; j++) {
3329 block = &adev->ip_blocks[j];
3330
3331 if (block->version->type != ip_order[i] ||
482f0e53
ML
3332 !block->status.valid ||
3333 block->status.hw)
2cb681b6
ML
3334 continue;
3335
895bd048
JZ
3336 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3337 r = block->version->funcs->resume(adev);
3338 else
3339 r = block->version->funcs->hw_init(adev);
3340
0aaeefcc 3341 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3342 if (r)
3343 return r;
482f0e53 3344 block->status.hw = true;
a90ad3c2
ML
3345 }
3346 }
3347
3348 return 0;
3349}
3350
e3ecdffa
AD
3351/**
3352 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3353 *
3354 * @adev: amdgpu_device pointer
3355 *
3356 * First resume function for hardware IPs. The list of all the hardware
3357 * IPs that make up the asic is walked and the resume callbacks are run for
3358 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3359 * after a suspend and updates the software state as necessary. This
3360 * function is also used for restoring the GPU after a GPU reset.
3361 * Returns 0 on success, negative error code on failure.
3362 */
06ec9070 3363static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3364{
3365 int i, r;
3366
a90ad3c2 3367 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3368 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3369 continue;
a90ad3c2 3370 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3371 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3372 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3373 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3374
fcf0649f
CZ
3375 r = adev->ip_blocks[i].version->funcs->resume(adev);
3376 if (r) {
3377 DRM_ERROR("resume of IP block <%s> failed %d\n",
3378 adev->ip_blocks[i].version->funcs->name, r);
3379 return r;
3380 }
482f0e53 3381 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3382 }
3383 }
3384
3385 return 0;
3386}
3387
e3ecdffa
AD
3388/**
3389 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3390 *
3391 * @adev: amdgpu_device pointer
3392 *
3393 * First resume function for hardware IPs. The list of all the hardware
3394 * IPs that make up the asic is walked and the resume callbacks are run for
3395 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3396 * functional state after a suspend and updates the software state as
3397 * necessary. This function is also used for restoring the GPU after a GPU
3398 * reset.
3399 * Returns 0 on success, negative error code on failure.
3400 */
06ec9070 3401static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3402{
3403 int i, r;
3404
3405 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3406 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3407 continue;
fcf0649f 3408 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3409 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3410 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3411 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3412 continue;
a1255107 3413 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3414 if (r) {
a1255107
AD
3415 DRM_ERROR("resume of IP block <%s> failed %d\n",
3416 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3417 return r;
2c1a2784 3418 }
482f0e53 3419 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3420 }
3421
3422 return 0;
3423}
3424
e3ecdffa
AD
3425/**
3426 * amdgpu_device_ip_resume - run resume for hardware IPs
3427 *
3428 * @adev: amdgpu_device pointer
3429 *
3430 * Main resume function for hardware IPs. The hardware IPs
3431 * are split into two resume functions because they are
b8920e1e 3432 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3433 * steps need to be take between them. In this case (S3/S4) they are
3434 * run sequentially.
3435 * Returns 0 on success, negative error code on failure.
3436 */
06ec9070 3437static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3438{
3439 int r;
3440
06ec9070 3441 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3442 if (r)
3443 return r;
7a3e0bb2
RZ
3444
3445 r = amdgpu_device_fw_loading(adev);
3446 if (r)
3447 return r;
3448
06ec9070 3449 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3450
3451 return r;
3452}
3453
e3ecdffa
AD
3454/**
3455 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3456 *
3457 * @adev: amdgpu_device pointer
3458 *
3459 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3460 */
4e99a44e 3461static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3462{
6867e1b5
ML
3463 if (amdgpu_sriov_vf(adev)) {
3464 if (adev->is_atom_fw) {
58ff791a 3465 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3466 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3467 } else {
3468 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3469 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3470 }
3471
3472 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3473 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3474 }
048765ad
AR
3475}
3476
e3ecdffa
AD
3477/**
3478 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3479 *
3480 * @asic_type: AMD asic type
3481 *
3482 * Check if there is DC (new modesetting infrastructre) support for an asic.
3483 * returns true if DC has support, false if not.
3484 */
4562236b
HW
3485bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3486{
3487 switch (asic_type) {
0637d417
AD
3488#ifdef CONFIG_DRM_AMDGPU_SI
3489 case CHIP_HAINAN:
3490#endif
3491 case CHIP_TOPAZ:
3492 /* chips with no display hardware */
3493 return false;
4562236b 3494#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3495 case CHIP_TAHITI:
3496 case CHIP_PITCAIRN:
3497 case CHIP_VERDE:
3498 case CHIP_OLAND:
2d32ffd6
AD
3499 /*
3500 * We have systems in the wild with these ASICs that require
3501 * LVDS and VGA support which is not supported with DC.
3502 *
3503 * Fallback to the non-DC driver here by default so as not to
3504 * cause regressions.
3505 */
3506#if defined(CONFIG_DRM_AMD_DC_SI)
3507 return amdgpu_dc > 0;
3508#else
3509 return false;
64200c46 3510#endif
4562236b 3511 case CHIP_BONAIRE:
0d6fbccb 3512 case CHIP_KAVERI:
367e6687
AD
3513 case CHIP_KABINI:
3514 case CHIP_MULLINS:
d9fda248
HW
3515 /*
3516 * We have systems in the wild with these ASICs that require
b5a0168e 3517 * VGA support which is not supported with DC.
d9fda248
HW
3518 *
3519 * Fallback to the non-DC driver here by default so as not to
3520 * cause regressions.
3521 */
3522 return amdgpu_dc > 0;
f7f12b25 3523 default:
fd187853 3524 return amdgpu_dc != 0;
f7f12b25 3525#else
4562236b 3526 default:
93b09a9a 3527 if (amdgpu_dc > 0)
b8920e1e 3528 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3529 return false;
f7f12b25 3530#endif
4562236b
HW
3531 }
3532}
3533
3534/**
3535 * amdgpu_device_has_dc_support - check if dc is supported
3536 *
982a820b 3537 * @adev: amdgpu_device pointer
4562236b
HW
3538 *
3539 * Returns true for supported, false for not supported
3540 */
3541bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3542{
25263da3 3543 if (adev->enable_virtual_display ||
abaf210c 3544 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3545 return false;
3546
4562236b
HW
3547 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3548}
3549
d4535e2c
AG
3550static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3551{
3552 struct amdgpu_device *adev =
3553 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3554 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3555
c6a6e2db
AG
3556 /* It's a bug to not have a hive within this function */
3557 if (WARN_ON(!hive))
3558 return;
3559
3560 /*
3561 * Use task barrier to synchronize all xgmi reset works across the
3562 * hive. task_barrier_enter and task_barrier_exit will block
3563 * until all the threads running the xgmi reset works reach
3564 * those points. task_barrier_full will do both blocks.
3565 */
3566 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3567
3568 task_barrier_enter(&hive->tb);
4a580877 3569 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3570
3571 if (adev->asic_reset_res)
3572 goto fail;
3573
3574 task_barrier_exit(&hive->tb);
4a580877 3575 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3576
3577 if (adev->asic_reset_res)
3578 goto fail;
43c4d576 3579
5e67bba3 3580 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3581 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3582 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3583 } else {
3584
3585 task_barrier_full(&hive->tb);
3586 adev->asic_reset_res = amdgpu_asic_reset(adev);
3587 }
ce316fa5 3588
c6a6e2db 3589fail:
d4535e2c 3590 if (adev->asic_reset_res)
fed184e9 3591 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3592 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3593 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3594}
3595
71f98027
AD
3596static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3597{
3598 char *input = amdgpu_lockup_timeout;
3599 char *timeout_setting = NULL;
3600 int index = 0;
3601 long timeout;
3602 int ret = 0;
3603
3604 /*
67387dfe
AD
3605 * By default timeout for non compute jobs is 10000
3606 * and 60000 for compute jobs.
71f98027 3607 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3608 * jobs are 60000 by default.
71f98027
AD
3609 */
3610 adev->gfx_timeout = msecs_to_jiffies(10000);
3611 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3612 if (amdgpu_sriov_vf(adev))
3613 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3614 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3615 else
67387dfe 3616 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3617
f440ff44 3618 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3619 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3620 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3621 ret = kstrtol(timeout_setting, 0, &timeout);
3622 if (ret)
3623 return ret;
3624
3625 if (timeout == 0) {
3626 index++;
3627 continue;
3628 } else if (timeout < 0) {
3629 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3630 dev_warn(adev->dev, "lockup timeout disabled");
3631 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3632 } else {
3633 timeout = msecs_to_jiffies(timeout);
3634 }
3635
3636 switch (index++) {
3637 case 0:
3638 adev->gfx_timeout = timeout;
3639 break;
3640 case 1:
3641 adev->compute_timeout = timeout;
3642 break;
3643 case 2:
3644 adev->sdma_timeout = timeout;
3645 break;
3646 case 3:
3647 adev->video_timeout = timeout;
3648 break;
3649 default:
3650 break;
3651 }
3652 }
3653 /*
3654 * There is only one value specified and
3655 * it should apply to all non-compute jobs.
3656 */
bcccee89 3657 if (index == 1) {
71f98027 3658 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3659 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3660 adev->compute_timeout = adev->gfx_timeout;
3661 }
71f98027
AD
3662 }
3663
3664 return ret;
3665}
d4535e2c 3666
4a74c38c
PY
3667/**
3668 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3669 *
3670 * @adev: amdgpu_device pointer
3671 *
3672 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3673 */
3674static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3675{
3676 struct iommu_domain *domain;
3677
3678 domain = iommu_get_domain_for_dev(adev->dev);
3679 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3680 adev->ram_is_direct_mapped = true;
3681}
3682
77f3a5cd 3683static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3684 &dev_attr_pcie_replay_count.attr,
3685 NULL
3686};
3687
02ff519e
AD
3688static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3689{
3690 if (amdgpu_mcbp == 1)
3691 adev->gfx.mcbp = true;
1e9e15dc
JZ
3692 else if (amdgpu_mcbp == 0)
3693 adev->gfx.mcbp = false;
4e8303cf
LL
3694 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3695 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3696 adev->gfx.num_gfx_rings)
50a7c876
AD
3697 adev->gfx.mcbp = true;
3698
02ff519e
AD
3699 if (amdgpu_sriov_vf(adev))
3700 adev->gfx.mcbp = true;
3701
3702 if (adev->gfx.mcbp)
3703 DRM_INFO("MCBP is enabled\n");
3704}
3705
d38ceaf9
AD
3706/**
3707 * amdgpu_device_init - initialize the driver
3708 *
3709 * @adev: amdgpu_device pointer
d38ceaf9
AD
3710 * @flags: driver flags
3711 *
3712 * Initializes the driver info and hw (all asics).
3713 * Returns 0 for success or an error on failure.
3714 * Called at driver startup.
3715 */
3716int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3717 uint32_t flags)
3718{
8aba21b7
LT
3719 struct drm_device *ddev = adev_to_drm(adev);
3720 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3721 int r, i;
b98c6299 3722 bool px = false;
95844d20 3723 u32 max_MBps;
59e9fff1 3724 int tmp;
d38ceaf9
AD
3725
3726 adev->shutdown = false;
d38ceaf9 3727 adev->flags = flags;
4e66d7d2
YZ
3728
3729 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3730 adev->asic_type = amdgpu_force_asic_type;
3731 else
3732 adev->asic_type = flags & AMD_ASIC_MASK;
3733
d38ceaf9 3734 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3735 if (amdgpu_emu_mode == 1)
8bdab6bb 3736 adev->usec_timeout *= 10;
770d13b1 3737 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3738 adev->accel_working = false;
3739 adev->num_rings = 0;
68ce8b24 3740 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3741 adev->mman.buffer_funcs = NULL;
3742 adev->mman.buffer_funcs_ring = NULL;
3743 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3744 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3745 adev->gmc.gmc_funcs = NULL;
7bd939d0 3746 adev->harvest_ip_mask = 0x0;
f54d1867 3747 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3748 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3749
3750 adev->smc_rreg = &amdgpu_invalid_rreg;
3751 adev->smc_wreg = &amdgpu_invalid_wreg;
3752 adev->pcie_rreg = &amdgpu_invalid_rreg;
3753 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3754 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3755 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3756 adev->pciep_rreg = &amdgpu_invalid_rreg;
3757 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3758 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3759 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3760 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3761 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3762 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3763 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3764 adev->didt_rreg = &amdgpu_invalid_rreg;
3765 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3766 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3767 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3768 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3769 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3770
3e39ab90
AD
3771 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3772 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3773 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3774
3775 /* mutex initialization are all done here so we
b8920e1e
SS
3776 * can recall function without having locking issues
3777 */
0e5ca0d1 3778 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3779 mutex_init(&adev->pm.mutex);
3780 mutex_init(&adev->gfx.gpu_clock_mutex);
3781 mutex_init(&adev->srbm_mutex);
b8866c26 3782 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3783 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3784 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3785 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3786 mutex_init(&adev->mn_lock);
e23b74aa 3787 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3788 hash_init(adev->mn_hash);
32eaeae0 3789 mutex_init(&adev->psp.mutex);
bd052211 3790 mutex_init(&adev->notifier_lock);
8cda7a4f 3791 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3792 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3793
ab3b9de6 3794 amdgpu_device_init_apu_flags(adev);
9f6a7857 3795
912dfc84
EQ
3796 r = amdgpu_device_check_arguments(adev);
3797 if (r)
3798 return r;
d38ceaf9 3799
d38ceaf9
AD
3800 spin_lock_init(&adev->mmio_idx_lock);
3801 spin_lock_init(&adev->smc_idx_lock);
3802 spin_lock_init(&adev->pcie_idx_lock);
3803 spin_lock_init(&adev->uvd_ctx_idx_lock);
3804 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3805 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3806 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3807 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3808 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3809
0c4e7fa5
CZ
3810 INIT_LIST_HEAD(&adev->shadow_list);
3811 mutex_init(&adev->shadow_list_lock);
3812
655ce9cb 3813 INIT_LIST_HEAD(&adev->reset_list);
3814
6492e1b0 3815 INIT_LIST_HEAD(&adev->ras_list);
3816
3e38b634
EQ
3817 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3818
beff74bc
AD
3819 INIT_DELAYED_WORK(&adev->delayed_init_work,
3820 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3821 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3822 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3823
d4535e2c
AG
3824 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3825
d23ee13f 3826 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3827 adev->gfx.gfx_off_residency = 0;
3828 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3829 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3830
b265bdbd
EQ
3831 atomic_set(&adev->throttling_logging_enabled, 1);
3832 /*
3833 * If throttling continues, logging will be performed every minute
3834 * to avoid log flooding. "-1" is subtracted since the thermal
3835 * throttling interrupt comes every second. Thus, the total logging
3836 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3837 * for throttling interrupt) = 60 seconds.
3838 */
3839 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3840 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3841
0fa49558
AX
3842 /* Registers mapping */
3843 /* TODO: block userspace mapping of io register */
da69c161
KW
3844 if (adev->asic_type >= CHIP_BONAIRE) {
3845 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3846 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3847 } else {
3848 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3849 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3850 }
d38ceaf9 3851
6c08e0ef
EQ
3852 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3853 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3854
d38ceaf9 3855 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3856 if (!adev->rmmio)
d38ceaf9 3857 return -ENOMEM;
b8920e1e 3858
d38ceaf9 3859 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3860 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3861
436afdfa
PY
3862 /*
3863 * Reset domain needs to be present early, before XGMI hive discovered
3864 * (if any) and intitialized to use reset sem and in_gpu reset flag
3865 * early on during init and before calling to RREG32.
3866 */
3867 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3868 if (!adev->reset_domain)
3869 return -ENOMEM;
3870
3aa0115d
ML
3871 /* detect hw virtualization here */
3872 amdgpu_detect_virtualization(adev);
3873
04e85958
TL
3874 amdgpu_device_get_pcie_info(adev);
3875
dffa11b4
ML
3876 r = amdgpu_device_get_job_timeout_settings(adev);
3877 if (r) {
3878 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3879 return r;
a190d1c7
XY
3880 }
3881
d38ceaf9 3882 /* early init functions */
06ec9070 3883 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3884 if (r)
4ef87d8f 3885 return r;
d38ceaf9 3886
02ff519e
AD
3887 amdgpu_device_set_mcbp(adev);
3888
b7cdb41e
ML
3889 /* Get rid of things like offb */
3890 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3891 if (r)
3892 return r;
3893
4d33e704
SK
3894 /* Enable TMZ based on IP_VERSION */
3895 amdgpu_gmc_tmz_set(adev);
3896
957b0787 3897 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3898 /* Need to get xgmi info early to decide the reset behavior*/
3899 if (adev->gmc.xgmi.supported) {
3900 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3901 if (r)
3902 return r;
3903 }
3904
8e6d0b69 3905 /* enable PCIE atomic ops */
b4520bfd
GW
3906 if (amdgpu_sriov_vf(adev)) {
3907 if (adev->virt.fw_reserve.p_pf2vf)
3908 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3909 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3910 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3911 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3912 * internal path natively support atomics, set have_atomics_support to true.
3913 */
b4520bfd 3914 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
3915 (amdgpu_ip_version(adev, GC_HWIP, 0) >
3916 IP_VERSION(9, 0, 0))) {
0e768043 3917 adev->have_atomics_support = true;
b4520bfd 3918 } else {
8e6d0b69 3919 adev->have_atomics_support =
3920 !pci_enable_atomic_ops_to_root(adev->pdev,
3921 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3922 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3923 }
3924
8e6d0b69 3925 if (!adev->have_atomics_support)
3926 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3927
6585661d 3928 /* doorbell bar mapping and doorbell index init*/
43c064db 3929 amdgpu_doorbell_init(adev);
6585661d 3930
9475a943
SL
3931 if (amdgpu_emu_mode == 1) {
3932 /* post the asic on emulation mode */
3933 emu_soc_asic_init(adev);
bfca0289 3934 goto fence_driver_init;
9475a943 3935 }
bfca0289 3936
04442bf7
LL
3937 amdgpu_reset_init(adev);
3938
4e99a44e 3939 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3940 if (adev->bios)
3941 amdgpu_device_detect_sriov_bios(adev);
048765ad 3942
95e8e59e
AD
3943 /* check if we need to reset the asic
3944 * E.g., driver was not cleanly unloaded previously, etc.
3945 */
f14899fd 3946 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3947 if (adev->gmc.xgmi.num_physical_nodes) {
3948 dev_info(adev->dev, "Pending hive reset.\n");
3949 adev->gmc.xgmi.pending_reset = true;
3950 /* Only need to init necessary block for SMU to handle the reset */
3951 for (i = 0; i < adev->num_ip_blocks; i++) {
3952 if (!adev->ip_blocks[i].status.valid)
3953 continue;
3954 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3955 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3956 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3957 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3958 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3959 adev->ip_blocks[i].version->funcs->name);
3960 adev->ip_blocks[i].status.hw = true;
3961 }
3962 }
3963 } else {
59e9fff1 3964 tmp = amdgpu_reset_method;
3965 /* It should do a default reset when loading or reloading the driver,
3966 * regardless of the module parameter reset_method.
3967 */
3968 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3969 r = amdgpu_asic_reset(adev);
59e9fff1 3970 amdgpu_reset_method = tmp;
e3c1b071 3971 if (r) {
3972 dev_err(adev->dev, "asic reset on init failed\n");
3973 goto failed;
3974 }
95e8e59e
AD
3975 }
3976 }
3977
d38ceaf9 3978 /* Post card if necessary */
39c640c0 3979 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3980 if (!adev->bios) {
bec86378 3981 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3982 r = -EINVAL;
3983 goto failed;
d38ceaf9 3984 }
bec86378 3985 DRM_INFO("GPU posting now...\n");
4d2997ab 3986 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3987 if (r) {
3988 dev_err(adev->dev, "gpu post error!\n");
3989 goto failed;
3990 }
d38ceaf9
AD
3991 }
3992
9535a86a
SZ
3993 if (adev->bios) {
3994 if (adev->is_atom_fw) {
3995 /* Initialize clocks */
3996 r = amdgpu_atomfirmware_get_clock_info(adev);
3997 if (r) {
3998 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3999 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4000 goto failed;
4001 }
4002 } else {
4003 /* Initialize clocks */
4004 r = amdgpu_atombios_get_clock_info(adev);
4005 if (r) {
4006 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4007 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4008 goto failed;
4009 }
4010 /* init i2c buses */
4011 if (!amdgpu_device_has_dc_support(adev))
4012 amdgpu_atombios_i2c_init(adev);
a5bde2f9 4013 }
2c1a2784 4014 }
d38ceaf9 4015
bfca0289 4016fence_driver_init:
d38ceaf9 4017 /* Fence driver */
067f44c8 4018 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4019 if (r) {
067f44c8 4020 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4021 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4022 goto failed;
2c1a2784 4023 }
d38ceaf9
AD
4024
4025 /* init the mode config */
4a580877 4026 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4027
06ec9070 4028 r = amdgpu_device_ip_init(adev);
d38ceaf9 4029 if (r) {
06ec9070 4030 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4031 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4032 goto release_ras_con;
d38ceaf9
AD
4033 }
4034
8d35a259
LG
4035 amdgpu_fence_driver_hw_init(adev);
4036
d69b8971
YZ
4037 dev_info(adev->dev,
4038 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4039 adev->gfx.config.max_shader_engines,
4040 adev->gfx.config.max_sh_per_se,
4041 adev->gfx.config.max_cu_per_sh,
4042 adev->gfx.cu_info.number);
4043
d38ceaf9
AD
4044 adev->accel_working = true;
4045
e59c0205
AX
4046 amdgpu_vm_check_compute_bug(adev);
4047
95844d20
MO
4048 /* Initialize the buffer migration limit. */
4049 if (amdgpu_moverate >= 0)
4050 max_MBps = amdgpu_moverate;
4051 else
4052 max_MBps = 8; /* Allow 8 MB/s. */
4053 /* Get a log2 for easy divisions. */
4054 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4055
b0adca4d
EQ
4056 /*
4057 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4058 * Otherwise the mgpu fan boost feature will be skipped due to the
4059 * gpu instance is counted less.
4060 */
4061 amdgpu_register_gpu_instance(adev);
4062
d38ceaf9
AD
4063 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4064 * explicit gating rather than handling it automatically.
4065 */
e3c1b071 4066 if (!adev->gmc.xgmi.pending_reset) {
4067 r = amdgpu_device_ip_late_init(adev);
4068 if (r) {
4069 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4070 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4071 goto release_ras_con;
e3c1b071 4072 }
4073 /* must succeed. */
4074 amdgpu_ras_resume(adev);
4075 queue_delayed_work(system_wq, &adev->delayed_init_work,
4076 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4077 }
d38ceaf9 4078
38eecbe0
CL
4079 if (amdgpu_sriov_vf(adev)) {
4080 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4081 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4082 }
2c738637 4083
90bcb9b5
EQ
4084 /*
4085 * Place those sysfs registering after `late_init`. As some of those
4086 * operations performed in `late_init` might affect the sysfs
4087 * interfaces creating.
4088 */
4089 r = amdgpu_atombios_sysfs_init(adev);
4090 if (r)
4091 drm_err(&adev->ddev,
4092 "registering atombios sysfs failed (%d).\n", r);
4093
4094 r = amdgpu_pm_sysfs_init(adev);
4095 if (r)
4096 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4097
4098 r = amdgpu_ucode_sysfs_init(adev);
4099 if (r) {
4100 adev->ucode_sysfs_en = false;
4101 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4102 } else
4103 adev->ucode_sysfs_en = true;
4104
77f3a5cd 4105 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4106 if (r)
77f3a5cd 4107 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4108
76da73f0
LL
4109 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4110 if (r)
4111 dev_err(adev->dev,
4112 "Could not create amdgpu board attributes\n");
4113
7957ec80
LL
4114 amdgpu_fru_sysfs_init(adev);
4115
d155bef0
AB
4116 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4117 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4118 if (r)
4119 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4120
c1dd4aa6
AG
4121 /* Have stored pci confspace at hand for restore in sudden PCI error */
4122 if (amdgpu_device_cache_pci_state(adev->pdev))
4123 pci_restore_state(pdev);
4124
8c3dd61c
KHF
4125 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4126 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4127 * ignore it
4128 */
8c3dd61c 4129 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4130 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4131
d37a3929
OC
4132 px = amdgpu_device_supports_px(ddev);
4133
4134 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4135 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4136 vga_switcheroo_register_client(adev->pdev,
4137 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4138
4139 if (px)
8c3dd61c 4140 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4141
e3c1b071 4142 if (adev->gmc.xgmi.pending_reset)
4143 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4144 msecs_to_jiffies(AMDGPU_RESUME_MS));
4145
4a74c38c
PY
4146 amdgpu_device_check_iommu_direct_map(adev);
4147
d38ceaf9 4148 return 0;
83ba126a 4149
970fd197 4150release_ras_con:
38eecbe0
CL
4151 if (amdgpu_sriov_vf(adev))
4152 amdgpu_virt_release_full_gpu(adev, true);
4153
4154 /* failed in exclusive mode due to timeout */
4155 if (amdgpu_sriov_vf(adev) &&
4156 !amdgpu_sriov_runtime(adev) &&
4157 amdgpu_virt_mmio_blocked(adev) &&
4158 !amdgpu_virt_wait_reset(adev)) {
4159 dev_err(adev->dev, "VF exclusive mode timeout\n");
4160 /* Don't send request since VF is inactive. */
4161 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4162 adev->virt.ops = NULL;
4163 r = -EAGAIN;
4164 }
970fd197
SY
4165 amdgpu_release_ras_context(adev);
4166
83ba126a 4167failed:
89041940 4168 amdgpu_vf_error_trans_all(adev);
8840a387 4169
83ba126a 4170 return r;
d38ceaf9
AD
4171}
4172
07775fc1
AG
4173static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4174{
62d5f9f7 4175
07775fc1
AG
4176 /* Clear all CPU mappings pointing to this device */
4177 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4178
4179 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4180 amdgpu_doorbell_fini(adev);
07775fc1
AG
4181
4182 iounmap(adev->rmmio);
4183 adev->rmmio = NULL;
4184 if (adev->mman.aper_base_kaddr)
4185 iounmap(adev->mman.aper_base_kaddr);
4186 adev->mman.aper_base_kaddr = NULL;
4187
4188 /* Memory manager related */
a0ba1279 4189 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4190 arch_phys_wc_del(adev->gmc.vram_mtrr);
4191 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4192 }
4193}
4194
d38ceaf9 4195/**
bbe04dec 4196 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4197 *
4198 * @adev: amdgpu_device pointer
4199 *
4200 * Tear down the driver info (all asics).
4201 * Called at driver shutdown.
4202 */
72c8c97b 4203void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4204{
aac89168 4205 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4206 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4207 adev->shutdown = true;
9f875167 4208
752c683d
ML
4209 /* make sure IB test finished before entering exclusive mode
4210 * to avoid preemption on IB test
b8920e1e 4211 */
519b8b76 4212 if (amdgpu_sriov_vf(adev)) {
752c683d 4213 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4214 amdgpu_virt_fini_data_exchange(adev);
4215 }
752c683d 4216
e5b03032
ML
4217 /* disable all interrupts */
4218 amdgpu_irq_disable_all(adev);
47fc644f 4219 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4220 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4221 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4222 else
4a580877 4223 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4224 }
8d35a259 4225 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4226
cd3a8a59 4227 if (adev->mman.initialized)
9bff18d1 4228 drain_workqueue(adev->mman.bdev.wq);
98f56188 4229
53e9d836 4230 if (adev->pm.sysfs_initialized)
7c868b59 4231 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4232 if (adev->ucode_sysfs_en)
4233 amdgpu_ucode_sysfs_fini(adev);
4234 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4235 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4236
232d1d43
SY
4237 /* disable ras feature must before hw fini */
4238 amdgpu_ras_pre_fini(adev);
4239
e9669fb7 4240 amdgpu_device_ip_fini_early(adev);
d10d0daa 4241
a3848df6
YW
4242 amdgpu_irq_fini_hw(adev);
4243
b6fd6e0f
SK
4244 if (adev->mman.initialized)
4245 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4246
d10d0daa 4247 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4248
39934d3e
VP
4249 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4250 amdgpu_device_unmap_mmio(adev);
87172e89 4251
72c8c97b
AG
4252}
4253
4254void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4255{
62d5f9f7 4256 int idx;
d37a3929 4257 bool px;
62d5f9f7 4258
8d35a259 4259 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4260 amdgpu_device_ip_fini(adev);
b31d3063 4261 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4262 adev->accel_working = false;
68ce8b24 4263 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4264
4265 amdgpu_reset_fini(adev);
4266
d38ceaf9 4267 /* free i2c buses */
4562236b
HW
4268 if (!amdgpu_device_has_dc_support(adev))
4269 amdgpu_i2c_fini(adev);
bfca0289
SL
4270
4271 if (amdgpu_emu_mode != 1)
4272 amdgpu_atombios_fini(adev);
4273
d38ceaf9
AD
4274 kfree(adev->bios);
4275 adev->bios = NULL;
d37a3929 4276
8a2b5139
LL
4277 kfree(adev->fru_info);
4278 adev->fru_info = NULL;
4279
d37a3929
OC
4280 px = amdgpu_device_supports_px(adev_to_drm(adev));
4281
4282 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4283 apple_gmux_detect(NULL, NULL)))
84c8b22e 4284 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4285
4286 if (px)
83ba126a 4287 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4288
38d6be81 4289 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4290 vga_client_unregister(adev->pdev);
e9bc1bf7 4291
62d5f9f7
LS
4292 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4293
4294 iounmap(adev->rmmio);
4295 adev->rmmio = NULL;
43c064db 4296 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4297 drm_dev_exit(idx);
4298 }
4299
d155bef0
AB
4300 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4301 amdgpu_pmu_fini(adev);
72de33f8 4302 if (adev->mman.discovery_bin)
a190d1c7 4303 amdgpu_discovery_fini(adev);
72c8c97b 4304
cfbb6b00
AG
4305 amdgpu_reset_put_reset_domain(adev->reset_domain);
4306 adev->reset_domain = NULL;
4307
72c8c97b
AG
4308 kfree(adev->pci_state);
4309
d38ceaf9
AD
4310}
4311
58144d28
ND
4312/**
4313 * amdgpu_device_evict_resources - evict device resources
4314 * @adev: amdgpu device object
4315 *
4316 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4317 * of the vram memory type. Mainly used for evicting device resources
4318 * at suspend time.
4319 *
4320 */
7863c155 4321static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4322{
7863c155
ML
4323 int ret;
4324
e53d9665
ML
4325 /* No need to evict vram on APUs for suspend to ram or s2idle */
4326 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4327 return 0;
58144d28 4328
7863c155
ML
4329 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4330 if (ret)
58144d28 4331 DRM_WARN("evicting device resources failed\n");
7863c155 4332 return ret;
58144d28 4333}
d38ceaf9
AD
4334
4335/*
4336 * Suspend & resume.
4337 */
4338/**
810ddc3a 4339 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4340 *
87e3f136 4341 * @dev: drm dev pointer
87e3f136 4342 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4343 *
4344 * Puts the hw in the suspend state (all asics).
4345 * Returns 0 for success or an error on failure.
4346 * Called at driver suspend.
4347 */
de185019 4348int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4349{
a2e15b0e 4350 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4351 int r = 0;
d38ceaf9 4352
d38ceaf9
AD
4353 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4354 return 0;
4355
44779b43 4356 adev->in_suspend = true;
3fa8f89d 4357
47ea2076
SF
4358 /* Evict the majority of BOs before grabbing the full access */
4359 r = amdgpu_device_evict_resources(adev);
4360 if (r)
4361 return r;
4362
d7274ec7
BZ
4363 if (amdgpu_sriov_vf(adev)) {
4364 amdgpu_virt_fini_data_exchange(adev);
4365 r = amdgpu_virt_request_full_gpu(adev, false);
4366 if (r)
4367 return r;
4368 }
4369
3fa8f89d
S
4370 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4371 DRM_WARN("smart shift update failed\n");
4372
5f818173 4373 if (fbcon)
087451f3 4374 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4375
beff74bc 4376 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4377 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4378
5e6932fe 4379 amdgpu_ras_suspend(adev);
4380
2196927b 4381 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4382
c004d44e 4383 if (!adev->in_s0ix)
5d3a2d95 4384 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4385
7863c155
ML
4386 r = amdgpu_device_evict_resources(adev);
4387 if (r)
4388 return r;
d38ceaf9 4389
8d35a259 4390 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4391
2196927b 4392 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4393
d7274ec7
BZ
4394 if (amdgpu_sriov_vf(adev))
4395 amdgpu_virt_release_full_gpu(adev, false);
4396
d38ceaf9
AD
4397 return 0;
4398}
4399
4400/**
810ddc3a 4401 * amdgpu_device_resume - initiate device resume
d38ceaf9 4402 *
87e3f136 4403 * @dev: drm dev pointer
87e3f136 4404 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4405 *
4406 * Bring the hw back to operating state (all asics).
4407 * Returns 0 for success or an error on failure.
4408 * Called at driver resume.
4409 */
de185019 4410int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4411{
1348969a 4412 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4413 int r = 0;
d38ceaf9 4414
d7274ec7
BZ
4415 if (amdgpu_sriov_vf(adev)) {
4416 r = amdgpu_virt_request_full_gpu(adev, true);
4417 if (r)
4418 return r;
4419 }
4420
d38ceaf9
AD
4421 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4422 return 0;
4423
62498733 4424 if (adev->in_s0ix)
bc143d8b 4425 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4426
d38ceaf9 4427 /* post card */
39c640c0 4428 if (amdgpu_device_need_post(adev)) {
4d2997ab 4429 r = amdgpu_device_asic_init(adev);
74b0b157 4430 if (r)
aac89168 4431 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4432 }
d38ceaf9 4433
06ec9070 4434 r = amdgpu_device_ip_resume(adev);
d7274ec7 4435
e6707218 4436 if (r) {
aac89168 4437 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4438 goto exit;
e6707218 4439 }
8d35a259 4440 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4441
06ec9070 4442 r = amdgpu_device_ip_late_init(adev);
03161a6e 4443 if (r)
3c22c1ea 4444 goto exit;
d38ceaf9 4445
beff74bc
AD
4446 queue_delayed_work(system_wq, &adev->delayed_init_work,
4447 msecs_to_jiffies(AMDGPU_RESUME_MS));
4448
c004d44e 4449 if (!adev->in_s0ix) {
5d3a2d95
AD
4450 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4451 if (r)
3c22c1ea 4452 goto exit;
5d3a2d95 4453 }
756e6880 4454
3c22c1ea
SF
4455exit:
4456 if (amdgpu_sriov_vf(adev)) {
4457 amdgpu_virt_init_data_exchange(adev);
4458 amdgpu_virt_release_full_gpu(adev, true);
4459 }
4460
4461 if (r)
4462 return r;
4463
96a5d8d4 4464 /* Make sure IB tests flushed */
beff74bc 4465 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4466
a2e15b0e 4467 if (fbcon)
087451f3 4468 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4469
5e6932fe 4470 amdgpu_ras_resume(adev);
4471
d09ef243
AD
4472 if (adev->mode_info.num_crtc) {
4473 /*
4474 * Most of the connector probing functions try to acquire runtime pm
4475 * refs to ensure that the GPU is powered on when connector polling is
4476 * performed. Since we're calling this from a runtime PM callback,
4477 * trying to acquire rpm refs will cause us to deadlock.
4478 *
4479 * Since we're guaranteed to be holding the rpm lock, it's safe to
4480 * temporarily disable the rpm helpers so this doesn't deadlock us.
4481 */
23a1a9e5 4482#ifdef CONFIG_PM
d09ef243 4483 dev->dev->power.disable_depth++;
23a1a9e5 4484#endif
d09ef243
AD
4485 if (!adev->dc_enabled)
4486 drm_helper_hpd_irq_event(dev);
4487 else
4488 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4489#ifdef CONFIG_PM
d09ef243 4490 dev->dev->power.disable_depth--;
23a1a9e5 4491#endif
d09ef243 4492 }
44779b43
RZ
4493 adev->in_suspend = false;
4494
dc907c9d
JX
4495 if (adev->enable_mes)
4496 amdgpu_mes_self_test(adev);
4497
3fa8f89d
S
4498 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4499 DRM_WARN("smart shift update failed\n");
4500
4d3b9ae5 4501 return 0;
d38ceaf9
AD
4502}
4503
e3ecdffa
AD
4504/**
4505 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4506 *
4507 * @adev: amdgpu_device pointer
4508 *
4509 * The list of all the hardware IPs that make up the asic is walked and
4510 * the check_soft_reset callbacks are run. check_soft_reset determines
4511 * if the asic is still hung or not.
4512 * Returns true if any of the IPs are still in a hung state, false if not.
4513 */
06ec9070 4514static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4515{
4516 int i;
4517 bool asic_hang = false;
4518
f993d628
ML
4519 if (amdgpu_sriov_vf(adev))
4520 return true;
4521
8bc04c29
AD
4522 if (amdgpu_asic_need_full_reset(adev))
4523 return true;
4524
63fbf42f 4525 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4526 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4527 continue;
a1255107
AD
4528 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4529 adev->ip_blocks[i].status.hang =
4530 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4531 if (adev->ip_blocks[i].status.hang) {
aac89168 4532 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4533 asic_hang = true;
4534 }
4535 }
4536 return asic_hang;
4537}
4538
e3ecdffa
AD
4539/**
4540 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4541 *
4542 * @adev: amdgpu_device pointer
4543 *
4544 * The list of all the hardware IPs that make up the asic is walked and the
4545 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4546 * handles any IP specific hardware or software state changes that are
4547 * necessary for a soft reset to succeed.
4548 * Returns 0 on success, negative error code on failure.
4549 */
06ec9070 4550static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4551{
4552 int i, r = 0;
4553
4554 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4555 if (!adev->ip_blocks[i].status.valid)
d31a501e 4556 continue;
a1255107
AD
4557 if (adev->ip_blocks[i].status.hang &&
4558 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4559 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4560 if (r)
4561 return r;
4562 }
4563 }
4564
4565 return 0;
4566}
4567
e3ecdffa
AD
4568/**
4569 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4570 *
4571 * @adev: amdgpu_device pointer
4572 *
4573 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4574 * reset is necessary to recover.
4575 * Returns true if a full asic reset is required, false if not.
4576 */
06ec9070 4577static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4578{
da146d3b
AD
4579 int i;
4580
8bc04c29
AD
4581 if (amdgpu_asic_need_full_reset(adev))
4582 return true;
4583
da146d3b 4584 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4585 if (!adev->ip_blocks[i].status.valid)
da146d3b 4586 continue;
a1255107
AD
4587 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4588 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4589 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4590 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4591 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4592 if (adev->ip_blocks[i].status.hang) {
aac89168 4593 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4594 return true;
4595 }
4596 }
35d782fe
CZ
4597 }
4598 return false;
4599}
4600
e3ecdffa
AD
4601/**
4602 * amdgpu_device_ip_soft_reset - do a soft reset
4603 *
4604 * @adev: amdgpu_device pointer
4605 *
4606 * The list of all the hardware IPs that make up the asic is walked and the
4607 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4608 * IP specific hardware or software state changes that are necessary to soft
4609 * reset the IP.
4610 * Returns 0 on success, negative error code on failure.
4611 */
06ec9070 4612static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4613{
4614 int i, r = 0;
4615
4616 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4617 if (!adev->ip_blocks[i].status.valid)
35d782fe 4618 continue;
a1255107
AD
4619 if (adev->ip_blocks[i].status.hang &&
4620 adev->ip_blocks[i].version->funcs->soft_reset) {
4621 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4622 if (r)
4623 return r;
4624 }
4625 }
4626
4627 return 0;
4628}
4629
e3ecdffa
AD
4630/**
4631 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4632 *
4633 * @adev: amdgpu_device pointer
4634 *
4635 * The list of all the hardware IPs that make up the asic is walked and the
4636 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4637 * handles any IP specific hardware or software state changes that are
4638 * necessary after the IP has been soft reset.
4639 * Returns 0 on success, negative error code on failure.
4640 */
06ec9070 4641static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4642{
4643 int i, r = 0;
4644
4645 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4646 if (!adev->ip_blocks[i].status.valid)
35d782fe 4647 continue;
a1255107
AD
4648 if (adev->ip_blocks[i].status.hang &&
4649 adev->ip_blocks[i].version->funcs->post_soft_reset)
4650 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4651 if (r)
4652 return r;
4653 }
4654
4655 return 0;
4656}
4657
e3ecdffa 4658/**
c33adbc7 4659 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4660 *
4661 * @adev: amdgpu_device pointer
4662 *
4663 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4664 * restore things like GPUVM page tables after a GPU reset where
4665 * the contents of VRAM might be lost.
403009bf
CK
4666 *
4667 * Returns:
4668 * 0 on success, negative error code on failure.
e3ecdffa 4669 */
c33adbc7 4670static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4671{
c41d1cf6 4672 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4673 struct amdgpu_bo *shadow;
e18aaea7 4674 struct amdgpu_bo_vm *vmbo;
403009bf 4675 long r = 1, tmo;
c41d1cf6
ML
4676
4677 if (amdgpu_sriov_runtime(adev))
b045d3af 4678 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4679 else
4680 tmo = msecs_to_jiffies(100);
4681
aac89168 4682 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4683 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4684 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4685 /* If vm is compute context or adev is APU, shadow will be NULL */
4686 if (!vmbo->shadow)
4687 continue;
4688 shadow = vmbo->shadow;
4689
403009bf 4690 /* No need to recover an evicted BO */
d3116756
CK
4691 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4692 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4693 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4694 continue;
4695
4696 r = amdgpu_bo_restore_shadow(shadow, &next);
4697 if (r)
4698 break;
4699
c41d1cf6 4700 if (fence) {
1712fb1a 4701 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4702 dma_fence_put(fence);
4703 fence = next;
1712fb1a 4704 if (tmo == 0) {
4705 r = -ETIMEDOUT;
c41d1cf6 4706 break;
1712fb1a 4707 } else if (tmo < 0) {
4708 r = tmo;
4709 break;
4710 }
403009bf
CK
4711 } else {
4712 fence = next;
c41d1cf6 4713 }
c41d1cf6
ML
4714 }
4715 mutex_unlock(&adev->shadow_list_lock);
4716
403009bf
CK
4717 if (fence)
4718 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4719 dma_fence_put(fence);
4720
1712fb1a 4721 if (r < 0 || tmo <= 0) {
aac89168 4722 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4723 return -EIO;
4724 }
c41d1cf6 4725
aac89168 4726 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4727 return 0;
c41d1cf6
ML
4728}
4729
a90ad3c2 4730
e3ecdffa 4731/**
06ec9070 4732 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4733 *
982a820b 4734 * @adev: amdgpu_device pointer
87e3f136 4735 * @from_hypervisor: request from hypervisor
5740682e
ML
4736 *
4737 * do VF FLR and reinitialize Asic
3f48c681 4738 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4739 */
4740static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4741 bool from_hypervisor)
5740682e
ML
4742{
4743 int r;
a5f67c93 4744 struct amdgpu_hive_info *hive = NULL;
7258fa31 4745 int retry_limit = 0;
5740682e 4746
7258fa31 4747retry:
c004d44e 4748 amdgpu_amdkfd_pre_reset(adev);
428890a3 4749
5740682e
ML
4750 if (from_hypervisor)
4751 r = amdgpu_virt_request_full_gpu(adev, true);
4752 else
4753 r = amdgpu_virt_reset_gpu(adev);
4754 if (r)
4755 return r;
f734b213 4756 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4757
83f24a8f
HC
4758 /* some sw clean up VF needs to do before recover */
4759 amdgpu_virt_post_reset(adev);
4760
a90ad3c2 4761 /* Resume IP prior to SMC */
06ec9070 4762 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4763 if (r)
4764 goto error;
a90ad3c2 4765
c9ffa427 4766 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4767
7a3e0bb2
RZ
4768 r = amdgpu_device_fw_loading(adev);
4769 if (r)
4770 return r;
4771
a90ad3c2 4772 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4773 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4774 if (r)
4775 goto error;
a90ad3c2 4776
a5f67c93
ZL
4777 hive = amdgpu_get_xgmi_hive(adev);
4778 /* Update PSP FW topology after reset */
4779 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4780 r = amdgpu_xgmi_update_topology(hive, adev);
4781
4782 if (hive)
4783 amdgpu_put_xgmi_hive(hive);
4784
4785 if (!r) {
a5f67c93 4786 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4787
c004d44e 4788 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4789 }
a90ad3c2 4790
abc34253 4791error:
c41d1cf6 4792 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4793 amdgpu_inc_vram_lost(adev);
c33adbc7 4794 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4795 }
437f3e0b 4796 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4797
7258fa31
SK
4798 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4799 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4800 retry_limit++;
4801 goto retry;
4802 } else
4803 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4804 }
4805
a90ad3c2
ML
4806 return r;
4807}
4808
9a1cddd6 4809/**
4810 * amdgpu_device_has_job_running - check if there is any job in mirror list
4811 *
982a820b 4812 * @adev: amdgpu_device pointer
9a1cddd6 4813 *
4814 * check if there is any job in mirror list
4815 */
4816bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4817{
4818 int i;
4819 struct drm_sched_job *job;
4820
4821 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4822 struct amdgpu_ring *ring = adev->rings[i];
4823
4824 if (!ring || !ring->sched.thread)
4825 continue;
4826
4827 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4828 job = list_first_entry_or_null(&ring->sched.pending_list,
4829 struct drm_sched_job, list);
9a1cddd6 4830 spin_unlock(&ring->sched.job_list_lock);
4831 if (job)
4832 return true;
4833 }
4834 return false;
4835}
4836
12938fad
CK
4837/**
4838 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4839 *
982a820b 4840 * @adev: amdgpu_device pointer
12938fad
CK
4841 *
4842 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4843 * a hung GPU.
4844 */
4845bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4846{
12938fad 4847
3ba7b418
AG
4848 if (amdgpu_gpu_recovery == 0)
4849 goto disabled;
4850
1a11a65d
YC
4851 /* Skip soft reset check in fatal error mode */
4852 if (!amdgpu_ras_is_poison_mode_supported(adev))
4853 return true;
4854
3ba7b418
AG
4855 if (amdgpu_sriov_vf(adev))
4856 return true;
4857
4858 if (amdgpu_gpu_recovery == -1) {
4859 switch (adev->asic_type) {
b3523c45
AD
4860#ifdef CONFIG_DRM_AMDGPU_SI
4861 case CHIP_VERDE:
4862 case CHIP_TAHITI:
4863 case CHIP_PITCAIRN:
4864 case CHIP_OLAND:
4865 case CHIP_HAINAN:
4866#endif
4867#ifdef CONFIG_DRM_AMDGPU_CIK
4868 case CHIP_KAVERI:
4869 case CHIP_KABINI:
4870 case CHIP_MULLINS:
4871#endif
4872 case CHIP_CARRIZO:
4873 case CHIP_STONEY:
4874 case CHIP_CYAN_SKILLFISH:
3ba7b418 4875 goto disabled;
b3523c45
AD
4876 default:
4877 break;
3ba7b418 4878 }
12938fad
CK
4879 }
4880
4881 return true;
3ba7b418
AG
4882
4883disabled:
aac89168 4884 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4885 return false;
12938fad
CK
4886}
4887
5c03e584
FX
4888int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4889{
47fc644f
SS
4890 u32 i;
4891 int ret = 0;
5c03e584 4892
47fc644f 4893 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4894
47fc644f 4895 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4896
47fc644f
SS
4897 /* disable BM */
4898 pci_clear_master(adev->pdev);
5c03e584 4899
47fc644f 4900 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4901
47fc644f
SS
4902 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4903 dev_info(adev->dev, "GPU smu mode1 reset\n");
4904 ret = amdgpu_dpm_mode1_reset(adev);
4905 } else {
4906 dev_info(adev->dev, "GPU psp mode1 reset\n");
4907 ret = psp_gpu_reset(adev);
4908 }
5c03e584 4909
47fc644f 4910 if (ret)
2c0f880a 4911 goto mode1_reset_failed;
5c03e584 4912
47fc644f 4913 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4914 ret = amdgpu_psp_wait_for_bootloader(adev);
4915 if (ret)
2c0f880a 4916 goto mode1_reset_failed;
5c03e584 4917
47fc644f
SS
4918 /* wait for asic to come out of reset */
4919 for (i = 0; i < adev->usec_timeout; i++) {
4920 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4921
47fc644f
SS
4922 if (memsize != 0xffffffff)
4923 break;
4924 udelay(1);
4925 }
5c03e584 4926
2c0f880a
HZ
4927 if (i >= adev->usec_timeout) {
4928 ret = -ETIMEDOUT;
4929 goto mode1_reset_failed;
4930 }
4931
47fc644f 4932 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4933
2c0f880a
HZ
4934 return 0;
4935
4936mode1_reset_failed:
4937 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4938 return ret;
5c03e584 4939}
5c6dd71e 4940
e3c1b071 4941int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4942 struct amdgpu_reset_context *reset_context)
26bc5340 4943{
5c1e6fa4 4944 int i, r = 0;
04442bf7
LL
4945 struct amdgpu_job *job = NULL;
4946 bool need_full_reset =
4947 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4948
4949 if (reset_context->reset_req_dev == adev)
4950 job = reset_context->job;
71182665 4951
b602ca5f
TZ
4952 if (amdgpu_sriov_vf(adev)) {
4953 /* stop the data exchange thread */
4954 amdgpu_virt_fini_data_exchange(adev);
4955 }
4956
9e225fb9
AG
4957 amdgpu_fence_driver_isr_toggle(adev, true);
4958
71182665 4959 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4960 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4961 struct amdgpu_ring *ring = adev->rings[i];
4962
51687759 4963 if (!ring || !ring->sched.thread)
0875dc9e 4964 continue;
5740682e 4965
b8920e1e
SS
4966 /* Clear job fence from fence drv to avoid force_completion
4967 * leave NULL and vm flush fence in fence drv
4968 */
5c1e6fa4 4969 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4970
2f9d4084
ML
4971 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4972 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4973 }
d38ceaf9 4974
9e225fb9
AG
4975 amdgpu_fence_driver_isr_toggle(adev, false);
4976
ff99849b 4977 if (job && job->vm)
222b5f04
AG
4978 drm_sched_increase_karma(&job->base);
4979
04442bf7 4980 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 4981 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 4982 if (r == -EOPNOTSUPP)
404b277b
LL
4983 r = 0;
4984 else
04442bf7
LL
4985 return r;
4986
1d721ed6 4987 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4988 if (!amdgpu_sriov_vf(adev)) {
4989
4990 if (!need_full_reset)
4991 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4992
360cd081
LG
4993 if (!need_full_reset && amdgpu_gpu_recovery &&
4994 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4995 amdgpu_device_ip_pre_soft_reset(adev);
4996 r = amdgpu_device_ip_soft_reset(adev);
4997 amdgpu_device_ip_post_soft_reset(adev);
4998 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4999 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
5000 need_full_reset = true;
5001 }
5002 }
5003
5004 if (need_full_reset)
5005 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
5006 if (need_full_reset)
5007 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5008 else
5009 clear_bit(AMDGPU_NEED_FULL_RESET,
5010 &reset_context->flags);
26bc5340
AG
5011 }
5012
5013 return r;
5014}
5015
15fd09a0
SA
5016static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5017{
15fd09a0
SA
5018 int i;
5019
38a15ad9 5020 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
5021
5022 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
5023 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
5024 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
5025 adev->reset_dump_reg_value[i]);
15fd09a0
SA
5026 }
5027
5028 return 0;
5029}
5030
a7691785
AA
5031#ifndef CONFIG_DEV_COREDUMP
5032static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
5033 struct amdgpu_reset_context *reset_context)
5034{
5035}
5036#else
3d8785f6
SA
5037static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
5038 size_t count, void *data, size_t datalen)
5039{
5040 struct drm_printer p;
a7691785 5041 struct amdgpu_coredump_info *coredump = data;
3d8785f6
SA
5042 struct drm_print_iterator iter;
5043 int i;
5044
5045 iter.data = buffer;
5046 iter.offset = 0;
5047 iter.start = offset;
5048 iter.remain = count;
5049
5050 p = drm_coredump_printer(&iter);
5051
5052 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
5053 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
5054 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
a7691785
AA
5055 drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec);
5056 if (coredump->reset_task_info.pid)
3d8785f6 5057 drm_printf(&p, "process_name: %s PID: %d\n",
a7691785
AA
5058 coredump->reset_task_info.process_name,
5059 coredump->reset_task_info.pid);
3d8785f6 5060
a7691785 5061 if (coredump->reset_vram_lost)
3d8785f6 5062 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
a7691785 5063 if (coredump->adev->num_regs) {
3d8785f6
SA
5064 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
5065
a7691785 5066 for (i = 0; i < coredump->adev->num_regs; i++)
3d8785f6 5067 drm_printf(&p, "0x%08x: 0x%08x\n",
a7691785
AA
5068 coredump->adev->reset_dump_reg_list[i],
5069 coredump->adev->reset_dump_reg_value[i]);
3d8785f6
SA
5070 }
5071
5072 return count - iter.remain;
5073}
5074
5075static void amdgpu_devcoredump_free(void *data)
5076{
a7691785 5077 kfree(data);
3d8785f6
SA
5078}
5079
a7691785
AA
5080static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
5081 struct amdgpu_reset_context *reset_context)
3d8785f6 5082{
a7691785 5083 struct amdgpu_coredump_info *coredump;
3d8785f6
SA
5084 struct drm_device *dev = adev_to_drm(adev);
5085
a7691785
AA
5086 coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
5087
5088 if (!coredump) {
5089 DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
5090 return;
5091 }
5092
5093 coredump->reset_vram_lost = vram_lost;
5094
5095 if (reset_context->job && reset_context->job->vm)
5096 coredump->reset_task_info = reset_context->job->vm->task_info;
5097
5098 coredump->adev = adev;
5099
5100 ktime_get_ts64(&coredump->reset_time);
5101
5102 dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
3d8785f6
SA
5103 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
5104}
5105#endif
5106
04442bf7
LL
5107int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5108 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5109{
5110 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5111 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5112 int r = 0;
f5c7e779 5113 bool gpu_reset_for_dev_remove = 0;
26bc5340 5114
04442bf7
LL
5115 /* Try reset handler method first */
5116 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5117 reset_list);
15fd09a0 5118 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5119
5120 reset_context->reset_device_list = device_list_handle;
04442bf7 5121 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5122 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5123 if (r == -EOPNOTSUPP)
404b277b
LL
5124 r = 0;
5125 else
04442bf7
LL
5126 return r;
5127
5128 /* Reset handler not implemented, use the default method */
5129 need_full_reset =
5130 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5131 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5132
f5c7e779
YC
5133 gpu_reset_for_dev_remove =
5134 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5135 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5136
26bc5340 5137 /*
655ce9cb 5138 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5139 * to allow proper links negotiation in FW (within 1 sec)
5140 */
7ac71382 5141 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5142 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5143 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5144 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5145 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5146 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5147 r = -EALREADY;
5148 } else
5149 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5150
041a62bc 5151 if (r) {
aac89168 5152 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5153 r, adev_to_drm(tmp_adev)->unique);
19349072 5154 goto out;
ce316fa5
LM
5155 }
5156 }
5157
041a62bc
AG
5158 /* For XGMI wait for all resets to complete before proceed */
5159 if (!r) {
655ce9cb 5160 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5161 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5162 flush_work(&tmp_adev->xgmi_reset_work);
5163 r = tmp_adev->asic_reset_res;
5164 if (r)
5165 break;
ce316fa5
LM
5166 }
5167 }
5168 }
ce316fa5 5169 }
26bc5340 5170
43c4d576 5171 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5172 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 5173 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
5174 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
5175 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
5176 }
5177
00eaa571 5178 amdgpu_ras_intr_cleared();
43c4d576 5179 }
00eaa571 5180
f5c7e779
YC
5181 /* Since the mode1 reset affects base ip blocks, the
5182 * phase1 ip blocks need to be resumed. Otherwise there
5183 * will be a BIOS signature error and the psp bootloader
5184 * can't load kdb on the next amdgpu install.
5185 */
5186 if (gpu_reset_for_dev_remove) {
5187 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5188 amdgpu_device_ip_resume_phase1(tmp_adev);
5189
5190 goto end;
5191 }
5192
655ce9cb 5193 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5194 if (need_full_reset) {
5195 /* post card */
e3c1b071 5196 r = amdgpu_device_asic_init(tmp_adev);
5197 if (r) {
aac89168 5198 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5199 } else {
26bc5340 5200 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5201
26bc5340
AG
5202 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5203 if (r)
5204 goto out;
5205
5206 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785
AA
5207
5208 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5209
26bc5340 5210 if (vram_lost) {
77e7f829 5211 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5212 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5213 }
5214
26bc5340
AG
5215 r = amdgpu_device_fw_loading(tmp_adev);
5216 if (r)
5217 return r;
5218
c45e38f2
LL
5219 r = amdgpu_xcp_restore_partition_mode(
5220 tmp_adev->xcp_mgr);
5221 if (r)
5222 goto out;
5223
26bc5340
AG
5224 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5225 if (r)
5226 goto out;
5227
5228 if (vram_lost)
5229 amdgpu_device_fill_reset_magic(tmp_adev);
5230
fdafb359
EQ
5231 /*
5232 * Add this ASIC as tracked as reset was already
5233 * complete successfully.
5234 */
5235 amdgpu_register_gpu_instance(tmp_adev);
5236
04442bf7
LL
5237 if (!reset_context->hive &&
5238 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5239 amdgpu_xgmi_add_device(tmp_adev);
5240
7c04ca50 5241 r = amdgpu_device_ip_late_init(tmp_adev);
5242 if (r)
5243 goto out;
5244
087451f3 5245 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5246
e8fbaf03
GC
5247 /*
5248 * The GPU enters bad state once faulty pages
5249 * by ECC has reached the threshold, and ras
5250 * recovery is scheduled next. So add one check
5251 * here to break recovery if it indeed exceeds
5252 * bad page threshold, and remind user to
5253 * retire this GPU or setting one bigger
5254 * bad_page_threshold value to fix this once
5255 * probing driver again.
5256 */
11003c68 5257 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5258 /* must succeed. */
5259 amdgpu_ras_resume(tmp_adev);
5260 } else {
5261 r = -EINVAL;
5262 goto out;
5263 }
e79a04d5 5264
26bc5340 5265 /* Update PSP FW topology after reset */
04442bf7
LL
5266 if (reset_context->hive &&
5267 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5268 r = amdgpu_xgmi_update_topology(
5269 reset_context->hive, tmp_adev);
26bc5340
AG
5270 }
5271 }
5272
26bc5340
AG
5273out:
5274 if (!r) {
5275 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5276 r = amdgpu_ib_ring_tests(tmp_adev);
5277 if (r) {
5278 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5279 need_full_reset = true;
5280 r = -EAGAIN;
5281 goto end;
5282 }
5283 }
5284
5285 if (!r)
5286 r = amdgpu_device_recover_vram(tmp_adev);
5287 else
5288 tmp_adev->asic_reset_res = r;
5289 }
5290
5291end:
04442bf7
LL
5292 if (need_full_reset)
5293 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5294 else
5295 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5296 return r;
5297}
5298
e923be99 5299static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5300{
5740682e 5301
a3a09142
AD
5302 switch (amdgpu_asic_reset_method(adev)) {
5303 case AMD_RESET_METHOD_MODE1:
5304 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5305 break;
5306 case AMD_RESET_METHOD_MODE2:
5307 adev->mp1_state = PP_MP1_STATE_RESET;
5308 break;
5309 default:
5310 adev->mp1_state = PP_MP1_STATE_NONE;
5311 break;
5312 }
26bc5340 5313}
d38ceaf9 5314
e923be99 5315static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5316{
89041940 5317 amdgpu_vf_error_trans_all(adev);
a3a09142 5318 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5319}
5320
3f12acc8
EQ
5321static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5322{
5323 struct pci_dev *p = NULL;
5324
5325 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5326 adev->pdev->bus->number, 1);
5327 if (p) {
5328 pm_runtime_enable(&(p->dev));
5329 pm_runtime_resume(&(p->dev));
5330 }
b85e285e
YY
5331
5332 pci_dev_put(p);
3f12acc8
EQ
5333}
5334
5335static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5336{
5337 enum amd_reset_method reset_method;
5338 struct pci_dev *p = NULL;
5339 u64 expires;
5340
5341 /*
5342 * For now, only BACO and mode1 reset are confirmed
5343 * to suffer the audio issue without proper suspended.
5344 */
5345 reset_method = amdgpu_asic_reset_method(adev);
5346 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5347 (reset_method != AMD_RESET_METHOD_MODE1))
5348 return -EINVAL;
5349
5350 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5351 adev->pdev->bus->number, 1);
5352 if (!p)
5353 return -ENODEV;
5354
5355 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5356 if (!expires)
5357 /*
5358 * If we cannot get the audio device autosuspend delay,
5359 * a fixed 4S interval will be used. Considering 3S is
5360 * the audio controller default autosuspend delay setting.
5361 * 4S used here is guaranteed to cover that.
5362 */
54b7feb9 5363 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5364
5365 while (!pm_runtime_status_suspended(&(p->dev))) {
5366 if (!pm_runtime_suspend(&(p->dev)))
5367 break;
5368
5369 if (expires < ktime_get_mono_fast_ns()) {
5370 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5371 pci_dev_put(p);
3f12acc8
EQ
5372 /* TODO: abort the succeeding gpu reset? */
5373 return -ETIMEDOUT;
5374 }
5375 }
5376
5377 pm_runtime_disable(&(p->dev));
5378
b85e285e 5379 pci_dev_put(p);
3f12acc8
EQ
5380 return 0;
5381}
5382
d193b12b 5383static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5384{
5385 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5386
5387#if defined(CONFIG_DEBUG_FS)
5388 if (!amdgpu_sriov_vf(adev))
5389 cancel_work(&adev->reset_work);
5390#endif
5391
5392 if (adev->kfd.dev)
5393 cancel_work(&adev->kfd.reset_work);
5394
5395 if (amdgpu_sriov_vf(adev))
5396 cancel_work(&adev->virt.flr_work);
5397
5398 if (con && adev->ras_enabled)
5399 cancel_work(&con->recovery_work);
5400
5401}
5402
26bc5340 5403/**
6e9c65f7 5404 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5405 *
982a820b 5406 * @adev: amdgpu_device pointer
26bc5340 5407 * @job: which job trigger hang
80bd2de1 5408 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5409 *
5410 * Attempt to reset the GPU if it has hung (all asics).
5411 * Attempt to do soft-reset or full-reset and reinitialize Asic
5412 * Returns 0 for success or an error on failure.
5413 */
5414
cf727044 5415int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5416 struct amdgpu_job *job,
5417 struct amdgpu_reset_context *reset_context)
26bc5340 5418{
1d721ed6 5419 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5420 bool job_signaled = false;
26bc5340 5421 struct amdgpu_hive_info *hive = NULL;
26bc5340 5422 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5423 int i, r = 0;
bb5c7235 5424 bool need_emergency_restart = false;
3f12acc8 5425 bool audio_suspended = false;
f5c7e779
YC
5426 bool gpu_reset_for_dev_remove = false;
5427
5428 gpu_reset_for_dev_remove =
5429 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5430 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5431
6e3cd2a9 5432 /*
bb5c7235
WS
5433 * Special case: RAS triggered and full reset isn't supported
5434 */
5435 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5436
d5ea093e
AG
5437 /*
5438 * Flush RAM to disk so that after reboot
5439 * the user can read log and see why the system rebooted.
5440 */
bb5c7235 5441 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5442 DRM_WARN("Emergency reboot.");
5443
5444 ksys_sync_helper();
5445 emergency_restart();
5446 }
5447
b823821f 5448 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5449 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5450
175ac6ec
ZL
5451 if (!amdgpu_sriov_vf(adev))
5452 hive = amdgpu_get_xgmi_hive(adev);
681260df 5453 if (hive)
53b3f8f4 5454 mutex_lock(&hive->hive_lock);
26bc5340 5455
f1549c09
LG
5456 reset_context->job = job;
5457 reset_context->hive = hive;
9e94d22c
EQ
5458 /*
5459 * Build list of devices to reset.
5460 * In case we are in XGMI hive mode, resort the device list
5461 * to put adev in the 1st position.
5462 */
5463 INIT_LIST_HEAD(&device_list);
175ac6ec 5464 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5465 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5466 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5467 if (gpu_reset_for_dev_remove && adev->shutdown)
5468 tmp_adev->shutdown = true;
5469 }
655ce9cb 5470 if (!list_is_first(&adev->reset_list, &device_list))
5471 list_rotate_to_front(&adev->reset_list, &device_list);
5472 device_list_handle = &device_list;
26bc5340 5473 } else {
655ce9cb 5474 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5475 device_list_handle = &device_list;
5476 }
5477
e923be99
AG
5478 /* We need to lock reset domain only once both for XGMI and single device */
5479 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5480 reset_list);
3675c2f2 5481 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5482
1d721ed6 5483 /* block all schedulers and reset given job's ring */
655ce9cb 5484 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5485
e923be99 5486 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5487
3f12acc8
EQ
5488 /*
5489 * Try to put the audio codec into suspend state
5490 * before gpu reset started.
5491 *
5492 * Due to the power domain of the graphics device
5493 * is shared with AZ power domain. Without this,
5494 * we may change the audio hardware from behind
5495 * the audio driver's back. That will trigger
5496 * some audio codec errors.
5497 */
5498 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5499 audio_suspended = true;
5500
9e94d22c
EQ
5501 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5502
52fb44cf
EQ
5503 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5504
c004d44e 5505 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5506 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5507
12ffa55d
AG
5508 /*
5509 * Mark these ASICs to be reseted as untracked first
5510 * And add them back after reset completed
5511 */
5512 amdgpu_unregister_gpu_instance(tmp_adev);
5513
163d4cd2 5514 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5515
f1c1314b 5516 /* disable ras on ALL IPs */
bb5c7235 5517 if (!need_emergency_restart &&
b823821f 5518 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5519 amdgpu_ras_suspend(tmp_adev);
5520
1d721ed6
AG
5521 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5522 struct amdgpu_ring *ring = tmp_adev->rings[i];
5523
5524 if (!ring || !ring->sched.thread)
5525 continue;
5526
0b2d2c2e 5527 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5528
bb5c7235 5529 if (need_emergency_restart)
7c6e68c7 5530 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5531 }
8f8c80f4 5532 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5533 }
5534
bb5c7235 5535 if (need_emergency_restart)
7c6e68c7
AG
5536 goto skip_sched_resume;
5537
1d721ed6
AG
5538 /*
5539 * Must check guilty signal here since after this point all old
5540 * HW fences are force signaled.
5541 *
5542 * job->base holds a reference to parent fence
5543 */
f6a3f660 5544 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5545 job_signaled = true;
1d721ed6
AG
5546 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5547 goto skip_hw_reset;
5548 }
5549
26bc5340 5550retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5551 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5552 if (gpu_reset_for_dev_remove) {
5553 /* Workaroud for ASICs need to disable SMC first */
5554 amdgpu_device_smu_fini_early(tmp_adev);
5555 }
f1549c09 5556 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5557 /*TODO Should we stop ?*/
5558 if (r) {
aac89168 5559 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5560 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5561 tmp_adev->asic_reset_res = r;
5562 }
247c7b0d
AG
5563
5564 /*
5565 * Drop all pending non scheduler resets. Scheduler resets
5566 * were already dropped during drm_sched_stop
5567 */
d193b12b 5568 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5569 }
5570
5571 /* Actual ASIC resets if needed.*/
4f30d920 5572 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5573 if (amdgpu_sriov_vf(adev)) {
5574 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5575 if (r)
5576 adev->asic_reset_res = r;
950d6425 5577
28606c4e 5578 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5579 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5580 IP_VERSION(9, 4, 2) ||
5581 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5582 amdgpu_ras_resume(adev);
26bc5340 5583 } else {
f1549c09 5584 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5585 if (r && r == -EAGAIN)
26bc5340 5586 goto retry;
f5c7e779
YC
5587
5588 if (!r && gpu_reset_for_dev_remove)
5589 goto recover_end;
26bc5340
AG
5590 }
5591
1d721ed6
AG
5592skip_hw_reset:
5593
26bc5340 5594 /* Post ASIC reset for all devs .*/
655ce9cb 5595 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5596
1d721ed6
AG
5597 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5598 struct amdgpu_ring *ring = tmp_adev->rings[i];
5599
5600 if (!ring || !ring->sched.thread)
5601 continue;
5602
6868a2c4 5603 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5604 }
5605
4e8303cf
LL
5606 if (adev->enable_mes &&
5607 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
ed67f729
JX
5608 amdgpu_mes_self_test(tmp_adev);
5609
b8920e1e 5610 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5611 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5612
7258fa31
SK
5613 if (tmp_adev->asic_reset_res)
5614 r = tmp_adev->asic_reset_res;
5615
1d721ed6 5616 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5617
5618 if (r) {
5619 /* bad news, how to tell it to userspace ? */
12ffa55d 5620 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5621 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5622 } else {
12ffa55d 5623 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5624 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5625 DRM_WARN("smart shift update failed\n");
26bc5340 5626 }
7c6e68c7 5627 }
26bc5340 5628
7c6e68c7 5629skip_sched_resume:
655ce9cb 5630 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5631 /* unlock kfd: SRIOV would do it separately */
c004d44e 5632 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5633 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5634
5635 /* kfd_post_reset will do nothing if kfd device is not initialized,
5636 * need to bring up kfd here if it's not be initialized before
5637 */
5638 if (!adev->kfd.init_complete)
5639 amdgpu_amdkfd_device_init(adev);
5640
3f12acc8
EQ
5641 if (audio_suspended)
5642 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5643
5644 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5645
5646 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5647 }
5648
f5c7e779 5649recover_end:
e923be99
AG
5650 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5651 reset_list);
5652 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5653
9e94d22c 5654 if (hive) {
9e94d22c 5655 mutex_unlock(&hive->hive_lock);
d95e8e97 5656 amdgpu_put_xgmi_hive(hive);
9e94d22c 5657 }
26bc5340 5658
f287a3c5 5659 if (r)
26bc5340 5660 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5661
5662 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5663 return r;
5664}
5665
e3ecdffa
AD
5666/**
5667 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5668 *
5669 * @adev: amdgpu_device pointer
5670 *
5671 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5672 * and lanes) of the slot the device is in. Handles APUs and
5673 * virtualized environments where PCIE config space may not be available.
5674 */
5494d864 5675static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5676{
5d9a6330 5677 struct pci_dev *pdev;
c5313457
HK
5678 enum pci_bus_speed speed_cap, platform_speed_cap;
5679 enum pcie_link_width platform_link_width;
d0dd7f0c 5680
cd474ba0
AD
5681 if (amdgpu_pcie_gen_cap)
5682 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5683
cd474ba0
AD
5684 if (amdgpu_pcie_lane_cap)
5685 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5686
cd474ba0 5687 /* covers APUs as well */
04e85958 5688 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5689 if (adev->pm.pcie_gen_mask == 0)
5690 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5691 if (adev->pm.pcie_mlw_mask == 0)
5692 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5693 return;
cd474ba0 5694 }
d0dd7f0c 5695
c5313457
HK
5696 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5697 return;
5698
dbaa922b
AD
5699 pcie_bandwidth_available(adev->pdev, NULL,
5700 &platform_speed_cap, &platform_link_width);
c5313457 5701
cd474ba0 5702 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5703 /* asic caps */
5704 pdev = adev->pdev;
5705 speed_cap = pcie_get_speed_cap(pdev);
5706 if (speed_cap == PCI_SPEED_UNKNOWN) {
5707 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5708 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5709 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5710 } else {
2b3a1f51
FX
5711 if (speed_cap == PCIE_SPEED_32_0GT)
5712 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5713 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5714 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5715 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5716 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5717 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5718 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5719 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5720 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5721 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5722 else if (speed_cap == PCIE_SPEED_8_0GT)
5723 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5724 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5725 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5726 else if (speed_cap == PCIE_SPEED_5_0GT)
5727 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5728 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5729 else
5730 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5731 }
5732 /* platform caps */
c5313457 5733 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5734 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5735 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5736 } else {
2b3a1f51
FX
5737 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5738 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5739 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5740 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5741 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5742 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5743 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5744 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5745 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5746 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5747 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5748 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5749 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5750 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5751 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5752 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5753 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5754 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5755 else
5756 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5757
cd474ba0
AD
5758 }
5759 }
5760 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5761 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5762 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5763 } else {
c5313457 5764 switch (platform_link_width) {
5d9a6330 5765 case PCIE_LNK_X32:
cd474ba0
AD
5766 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5767 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5768 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5769 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5770 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5771 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5772 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5773 break;
5d9a6330 5774 case PCIE_LNK_X16:
cd474ba0
AD
5775 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5776 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5777 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5778 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5779 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5780 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5781 break;
5d9a6330 5782 case PCIE_LNK_X12:
cd474ba0
AD
5783 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5784 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5785 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5786 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5787 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5788 break;
5d9a6330 5789 case PCIE_LNK_X8:
cd474ba0
AD
5790 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5791 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5792 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5793 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5794 break;
5d9a6330 5795 case PCIE_LNK_X4:
cd474ba0
AD
5796 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5797 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5798 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5799 break;
5d9a6330 5800 case PCIE_LNK_X2:
cd474ba0
AD
5801 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5802 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5803 break;
5d9a6330 5804 case PCIE_LNK_X1:
cd474ba0
AD
5805 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5806 break;
5807 default:
5808 break;
5809 }
d0dd7f0c
AD
5810 }
5811 }
5812}
d38ceaf9 5813
08a2fd23
RE
5814/**
5815 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5816 *
5817 * @adev: amdgpu_device pointer
5818 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5819 *
5820 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5821 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5822 * @peer_adev.
5823 */
5824bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5825 struct amdgpu_device *peer_adev)
5826{
5827#ifdef CONFIG_HSA_AMD_P2P
5828 uint64_t address_mask = peer_adev->dev->dma_mask ?
5829 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5830 resource_size_t aper_limit =
5831 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5832 bool p2p_access =
5833 !adev->gmc.xgmi.connected_to_cpu &&
5834 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5835
5836 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5837 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5838 !(adev->gmc.aper_base & address_mask ||
5839 aper_limit & address_mask));
5840#else
5841 return false;
5842#endif
5843}
5844
361dbd01
AD
5845int amdgpu_device_baco_enter(struct drm_device *dev)
5846{
1348969a 5847 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5848 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5849
6ab68650 5850 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5851 return -ENOTSUPP;
5852
8ab0d6f0 5853 if (ras && adev->ras_enabled &&
acdae216 5854 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5855 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5856
9530273e 5857 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5858}
5859
5860int amdgpu_device_baco_exit(struct drm_device *dev)
5861{
1348969a 5862 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5863 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5864 int ret = 0;
361dbd01 5865
6ab68650 5866 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5867 return -ENOTSUPP;
5868
9530273e
EQ
5869 ret = amdgpu_dpm_baco_exit(adev);
5870 if (ret)
5871 return ret;
7a22677b 5872
8ab0d6f0 5873 if (ras && adev->ras_enabled &&
acdae216 5874 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5875 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5876
1bece222
CL
5877 if (amdgpu_passthrough(adev) &&
5878 adev->nbio.funcs->clear_doorbell_interrupt)
5879 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5880
7a22677b 5881 return 0;
361dbd01 5882}
c9a6b82f
AG
5883
5884/**
5885 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5886 * @pdev: PCI device struct
5887 * @state: PCI channel state
5888 *
5889 * Description: Called when a PCI error is detected.
5890 *
5891 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5892 */
5893pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5894{
5895 struct drm_device *dev = pci_get_drvdata(pdev);
5896 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5897 int i;
c9a6b82f
AG
5898
5899 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5900
6894305c
AG
5901 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5902 DRM_WARN("No support for XGMI hive yet...");
5903 return PCI_ERS_RESULT_DISCONNECT;
5904 }
5905
e17e27f9
GC
5906 adev->pci_channel_state = state;
5907
c9a6b82f
AG
5908 switch (state) {
5909 case pci_channel_io_normal:
5910 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5911 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5912 case pci_channel_io_frozen:
5913 /*
d0fb18b5 5914 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5915 * to GPU during PCI error recovery
5916 */
3675c2f2 5917 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5918 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5919
5920 /*
5921 * Block any work scheduling as we do for regular GPU reset
5922 * for the duration of the recovery
5923 */
5924 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5925 struct amdgpu_ring *ring = adev->rings[i];
5926
5927 if (!ring || !ring->sched.thread)
5928 continue;
5929
5930 drm_sched_stop(&ring->sched, NULL);
5931 }
8f8c80f4 5932 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5933 return PCI_ERS_RESULT_NEED_RESET;
5934 case pci_channel_io_perm_failure:
5935 /* Permanent error, prepare for device removal */
5936 return PCI_ERS_RESULT_DISCONNECT;
5937 }
5938
5939 return PCI_ERS_RESULT_NEED_RESET;
5940}
5941
5942/**
5943 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5944 * @pdev: pointer to PCI device
5945 */
5946pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5947{
5948
5949 DRM_INFO("PCI error: mmio enabled callback!!\n");
5950
5951 /* TODO - dump whatever for debugging purposes */
5952
5953 /* This called only if amdgpu_pci_error_detected returns
5954 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5955 * works, no need to reset slot.
5956 */
5957
5958 return PCI_ERS_RESULT_RECOVERED;
5959}
5960
5961/**
5962 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5963 * @pdev: PCI device struct
5964 *
5965 * Description: This routine is called by the pci error recovery
5966 * code after the PCI slot has been reset, just before we
5967 * should resume normal operations.
5968 */
5969pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5970{
5971 struct drm_device *dev = pci_get_drvdata(pdev);
5972 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5973 int r, i;
04442bf7 5974 struct amdgpu_reset_context reset_context;
362c7b91 5975 u32 memsize;
7ac71382 5976 struct list_head device_list;
c9a6b82f
AG
5977
5978 DRM_INFO("PCI error: slot reset callback!!\n");
5979
04442bf7
LL
5980 memset(&reset_context, 0, sizeof(reset_context));
5981
7ac71382 5982 INIT_LIST_HEAD(&device_list);
655ce9cb 5983 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5984
362c7b91
AG
5985 /* wait for asic to come out of reset */
5986 msleep(500);
5987
7ac71382 5988 /* Restore PCI confspace */
c1dd4aa6 5989 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5990
362c7b91
AG
5991 /* confirm ASIC came out of reset */
5992 for (i = 0; i < adev->usec_timeout; i++) {
5993 memsize = amdgpu_asic_get_config_memsize(adev);
5994
5995 if (memsize != 0xffffffff)
5996 break;
5997 udelay(1);
5998 }
5999 if (memsize == 0xffffffff) {
6000 r = -ETIME;
6001 goto out;
6002 }
6003
04442bf7
LL
6004 reset_context.method = AMD_RESET_METHOD_NONE;
6005 reset_context.reset_req_dev = adev;
6006 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6007 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6008
7afefb81 6009 adev->no_hw_access = true;
04442bf7 6010 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 6011 adev->no_hw_access = false;
c9a6b82f
AG
6012 if (r)
6013 goto out;
6014
04442bf7 6015 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
6016
6017out:
c9a6b82f 6018 if (!r) {
c1dd4aa6
AG
6019 if (amdgpu_device_cache_pci_state(adev->pdev))
6020 pci_restore_state(adev->pdev);
6021
c9a6b82f
AG
6022 DRM_INFO("PCIe error recovery succeeded\n");
6023 } else {
6024 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
6025 amdgpu_device_unset_mp1_state(adev);
6026 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
6027 }
6028
6029 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6030}
6031
6032/**
6033 * amdgpu_pci_resume() - resume normal ops after PCI reset
6034 * @pdev: pointer to PCI device
6035 *
6036 * Called when the error recovery driver tells us that its
505199a3 6037 * OK to resume normal operation.
c9a6b82f
AG
6038 */
6039void amdgpu_pci_resume(struct pci_dev *pdev)
6040{
6041 struct drm_device *dev = pci_get_drvdata(pdev);
6042 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 6043 int i;
c9a6b82f 6044
c9a6b82f
AG
6045
6046 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 6047
e17e27f9
GC
6048 /* Only continue execution for the case of pci_channel_io_frozen */
6049 if (adev->pci_channel_state != pci_channel_io_frozen)
6050 return;
6051
acd89fca
AG
6052 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6053 struct amdgpu_ring *ring = adev->rings[i];
6054
6055 if (!ring || !ring->sched.thread)
6056 continue;
6057
acd89fca
AG
6058 drm_sched_start(&ring->sched, true);
6059 }
6060
e923be99
AG
6061 amdgpu_device_unset_mp1_state(adev);
6062 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 6063}
c1dd4aa6
AG
6064
6065bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6066{
6067 struct drm_device *dev = pci_get_drvdata(pdev);
6068 struct amdgpu_device *adev = drm_to_adev(dev);
6069 int r;
6070
6071 r = pci_save_state(pdev);
6072 if (!r) {
6073 kfree(adev->pci_state);
6074
6075 adev->pci_state = pci_store_saved_state(pdev);
6076
6077 if (!adev->pci_state) {
6078 DRM_ERROR("Failed to store PCI saved state");
6079 return false;
6080 }
6081 } else {
6082 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6083 return false;
6084 }
6085
6086 return true;
6087}
6088
6089bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6090{
6091 struct drm_device *dev = pci_get_drvdata(pdev);
6092 struct amdgpu_device *adev = drm_to_adev(dev);
6093 int r;
6094
6095 if (!adev->pci_state)
6096 return false;
6097
6098 r = pci_load_saved_state(pdev, adev->pci_state);
6099
6100 if (!r) {
6101 pci_restore_state(pdev);
6102 } else {
6103 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6104 return false;
6105 }
6106
6107 return true;
6108}
6109
810085dd
EH
6110void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6111 struct amdgpu_ring *ring)
6112{
6113#ifdef CONFIG_X86_64
b818a5d3 6114 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6115 return;
6116#endif
6117 if (adev->gmc.xgmi.connected_to_cpu)
6118 return;
6119
6120 if (ring && ring->funcs->emit_hdp_flush)
6121 amdgpu_ring_emit_hdp_flush(ring);
6122 else
6123 amdgpu_asic_flush_hdp(adev, ring);
6124}
c1dd4aa6 6125
810085dd
EH
6126void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6127 struct amdgpu_ring *ring)
6128{
6129#ifdef CONFIG_X86_64
b818a5d3 6130 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6131 return;
6132#endif
6133 if (adev->gmc.xgmi.connected_to_cpu)
6134 return;
c1dd4aa6 6135
810085dd
EH
6136 amdgpu_asic_invalidate_hdp(adev, ring);
6137}
34f3a4a9 6138
89a7a870
AG
6139int amdgpu_in_reset(struct amdgpu_device *adev)
6140{
6141 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6142}
6143
34f3a4a9
LY
6144/**
6145 * amdgpu_device_halt() - bring hardware to some kind of halt state
6146 *
6147 * @adev: amdgpu_device pointer
6148 *
6149 * Bring hardware to some kind of halt state so that no one can touch it
6150 * any more. It will help to maintain error context when error occurred.
6151 * Compare to a simple hang, the system will keep stable at least for SSH
6152 * access. Then it should be trivial to inspect the hardware state and
6153 * see what's going on. Implemented as following:
6154 *
6155 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6156 * clears all CPU mappings to device, disallows remappings through page faults
6157 * 2. amdgpu_irq_disable_all() disables all interrupts
6158 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6159 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6160 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6161 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6162 * flush any in flight DMA operations
6163 */
6164void amdgpu_device_halt(struct amdgpu_device *adev)
6165{
6166 struct pci_dev *pdev = adev->pdev;
e0f943b4 6167 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6168
2c1c7ba4 6169 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6170 drm_dev_unplug(ddev);
6171
6172 amdgpu_irq_disable_all(adev);
6173
6174 amdgpu_fence_driver_hw_fini(adev);
6175
6176 adev->no_hw_access = true;
6177
6178 amdgpu_device_unmap_mmio(adev);
6179
6180 pci_disable_device(pdev);
6181 pci_wait_for_pending_transaction(pdev);
6182}
86700a40
XD
6183
6184u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6185 u32 reg)
6186{
6187 unsigned long flags, address, data;
6188 u32 r;
6189
6190 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6191 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6192
6193 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6194 WREG32(address, reg * 4);
6195 (void)RREG32(address);
6196 r = RREG32(data);
6197 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6198 return r;
6199}
6200
6201void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6202 u32 reg, u32 v)
6203{
6204 unsigned long flags, address, data;
6205
6206 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6207 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6208
6209 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6210 WREG32(address, reg * 4);
6211 (void)RREG32(address);
6212 WREG32(data, v);
6213 (void)RREG32(data);
6214 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6215}
68ce8b24
CK
6216
6217/**
6218 * amdgpu_device_switch_gang - switch to a new gang
6219 * @adev: amdgpu_device pointer
6220 * @gang: the gang to switch to
6221 *
6222 * Try to switch to a new gang.
6223 * Returns: NULL if we switched to the new gang or a reference to the current
6224 * gang leader.
6225 */
6226struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6227 struct dma_fence *gang)
6228{
6229 struct dma_fence *old = NULL;
6230
6231 do {
6232 dma_fence_put(old);
6233 rcu_read_lock();
6234 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6235 rcu_read_unlock();
6236
6237 if (old == gang)
6238 break;
6239
6240 if (!dma_fence_is_signaled(old))
6241 return old;
6242
6243 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6244 old, gang) != old);
6245
6246 dma_fence_put(old);
6247 return NULL;
6248}
220c8cc8
AD
6249
6250bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6251{
6252 switch (adev->asic_type) {
6253#ifdef CONFIG_DRM_AMDGPU_SI
6254 case CHIP_HAINAN:
6255#endif
6256 case CHIP_TOPAZ:
6257 /* chips with no display hardware */
6258 return false;
6259#ifdef CONFIG_DRM_AMDGPU_SI
6260 case CHIP_TAHITI:
6261 case CHIP_PITCAIRN:
6262 case CHIP_VERDE:
6263 case CHIP_OLAND:
6264#endif
6265#ifdef CONFIG_DRM_AMDGPU_CIK
6266 case CHIP_BONAIRE:
6267 case CHIP_HAWAII:
6268 case CHIP_KAVERI:
6269 case CHIP_KABINI:
6270 case CHIP_MULLINS:
6271#endif
6272 case CHIP_TONGA:
6273 case CHIP_FIJI:
6274 case CHIP_POLARIS10:
6275 case CHIP_POLARIS11:
6276 case CHIP_POLARIS12:
6277 case CHIP_VEGAM:
6278 case CHIP_CARRIZO:
6279 case CHIP_STONEY:
6280 /* chips with display hardware */
6281 return true;
6282 default:
6283 /* IP discovery */
4e8303cf 6284 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6285 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6286 return false;
6287 return true;
6288 }
6289}
81283fee
JZ
6290
6291uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6292 uint32_t inst, uint32_t reg_addr, char reg_name[],
6293 uint32_t expected_value, uint32_t mask)
6294{
6295 uint32_t ret = 0;
6296 uint32_t old_ = 0;
6297 uint32_t tmp_ = RREG32(reg_addr);
6298 uint32_t loop = adev->usec_timeout;
6299
6300 while ((tmp_ & (mask)) != (expected_value)) {
6301 if (old_ != tmp_) {
6302 loop = adev->usec_timeout;
6303 old_ = tmp_;
6304 } else
6305 udelay(1);
6306 tmp_ = RREG32(reg_addr);
6307 loop--;
6308 if (!loop) {
6309 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6310 inst, reg_name, (uint32_t)expected_value,
6311 (uint32_t)(tmp_ & (mask)));
6312 ret = -ETIMEDOUT;
6313 break;
6314 }
6315 }
6316 return ret;
6317}