Revert "drm/amdkfd: Use partial migrations in GPU page faults"
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
08a2fd23 35#include <linux/pci-p2pdma.h>
d37a3929 36#include <linux/apple-gmux.h>
fdf2f6c5 37
b7cdb41e 38#include <drm/drm_aperture.h>
4562236b 39#include <drm/drm_atomic_helper.h>
973ad627 40#include <drm/drm_crtc_helper.h>
45b64fd9 41#include <drm/drm_fb_helper.h>
fcd70cd3 42#include <drm/drm_probe_helper.h>
d38ceaf9
AD
43#include <drm/amdgpu_drm.h>
44#include <linux/vgaarb.h>
45#include <linux/vga_switcheroo.h>
46#include <linux/efi.h>
47#include "amdgpu.h"
f4b373f4 48#include "amdgpu_trace.h"
d38ceaf9
AD
49#include "amdgpu_i2c.h"
50#include "atom.h"
51#include "amdgpu_atombios.h"
a5bde2f9 52#include "amdgpu_atomfirmware.h"
d0dd7f0c 53#include "amd_pcie.h"
33f34802
KW
54#ifdef CONFIG_DRM_AMDGPU_SI
55#include "si.h"
56#endif
a2e73f56
AD
57#ifdef CONFIG_DRM_AMDGPU_CIK
58#include "cik.h"
59#endif
aaa36a97 60#include "vi.h"
460826e6 61#include "soc15.h"
0a5b8c7b 62#include "nv.h"
d38ceaf9 63#include "bif/bif_4_1_d.h"
bec86378 64#include <linux/firmware.h>
89041940 65#include "amdgpu_vf_error.h"
d38ceaf9 66
ba997709 67#include "amdgpu_amdkfd.h"
d2f52ac8 68#include "amdgpu_pm.h"
d38ceaf9 69
5183411b 70#include "amdgpu_xgmi.h"
c030f2e4 71#include "amdgpu_ras.h"
9c7c85f7 72#include "amdgpu_pmu.h"
bd607166 73#include "amdgpu_fru_eeprom.h"
04442bf7 74#include "amdgpu_reset.h"
5183411b 75
d5ea093e 76#include <linux/suspend.h>
c6a6e2db 77#include <drm/task_barrier.h>
3f12acc8 78#include <linux/pm_runtime.h>
d5ea093e 79
f89f8c6b
AG
80#include <drm/drm_drv.h>
81
3ad5dcfe
KHF
82#if IS_ENABLED(CONFIG_X86)
83#include <asm/intel-family.h>
84#endif
85
e2a75f88 86MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 87MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 88MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 89MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 90MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 91MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 92MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 93
2dc80b00 94#define AMDGPU_RESUME_MS 2000
7258fa31
SK
95#define AMDGPU_MAX_RETRY_LIMIT 2
96#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 97
b7cdb41e
ML
98static const struct drm_driver amdgpu_kms_driver;
99
050091ab 100const char *amdgpu_asic_name[] = {
da69c161
KW
101 "TAHITI",
102 "PITCAIRN",
103 "VERDE",
104 "OLAND",
105 "HAINAN",
d38ceaf9
AD
106 "BONAIRE",
107 "KAVERI",
108 "KABINI",
109 "HAWAII",
110 "MULLINS",
111 "TOPAZ",
112 "TONGA",
48299f95 113 "FIJI",
d38ceaf9 114 "CARRIZO",
139f4917 115 "STONEY",
2cc0c0b5
FC
116 "POLARIS10",
117 "POLARIS11",
c4642a47 118 "POLARIS12",
48ff108d 119 "VEGAM",
d4196f01 120 "VEGA10",
8fab806a 121 "VEGA12",
956fcddc 122 "VEGA20",
2ca8a5d2 123 "RAVEN",
d6c3b24e 124 "ARCTURUS",
1eee4228 125 "RENOIR",
d46b417a 126 "ALDEBARAN",
852a6626 127 "NAVI10",
d0f56dc2 128 "CYAN_SKILLFISH",
87dbad02 129 "NAVI14",
9802f5d7 130 "NAVI12",
ccaf72d3 131 "SIENNA_CICHLID",
ddd8fbe7 132 "NAVY_FLOUNDER",
4f1e9a76 133 "VANGOGH",
a2468e04 134 "DIMGREY_CAVEFISH",
6f169591 135 "BEIGE_GOBY",
ee9236b7 136 "YELLOW_CARP",
3ae695d6 137 "IP DISCOVERY",
d38ceaf9
AD
138 "LAST",
139};
140
dcea6e65
KR
141/**
142 * DOC: pcie_replay_count
143 *
144 * The amdgpu driver provides a sysfs API for reporting the total number
145 * of PCIe replays (NAKs)
146 * The file pcie_replay_count is used for this and returns the total
147 * number of replays as a sum of the NAKs generated and NAKs received
148 */
149
150static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
151 struct device_attribute *attr, char *buf)
152{
153 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 154 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
155 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
156
36000c7a 157 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
158}
159
b8920e1e 160static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
161 amdgpu_device_get_pcie_replay_count, NULL);
162
4798db85
LL
163/**
164 * DOC: board_info
165 *
166 * The amdgpu driver provides a sysfs API for giving board related information.
167 * It provides the form factor information in the format
168 *
169 * type : form factor
170 *
171 * Possible form factor values
172 *
173 * - "cem" - PCIE CEM card
174 * - "oam" - Open Compute Accelerator Module
175 * - "unknown" - Not known
176 *
177 */
178
76da73f0
LL
179static ssize_t amdgpu_device_get_board_info(struct device *dev,
180 struct device_attribute *attr,
181 char *buf)
182{
183 struct drm_device *ddev = dev_get_drvdata(dev);
184 struct amdgpu_device *adev = drm_to_adev(ddev);
185 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
186 const char *pkg;
187
188 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
189 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
190
191 switch (pkg_type) {
192 case AMDGPU_PKG_TYPE_CEM:
193 pkg = "cem";
194 break;
195 case AMDGPU_PKG_TYPE_OAM:
196 pkg = "oam";
197 break;
198 default:
199 pkg = "unknown";
200 break;
201 }
202
203 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
204}
205
206static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
207
208static struct attribute *amdgpu_board_attrs[] = {
209 &dev_attr_board_info.attr,
210 NULL,
211};
212
213static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
214 struct attribute *attr, int n)
215{
216 struct device *dev = kobj_to_dev(kobj);
217 struct drm_device *ddev = dev_get_drvdata(dev);
218 struct amdgpu_device *adev = drm_to_adev(ddev);
219
220 if (adev->flags & AMD_IS_APU)
221 return 0;
222
223 return attr->mode;
224}
225
226static const struct attribute_group amdgpu_board_attrs_group = {
227 .attrs = amdgpu_board_attrs,
228 .is_visible = amdgpu_board_attrs_is_visible
229};
230
5494d864
AD
231static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
232
bd607166 233
fd496ca8 234/**
b98c6299 235 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
236 *
237 * @dev: drm_device pointer
238 *
b98c6299 239 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
240 * otherwise return false.
241 */
b98c6299 242bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
243{
244 struct amdgpu_device *adev = drm_to_adev(dev);
245
b98c6299 246 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
247 return true;
248 return false;
249}
250
e3ecdffa 251/**
0330b848 252 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
253 *
254 * @dev: drm_device pointer
255 *
b98c6299 256 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
257 * otherwise return false.
258 */
31af062a 259bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 260{
1348969a 261 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 262
b98c6299
AD
263 if (adev->has_pr3 ||
264 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
265 return true;
266 return false;
267}
268
a69cba42
AD
269/**
270 * amdgpu_device_supports_baco - Does the device support BACO
271 *
272 * @dev: drm_device pointer
273 *
274 * Returns true if the device supporte BACO,
275 * otherwise return false.
276 */
277bool amdgpu_device_supports_baco(struct drm_device *dev)
278{
1348969a 279 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
280
281 return amdgpu_asic_supports_baco(adev);
282}
283
3fa8f89d
S
284/**
285 * amdgpu_device_supports_smart_shift - Is the device dGPU with
286 * smart shift support
287 *
288 * @dev: drm_device pointer
289 *
290 * Returns true if the device is a dGPU with Smart Shift support,
291 * otherwise returns false.
292 */
293bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
294{
295 return (amdgpu_device_supports_boco(dev) &&
296 amdgpu_acpi_is_power_shift_control_supported());
297}
298
6e3cd2a9
MCC
299/*
300 * VRAM access helper functions
301 */
302
e35e2b11 303/**
048af66b 304 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
305 *
306 * @adev: amdgpu_device pointer
307 * @pos: offset of the buffer in vram
308 * @buf: virtual address of the buffer in system memory
309 * @size: read/write size, sizeof(@buf) must > @size
310 * @write: true - write to vram, otherwise - read from vram
311 */
048af66b
KW
312void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
313 void *buf, size_t size, bool write)
e35e2b11 314{
e35e2b11 315 unsigned long flags;
048af66b
KW
316 uint32_t hi = ~0, tmp = 0;
317 uint32_t *data = buf;
ce05ac56 318 uint64_t last;
f89f8c6b 319 int idx;
ce05ac56 320
c58a863b 321 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 322 return;
9d11eb0d 323
048af66b
KW
324 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
325
326 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
327 for (last = pos + size; pos < last; pos += 4) {
328 tmp = pos >> 31;
329
330 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
331 if (tmp != hi) {
332 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
333 hi = tmp;
334 }
335 if (write)
336 WREG32_NO_KIQ(mmMM_DATA, *data++);
337 else
338 *data++ = RREG32_NO_KIQ(mmMM_DATA);
339 }
340
341 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
342 drm_dev_exit(idx);
343}
344
345/**
bbe04dec 346 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
347 *
348 * @adev: amdgpu_device pointer
349 * @pos: offset of the buffer in vram
350 * @buf: virtual address of the buffer in system memory
351 * @size: read/write size, sizeof(@buf) must > @size
352 * @write: true - write to vram, otherwise - read from vram
353 *
354 * The return value means how many bytes have been transferred.
355 */
356size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
357 void *buf, size_t size, bool write)
358{
9d11eb0d 359#ifdef CONFIG_64BIT
048af66b
KW
360 void __iomem *addr;
361 size_t count = 0;
362 uint64_t last;
363
364 if (!adev->mman.aper_base_kaddr)
365 return 0;
366
9d11eb0d
CK
367 last = min(pos + size, adev->gmc.visible_vram_size);
368 if (last > pos) {
048af66b
KW
369 addr = adev->mman.aper_base_kaddr + pos;
370 count = last - pos;
9d11eb0d
CK
371
372 if (write) {
373 memcpy_toio(addr, buf, count);
4c452b5c
SS
374 /* Make sure HDP write cache flush happens without any reordering
375 * after the system memory contents are sent over PCIe device
376 */
9d11eb0d 377 mb();
810085dd 378 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 379 } else {
810085dd 380 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
381 /* Make sure HDP read cache is invalidated before issuing a read
382 * to the PCIe device
383 */
9d11eb0d
CK
384 mb();
385 memcpy_fromio(buf, addr, count);
386 }
387
9d11eb0d 388 }
048af66b
KW
389
390 return count;
391#else
392 return 0;
9d11eb0d 393#endif
048af66b 394}
9d11eb0d 395
048af66b
KW
396/**
397 * amdgpu_device_vram_access - read/write a buffer in vram
398 *
399 * @adev: amdgpu_device pointer
400 * @pos: offset of the buffer in vram
401 * @buf: virtual address of the buffer in system memory
402 * @size: read/write size, sizeof(@buf) must > @size
403 * @write: true - write to vram, otherwise - read from vram
404 */
405void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
406 void *buf, size_t size, bool write)
407{
408 size_t count;
e35e2b11 409
048af66b
KW
410 /* try to using vram apreature to access vram first */
411 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
412 size -= count;
413 if (size) {
414 /* using MM to access rest vram */
415 pos += count;
416 buf += count;
417 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
418 }
419}
420
d38ceaf9 421/*
f7ee1874 422 * register access helper functions.
d38ceaf9 423 */
56b53c0b
DL
424
425/* Check if hw access should be skipped because of hotplug or device error */
426bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
427{
7afefb81 428 if (adev->no_hw_access)
56b53c0b
DL
429 return true;
430
431#ifdef CONFIG_LOCKDEP
432 /*
433 * This is a bit complicated to understand, so worth a comment. What we assert
434 * here is that the GPU reset is not running on another thread in parallel.
435 *
436 * For this we trylock the read side of the reset semaphore, if that succeeds
437 * we know that the reset is not running in paralell.
438 *
439 * If the trylock fails we assert that we are either already holding the read
440 * side of the lock or are the reset thread itself and hold the write side of
441 * the lock.
442 */
443 if (in_task()) {
d0fb18b5
AG
444 if (down_read_trylock(&adev->reset_domain->sem))
445 up_read(&adev->reset_domain->sem);
56b53c0b 446 else
d0fb18b5 447 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
448 }
449#endif
450 return false;
451}
452
e3ecdffa 453/**
f7ee1874 454 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
455 *
456 * @adev: amdgpu_device pointer
457 * @reg: dword aligned register offset
458 * @acc_flags: access flags which require special behavior
459 *
460 * Returns the 32 bit value from the offset specified.
461 */
f7ee1874
HZ
462uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
463 uint32_t reg, uint32_t acc_flags)
d38ceaf9 464{
f4b373f4
TSD
465 uint32_t ret;
466
56b53c0b 467 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
468 return 0;
469
f7ee1874
HZ
470 if ((reg * 4) < adev->rmmio_size) {
471 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
472 amdgpu_sriov_runtime(adev) &&
d0fb18b5 473 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 474 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 475 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
476 } else {
477 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
478 }
479 } else {
480 ret = adev->pcie_rreg(adev, reg * 4);
81202807 481 }
bc992ba5 482
f7ee1874 483 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 484
f4b373f4 485 return ret;
d38ceaf9
AD
486}
487
421a2a30
ML
488/*
489 * MMIO register read with bytes helper functions
490 * @offset:bytes offset from MMIO start
b8920e1e 491 */
421a2a30 492
e3ecdffa
AD
493/**
494 * amdgpu_mm_rreg8 - read a memory mapped IO register
495 *
496 * @adev: amdgpu_device pointer
497 * @offset: byte aligned register offset
498 *
499 * Returns the 8 bit value from the offset specified.
500 */
7cbbc745
AG
501uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
502{
56b53c0b 503 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
504 return 0;
505
421a2a30
ML
506 if (offset < adev->rmmio_size)
507 return (readb(adev->rmmio + offset));
508 BUG();
509}
510
511/*
512 * MMIO register write with bytes helper functions
513 * @offset:bytes offset from MMIO start
514 * @value: the value want to be written to the register
b8920e1e
SS
515 */
516
e3ecdffa
AD
517/**
518 * amdgpu_mm_wreg8 - read a memory mapped IO register
519 *
520 * @adev: amdgpu_device pointer
521 * @offset: byte aligned register offset
522 * @value: 8 bit value to write
523 *
524 * Writes the value specified to the offset specified.
525 */
7cbbc745
AG
526void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
527{
56b53c0b 528 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
529 return;
530
421a2a30
ML
531 if (offset < adev->rmmio_size)
532 writeb(value, adev->rmmio + offset);
533 else
534 BUG();
535}
536
e3ecdffa 537/**
f7ee1874 538 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
539 *
540 * @adev: amdgpu_device pointer
541 * @reg: dword aligned register offset
542 * @v: 32 bit value to write to the register
543 * @acc_flags: access flags which require special behavior
544 *
545 * Writes the value specified to the offset specified.
546 */
f7ee1874
HZ
547void amdgpu_device_wreg(struct amdgpu_device *adev,
548 uint32_t reg, uint32_t v,
549 uint32_t acc_flags)
d38ceaf9 550{
56b53c0b 551 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
552 return;
553
f7ee1874
HZ
554 if ((reg * 4) < adev->rmmio_size) {
555 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
556 amdgpu_sriov_runtime(adev) &&
d0fb18b5 557 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 558 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 559 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
560 } else {
561 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
562 }
563 } else {
564 adev->pcie_wreg(adev, reg * 4, v);
81202807 565 }
bc992ba5 566
f7ee1874 567 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 568}
d38ceaf9 569
03f2abb0 570/**
4cc9f86f 571 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 572 *
71579346
RB
573 * @adev: amdgpu_device pointer
574 * @reg: mmio/rlc register
575 * @v: value to write
8057a9d6 576 * @xcc_id: xcc accelerated compute core id
71579346
RB
577 *
578 * this function is invoked only for the debugfs register access
03f2abb0 579 */
f7ee1874 580void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
581 uint32_t reg, uint32_t v,
582 uint32_t xcc_id)
2e0cc4d4 583{
56b53c0b 584 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
585 return;
586
2e0cc4d4 587 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
588 adev->gfx.rlc.funcs &&
589 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 590 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 591 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
592 } else if ((reg * 4) >= adev->rmmio_size) {
593 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
594 } else {
595 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 596 }
d38ceaf9
AD
597}
598
1bba3683
HZ
599/**
600 * amdgpu_device_indirect_rreg - read an indirect register
601 *
602 * @adev: amdgpu_device pointer
22f453fb 603 * @reg_addr: indirect register address to read from
1bba3683
HZ
604 *
605 * Returns the value of indirect register @reg_addr
606 */
607u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
608 u32 reg_addr)
609{
65ba96e9 610 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
611 void __iomem *pcie_index_offset;
612 void __iomem *pcie_data_offset;
65ba96e9
HZ
613 u32 r;
614
615 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
616 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
617
618 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
619 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
620 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
621
622 writel(reg_addr, pcie_index_offset);
623 readl(pcie_index_offset);
624 r = readl(pcie_data_offset);
625 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
626
627 return r;
628}
629
0c552ed3
LM
630u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
631 u64 reg_addr)
632{
633 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
634 u32 r;
635 void __iomem *pcie_index_offset;
636 void __iomem *pcie_index_hi_offset;
637 void __iomem *pcie_data_offset;
638
639 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
640 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 641 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
642 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
643 else
644 pcie_index_hi = 0;
645
646 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
647 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
648 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
649 if (pcie_index_hi != 0)
650 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
651 pcie_index_hi * 4;
652
653 writel(reg_addr, pcie_index_offset);
654 readl(pcie_index_offset);
655 if (pcie_index_hi != 0) {
656 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
657 readl(pcie_index_hi_offset);
658 }
659 r = readl(pcie_data_offset);
660
661 /* clear the high bits */
662 if (pcie_index_hi != 0) {
663 writel(0, pcie_index_hi_offset);
664 readl(pcie_index_hi_offset);
665 }
666
667 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
668
669 return r;
670}
671
1bba3683
HZ
672/**
673 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
674 *
675 * @adev: amdgpu_device pointer
22f453fb 676 * @reg_addr: indirect register address to read from
1bba3683
HZ
677 *
678 * Returns the value of indirect register @reg_addr
679 */
680u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
681 u32 reg_addr)
682{
65ba96e9 683 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
684 void __iomem *pcie_index_offset;
685 void __iomem *pcie_data_offset;
65ba96e9
HZ
686 u64 r;
687
688 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
689 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
690
691 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
692 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
693 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
694
695 /* read low 32 bits */
696 writel(reg_addr, pcie_index_offset);
697 readl(pcie_index_offset);
698 r = readl(pcie_data_offset);
699 /* read high 32 bits */
700 writel(reg_addr + 4, pcie_index_offset);
701 readl(pcie_index_offset);
702 r |= ((u64)readl(pcie_data_offset) << 32);
703 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
704
705 return r;
706}
707
a76b2870
CL
708u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
709 u64 reg_addr)
710{
711 unsigned long flags, pcie_index, pcie_data;
712 unsigned long pcie_index_hi = 0;
713 void __iomem *pcie_index_offset;
714 void __iomem *pcie_index_hi_offset;
715 void __iomem *pcie_data_offset;
716 u64 r;
717
718 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
719 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
720 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
721 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
722
723 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
724 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
725 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
726 if (pcie_index_hi != 0)
727 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
728 pcie_index_hi * 4;
729
730 /* read low 32 bits */
731 writel(reg_addr, pcie_index_offset);
732 readl(pcie_index_offset);
733 if (pcie_index_hi != 0) {
734 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
735 readl(pcie_index_hi_offset);
736 }
737 r = readl(pcie_data_offset);
738 /* read high 32 bits */
739 writel(reg_addr + 4, pcie_index_offset);
740 readl(pcie_index_offset);
741 if (pcie_index_hi != 0) {
742 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
743 readl(pcie_index_hi_offset);
744 }
745 r |= ((u64)readl(pcie_data_offset) << 32);
746
747 /* clear the high bits */
748 if (pcie_index_hi != 0) {
749 writel(0, pcie_index_hi_offset);
750 readl(pcie_index_hi_offset);
751 }
752
753 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
754
755 return r;
756}
757
1bba3683
HZ
758/**
759 * amdgpu_device_indirect_wreg - write an indirect register address
760 *
761 * @adev: amdgpu_device pointer
1bba3683
HZ
762 * @reg_addr: indirect register offset
763 * @reg_data: indirect register data
764 *
765 */
766void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
767 u32 reg_addr, u32 reg_data)
768{
65ba96e9 769 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
770 void __iomem *pcie_index_offset;
771 void __iomem *pcie_data_offset;
772
65ba96e9
HZ
773 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
774 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
775
1bba3683
HZ
776 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
777 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
778 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
779
780 writel(reg_addr, pcie_index_offset);
781 readl(pcie_index_offset);
782 writel(reg_data, pcie_data_offset);
783 readl(pcie_data_offset);
784 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
785}
786
0c552ed3
LM
787void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
788 u64 reg_addr, u32 reg_data)
789{
790 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
791 void __iomem *pcie_index_offset;
792 void __iomem *pcie_index_hi_offset;
793 void __iomem *pcie_data_offset;
794
795 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
796 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 797 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
798 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
799 else
800 pcie_index_hi = 0;
801
802 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
803 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
804 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
805 if (pcie_index_hi != 0)
806 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
807 pcie_index_hi * 4;
808
809 writel(reg_addr, pcie_index_offset);
810 readl(pcie_index_offset);
811 if (pcie_index_hi != 0) {
812 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
813 readl(pcie_index_hi_offset);
814 }
815 writel(reg_data, pcie_data_offset);
816 readl(pcie_data_offset);
817
818 /* clear the high bits */
819 if (pcie_index_hi != 0) {
820 writel(0, pcie_index_hi_offset);
821 readl(pcie_index_hi_offset);
822 }
823
824 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
825}
826
1bba3683
HZ
827/**
828 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
829 *
830 * @adev: amdgpu_device pointer
1bba3683
HZ
831 * @reg_addr: indirect register offset
832 * @reg_data: indirect register data
833 *
834 */
835void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
836 u32 reg_addr, u64 reg_data)
837{
65ba96e9 838 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
839 void __iomem *pcie_index_offset;
840 void __iomem *pcie_data_offset;
841
65ba96e9
HZ
842 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
843 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
844
1bba3683
HZ
845 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
846 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
847 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
848
849 /* write low 32 bits */
850 writel(reg_addr, pcie_index_offset);
851 readl(pcie_index_offset);
852 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
853 readl(pcie_data_offset);
854 /* write high 32 bits */
855 writel(reg_addr + 4, pcie_index_offset);
856 readl(pcie_index_offset);
857 writel((u32)(reg_data >> 32), pcie_data_offset);
858 readl(pcie_data_offset);
859 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
860}
861
a76b2870
CL
862void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
863 u64 reg_addr, u64 reg_data)
864{
865 unsigned long flags, pcie_index, pcie_data;
866 unsigned long pcie_index_hi = 0;
867 void __iomem *pcie_index_offset;
868 void __iomem *pcie_index_hi_offset;
869 void __iomem *pcie_data_offset;
870
871 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
872 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
873 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
874 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
875
876 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
877 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
878 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
879 if (pcie_index_hi != 0)
880 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
881 pcie_index_hi * 4;
882
883 /* write low 32 bits */
884 writel(reg_addr, pcie_index_offset);
885 readl(pcie_index_offset);
886 if (pcie_index_hi != 0) {
887 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
888 readl(pcie_index_hi_offset);
889 }
890 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
891 readl(pcie_data_offset);
892 /* write high 32 bits */
893 writel(reg_addr + 4, pcie_index_offset);
894 readl(pcie_index_offset);
895 if (pcie_index_hi != 0) {
896 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
897 readl(pcie_index_hi_offset);
898 }
899 writel((u32)(reg_data >> 32), pcie_data_offset);
900 readl(pcie_data_offset);
901
902 /* clear the high bits */
903 if (pcie_index_hi != 0) {
904 writel(0, pcie_index_hi_offset);
905 readl(pcie_index_hi_offset);
906 }
907
908 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
909}
910
dabc114e
HZ
911/**
912 * amdgpu_device_get_rev_id - query device rev_id
913 *
914 * @adev: amdgpu_device pointer
915 *
916 * Return device rev_id
917 */
918u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
919{
920 return adev->nbio.funcs->get_rev_id(adev);
921}
922
d38ceaf9
AD
923/**
924 * amdgpu_invalid_rreg - dummy reg read function
925 *
982a820b 926 * @adev: amdgpu_device pointer
d38ceaf9
AD
927 * @reg: offset of register
928 *
929 * Dummy register read function. Used for register blocks
930 * that certain asics don't have (all asics).
931 * Returns the value in the register.
932 */
933static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
934{
935 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
936 BUG();
937 return 0;
938}
939
0c552ed3
LM
940static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
941{
942 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
943 BUG();
944 return 0;
945}
946
d38ceaf9
AD
947/**
948 * amdgpu_invalid_wreg - dummy reg write function
949 *
982a820b 950 * @adev: amdgpu_device pointer
d38ceaf9
AD
951 * @reg: offset of register
952 * @v: value to write to the register
953 *
954 * Dummy register read function. Used for register blocks
955 * that certain asics don't have (all asics).
956 */
957static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
958{
959 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
960 reg, v);
961 BUG();
962}
963
0c552ed3
LM
964static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
965{
966 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
967 reg, v);
968 BUG();
969}
970
4fa1c6a6
TZ
971/**
972 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
973 *
982a820b 974 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
975 * @reg: offset of register
976 *
977 * Dummy register read function. Used for register blocks
978 * that certain asics don't have (all asics).
979 * Returns the value in the register.
980 */
981static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
982{
983 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
984 BUG();
985 return 0;
986}
987
a76b2870
CL
988static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
989{
990 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
991 BUG();
992 return 0;
993}
994
4fa1c6a6
TZ
995/**
996 * amdgpu_invalid_wreg64 - dummy reg write function
997 *
982a820b 998 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
999 * @reg: offset of register
1000 * @v: value to write to the register
1001 *
1002 * Dummy register read function. Used for register blocks
1003 * that certain asics don't have (all asics).
1004 */
1005static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1006{
1007 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1008 reg, v);
1009 BUG();
1010}
1011
a76b2870
CL
1012static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1013{
1014 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1015 reg, v);
1016 BUG();
1017}
1018
d38ceaf9
AD
1019/**
1020 * amdgpu_block_invalid_rreg - dummy reg read function
1021 *
982a820b 1022 * @adev: amdgpu_device pointer
d38ceaf9
AD
1023 * @block: offset of instance
1024 * @reg: offset of register
1025 *
1026 * Dummy register read function. Used for register blocks
1027 * that certain asics don't have (all asics).
1028 * Returns the value in the register.
1029 */
1030static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1031 uint32_t block, uint32_t reg)
1032{
1033 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1034 reg, block);
1035 BUG();
1036 return 0;
1037}
1038
1039/**
1040 * amdgpu_block_invalid_wreg - dummy reg write function
1041 *
982a820b 1042 * @adev: amdgpu_device pointer
d38ceaf9
AD
1043 * @block: offset of instance
1044 * @reg: offset of register
1045 * @v: value to write to the register
1046 *
1047 * Dummy register read function. Used for register blocks
1048 * that certain asics don't have (all asics).
1049 */
1050static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1051 uint32_t block,
1052 uint32_t reg, uint32_t v)
1053{
1054 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1055 reg, block, v);
1056 BUG();
1057}
1058
4d2997ab
AD
1059/**
1060 * amdgpu_device_asic_init - Wrapper for atom asic_init
1061 *
982a820b 1062 * @adev: amdgpu_device pointer
4d2997ab
AD
1063 *
1064 * Does any asic specific work and then calls atom asic init.
1065 */
1066static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1067{
15c5c5f5
LL
1068 int ret;
1069
4d2997ab
AD
1070 amdgpu_asic_pre_asic_init(adev);
1071
4e8303cf
LL
1072 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1073 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1074 amdgpu_psp_wait_for_bootloader(adev);
1075 ret = amdgpu_atomfirmware_asic_init(adev, true);
1076 return ret;
1077 } else {
85d1bcc6 1078 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1079 }
1080
1081 return 0;
4d2997ab
AD
1082}
1083
e3ecdffa 1084/**
7ccfd79f 1085 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1086 *
982a820b 1087 * @adev: amdgpu_device pointer
e3ecdffa
AD
1088 *
1089 * Allocates a scratch page of VRAM for use by various things in the
1090 * driver.
1091 */
7ccfd79f 1092static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1093{
7ccfd79f
CK
1094 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1095 AMDGPU_GEM_DOMAIN_VRAM |
1096 AMDGPU_GEM_DOMAIN_GTT,
1097 &adev->mem_scratch.robj,
1098 &adev->mem_scratch.gpu_addr,
1099 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1100}
1101
e3ecdffa 1102/**
7ccfd79f 1103 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1104 *
982a820b 1105 * @adev: amdgpu_device pointer
e3ecdffa
AD
1106 *
1107 * Frees the VRAM scratch page.
1108 */
7ccfd79f 1109static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1110{
7ccfd79f 1111 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1112}
1113
1114/**
9c3f2b54 1115 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1116 *
1117 * @adev: amdgpu_device pointer
1118 * @registers: pointer to the register array
1119 * @array_size: size of the register array
1120 *
b8920e1e 1121 * Programs an array or registers with and or masks.
d38ceaf9
AD
1122 * This is a helper for setting golden registers.
1123 */
9c3f2b54
AD
1124void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1125 const u32 *registers,
1126 const u32 array_size)
d38ceaf9
AD
1127{
1128 u32 tmp, reg, and_mask, or_mask;
1129 int i;
1130
1131 if (array_size % 3)
1132 return;
1133
47fc644f 1134 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1135 reg = registers[i + 0];
1136 and_mask = registers[i + 1];
1137 or_mask = registers[i + 2];
1138
1139 if (and_mask == 0xffffffff) {
1140 tmp = or_mask;
1141 } else {
1142 tmp = RREG32(reg);
1143 tmp &= ~and_mask;
e0d07657
HZ
1144 if (adev->family >= AMDGPU_FAMILY_AI)
1145 tmp |= (or_mask & and_mask);
1146 else
1147 tmp |= or_mask;
d38ceaf9
AD
1148 }
1149 WREG32(reg, tmp);
1150 }
1151}
1152
e3ecdffa
AD
1153/**
1154 * amdgpu_device_pci_config_reset - reset the GPU
1155 *
1156 * @adev: amdgpu_device pointer
1157 *
1158 * Resets the GPU using the pci config reset sequence.
1159 * Only applicable to asics prior to vega10.
1160 */
8111c387 1161void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1162{
1163 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1164}
1165
af484df8
AD
1166/**
1167 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1168 *
1169 * @adev: amdgpu_device pointer
1170 *
1171 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1172 */
1173int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1174{
1175 return pci_reset_function(adev->pdev);
1176}
1177
d38ceaf9 1178/*
06ec9070 1179 * amdgpu_device_wb_*()
455a7bc2 1180 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1181 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1182 */
1183
1184/**
06ec9070 1185 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1186 *
1187 * @adev: amdgpu_device pointer
1188 *
1189 * Disables Writeback and frees the Writeback memory (all asics).
1190 * Used at driver shutdown.
1191 */
06ec9070 1192static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1193{
1194 if (adev->wb.wb_obj) {
a76ed485
AD
1195 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1196 &adev->wb.gpu_addr,
1197 (void **)&adev->wb.wb);
d38ceaf9
AD
1198 adev->wb.wb_obj = NULL;
1199 }
1200}
1201
1202/**
03f2abb0 1203 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1204 *
1205 * @adev: amdgpu_device pointer
1206 *
455a7bc2 1207 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1208 * Used at driver startup.
1209 * Returns 0 on success or an -error on failure.
1210 */
06ec9070 1211static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1212{
1213 int r;
1214
1215 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1216 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1217 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1218 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1219 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1220 (void **)&adev->wb.wb);
d38ceaf9
AD
1221 if (r) {
1222 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1223 return r;
1224 }
d38ceaf9
AD
1225
1226 adev->wb.num_wb = AMDGPU_MAX_WB;
1227 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1228
1229 /* clear wb memory */
73469585 1230 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1231 }
1232
1233 return 0;
1234}
1235
1236/**
131b4b36 1237 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1238 *
1239 * @adev: amdgpu_device pointer
1240 * @wb: wb index
1241 *
1242 * Allocate a wb slot for use by the driver (all asics).
1243 * Returns 0 on success or -EINVAL on failure.
1244 */
131b4b36 1245int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1246{
1247 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1248
97407b63 1249 if (offset < adev->wb.num_wb) {
7014285a 1250 __set_bit(offset, adev->wb.used);
63ae07ca 1251 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1252 return 0;
1253 } else {
1254 return -EINVAL;
1255 }
1256}
1257
d38ceaf9 1258/**
131b4b36 1259 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1260 *
1261 * @adev: amdgpu_device pointer
1262 * @wb: wb index
1263 *
1264 * Free a wb slot allocated for use by the driver (all asics)
1265 */
131b4b36 1266void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1267{
73469585 1268 wb >>= 3;
d38ceaf9 1269 if (wb < adev->wb.num_wb)
73469585 1270 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1271}
1272
d6895ad3
CK
1273/**
1274 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1275 *
1276 * @adev: amdgpu_device pointer
1277 *
1278 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1279 * to fail, but if any of the BARs is not accessible after the size we abort
1280 * driver loading by returning -ENODEV.
1281 */
1282int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1283{
453f617a 1284 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1285 struct pci_bus *root;
1286 struct resource *res;
b8920e1e 1287 unsigned int i;
d6895ad3
CK
1288 u16 cmd;
1289 int r;
1290
822130b5
AB
1291 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1292 return 0;
1293
0c03b912 1294 /* Bypass for VF */
1295 if (amdgpu_sriov_vf(adev))
1296 return 0;
1297
b7221f2b
AD
1298 /* skip if the bios has already enabled large BAR */
1299 if (adev->gmc.real_vram_size &&
1300 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1301 return 0;
1302
31b8adab
CK
1303 /* Check if the root BUS has 64bit memory resources */
1304 root = adev->pdev->bus;
1305 while (root->parent)
1306 root = root->parent;
1307
1308 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1309 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1310 res->start > 0x100000000ull)
1311 break;
1312 }
1313
1314 /* Trying to resize is pointless without a root hub window above 4GB */
1315 if (!res)
1316 return 0;
1317
453f617a
ND
1318 /* Limit the BAR size to what is available */
1319 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1320 rbar_size);
1321
d6895ad3
CK
1322 /* Disable memory decoding while we change the BAR addresses and size */
1323 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1324 pci_write_config_word(adev->pdev, PCI_COMMAND,
1325 cmd & ~PCI_COMMAND_MEMORY);
1326
1327 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1328 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1329 if (adev->asic_type >= CHIP_BONAIRE)
1330 pci_release_resource(adev->pdev, 2);
1331
1332 pci_release_resource(adev->pdev, 0);
1333
1334 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1335 if (r == -ENOSPC)
1336 DRM_INFO("Not enough PCI address space for a large BAR.");
1337 else if (r && r != -ENOTSUPP)
1338 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1339
1340 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1341
1342 /* When the doorbell or fb BAR isn't available we have no chance of
1343 * using the device.
1344 */
43c064db 1345 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1346 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1347 return -ENODEV;
1348
1349 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1350
1351 return 0;
1352}
a05502e5 1353
9535a86a
SZ
1354static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1355{
b8920e1e 1356 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1357 return false;
9535a86a
SZ
1358
1359 return true;
1360}
1361
d38ceaf9
AD
1362/*
1363 * GPU helpers function.
1364 */
1365/**
39c640c0 1366 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1367 *
1368 * @adev: amdgpu_device pointer
1369 *
c836fec5
JQ
1370 * Check if the asic has been initialized (all asics) at driver startup
1371 * or post is needed if hw reset is performed.
1372 * Returns true if need or false if not.
d38ceaf9 1373 */
39c640c0 1374bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1375{
1376 uint32_t reg;
1377
bec86378
ML
1378 if (amdgpu_sriov_vf(adev))
1379 return false;
1380
9535a86a
SZ
1381 if (!amdgpu_device_read_bios(adev))
1382 return false;
1383
bec86378 1384 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1385 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1386 * some old smc fw still need driver do vPost otherwise gpu hang, while
1387 * those smc fw version above 22.15 doesn't have this flaw, so we force
1388 * vpost executed for smc version below 22.15
bec86378
ML
1389 */
1390 if (adev->asic_type == CHIP_FIJI) {
1391 int err;
1392 uint32_t fw_ver;
b8920e1e 1393
bec86378
ML
1394 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1395 /* force vPost if error occured */
1396 if (err)
1397 return true;
1398
1399 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1400 if (fw_ver < 0x00160e00)
1401 return true;
bec86378 1402 }
bec86378 1403 }
91fe77eb 1404
e3c1b071 1405 /* Don't post if we need to reset whole hive on init */
1406 if (adev->gmc.xgmi.pending_reset)
1407 return false;
1408
91fe77eb 1409 if (adev->has_hw_reset) {
1410 adev->has_hw_reset = false;
1411 return true;
1412 }
1413
1414 /* bios scratch used on CIK+ */
1415 if (adev->asic_type >= CHIP_BONAIRE)
1416 return amdgpu_atombios_scratch_need_asic_init(adev);
1417
1418 /* check MEM_SIZE for older asics */
1419 reg = amdgpu_asic_get_config_memsize(adev);
1420
1421 if ((reg != 0) && (reg != 0xffffffff))
1422 return false;
1423
1424 return true;
70e64c4d
ML
1425}
1426
bb0f8429
ML
1427/*
1428 * Check whether seamless boot is supported.
1429 *
7f4ce7b5
ML
1430 * So far we only support seamless boot on DCE 3.0 or later.
1431 * If users report that it works on older ASICS as well, we may
1432 * loosen this.
bb0f8429
ML
1433 */
1434bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1435{
5dc270d3
ML
1436 switch (amdgpu_seamless) {
1437 case -1:
1438 break;
1439 case 1:
1440 return true;
1441 case 0:
1442 return false;
1443 default:
1444 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1445 amdgpu_seamless);
1446 return false;
1447 }
1448
3657a1d5
ML
1449 if (!(adev->flags & AMD_IS_APU))
1450 return false;
1451
5dc270d3
ML
1452 if (adev->mman.keep_stolen_vga_memory)
1453 return false;
1454
7f4ce7b5 1455 return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1456}
1457
5d1eb4c4
ML
1458/*
1459 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1460 * speed switching. Until we have confirmation from Intel that a specific host
1461 * supports it, it's safer that we keep it disabled for all.
1462 *
1463 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1464 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1465 */
1466bool amdgpu_device_pcie_dynamic_switching_supported(void)
1467{
1468#if IS_ENABLED(CONFIG_X86)
1469 struct cpuinfo_x86 *c = &cpu_data(0);
1470
1471 if (c->x86_vendor == X86_VENDOR_INTEL)
1472 return false;
1473#endif
1474 return true;
1475}
1476
0ab5d711
ML
1477/**
1478 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1479 *
1480 * @adev: amdgpu_device pointer
1481 *
1482 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1483 * be set for this device.
1484 *
1485 * Returns true if it should be used or false if not.
1486 */
1487bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1488{
1489 switch (amdgpu_aspm) {
1490 case -1:
1491 break;
1492 case 0:
1493 return false;
1494 case 1:
1495 return true;
1496 default:
1497 return false;
1498 }
1499 return pcie_aspm_enabled(adev->pdev);
1500}
1501
3ad5dcfe
KHF
1502bool amdgpu_device_aspm_support_quirk(void)
1503{
1504#if IS_ENABLED(CONFIG_X86)
1505 struct cpuinfo_x86 *c = &cpu_data(0);
1506
1507 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1508#else
1509 return true;
1510#endif
1511}
1512
d38ceaf9
AD
1513/* if we get transitioned to only one device, take VGA back */
1514/**
06ec9070 1515 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1516 *
bf44e8ce 1517 * @pdev: PCI device pointer
d38ceaf9
AD
1518 * @state: enable/disable vga decode
1519 *
1520 * Enable/disable vga decode (all asics).
1521 * Returns VGA resource flags.
1522 */
bf44e8ce
CH
1523static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1524 bool state)
d38ceaf9 1525{
bf44e8ce 1526 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1527
d38ceaf9
AD
1528 amdgpu_asic_set_vga_state(adev, state);
1529 if (state)
1530 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1531 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1532 else
1533 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1534}
1535
e3ecdffa
AD
1536/**
1537 * amdgpu_device_check_block_size - validate the vm block size
1538 *
1539 * @adev: amdgpu_device pointer
1540 *
1541 * Validates the vm block size specified via module parameter.
1542 * The vm block size defines number of bits in page table versus page directory,
1543 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1544 * page table and the remaining bits are in the page directory.
1545 */
06ec9070 1546static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1547{
1548 /* defines number of bits in page table versus page directory,
1549 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1550 * page table and the remaining bits are in the page directory
1551 */
bab4fee7
JZ
1552 if (amdgpu_vm_block_size == -1)
1553 return;
a1adf8be 1554
bab4fee7 1555 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1556 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1557 amdgpu_vm_block_size);
97489129 1558 amdgpu_vm_block_size = -1;
a1adf8be 1559 }
a1adf8be
CZ
1560}
1561
e3ecdffa
AD
1562/**
1563 * amdgpu_device_check_vm_size - validate the vm size
1564 *
1565 * @adev: amdgpu_device pointer
1566 *
1567 * Validates the vm size in GB specified via module parameter.
1568 * The VM size is the size of the GPU virtual memory space in GB.
1569 */
06ec9070 1570static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1571{
64dab074
AD
1572 /* no need to check the default value */
1573 if (amdgpu_vm_size == -1)
1574 return;
1575
83ca145d
ZJ
1576 if (amdgpu_vm_size < 1) {
1577 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1578 amdgpu_vm_size);
f3368128 1579 amdgpu_vm_size = -1;
83ca145d 1580 }
83ca145d
ZJ
1581}
1582
7951e376
RZ
1583static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1584{
1585 struct sysinfo si;
a9d4fe2f 1586 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1587 uint64_t total_memory;
1588 uint64_t dram_size_seven_GB = 0x1B8000000;
1589 uint64_t dram_size_three_GB = 0xB8000000;
1590
1591 if (amdgpu_smu_memory_pool_size == 0)
1592 return;
1593
1594 if (!is_os_64) {
1595 DRM_WARN("Not 64-bit OS, feature not supported\n");
1596 goto def_value;
1597 }
1598 si_meminfo(&si);
1599 total_memory = (uint64_t)si.totalram * si.mem_unit;
1600
1601 if ((amdgpu_smu_memory_pool_size == 1) ||
1602 (amdgpu_smu_memory_pool_size == 2)) {
1603 if (total_memory < dram_size_three_GB)
1604 goto def_value1;
1605 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1606 (amdgpu_smu_memory_pool_size == 8)) {
1607 if (total_memory < dram_size_seven_GB)
1608 goto def_value1;
1609 } else {
1610 DRM_WARN("Smu memory pool size not supported\n");
1611 goto def_value;
1612 }
1613 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1614
1615 return;
1616
1617def_value1:
1618 DRM_WARN("No enough system memory\n");
1619def_value:
1620 adev->pm.smu_prv_buffer_size = 0;
1621}
1622
9f6a7857
HR
1623static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1624{
1625 if (!(adev->flags & AMD_IS_APU) ||
1626 adev->asic_type < CHIP_RAVEN)
1627 return 0;
1628
1629 switch (adev->asic_type) {
1630 case CHIP_RAVEN:
1631 if (adev->pdev->device == 0x15dd)
1632 adev->apu_flags |= AMD_APU_IS_RAVEN;
1633 if (adev->pdev->device == 0x15d8)
1634 adev->apu_flags |= AMD_APU_IS_PICASSO;
1635 break;
1636 case CHIP_RENOIR:
1637 if ((adev->pdev->device == 0x1636) ||
1638 (adev->pdev->device == 0x164c))
1639 adev->apu_flags |= AMD_APU_IS_RENOIR;
1640 else
1641 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1642 break;
1643 case CHIP_VANGOGH:
1644 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1645 break;
1646 case CHIP_YELLOW_CARP:
1647 break;
d0f56dc2 1648 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1649 if ((adev->pdev->device == 0x13FE) ||
1650 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1651 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1652 break;
9f6a7857 1653 default:
4eaf21b7 1654 break;
9f6a7857
HR
1655 }
1656
1657 return 0;
1658}
1659
d38ceaf9 1660/**
06ec9070 1661 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1662 *
1663 * @adev: amdgpu_device pointer
1664 *
1665 * Validates certain module parameters and updates
1666 * the associated values used by the driver (all asics).
1667 */
912dfc84 1668static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1669{
5b011235
CZ
1670 if (amdgpu_sched_jobs < 4) {
1671 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1672 amdgpu_sched_jobs);
1673 amdgpu_sched_jobs = 4;
47fc644f 1674 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1675 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1676 amdgpu_sched_jobs);
1677 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1678 }
d38ceaf9 1679
83e74db6 1680 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1681 /* gart size must be greater or equal to 32M */
1682 dev_warn(adev->dev, "gart size (%d) too small\n",
1683 amdgpu_gart_size);
83e74db6 1684 amdgpu_gart_size = -1;
d38ceaf9
AD
1685 }
1686
36d38372 1687 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1688 /* gtt size must be greater or equal to 32M */
36d38372
CK
1689 dev_warn(adev->dev, "gtt size (%d) too small\n",
1690 amdgpu_gtt_size);
1691 amdgpu_gtt_size = -1;
d38ceaf9
AD
1692 }
1693
d07f14be
RH
1694 /* valid range is between 4 and 9 inclusive */
1695 if (amdgpu_vm_fragment_size != -1 &&
1696 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1697 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1698 amdgpu_vm_fragment_size = -1;
1699 }
1700
5d5bd5e3
KW
1701 if (amdgpu_sched_hw_submission < 2) {
1702 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1703 amdgpu_sched_hw_submission);
1704 amdgpu_sched_hw_submission = 2;
1705 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1706 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1707 amdgpu_sched_hw_submission);
1708 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1709 }
1710
2656fd23
AG
1711 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1712 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1713 amdgpu_reset_method = -1;
1714 }
1715
7951e376
RZ
1716 amdgpu_device_check_smu_prv_buffer_size(adev);
1717
06ec9070 1718 amdgpu_device_check_vm_size(adev);
d38ceaf9 1719
06ec9070 1720 amdgpu_device_check_block_size(adev);
6a7f76e7 1721
19aede77 1722 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1723
e3c00faa 1724 return 0;
d38ceaf9
AD
1725}
1726
1727/**
1728 * amdgpu_switcheroo_set_state - set switcheroo state
1729 *
1730 * @pdev: pci dev pointer
1694467b 1731 * @state: vga_switcheroo state
d38ceaf9 1732 *
12024b17 1733 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1734 * the asics before or after it is powered up using ACPI methods.
1735 */
8aba21b7
LT
1736static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1737 enum vga_switcheroo_state state)
d38ceaf9
AD
1738{
1739 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1740 int r;
d38ceaf9 1741
b98c6299 1742 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1743 return;
1744
1745 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1746 pr_info("switched on\n");
d38ceaf9
AD
1747 /* don't suspend or resume card normally */
1748 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1749
8f66090b
TZ
1750 pci_set_power_state(pdev, PCI_D0);
1751 amdgpu_device_load_pci_state(pdev);
1752 r = pci_enable_device(pdev);
de185019
AD
1753 if (r)
1754 DRM_WARN("pci_enable_device failed (%d)\n", r);
1755 amdgpu_device_resume(dev, true);
d38ceaf9 1756
d38ceaf9 1757 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1758 } else {
dd4fa6c1 1759 pr_info("switched off\n");
d38ceaf9 1760 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
5095d541 1761 amdgpu_device_prepare(dev);
de185019 1762 amdgpu_device_suspend(dev, true);
8f66090b 1763 amdgpu_device_cache_pci_state(pdev);
de185019 1764 /* Shut down the device */
8f66090b
TZ
1765 pci_disable_device(pdev);
1766 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1767 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1768 }
1769}
1770
1771/**
1772 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1773 *
1774 * @pdev: pci dev pointer
1775 *
1776 * Callback for the switcheroo driver. Check of the switcheroo
1777 * state can be changed.
1778 * Returns true if the state can be changed, false if not.
1779 */
1780static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1781{
1782 struct drm_device *dev = pci_get_drvdata(pdev);
1783
b8920e1e 1784 /*
d38ceaf9
AD
1785 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1786 * locking inversion with the driver load path. And the access here is
1787 * completely racy anyway. So don't bother with locking for now.
1788 */
7e13ad89 1789 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1790}
1791
1792static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1793 .set_gpu_state = amdgpu_switcheroo_set_state,
1794 .reprobe = NULL,
1795 .can_switch = amdgpu_switcheroo_can_switch,
1796};
1797
e3ecdffa
AD
1798/**
1799 * amdgpu_device_ip_set_clockgating_state - set the CG state
1800 *
87e3f136 1801 * @dev: amdgpu_device pointer
e3ecdffa
AD
1802 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1803 * @state: clockgating state (gate or ungate)
1804 *
1805 * Sets the requested clockgating state for all instances of
1806 * the hardware IP specified.
1807 * Returns the error code from the last instance.
1808 */
43fa561f 1809int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1810 enum amd_ip_block_type block_type,
1811 enum amd_clockgating_state state)
d38ceaf9 1812{
43fa561f 1813 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1814 int i, r = 0;
1815
1816 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1817 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1818 continue;
c722865a
RZ
1819 if (adev->ip_blocks[i].version->type != block_type)
1820 continue;
1821 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1822 continue;
1823 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1824 (void *)adev, state);
1825 if (r)
1826 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1827 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1828 }
1829 return r;
1830}
1831
e3ecdffa
AD
1832/**
1833 * amdgpu_device_ip_set_powergating_state - set the PG state
1834 *
87e3f136 1835 * @dev: amdgpu_device pointer
e3ecdffa
AD
1836 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1837 * @state: powergating state (gate or ungate)
1838 *
1839 * Sets the requested powergating state for all instances of
1840 * the hardware IP specified.
1841 * Returns the error code from the last instance.
1842 */
43fa561f 1843int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1844 enum amd_ip_block_type block_type,
1845 enum amd_powergating_state state)
d38ceaf9 1846{
43fa561f 1847 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1848 int i, r = 0;
1849
1850 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1851 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1852 continue;
c722865a
RZ
1853 if (adev->ip_blocks[i].version->type != block_type)
1854 continue;
1855 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1856 continue;
1857 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1858 (void *)adev, state);
1859 if (r)
1860 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1861 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1862 }
1863 return r;
1864}
1865
e3ecdffa
AD
1866/**
1867 * amdgpu_device_ip_get_clockgating_state - get the CG state
1868 *
1869 * @adev: amdgpu_device pointer
1870 * @flags: clockgating feature flags
1871 *
1872 * Walks the list of IPs on the device and updates the clockgating
1873 * flags for each IP.
1874 * Updates @flags with the feature flags for each hardware IP where
1875 * clockgating is enabled.
1876 */
2990a1fc 1877void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1878 u64 *flags)
6cb2d4e4
HR
1879{
1880 int i;
1881
1882 for (i = 0; i < adev->num_ip_blocks; i++) {
1883 if (!adev->ip_blocks[i].status.valid)
1884 continue;
1885 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1886 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1887 }
1888}
1889
e3ecdffa
AD
1890/**
1891 * amdgpu_device_ip_wait_for_idle - wait for idle
1892 *
1893 * @adev: amdgpu_device pointer
1894 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1895 *
1896 * Waits for the request hardware IP to be idle.
1897 * Returns 0 for success or a negative error code on failure.
1898 */
2990a1fc
AD
1899int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1900 enum amd_ip_block_type block_type)
5dbbb60b
AD
1901{
1902 int i, r;
1903
1904 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1905 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1906 continue;
a1255107
AD
1907 if (adev->ip_blocks[i].version->type == block_type) {
1908 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1909 if (r)
1910 return r;
1911 break;
1912 }
1913 }
1914 return 0;
1915
1916}
1917
e3ecdffa
AD
1918/**
1919 * amdgpu_device_ip_is_idle - is the hardware IP idle
1920 *
1921 * @adev: amdgpu_device pointer
1922 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1923 *
1924 * Check if the hardware IP is idle or not.
1925 * Returns true if it the IP is idle, false if not.
1926 */
2990a1fc
AD
1927bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1928 enum amd_ip_block_type block_type)
5dbbb60b
AD
1929{
1930 int i;
1931
1932 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1933 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1934 continue;
a1255107
AD
1935 if (adev->ip_blocks[i].version->type == block_type)
1936 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1937 }
1938 return true;
1939
1940}
1941
e3ecdffa
AD
1942/**
1943 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1944 *
1945 * @adev: amdgpu_device pointer
87e3f136 1946 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1947 *
1948 * Returns a pointer to the hardware IP block structure
1949 * if it exists for the asic, otherwise NULL.
1950 */
2990a1fc
AD
1951struct amdgpu_ip_block *
1952amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1953 enum amd_ip_block_type type)
d38ceaf9
AD
1954{
1955 int i;
1956
1957 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1958 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1959 return &adev->ip_blocks[i];
1960
1961 return NULL;
1962}
1963
1964/**
2990a1fc 1965 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1966 *
1967 * @adev: amdgpu_device pointer
5fc3aeeb 1968 * @type: enum amd_ip_block_type
d38ceaf9
AD
1969 * @major: major version
1970 * @minor: minor version
1971 *
1972 * return 0 if equal or greater
1973 * return 1 if smaller or the ip_block doesn't exist
1974 */
2990a1fc
AD
1975int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1976 enum amd_ip_block_type type,
1977 u32 major, u32 minor)
d38ceaf9 1978{
2990a1fc 1979 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1980
a1255107
AD
1981 if (ip_block && ((ip_block->version->major > major) ||
1982 ((ip_block->version->major == major) &&
1983 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1984 return 0;
1985
1986 return 1;
1987}
1988
a1255107 1989/**
2990a1fc 1990 * amdgpu_device_ip_block_add
a1255107
AD
1991 *
1992 * @adev: amdgpu_device pointer
1993 * @ip_block_version: pointer to the IP to add
1994 *
1995 * Adds the IP block driver information to the collection of IPs
1996 * on the asic.
1997 */
2990a1fc
AD
1998int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1999 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
2000{
2001 if (!ip_block_version)
2002 return -EINVAL;
2003
7bd939d0
LG
2004 switch (ip_block_version->type) {
2005 case AMD_IP_BLOCK_TYPE_VCN:
2006 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2007 return 0;
2008 break;
2009 case AMD_IP_BLOCK_TYPE_JPEG:
2010 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2011 return 0;
2012 break;
2013 default:
2014 break;
2015 }
2016
e966a725 2017 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
2018 ip_block_version->funcs->name);
2019
a1255107
AD
2020 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2021
2022 return 0;
2023}
2024
e3ecdffa
AD
2025/**
2026 * amdgpu_device_enable_virtual_display - enable virtual display feature
2027 *
2028 * @adev: amdgpu_device pointer
2029 *
2030 * Enabled the virtual display feature if the user has enabled it via
2031 * the module parameter virtual_display. This feature provides a virtual
2032 * display hardware on headless boards or in virtualized environments.
2033 * This function parses and validates the configuration string specified by
2034 * the user and configues the virtual display configuration (number of
2035 * virtual connectors, crtcs, etc.) specified.
2036 */
483ef985 2037static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2038{
2039 adev->enable_virtual_display = false;
2040
2041 if (amdgpu_virtual_display) {
8f66090b 2042 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2043 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2044
2045 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2046 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2047 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2048 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2049 if (!strcmp("all", pciaddname)
2050 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2051 long num_crtc;
2052 int res = -1;
2053
9accf2fd 2054 adev->enable_virtual_display = true;
0f66356d
ED
2055
2056 if (pciaddname_tmp)
2057 res = kstrtol(pciaddname_tmp, 10,
2058 &num_crtc);
2059
2060 if (!res) {
2061 if (num_crtc < 1)
2062 num_crtc = 1;
2063 if (num_crtc > 6)
2064 num_crtc = 6;
2065 adev->mode_info.num_crtc = num_crtc;
2066 } else {
2067 adev->mode_info.num_crtc = 1;
2068 }
9accf2fd
ED
2069 break;
2070 }
2071 }
2072
0f66356d
ED
2073 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2074 amdgpu_virtual_display, pci_address_name,
2075 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2076
2077 kfree(pciaddstr);
2078 }
2079}
2080
25263da3
AD
2081void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2082{
2083 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2084 adev->mode_info.num_crtc = 1;
2085 adev->enable_virtual_display = true;
2086 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2087 adev->enable_virtual_display, adev->mode_info.num_crtc);
2088 }
2089}
2090
e3ecdffa
AD
2091/**
2092 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2093 *
2094 * @adev: amdgpu_device pointer
2095 *
2096 * Parses the asic configuration parameters specified in the gpu info
2097 * firmware and makes them availale to the driver for use in configuring
2098 * the asic.
2099 * Returns 0 on success, -EINVAL on failure.
2100 */
e2a75f88
AD
2101static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2102{
e2a75f88 2103 const char *chip_name;
c0a43457 2104 char fw_name[40];
e2a75f88
AD
2105 int err;
2106 const struct gpu_info_firmware_header_v1_0 *hdr;
2107
ab4fe3e1
HR
2108 adev->firmware.gpu_info_fw = NULL;
2109
72de33f8 2110 if (adev->mman.discovery_bin) {
cc375d8c
TY
2111 /*
2112 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2113 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2114 * when DAL no longer needs it.
2115 */
2116 if (adev->asic_type != CHIP_NAVI12)
2117 return 0;
258620d0
AD
2118 }
2119
e2a75f88 2120 switch (adev->asic_type) {
e2a75f88
AD
2121 default:
2122 return 0;
2123 case CHIP_VEGA10:
2124 chip_name = "vega10";
2125 break;
3f76dced
AD
2126 case CHIP_VEGA12:
2127 chip_name = "vega12";
2128 break;
2d2e5e7e 2129 case CHIP_RAVEN:
54f78a76 2130 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2131 chip_name = "raven2";
54f78a76 2132 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2133 chip_name = "picasso";
54c4d17e
FX
2134 else
2135 chip_name = "raven";
2d2e5e7e 2136 break;
65e60f6e
LM
2137 case CHIP_ARCTURUS:
2138 chip_name = "arcturus";
2139 break;
42b325e5
XY
2140 case CHIP_NAVI12:
2141 chip_name = "navi12";
2142 break;
e2a75f88
AD
2143 }
2144
2145 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2146 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2147 if (err) {
2148 dev_err(adev->dev,
b31d3063 2149 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2150 fw_name);
2151 goto out;
2152 }
2153
ab4fe3e1 2154 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2155 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2156
2157 switch (hdr->version_major) {
2158 case 1:
2159 {
2160 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2161 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2162 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2163
cc375d8c
TY
2164 /*
2165 * Should be droped when DAL no longer needs it.
2166 */
2167 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2168 goto parse_soc_bounding_box;
2169
b5ab16bf
AD
2170 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2171 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2172 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2173 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2174 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2175 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2176 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2177 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2178 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2179 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2180 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2181 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2182 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2183 adev->gfx.cu_info.max_waves_per_simd =
2184 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2185 adev->gfx.cu_info.max_scratch_slots_per_cu =
2186 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2187 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2188 if (hdr->version_minor >= 1) {
35c2e910
HZ
2189 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2190 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2191 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2192 adev->gfx.config.num_sc_per_sh =
2193 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2194 adev->gfx.config.num_packer_per_sc =
2195 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2196 }
ec51d3fa
XY
2197
2198parse_soc_bounding_box:
ec51d3fa
XY
2199 /*
2200 * soc bounding box info is not integrated in disocovery table,
258620d0 2201 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2202 */
48321c3d
HW
2203 if (hdr->version_minor == 2) {
2204 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2205 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2206 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2207 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2208 }
e2a75f88
AD
2209 break;
2210 }
2211 default:
2212 dev_err(adev->dev,
2213 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2214 err = -EINVAL;
2215 goto out;
2216 }
2217out:
e2a75f88
AD
2218 return err;
2219}
2220
e3ecdffa
AD
2221/**
2222 * amdgpu_device_ip_early_init - run early init for hardware IPs
2223 *
2224 * @adev: amdgpu_device pointer
2225 *
2226 * Early initialization pass for hardware IPs. The hardware IPs that make
2227 * up each asic are discovered each IP's early_init callback is run. This
2228 * is the first stage in initializing the asic.
2229 * Returns 0 on success, negative error code on failure.
2230 */
06ec9070 2231static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2232{
901e2be2
AD
2233 struct drm_device *dev = adev_to_drm(adev);
2234 struct pci_dev *parent;
aaa36a97 2235 int i, r;
ced69502 2236 bool total;
d38ceaf9 2237
483ef985 2238 amdgpu_device_enable_virtual_display(adev);
a6be7570 2239
00a979f3 2240 if (amdgpu_sriov_vf(adev)) {
00a979f3 2241 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2242 if (r)
2243 return r;
00a979f3
WS
2244 }
2245
d38ceaf9 2246 switch (adev->asic_type) {
33f34802
KW
2247#ifdef CONFIG_DRM_AMDGPU_SI
2248 case CHIP_VERDE:
2249 case CHIP_TAHITI:
2250 case CHIP_PITCAIRN:
2251 case CHIP_OLAND:
2252 case CHIP_HAINAN:
295d0daf 2253 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2254 r = si_set_ip_blocks(adev);
2255 if (r)
2256 return r;
2257 break;
2258#endif
a2e73f56
AD
2259#ifdef CONFIG_DRM_AMDGPU_CIK
2260 case CHIP_BONAIRE:
2261 case CHIP_HAWAII:
2262 case CHIP_KAVERI:
2263 case CHIP_KABINI:
2264 case CHIP_MULLINS:
e1ad2d53 2265 if (adev->flags & AMD_IS_APU)
a2e73f56 2266 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2267 else
2268 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2269
2270 r = cik_set_ip_blocks(adev);
2271 if (r)
2272 return r;
2273 break;
2274#endif
da87c30b
AD
2275 case CHIP_TOPAZ:
2276 case CHIP_TONGA:
2277 case CHIP_FIJI:
2278 case CHIP_POLARIS10:
2279 case CHIP_POLARIS11:
2280 case CHIP_POLARIS12:
2281 case CHIP_VEGAM:
2282 case CHIP_CARRIZO:
2283 case CHIP_STONEY:
2284 if (adev->flags & AMD_IS_APU)
2285 adev->family = AMDGPU_FAMILY_CZ;
2286 else
2287 adev->family = AMDGPU_FAMILY_VI;
2288
2289 r = vi_set_ip_blocks(adev);
2290 if (r)
2291 return r;
2292 break;
d38ceaf9 2293 default:
63352b7f
AD
2294 r = amdgpu_discovery_set_ip_blocks(adev);
2295 if (r)
2296 return r;
2297 break;
d38ceaf9
AD
2298 }
2299
901e2be2
AD
2300 if (amdgpu_has_atpx() &&
2301 (amdgpu_is_atpx_hybrid() ||
2302 amdgpu_has_atpx_dgpu_power_cntl()) &&
2303 ((adev->flags & AMD_IS_APU) == 0) &&
2304 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2305 adev->flags |= AMD_IS_PX;
2306
85ac2021 2307 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2308 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2309 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2310 }
901e2be2 2311
1884734a 2312
3b94fb10 2313 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2314 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2315 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2316 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2317 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2318
ced69502 2319 total = true;
d38ceaf9
AD
2320 for (i = 0; i < adev->num_ip_blocks; i++) {
2321 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2322 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2323 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2324 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2325 } else {
a1255107
AD
2326 if (adev->ip_blocks[i].version->funcs->early_init) {
2327 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2328 if (r == -ENOENT) {
a1255107 2329 adev->ip_blocks[i].status.valid = false;
2c1a2784 2330 } else if (r) {
a1255107
AD
2331 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2332 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2333 total = false;
2c1a2784 2334 } else {
a1255107 2335 adev->ip_blocks[i].status.valid = true;
2c1a2784 2336 }
974e6b64 2337 } else {
a1255107 2338 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2339 }
d38ceaf9 2340 }
21a249ca
AD
2341 /* get the vbios after the asic_funcs are set up */
2342 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2343 r = amdgpu_device_parse_gpu_info_fw(adev);
2344 if (r)
2345 return r;
2346
21a249ca 2347 /* Read BIOS */
9535a86a
SZ
2348 if (amdgpu_device_read_bios(adev)) {
2349 if (!amdgpu_get_bios(adev))
2350 return -EINVAL;
21a249ca 2351
9535a86a
SZ
2352 r = amdgpu_atombios_init(adev);
2353 if (r) {
2354 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2355 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2356 return r;
2357 }
21a249ca 2358 }
77eabc6f
PJZ
2359
2360 /*get pf2vf msg info at it's earliest time*/
2361 if (amdgpu_sriov_vf(adev))
2362 amdgpu_virt_init_data_exchange(adev);
2363
21a249ca 2364 }
d38ceaf9 2365 }
ced69502
ML
2366 if (!total)
2367 return -ENODEV;
d38ceaf9 2368
00fa4035 2369 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2370 adev->cg_flags &= amdgpu_cg_mask;
2371 adev->pg_flags &= amdgpu_pg_mask;
2372
d38ceaf9
AD
2373 return 0;
2374}
2375
0a4f2520
RZ
2376static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2377{
2378 int i, r;
2379
2380 for (i = 0; i < adev->num_ip_blocks; i++) {
2381 if (!adev->ip_blocks[i].status.sw)
2382 continue;
2383 if (adev->ip_blocks[i].status.hw)
2384 continue;
2385 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2386 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2387 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2388 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2389 if (r) {
2390 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2391 adev->ip_blocks[i].version->funcs->name, r);
2392 return r;
2393 }
2394 adev->ip_blocks[i].status.hw = true;
2395 }
2396 }
2397
2398 return 0;
2399}
2400
2401static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2402{
2403 int i, r;
2404
2405 for (i = 0; i < adev->num_ip_blocks; i++) {
2406 if (!adev->ip_blocks[i].status.sw)
2407 continue;
2408 if (adev->ip_blocks[i].status.hw)
2409 continue;
2410 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2411 if (r) {
2412 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2413 adev->ip_blocks[i].version->funcs->name, r);
2414 return r;
2415 }
2416 adev->ip_blocks[i].status.hw = true;
2417 }
2418
2419 return 0;
2420}
2421
7a3e0bb2
RZ
2422static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2423{
2424 int r = 0;
2425 int i;
80f41f84 2426 uint32_t smu_version;
7a3e0bb2
RZ
2427
2428 if (adev->asic_type >= CHIP_VEGA10) {
2429 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2430 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2431 continue;
2432
e3c1b071 2433 if (!adev->ip_blocks[i].status.sw)
2434 continue;
2435
482f0e53
ML
2436 /* no need to do the fw loading again if already done*/
2437 if (adev->ip_blocks[i].status.hw == true)
2438 break;
2439
53b3f8f4 2440 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2441 r = adev->ip_blocks[i].version->funcs->resume(adev);
2442 if (r) {
2443 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2444 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2445 return r;
2446 }
2447 } else {
2448 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2449 if (r) {
2450 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2451 adev->ip_blocks[i].version->funcs->name, r);
2452 return r;
7a3e0bb2 2453 }
7a3e0bb2 2454 }
482f0e53
ML
2455
2456 adev->ip_blocks[i].status.hw = true;
2457 break;
7a3e0bb2
RZ
2458 }
2459 }
482f0e53 2460
8973d9ec
ED
2461 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2462 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2463
80f41f84 2464 return r;
7a3e0bb2
RZ
2465}
2466
5fd8518d
AG
2467static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2468{
2469 long timeout;
2470 int r, i;
2471
2472 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2473 struct amdgpu_ring *ring = adev->rings[i];
2474
2475 /* No need to setup the GPU scheduler for rings that don't need it */
2476 if (!ring || ring->no_scheduler)
2477 continue;
2478
2479 switch (ring->funcs->type) {
2480 case AMDGPU_RING_TYPE_GFX:
2481 timeout = adev->gfx_timeout;
2482 break;
2483 case AMDGPU_RING_TYPE_COMPUTE:
2484 timeout = adev->compute_timeout;
2485 break;
2486 case AMDGPU_RING_TYPE_SDMA:
2487 timeout = adev->sdma_timeout;
2488 break;
2489 default:
2490 timeout = adev->video_timeout;
2491 break;
2492 }
2493
2494 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2495 ring->num_hw_submission, 0,
8ab62eda
JG
2496 timeout, adev->reset_domain->wq,
2497 ring->sched_score, ring->name,
2498 adev->dev);
5fd8518d
AG
2499 if (r) {
2500 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2501 ring->name);
2502 return r;
2503 }
2504 }
2505
d425c6f4
JZ
2506 amdgpu_xcp_update_partition_sched_list(adev);
2507
5fd8518d
AG
2508 return 0;
2509}
2510
2511
e3ecdffa
AD
2512/**
2513 * amdgpu_device_ip_init - run init for hardware IPs
2514 *
2515 * @adev: amdgpu_device pointer
2516 *
2517 * Main initialization pass for hardware IPs. The list of all the hardware
2518 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2519 * are run. sw_init initializes the software state associated with each IP
2520 * and hw_init initializes the hardware associated with each IP.
2521 * Returns 0 on success, negative error code on failure.
2522 */
06ec9070 2523static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2524{
2525 int i, r;
2526
c030f2e4 2527 r = amdgpu_ras_init(adev);
2528 if (r)
2529 return r;
2530
d38ceaf9 2531 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2532 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2533 continue;
a1255107 2534 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2535 if (r) {
a1255107
AD
2536 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2537 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2538 goto init_failed;
2c1a2784 2539 }
a1255107 2540 adev->ip_blocks[i].status.sw = true;
bfca0289 2541
c1c39032
AD
2542 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2543 /* need to do common hw init early so everything is set up for gmc */
2544 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2545 if (r) {
2546 DRM_ERROR("hw_init %d failed %d\n", i, r);
2547 goto init_failed;
2548 }
2549 adev->ip_blocks[i].status.hw = true;
2550 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2551 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2552 /* Try to reserve bad pages early */
2553 if (amdgpu_sriov_vf(adev))
2554 amdgpu_virt_exchange_data(adev);
2555
7ccfd79f 2556 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2557 if (r) {
7ccfd79f 2558 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2559 goto init_failed;
2c1a2784 2560 }
a1255107 2561 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2562 if (r) {
2563 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2564 goto init_failed;
2c1a2784 2565 }
06ec9070 2566 r = amdgpu_device_wb_init(adev);
2c1a2784 2567 if (r) {
06ec9070 2568 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2569 goto init_failed;
2c1a2784 2570 }
a1255107 2571 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2572
2573 /* right after GMC hw init, we create CSA */
02ff519e 2574 if (adev->gfx.mcbp) {
1e256e27 2575 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2576 AMDGPU_GEM_DOMAIN_VRAM |
2577 AMDGPU_GEM_DOMAIN_GTT,
2578 AMDGPU_CSA_SIZE);
2493664f
ML
2579 if (r) {
2580 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2581 goto init_failed;
2493664f
ML
2582 }
2583 }
d38ceaf9
AD
2584 }
2585 }
2586
c9ffa427 2587 if (amdgpu_sriov_vf(adev))
22c16d25 2588 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2589
533aed27
AG
2590 r = amdgpu_ib_pool_init(adev);
2591 if (r) {
2592 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2593 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2594 goto init_failed;
2595 }
2596
c8963ea4
RZ
2597 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2598 if (r)
72d3f592 2599 goto init_failed;
0a4f2520
RZ
2600
2601 r = amdgpu_device_ip_hw_init_phase1(adev);
2602 if (r)
72d3f592 2603 goto init_failed;
0a4f2520 2604
7a3e0bb2
RZ
2605 r = amdgpu_device_fw_loading(adev);
2606 if (r)
72d3f592 2607 goto init_failed;
7a3e0bb2 2608
0a4f2520
RZ
2609 r = amdgpu_device_ip_hw_init_phase2(adev);
2610 if (r)
72d3f592 2611 goto init_failed;
d38ceaf9 2612
121a2bc6
AG
2613 /*
2614 * retired pages will be loaded from eeprom and reserved here,
2615 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2616 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2617 * for I2C communication which only true at this point.
b82e65a9
GC
2618 *
2619 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2620 * failure from bad gpu situation and stop amdgpu init process
2621 * accordingly. For other failed cases, it will still release all
2622 * the resource and print error message, rather than returning one
2623 * negative value to upper level.
121a2bc6
AG
2624 *
2625 * Note: theoretically, this should be called before all vram allocations
2626 * to protect retired page from abusing
2627 */
b82e65a9
GC
2628 r = amdgpu_ras_recovery_init(adev);
2629 if (r)
2630 goto init_failed;
121a2bc6 2631
cfbb6b00
AG
2632 /**
2633 * In case of XGMI grab extra reference for reset domain for this device
2634 */
a4c63caf 2635 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2636 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2637 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2638 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2639
dfd0287b
LH
2640 if (WARN_ON(!hive)) {
2641 r = -ENOENT;
2642 goto init_failed;
2643 }
2644
46c67660 2645 if (!hive->reset_domain ||
2646 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2647 r = -ENOENT;
2648 amdgpu_put_xgmi_hive(hive);
2649 goto init_failed;
2650 }
2651
2652 /* Drop the early temporary reset domain we created for device */
2653 amdgpu_reset_put_reset_domain(adev->reset_domain);
2654 adev->reset_domain = hive->reset_domain;
9dfa4860 2655 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2656 }
a4c63caf
AG
2657 }
2658 }
2659
5fd8518d
AG
2660 r = amdgpu_device_init_schedulers(adev);
2661 if (r)
2662 goto init_failed;
e3c1b071 2663
2664 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2665 if (!adev->gmc.xgmi.pending_reset) {
2666 kgd2kfd_init_zone_device(adev);
e3c1b071 2667 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2668 }
c6332b97 2669
bd607166
KR
2670 amdgpu_fru_get_product_info(adev);
2671
72d3f592 2672init_failed:
c6332b97 2673
72d3f592 2674 return r;
d38ceaf9
AD
2675}
2676
e3ecdffa
AD
2677/**
2678 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2679 *
2680 * @adev: amdgpu_device pointer
2681 *
2682 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2683 * this function before a GPU reset. If the value is retained after a
2684 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2685 */
06ec9070 2686static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2687{
2688 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2689}
2690
e3ecdffa
AD
2691/**
2692 * amdgpu_device_check_vram_lost - check if vram is valid
2693 *
2694 * @adev: amdgpu_device pointer
2695 *
2696 * Checks the reset magic value written to the gart pointer in VRAM.
2697 * The driver calls this after a GPU reset to see if the contents of
2698 * VRAM is lost or now.
2699 * returns true if vram is lost, false if not.
2700 */
06ec9070 2701static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2702{
dadce777
EQ
2703 if (memcmp(adev->gart.ptr, adev->reset_magic,
2704 AMDGPU_RESET_MAGIC_NUM))
2705 return true;
2706
53b3f8f4 2707 if (!amdgpu_in_reset(adev))
dadce777
EQ
2708 return false;
2709
2710 /*
2711 * For all ASICs with baco/mode1 reset, the VRAM is
2712 * always assumed to be lost.
2713 */
2714 switch (amdgpu_asic_reset_method(adev)) {
2715 case AMD_RESET_METHOD_BACO:
2716 case AMD_RESET_METHOD_MODE1:
2717 return true;
2718 default:
2719 return false;
2720 }
0c49e0b8
CZ
2721}
2722
e3ecdffa 2723/**
1112a46b 2724 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2725 *
2726 * @adev: amdgpu_device pointer
b8b72130 2727 * @state: clockgating state (gate or ungate)
e3ecdffa 2728 *
e3ecdffa 2729 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2730 * set_clockgating_state callbacks are run.
2731 * Late initialization pass enabling clockgating for hardware IPs.
2732 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2733 * Returns 0 on success, negative error code on failure.
2734 */
fdd34271 2735
5d89bb2d
LL
2736int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2737 enum amd_clockgating_state state)
d38ceaf9 2738{
1112a46b 2739 int i, j, r;
d38ceaf9 2740
4a2ba394
SL
2741 if (amdgpu_emu_mode == 1)
2742 return 0;
2743
1112a46b
RZ
2744 for (j = 0; j < adev->num_ip_blocks; j++) {
2745 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2746 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2747 continue;
47198eb7 2748 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2749 if (adev->in_s0ix &&
47198eb7
AD
2750 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2751 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2752 continue;
4a446d55 2753 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2754 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2755 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2756 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2757 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2758 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2759 /* enable clockgating to save power */
a1255107 2760 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2761 state);
4a446d55
AD
2762 if (r) {
2763 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2764 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2765 return r;
2766 }
b0b00ff1 2767 }
d38ceaf9 2768 }
06b18f61 2769
c9f96fd5
RZ
2770 return 0;
2771}
2772
5d89bb2d
LL
2773int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2774 enum amd_powergating_state state)
c9f96fd5 2775{
1112a46b 2776 int i, j, r;
06b18f61 2777
c9f96fd5
RZ
2778 if (amdgpu_emu_mode == 1)
2779 return 0;
2780
1112a46b
RZ
2781 for (j = 0; j < adev->num_ip_blocks; j++) {
2782 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2783 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2784 continue;
47198eb7 2785 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2786 if (adev->in_s0ix &&
47198eb7
AD
2787 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2788 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2789 continue;
c9f96fd5
RZ
2790 /* skip CG for VCE/UVD, it's handled specially */
2791 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2792 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2793 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2794 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2795 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2796 /* enable powergating to save power */
2797 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2798 state);
c9f96fd5
RZ
2799 if (r) {
2800 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2801 adev->ip_blocks[i].version->funcs->name, r);
2802 return r;
2803 }
2804 }
2805 }
2dc80b00
S
2806 return 0;
2807}
2808
beff74bc
AD
2809static int amdgpu_device_enable_mgpu_fan_boost(void)
2810{
2811 struct amdgpu_gpu_instance *gpu_ins;
2812 struct amdgpu_device *adev;
2813 int i, ret = 0;
2814
2815 mutex_lock(&mgpu_info.mutex);
2816
2817 /*
2818 * MGPU fan boost feature should be enabled
2819 * only when there are two or more dGPUs in
2820 * the system
2821 */
2822 if (mgpu_info.num_dgpu < 2)
2823 goto out;
2824
2825 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2826 gpu_ins = &(mgpu_info.gpu_ins[i]);
2827 adev = gpu_ins->adev;
2828 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2829 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2830 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2831 if (ret)
2832 break;
2833
2834 gpu_ins->mgpu_fan_enabled = 1;
2835 }
2836 }
2837
2838out:
2839 mutex_unlock(&mgpu_info.mutex);
2840
2841 return ret;
2842}
2843
e3ecdffa
AD
2844/**
2845 * amdgpu_device_ip_late_init - run late init for hardware IPs
2846 *
2847 * @adev: amdgpu_device pointer
2848 *
2849 * Late initialization pass for hardware IPs. The list of all the hardware
2850 * IPs that make up the asic is walked and the late_init callbacks are run.
2851 * late_init covers any special initialization that an IP requires
2852 * after all of the have been initialized or something that needs to happen
2853 * late in the init process.
2854 * Returns 0 on success, negative error code on failure.
2855 */
06ec9070 2856static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2857{
60599a03 2858 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2859 int i = 0, r;
2860
2861 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2862 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2863 continue;
2864 if (adev->ip_blocks[i].version->funcs->late_init) {
2865 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2866 if (r) {
2867 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2868 adev->ip_blocks[i].version->funcs->name, r);
2869 return r;
2870 }
2dc80b00 2871 }
73f847db 2872 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2873 }
2874
867e24ca 2875 r = amdgpu_ras_late_init(adev);
2876 if (r) {
2877 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2878 return r;
2879 }
2880
a891d239
DL
2881 amdgpu_ras_set_error_query_ready(adev, true);
2882
1112a46b
RZ
2883 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2884 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2885
06ec9070 2886 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2887
beff74bc
AD
2888 r = amdgpu_device_enable_mgpu_fan_boost();
2889 if (r)
2890 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2891
4da8b639 2892 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2893 if (amdgpu_passthrough(adev) &&
2894 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2895 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2896 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2897
2898 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2899 mutex_lock(&mgpu_info.mutex);
2900
2901 /*
2902 * Reset device p-state to low as this was booted with high.
2903 *
2904 * This should be performed only after all devices from the same
2905 * hive get initialized.
2906 *
2907 * However, it's unknown how many device in the hive in advance.
2908 * As this is counted one by one during devices initializations.
2909 *
2910 * So, we wait for all XGMI interlinked devices initialized.
2911 * This may bring some delays as those devices may come from
2912 * different hives. But that should be OK.
2913 */
2914 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2915 for (i = 0; i < mgpu_info.num_gpu; i++) {
2916 gpu_instance = &(mgpu_info.gpu_ins[i]);
2917 if (gpu_instance->adev->flags & AMD_IS_APU)
2918 continue;
2919
d84a430d
JK
2920 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2921 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2922 if (r) {
2923 DRM_ERROR("pstate setting failed (%d).\n", r);
2924 break;
2925 }
2926 }
2927 }
2928
2929 mutex_unlock(&mgpu_info.mutex);
2930 }
2931
d38ceaf9
AD
2932 return 0;
2933}
2934
613aa3ea
LY
2935/**
2936 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2937 *
2938 * @adev: amdgpu_device pointer
2939 *
2940 * For ASICs need to disable SMC first
2941 */
2942static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2943{
2944 int i, r;
2945
4e8303cf 2946 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
2947 return;
2948
2949 for (i = 0; i < adev->num_ip_blocks; i++) {
2950 if (!adev->ip_blocks[i].status.hw)
2951 continue;
2952 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2953 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2954 /* XXX handle errors */
2955 if (r) {
2956 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2957 adev->ip_blocks[i].version->funcs->name, r);
2958 }
2959 adev->ip_blocks[i].status.hw = false;
2960 break;
2961 }
2962 }
2963}
2964
e9669fb7 2965static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2966{
2967 int i, r;
2968
e9669fb7
AG
2969 for (i = 0; i < adev->num_ip_blocks; i++) {
2970 if (!adev->ip_blocks[i].version->funcs->early_fini)
2971 continue;
5278a159 2972
e9669fb7
AG
2973 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2974 if (r) {
2975 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2976 adev->ip_blocks[i].version->funcs->name, r);
2977 }
2978 }
c030f2e4 2979
05df1f01 2980 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2981 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2982
7270e895
TY
2983 amdgpu_amdkfd_suspend(adev, false);
2984
613aa3ea
LY
2985 /* Workaroud for ASICs need to disable SMC first */
2986 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2987
d38ceaf9 2988 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2989 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2990 continue;
8201a67a 2991
a1255107 2992 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2993 /* XXX handle errors */
2c1a2784 2994 if (r) {
a1255107
AD
2995 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2996 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2997 }
8201a67a 2998
a1255107 2999 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
3000 }
3001
6effad8a
GC
3002 if (amdgpu_sriov_vf(adev)) {
3003 if (amdgpu_virt_release_full_gpu(adev, false))
3004 DRM_ERROR("failed to release exclusive mode on fini\n");
3005 }
3006
e9669fb7
AG
3007 return 0;
3008}
3009
3010/**
3011 * amdgpu_device_ip_fini - run fini for hardware IPs
3012 *
3013 * @adev: amdgpu_device pointer
3014 *
3015 * Main teardown pass for hardware IPs. The list of all the hardware
3016 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3017 * are run. hw_fini tears down the hardware associated with each IP
3018 * and sw_fini tears down any software state associated with each IP.
3019 * Returns 0 on success, negative error code on failure.
3020 */
3021static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3022{
3023 int i, r;
3024
3025 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3026 amdgpu_virt_release_ras_err_handler_data(adev);
3027
e9669fb7
AG
3028 if (adev->gmc.xgmi.num_physical_nodes > 1)
3029 amdgpu_xgmi_remove_device(adev);
3030
c004d44e 3031 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3032
d38ceaf9 3033 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3034 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3035 continue;
c12aba3a
ML
3036
3037 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3038 amdgpu_ucode_free_bo(adev);
1e256e27 3039 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3040 amdgpu_device_wb_fini(adev);
7ccfd79f 3041 amdgpu_device_mem_scratch_fini(adev);
533aed27 3042 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
3043 }
3044
a1255107 3045 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3046 /* XXX handle errors */
2c1a2784 3047 if (r) {
a1255107
AD
3048 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3049 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3050 }
a1255107
AD
3051 adev->ip_blocks[i].status.sw = false;
3052 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3053 }
3054
a6dcfd9c 3055 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3056 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3057 continue;
a1255107
AD
3058 if (adev->ip_blocks[i].version->funcs->late_fini)
3059 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3060 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3061 }
3062
c030f2e4 3063 amdgpu_ras_fini(adev);
3064
d38ceaf9
AD
3065 return 0;
3066}
3067
e3ecdffa 3068/**
beff74bc 3069 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3070 *
1112a46b 3071 * @work: work_struct.
e3ecdffa 3072 */
beff74bc 3073static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3074{
3075 struct amdgpu_device *adev =
beff74bc 3076 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3077 int r;
3078
3079 r = amdgpu_ib_ring_tests(adev);
3080 if (r)
3081 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3082}
3083
1e317b99
RZ
3084static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3085{
3086 struct amdgpu_device *adev =
3087 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3088
90a92662
MD
3089 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3090 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3091
3092 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3093 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3094}
3095
e3ecdffa 3096/**
e7854a03 3097 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3098 *
3099 * @adev: amdgpu_device pointer
3100 *
3101 * Main suspend function for hardware IPs. The list of all the hardware
3102 * IPs that make up the asic is walked, clockgating is disabled and the
3103 * suspend callbacks are run. suspend puts the hardware and software state
3104 * in each IP into a state suitable for suspend.
3105 * Returns 0 on success, negative error code on failure.
3106 */
e7854a03
AD
3107static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3108{
3109 int i, r;
3110
50ec83f0
AD
3111 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3112 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3113
b31d6ada
EQ
3114 /*
3115 * Per PMFW team's suggestion, driver needs to handle gfxoff
3116 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3117 * scenario. Add the missing df cstate disablement here.
3118 */
3119 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3120 dev_warn(adev->dev, "Failed to disallow df cstate");
3121
e7854a03
AD
3122 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3123 if (!adev->ip_blocks[i].status.valid)
3124 continue;
2b9f7848 3125
e7854a03 3126 /* displays are handled separately */
2b9f7848
ND
3127 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3128 continue;
3129
3130 /* XXX handle errors */
3131 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3132 /* XXX handle errors */
3133 if (r) {
3134 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3135 adev->ip_blocks[i].version->funcs->name, r);
3136 return r;
e7854a03 3137 }
2b9f7848
ND
3138
3139 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3140 }
3141
e7854a03
AD
3142 return 0;
3143}
3144
3145/**
3146 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3147 *
3148 * @adev: amdgpu_device pointer
3149 *
3150 * Main suspend function for hardware IPs. The list of all the hardware
3151 * IPs that make up the asic is walked, clockgating is disabled and the
3152 * suspend callbacks are run. suspend puts the hardware and software state
3153 * in each IP into a state suitable for suspend.
3154 * Returns 0 on success, negative error code on failure.
3155 */
3156static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3157{
3158 int i, r;
3159
557f42a2 3160 if (adev->in_s0ix)
bc143d8b 3161 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3162
d38ceaf9 3163 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3164 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3165 continue;
e7854a03
AD
3166 /* displays are handled in phase1 */
3167 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3168 continue;
bff77e86
LM
3169 /* PSP lost connection when err_event_athub occurs */
3170 if (amdgpu_ras_intr_triggered() &&
3171 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3172 adev->ip_blocks[i].status.hw = false;
3173 continue;
3174 }
e3c1b071 3175
3176 /* skip unnecessary suspend if we do not initialize them yet */
3177 if (adev->gmc.xgmi.pending_reset &&
3178 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3179 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3180 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3181 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3182 adev->ip_blocks[i].status.hw = false;
3183 continue;
3184 }
557f42a2 3185
afa6646b 3186 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3187 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3188 * like at runtime. PSP is also part of the always on hardware
3189 * so no need to suspend it.
3190 */
557f42a2 3191 if (adev->in_s0ix &&
32ff160d 3192 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3193 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3194 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3195 continue;
3196
2a7798ea
AD
3197 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3198 if (adev->in_s0ix &&
4e8303cf
LL
3199 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3200 IP_VERSION(5, 0, 0)) &&
3201 (adev->ip_blocks[i].version->type ==
3202 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3203 continue;
3204
e11c7750
TH
3205 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3206 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3207 * from this location and RLC Autoload automatically also gets loaded
3208 * from here based on PMFW -> PSP message during re-init sequence.
3209 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3210 * the TMR and reload FWs again for IMU enabled APU ASICs.
3211 */
3212 if (amdgpu_in_reset(adev) &&
3213 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3214 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3215 continue;
3216
d38ceaf9 3217 /* XXX handle errors */
a1255107 3218 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3219 /* XXX handle errors */
2c1a2784 3220 if (r) {
a1255107
AD
3221 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3222 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3223 }
876923fb 3224 adev->ip_blocks[i].status.hw = false;
a3a09142 3225 /* handle putting the SMC in the appropriate state */
47fc644f 3226 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3227 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3228 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3229 if (r) {
3230 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3231 adev->mp1_state, r);
3232 return r;
3233 }
a3a09142
AD
3234 }
3235 }
d38ceaf9
AD
3236 }
3237
3238 return 0;
3239}
3240
e7854a03
AD
3241/**
3242 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3243 *
3244 * @adev: amdgpu_device pointer
3245 *
3246 * Main suspend function for hardware IPs. The list of all the hardware
3247 * IPs that make up the asic is walked, clockgating is disabled and the
3248 * suspend callbacks are run. suspend puts the hardware and software state
3249 * in each IP into a state suitable for suspend.
3250 * Returns 0 on success, negative error code on failure.
3251 */
3252int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3253{
3254 int r;
3255
3c73683c
JC
3256 if (amdgpu_sriov_vf(adev)) {
3257 amdgpu_virt_fini_data_exchange(adev);
e7819644 3258 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3259 }
e7819644 3260
e7854a03
AD
3261 r = amdgpu_device_ip_suspend_phase1(adev);
3262 if (r)
3263 return r;
3264 r = amdgpu_device_ip_suspend_phase2(adev);
3265
e7819644
YT
3266 if (amdgpu_sriov_vf(adev))
3267 amdgpu_virt_release_full_gpu(adev, false);
3268
e7854a03
AD
3269 return r;
3270}
3271
06ec9070 3272static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3273{
3274 int i, r;
3275
2cb681b6 3276 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3277 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3278 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3279 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3280 AMD_IP_BLOCK_TYPE_IH,
3281 };
a90ad3c2 3282
95ea3dbc 3283 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3284 int j;
3285 struct amdgpu_ip_block *block;
a90ad3c2 3286
4cd2a96d
J
3287 block = &adev->ip_blocks[i];
3288 block->status.hw = false;
2cb681b6 3289
4cd2a96d 3290 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3291
4cd2a96d 3292 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3293 !block->status.valid)
3294 continue;
3295
3296 r = block->version->funcs->hw_init(adev);
0aaeefcc 3297 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3298 if (r)
3299 return r;
482f0e53 3300 block->status.hw = true;
a90ad3c2
ML
3301 }
3302 }
3303
3304 return 0;
3305}
3306
06ec9070 3307static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3308{
3309 int i, r;
3310
2cb681b6
ML
3311 static enum amd_ip_block_type ip_order[] = {
3312 AMD_IP_BLOCK_TYPE_SMC,
3313 AMD_IP_BLOCK_TYPE_DCE,
3314 AMD_IP_BLOCK_TYPE_GFX,
3315 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3316 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3317 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3318 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3319 AMD_IP_BLOCK_TYPE_VCN,
3320 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3321 };
a90ad3c2 3322
2cb681b6
ML
3323 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3324 int j;
3325 struct amdgpu_ip_block *block;
a90ad3c2 3326
2cb681b6
ML
3327 for (j = 0; j < adev->num_ip_blocks; j++) {
3328 block = &adev->ip_blocks[j];
3329
3330 if (block->version->type != ip_order[i] ||
482f0e53
ML
3331 !block->status.valid ||
3332 block->status.hw)
2cb681b6
ML
3333 continue;
3334
895bd048
JZ
3335 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3336 r = block->version->funcs->resume(adev);
3337 else
3338 r = block->version->funcs->hw_init(adev);
3339
0aaeefcc 3340 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3341 if (r)
3342 return r;
482f0e53 3343 block->status.hw = true;
a90ad3c2
ML
3344 }
3345 }
3346
3347 return 0;
3348}
3349
e3ecdffa
AD
3350/**
3351 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3352 *
3353 * @adev: amdgpu_device pointer
3354 *
3355 * First resume function for hardware IPs. The list of all the hardware
3356 * IPs that make up the asic is walked and the resume callbacks are run for
3357 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3358 * after a suspend and updates the software state as necessary. This
3359 * function is also used for restoring the GPU after a GPU reset.
3360 * Returns 0 on success, negative error code on failure.
3361 */
06ec9070 3362static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3363{
3364 int i, r;
3365
a90ad3c2 3366 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3367 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3368 continue;
a90ad3c2 3369 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3370 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3371 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3372 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3373
fcf0649f
CZ
3374 r = adev->ip_blocks[i].version->funcs->resume(adev);
3375 if (r) {
3376 DRM_ERROR("resume of IP block <%s> failed %d\n",
3377 adev->ip_blocks[i].version->funcs->name, r);
3378 return r;
3379 }
482f0e53 3380 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3381 }
3382 }
3383
3384 return 0;
3385}
3386
e3ecdffa
AD
3387/**
3388 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3389 *
3390 * @adev: amdgpu_device pointer
3391 *
3392 * First resume function for hardware IPs. The list of all the hardware
3393 * IPs that make up the asic is walked and the resume callbacks are run for
3394 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3395 * functional state after a suspend and updates the software state as
3396 * necessary. This function is also used for restoring the GPU after a GPU
3397 * reset.
3398 * Returns 0 on success, negative error code on failure.
3399 */
06ec9070 3400static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3401{
3402 int i, r;
3403
3404 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3405 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3406 continue;
fcf0649f 3407 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3408 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3409 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3410 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3411 continue;
a1255107 3412 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3413 if (r) {
a1255107
AD
3414 DRM_ERROR("resume of IP block <%s> failed %d\n",
3415 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3416 return r;
2c1a2784 3417 }
482f0e53 3418 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3419 }
3420
3421 return 0;
3422}
3423
e3ecdffa
AD
3424/**
3425 * amdgpu_device_ip_resume - run resume for hardware IPs
3426 *
3427 * @adev: amdgpu_device pointer
3428 *
3429 * Main resume function for hardware IPs. The hardware IPs
3430 * are split into two resume functions because they are
b8920e1e 3431 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3432 * steps need to be take between them. In this case (S3/S4) they are
3433 * run sequentially.
3434 * Returns 0 on success, negative error code on failure.
3435 */
06ec9070 3436static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3437{
3438 int r;
3439
06ec9070 3440 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3441 if (r)
3442 return r;
7a3e0bb2
RZ
3443
3444 r = amdgpu_device_fw_loading(adev);
3445 if (r)
3446 return r;
3447
06ec9070 3448 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3449
3450 return r;
3451}
3452
e3ecdffa
AD
3453/**
3454 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3455 *
3456 * @adev: amdgpu_device pointer
3457 *
3458 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3459 */
4e99a44e 3460static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3461{
6867e1b5
ML
3462 if (amdgpu_sriov_vf(adev)) {
3463 if (adev->is_atom_fw) {
58ff791a 3464 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3465 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3466 } else {
3467 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3468 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3469 }
3470
3471 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3472 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3473 }
048765ad
AR
3474}
3475
e3ecdffa
AD
3476/**
3477 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3478 *
3479 * @asic_type: AMD asic type
3480 *
3481 * Check if there is DC (new modesetting infrastructre) support for an asic.
3482 * returns true if DC has support, false if not.
3483 */
4562236b
HW
3484bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3485{
3486 switch (asic_type) {
0637d417
AD
3487#ifdef CONFIG_DRM_AMDGPU_SI
3488 case CHIP_HAINAN:
3489#endif
3490 case CHIP_TOPAZ:
3491 /* chips with no display hardware */
3492 return false;
4562236b 3493#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3494 case CHIP_TAHITI:
3495 case CHIP_PITCAIRN:
3496 case CHIP_VERDE:
3497 case CHIP_OLAND:
2d32ffd6
AD
3498 /*
3499 * We have systems in the wild with these ASICs that require
3500 * LVDS and VGA support which is not supported with DC.
3501 *
3502 * Fallback to the non-DC driver here by default so as not to
3503 * cause regressions.
3504 */
3505#if defined(CONFIG_DRM_AMD_DC_SI)
3506 return amdgpu_dc > 0;
3507#else
3508 return false;
64200c46 3509#endif
4562236b 3510 case CHIP_BONAIRE:
0d6fbccb 3511 case CHIP_KAVERI:
367e6687
AD
3512 case CHIP_KABINI:
3513 case CHIP_MULLINS:
d9fda248
HW
3514 /*
3515 * We have systems in the wild with these ASICs that require
b5a0168e 3516 * VGA support which is not supported with DC.
d9fda248
HW
3517 *
3518 * Fallback to the non-DC driver here by default so as not to
3519 * cause regressions.
3520 */
3521 return amdgpu_dc > 0;
f7f12b25 3522 default:
fd187853 3523 return amdgpu_dc != 0;
f7f12b25 3524#else
4562236b 3525 default:
93b09a9a 3526 if (amdgpu_dc > 0)
b8920e1e 3527 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3528 return false;
f7f12b25 3529#endif
4562236b
HW
3530 }
3531}
3532
3533/**
3534 * amdgpu_device_has_dc_support - check if dc is supported
3535 *
982a820b 3536 * @adev: amdgpu_device pointer
4562236b
HW
3537 *
3538 * Returns true for supported, false for not supported
3539 */
3540bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3541{
25263da3 3542 if (adev->enable_virtual_display ||
abaf210c 3543 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3544 return false;
3545
4562236b
HW
3546 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3547}
3548
d4535e2c
AG
3549static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3550{
3551 struct amdgpu_device *adev =
3552 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3553 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3554
c6a6e2db
AG
3555 /* It's a bug to not have a hive within this function */
3556 if (WARN_ON(!hive))
3557 return;
3558
3559 /*
3560 * Use task barrier to synchronize all xgmi reset works across the
3561 * hive. task_barrier_enter and task_barrier_exit will block
3562 * until all the threads running the xgmi reset works reach
3563 * those points. task_barrier_full will do both blocks.
3564 */
3565 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3566
3567 task_barrier_enter(&hive->tb);
4a580877 3568 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3569
3570 if (adev->asic_reset_res)
3571 goto fail;
3572
3573 task_barrier_exit(&hive->tb);
4a580877 3574 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3575
3576 if (adev->asic_reset_res)
3577 goto fail;
43c4d576 3578
21226f02 3579 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
c6a6e2db
AG
3580 } else {
3581
3582 task_barrier_full(&hive->tb);
3583 adev->asic_reset_res = amdgpu_asic_reset(adev);
3584 }
ce316fa5 3585
c6a6e2db 3586fail:
d4535e2c 3587 if (adev->asic_reset_res)
fed184e9 3588 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3589 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3590 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3591}
3592
71f98027
AD
3593static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3594{
3595 char *input = amdgpu_lockup_timeout;
3596 char *timeout_setting = NULL;
3597 int index = 0;
3598 long timeout;
3599 int ret = 0;
3600
3601 /*
67387dfe
AD
3602 * By default timeout for non compute jobs is 10000
3603 * and 60000 for compute jobs.
71f98027 3604 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3605 * jobs are 60000 by default.
71f98027
AD
3606 */
3607 adev->gfx_timeout = msecs_to_jiffies(10000);
3608 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3609 if (amdgpu_sriov_vf(adev))
3610 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3611 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3612 else
67387dfe 3613 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3614
f440ff44 3615 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3616 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3617 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3618 ret = kstrtol(timeout_setting, 0, &timeout);
3619 if (ret)
3620 return ret;
3621
3622 if (timeout == 0) {
3623 index++;
3624 continue;
3625 } else if (timeout < 0) {
3626 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3627 dev_warn(adev->dev, "lockup timeout disabled");
3628 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3629 } else {
3630 timeout = msecs_to_jiffies(timeout);
3631 }
3632
3633 switch (index++) {
3634 case 0:
3635 adev->gfx_timeout = timeout;
3636 break;
3637 case 1:
3638 adev->compute_timeout = timeout;
3639 break;
3640 case 2:
3641 adev->sdma_timeout = timeout;
3642 break;
3643 case 3:
3644 adev->video_timeout = timeout;
3645 break;
3646 default:
3647 break;
3648 }
3649 }
3650 /*
3651 * There is only one value specified and
3652 * it should apply to all non-compute jobs.
3653 */
bcccee89 3654 if (index == 1) {
71f98027 3655 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3656 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3657 adev->compute_timeout = adev->gfx_timeout;
3658 }
71f98027
AD
3659 }
3660
3661 return ret;
3662}
d4535e2c 3663
4a74c38c
PY
3664/**
3665 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3666 *
3667 * @adev: amdgpu_device pointer
3668 *
3669 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3670 */
3671static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3672{
3673 struct iommu_domain *domain;
3674
3675 domain = iommu_get_domain_for_dev(adev->dev);
3676 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3677 adev->ram_is_direct_mapped = true;
3678}
3679
77f3a5cd 3680static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3681 &dev_attr_pcie_replay_count.attr,
3682 NULL
3683};
3684
02ff519e
AD
3685static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3686{
3687 if (amdgpu_mcbp == 1)
3688 adev->gfx.mcbp = true;
1e9e15dc
JZ
3689 else if (amdgpu_mcbp == 0)
3690 adev->gfx.mcbp = false;
4e8303cf
LL
3691 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3692 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3693 adev->gfx.num_gfx_rings)
50a7c876
AD
3694 adev->gfx.mcbp = true;
3695
02ff519e
AD
3696 if (amdgpu_sriov_vf(adev))
3697 adev->gfx.mcbp = true;
3698
3699 if (adev->gfx.mcbp)
3700 DRM_INFO("MCBP is enabled\n");
3701}
3702
d38ceaf9
AD
3703/**
3704 * amdgpu_device_init - initialize the driver
3705 *
3706 * @adev: amdgpu_device pointer
d38ceaf9
AD
3707 * @flags: driver flags
3708 *
3709 * Initializes the driver info and hw (all asics).
3710 * Returns 0 for success or an error on failure.
3711 * Called at driver startup.
3712 */
3713int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3714 uint32_t flags)
3715{
8aba21b7
LT
3716 struct drm_device *ddev = adev_to_drm(adev);
3717 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3718 int r, i;
b98c6299 3719 bool px = false;
95844d20 3720 u32 max_MBps;
59e9fff1 3721 int tmp;
d38ceaf9
AD
3722
3723 adev->shutdown = false;
d38ceaf9 3724 adev->flags = flags;
4e66d7d2
YZ
3725
3726 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3727 adev->asic_type = amdgpu_force_asic_type;
3728 else
3729 adev->asic_type = flags & AMD_ASIC_MASK;
3730
d38ceaf9 3731 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3732 if (amdgpu_emu_mode == 1)
8bdab6bb 3733 adev->usec_timeout *= 10;
770d13b1 3734 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3735 adev->accel_working = false;
3736 adev->num_rings = 0;
68ce8b24 3737 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3738 adev->mman.buffer_funcs = NULL;
3739 adev->mman.buffer_funcs_ring = NULL;
3740 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3741 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3742 adev->gmc.gmc_funcs = NULL;
7bd939d0 3743 adev->harvest_ip_mask = 0x0;
f54d1867 3744 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3745 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3746
3747 adev->smc_rreg = &amdgpu_invalid_rreg;
3748 adev->smc_wreg = &amdgpu_invalid_wreg;
3749 adev->pcie_rreg = &amdgpu_invalid_rreg;
3750 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3751 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3752 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3753 adev->pciep_rreg = &amdgpu_invalid_rreg;
3754 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3755 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3756 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3757 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3758 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3759 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3760 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3761 adev->didt_rreg = &amdgpu_invalid_rreg;
3762 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3763 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3764 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3765 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3766 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3767
3e39ab90
AD
3768 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3769 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3770 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3771
3772 /* mutex initialization are all done here so we
b8920e1e
SS
3773 * can recall function without having locking issues
3774 */
0e5ca0d1 3775 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3776 mutex_init(&adev->pm.mutex);
3777 mutex_init(&adev->gfx.gpu_clock_mutex);
3778 mutex_init(&adev->srbm_mutex);
b8866c26 3779 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3780 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3781 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3782 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3783 mutex_init(&adev->mn_lock);
e23b74aa 3784 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3785 hash_init(adev->mn_hash);
32eaeae0 3786 mutex_init(&adev->psp.mutex);
bd052211 3787 mutex_init(&adev->notifier_lock);
8cda7a4f 3788 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3789 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3790
ab3b9de6 3791 amdgpu_device_init_apu_flags(adev);
9f6a7857 3792
912dfc84
EQ
3793 r = amdgpu_device_check_arguments(adev);
3794 if (r)
3795 return r;
d38ceaf9 3796
d38ceaf9
AD
3797 spin_lock_init(&adev->mmio_idx_lock);
3798 spin_lock_init(&adev->smc_idx_lock);
3799 spin_lock_init(&adev->pcie_idx_lock);
3800 spin_lock_init(&adev->uvd_ctx_idx_lock);
3801 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3802 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3803 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3804 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3805 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3806
0c4e7fa5
CZ
3807 INIT_LIST_HEAD(&adev->shadow_list);
3808 mutex_init(&adev->shadow_list_lock);
3809
655ce9cb 3810 INIT_LIST_HEAD(&adev->reset_list);
3811
6492e1b0 3812 INIT_LIST_HEAD(&adev->ras_list);
3813
3e38b634
EQ
3814 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3815
beff74bc
AD
3816 INIT_DELAYED_WORK(&adev->delayed_init_work,
3817 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3818 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3819 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3820
d4535e2c
AG
3821 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3822
d23ee13f 3823 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3824 adev->gfx.gfx_off_residency = 0;
3825 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3826 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3827
b265bdbd
EQ
3828 atomic_set(&adev->throttling_logging_enabled, 1);
3829 /*
3830 * If throttling continues, logging will be performed every minute
3831 * to avoid log flooding. "-1" is subtracted since the thermal
3832 * throttling interrupt comes every second. Thus, the total logging
3833 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3834 * for throttling interrupt) = 60 seconds.
3835 */
3836 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3837 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3838
0fa49558
AX
3839 /* Registers mapping */
3840 /* TODO: block userspace mapping of io register */
da69c161
KW
3841 if (adev->asic_type >= CHIP_BONAIRE) {
3842 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3843 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3844 } else {
3845 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3846 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3847 }
d38ceaf9 3848
6c08e0ef
EQ
3849 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3850 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3851
d38ceaf9 3852 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3853 if (!adev->rmmio)
d38ceaf9 3854 return -ENOMEM;
b8920e1e 3855
d38ceaf9 3856 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3857 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3858
436afdfa
PY
3859 /*
3860 * Reset domain needs to be present early, before XGMI hive discovered
3861 * (if any) and intitialized to use reset sem and in_gpu reset flag
3862 * early on during init and before calling to RREG32.
3863 */
3864 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3865 if (!adev->reset_domain)
3866 return -ENOMEM;
3867
3aa0115d
ML
3868 /* detect hw virtualization here */
3869 amdgpu_detect_virtualization(adev);
3870
04e85958
TL
3871 amdgpu_device_get_pcie_info(adev);
3872
dffa11b4
ML
3873 r = amdgpu_device_get_job_timeout_settings(adev);
3874 if (r) {
3875 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3876 return r;
a190d1c7
XY
3877 }
3878
d38ceaf9 3879 /* early init functions */
06ec9070 3880 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3881 if (r)
4ef87d8f 3882 return r;
d38ceaf9 3883
02ff519e
AD
3884 amdgpu_device_set_mcbp(adev);
3885
b7cdb41e
ML
3886 /* Get rid of things like offb */
3887 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3888 if (r)
3889 return r;
3890
4d33e704
SK
3891 /* Enable TMZ based on IP_VERSION */
3892 amdgpu_gmc_tmz_set(adev);
3893
957b0787 3894 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3895 /* Need to get xgmi info early to decide the reset behavior*/
3896 if (adev->gmc.xgmi.supported) {
3897 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3898 if (r)
3899 return r;
3900 }
3901
8e6d0b69 3902 /* enable PCIE atomic ops */
b4520bfd
GW
3903 if (amdgpu_sriov_vf(adev)) {
3904 if (adev->virt.fw_reserve.p_pf2vf)
3905 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3906 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3907 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3908 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3909 * internal path natively support atomics, set have_atomics_support to true.
3910 */
b4520bfd 3911 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
3912 (amdgpu_ip_version(adev, GC_HWIP, 0) >
3913 IP_VERSION(9, 0, 0))) {
0e768043 3914 adev->have_atomics_support = true;
b4520bfd 3915 } else {
8e6d0b69 3916 adev->have_atomics_support =
3917 !pci_enable_atomic_ops_to_root(adev->pdev,
3918 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3919 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3920 }
3921
8e6d0b69 3922 if (!adev->have_atomics_support)
3923 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3924
6585661d 3925 /* doorbell bar mapping and doorbell index init*/
43c064db 3926 amdgpu_doorbell_init(adev);
6585661d 3927
9475a943
SL
3928 if (amdgpu_emu_mode == 1) {
3929 /* post the asic on emulation mode */
3930 emu_soc_asic_init(adev);
bfca0289 3931 goto fence_driver_init;
9475a943 3932 }
bfca0289 3933
04442bf7
LL
3934 amdgpu_reset_init(adev);
3935
4e99a44e 3936 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3937 if (adev->bios)
3938 amdgpu_device_detect_sriov_bios(adev);
048765ad 3939
95e8e59e
AD
3940 /* check if we need to reset the asic
3941 * E.g., driver was not cleanly unloaded previously, etc.
3942 */
f14899fd 3943 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3944 if (adev->gmc.xgmi.num_physical_nodes) {
3945 dev_info(adev->dev, "Pending hive reset.\n");
3946 adev->gmc.xgmi.pending_reset = true;
3947 /* Only need to init necessary block for SMU to handle the reset */
3948 for (i = 0; i < adev->num_ip_blocks; i++) {
3949 if (!adev->ip_blocks[i].status.valid)
3950 continue;
3951 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3952 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3953 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3954 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3955 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3956 adev->ip_blocks[i].version->funcs->name);
3957 adev->ip_blocks[i].status.hw = true;
3958 }
3959 }
3960 } else {
59e9fff1 3961 tmp = amdgpu_reset_method;
3962 /* It should do a default reset when loading or reloading the driver,
3963 * regardless of the module parameter reset_method.
3964 */
3965 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3966 r = amdgpu_asic_reset(adev);
59e9fff1 3967 amdgpu_reset_method = tmp;
e3c1b071 3968 if (r) {
3969 dev_err(adev->dev, "asic reset on init failed\n");
3970 goto failed;
3971 }
95e8e59e
AD
3972 }
3973 }
3974
d38ceaf9 3975 /* Post card if necessary */
39c640c0 3976 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3977 if (!adev->bios) {
bec86378 3978 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3979 r = -EINVAL;
3980 goto failed;
d38ceaf9 3981 }
bec86378 3982 DRM_INFO("GPU posting now...\n");
4d2997ab 3983 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3984 if (r) {
3985 dev_err(adev->dev, "gpu post error!\n");
3986 goto failed;
3987 }
d38ceaf9
AD
3988 }
3989
9535a86a
SZ
3990 if (adev->bios) {
3991 if (adev->is_atom_fw) {
3992 /* Initialize clocks */
3993 r = amdgpu_atomfirmware_get_clock_info(adev);
3994 if (r) {
3995 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3996 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3997 goto failed;
3998 }
3999 } else {
4000 /* Initialize clocks */
4001 r = amdgpu_atombios_get_clock_info(adev);
4002 if (r) {
4003 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4004 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4005 goto failed;
4006 }
4007 /* init i2c buses */
4008 if (!amdgpu_device_has_dc_support(adev))
4009 amdgpu_atombios_i2c_init(adev);
a5bde2f9 4010 }
2c1a2784 4011 }
d38ceaf9 4012
bfca0289 4013fence_driver_init:
d38ceaf9 4014 /* Fence driver */
067f44c8 4015 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4016 if (r) {
067f44c8 4017 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4018 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4019 goto failed;
2c1a2784 4020 }
d38ceaf9
AD
4021
4022 /* init the mode config */
4a580877 4023 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4024
06ec9070 4025 r = amdgpu_device_ip_init(adev);
d38ceaf9 4026 if (r) {
06ec9070 4027 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4028 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4029 goto release_ras_con;
d38ceaf9
AD
4030 }
4031
8d35a259
LG
4032 amdgpu_fence_driver_hw_init(adev);
4033
d69b8971
YZ
4034 dev_info(adev->dev,
4035 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4036 adev->gfx.config.max_shader_engines,
4037 adev->gfx.config.max_sh_per_se,
4038 adev->gfx.config.max_cu_per_sh,
4039 adev->gfx.cu_info.number);
4040
d38ceaf9
AD
4041 adev->accel_working = true;
4042
e59c0205
AX
4043 amdgpu_vm_check_compute_bug(adev);
4044
95844d20
MO
4045 /* Initialize the buffer migration limit. */
4046 if (amdgpu_moverate >= 0)
4047 max_MBps = amdgpu_moverate;
4048 else
4049 max_MBps = 8; /* Allow 8 MB/s. */
4050 /* Get a log2 for easy divisions. */
4051 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4052
b0adca4d
EQ
4053 /*
4054 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4055 * Otherwise the mgpu fan boost feature will be skipped due to the
4056 * gpu instance is counted less.
4057 */
4058 amdgpu_register_gpu_instance(adev);
4059
d38ceaf9
AD
4060 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4061 * explicit gating rather than handling it automatically.
4062 */
e3c1b071 4063 if (!adev->gmc.xgmi.pending_reset) {
4064 r = amdgpu_device_ip_late_init(adev);
4065 if (r) {
4066 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4067 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4068 goto release_ras_con;
e3c1b071 4069 }
4070 /* must succeed. */
4071 amdgpu_ras_resume(adev);
4072 queue_delayed_work(system_wq, &adev->delayed_init_work,
4073 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4074 }
d38ceaf9 4075
38eecbe0
CL
4076 if (amdgpu_sriov_vf(adev)) {
4077 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4078 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4079 }
2c738637 4080
90bcb9b5
EQ
4081 /*
4082 * Place those sysfs registering after `late_init`. As some of those
4083 * operations performed in `late_init` might affect the sysfs
4084 * interfaces creating.
4085 */
4086 r = amdgpu_atombios_sysfs_init(adev);
4087 if (r)
4088 drm_err(&adev->ddev,
4089 "registering atombios sysfs failed (%d).\n", r);
4090
4091 r = amdgpu_pm_sysfs_init(adev);
4092 if (r)
4093 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4094
4095 r = amdgpu_ucode_sysfs_init(adev);
4096 if (r) {
4097 adev->ucode_sysfs_en = false;
4098 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4099 } else
4100 adev->ucode_sysfs_en = true;
4101
77f3a5cd 4102 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4103 if (r)
77f3a5cd 4104 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4105
76da73f0
LL
4106 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4107 if (r)
4108 dev_err(adev->dev,
4109 "Could not create amdgpu board attributes\n");
4110
7957ec80
LL
4111 amdgpu_fru_sysfs_init(adev);
4112
d155bef0
AB
4113 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4114 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4115 if (r)
4116 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4117
c1dd4aa6
AG
4118 /* Have stored pci confspace at hand for restore in sudden PCI error */
4119 if (amdgpu_device_cache_pci_state(adev->pdev))
4120 pci_restore_state(pdev);
4121
8c3dd61c
KHF
4122 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4123 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4124 * ignore it
4125 */
8c3dd61c 4126 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4127 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4128
d37a3929
OC
4129 px = amdgpu_device_supports_px(ddev);
4130
4131 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4132 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4133 vga_switcheroo_register_client(adev->pdev,
4134 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4135
4136 if (px)
8c3dd61c 4137 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4138
e3c1b071 4139 if (adev->gmc.xgmi.pending_reset)
4140 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4141 msecs_to_jiffies(AMDGPU_RESUME_MS));
4142
4a74c38c
PY
4143 amdgpu_device_check_iommu_direct_map(adev);
4144
d38ceaf9 4145 return 0;
83ba126a 4146
970fd197 4147release_ras_con:
38eecbe0
CL
4148 if (amdgpu_sriov_vf(adev))
4149 amdgpu_virt_release_full_gpu(adev, true);
4150
4151 /* failed in exclusive mode due to timeout */
4152 if (amdgpu_sriov_vf(adev) &&
4153 !amdgpu_sriov_runtime(adev) &&
4154 amdgpu_virt_mmio_blocked(adev) &&
4155 !amdgpu_virt_wait_reset(adev)) {
4156 dev_err(adev->dev, "VF exclusive mode timeout\n");
4157 /* Don't send request since VF is inactive. */
4158 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4159 adev->virt.ops = NULL;
4160 r = -EAGAIN;
4161 }
970fd197
SY
4162 amdgpu_release_ras_context(adev);
4163
83ba126a 4164failed:
89041940 4165 amdgpu_vf_error_trans_all(adev);
8840a387 4166
83ba126a 4167 return r;
d38ceaf9
AD
4168}
4169
07775fc1
AG
4170static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4171{
62d5f9f7 4172
07775fc1
AG
4173 /* Clear all CPU mappings pointing to this device */
4174 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4175
4176 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4177 amdgpu_doorbell_fini(adev);
07775fc1
AG
4178
4179 iounmap(adev->rmmio);
4180 adev->rmmio = NULL;
4181 if (adev->mman.aper_base_kaddr)
4182 iounmap(adev->mman.aper_base_kaddr);
4183 adev->mman.aper_base_kaddr = NULL;
4184
4185 /* Memory manager related */
a0ba1279 4186 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4187 arch_phys_wc_del(adev->gmc.vram_mtrr);
4188 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4189 }
4190}
4191
d38ceaf9 4192/**
bbe04dec 4193 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4194 *
4195 * @adev: amdgpu_device pointer
4196 *
4197 * Tear down the driver info (all asics).
4198 * Called at driver shutdown.
4199 */
72c8c97b 4200void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4201{
aac89168 4202 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4203 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4204 adev->shutdown = true;
9f875167 4205
752c683d
ML
4206 /* make sure IB test finished before entering exclusive mode
4207 * to avoid preemption on IB test
b8920e1e 4208 */
519b8b76 4209 if (amdgpu_sriov_vf(adev)) {
752c683d 4210 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4211 amdgpu_virt_fini_data_exchange(adev);
4212 }
752c683d 4213
e5b03032
ML
4214 /* disable all interrupts */
4215 amdgpu_irq_disable_all(adev);
47fc644f 4216 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4217 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4218 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4219 else
4a580877 4220 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4221 }
8d35a259 4222 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4223
cd3a8a59 4224 if (adev->mman.initialized)
9bff18d1 4225 drain_workqueue(adev->mman.bdev.wq);
98f56188 4226
53e9d836 4227 if (adev->pm.sysfs_initialized)
7c868b59 4228 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4229 if (adev->ucode_sysfs_en)
4230 amdgpu_ucode_sysfs_fini(adev);
4231 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4232 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4233
232d1d43
SY
4234 /* disable ras feature must before hw fini */
4235 amdgpu_ras_pre_fini(adev);
4236
e9669fb7 4237 amdgpu_device_ip_fini_early(adev);
d10d0daa 4238
a3848df6
YW
4239 amdgpu_irq_fini_hw(adev);
4240
b6fd6e0f
SK
4241 if (adev->mman.initialized)
4242 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4243
d10d0daa 4244 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4245
39934d3e
VP
4246 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4247 amdgpu_device_unmap_mmio(adev);
87172e89 4248
72c8c97b
AG
4249}
4250
4251void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4252{
62d5f9f7 4253 int idx;
d37a3929 4254 bool px;
62d5f9f7 4255
8d35a259 4256 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4257 amdgpu_device_ip_fini(adev);
b31d3063 4258 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4259 adev->accel_working = false;
68ce8b24 4260 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4261
4262 amdgpu_reset_fini(adev);
4263
d38ceaf9 4264 /* free i2c buses */
4562236b
HW
4265 if (!amdgpu_device_has_dc_support(adev))
4266 amdgpu_i2c_fini(adev);
bfca0289
SL
4267
4268 if (amdgpu_emu_mode != 1)
4269 amdgpu_atombios_fini(adev);
4270
d38ceaf9
AD
4271 kfree(adev->bios);
4272 adev->bios = NULL;
d37a3929 4273
8a2b5139
LL
4274 kfree(adev->fru_info);
4275 adev->fru_info = NULL;
4276
d37a3929
OC
4277 px = amdgpu_device_supports_px(adev_to_drm(adev));
4278
4279 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4280 apple_gmux_detect(NULL, NULL)))
84c8b22e 4281 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4282
4283 if (px)
83ba126a 4284 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4285
38d6be81 4286 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4287 vga_client_unregister(adev->pdev);
e9bc1bf7 4288
62d5f9f7
LS
4289 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4290
4291 iounmap(adev->rmmio);
4292 adev->rmmio = NULL;
43c064db 4293 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4294 drm_dev_exit(idx);
4295 }
4296
d155bef0
AB
4297 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4298 amdgpu_pmu_fini(adev);
72de33f8 4299 if (adev->mman.discovery_bin)
a190d1c7 4300 amdgpu_discovery_fini(adev);
72c8c97b 4301
cfbb6b00
AG
4302 amdgpu_reset_put_reset_domain(adev->reset_domain);
4303 adev->reset_domain = NULL;
4304
72c8c97b
AG
4305 kfree(adev->pci_state);
4306
d38ceaf9
AD
4307}
4308
58144d28
ND
4309/**
4310 * amdgpu_device_evict_resources - evict device resources
4311 * @adev: amdgpu device object
4312 *
4313 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4314 * of the vram memory type. Mainly used for evicting device resources
4315 * at suspend time.
4316 *
4317 */
7863c155 4318static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4319{
7863c155
ML
4320 int ret;
4321
e53d9665
ML
4322 /* No need to evict vram on APUs for suspend to ram or s2idle */
4323 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4324 return 0;
58144d28 4325
7863c155
ML
4326 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4327 if (ret)
58144d28 4328 DRM_WARN("evicting device resources failed\n");
7863c155 4329 return ret;
58144d28 4330}
d38ceaf9
AD
4331
4332/*
4333 * Suspend & resume.
4334 */
5095d541
ML
4335/**
4336 * amdgpu_device_prepare - prepare for device suspend
4337 *
4338 * @dev: drm dev pointer
4339 *
4340 * Prepare to put the hw in the suspend state (all asics).
4341 * Returns 0 for success or an error on failure.
4342 * Called at driver suspend.
4343 */
4344int amdgpu_device_prepare(struct drm_device *dev)
4345{
4346 struct amdgpu_device *adev = drm_to_adev(dev);
cb11ca32 4347 int i, r;
5095d541
ML
4348
4349 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4350 return 0;
4351
4352 /* Evict the majority of BOs before starting suspend sequence */
4353 r = amdgpu_device_evict_resources(adev);
4354 if (r)
4355 return r;
4356
cb11ca32
ML
4357 for (i = 0; i < adev->num_ip_blocks; i++) {
4358 if (!adev->ip_blocks[i].status.valid)
4359 continue;
4360 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4361 continue;
4362 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4363 if (r)
4364 return r;
4365 }
4366
5095d541
ML
4367 return 0;
4368}
4369
d38ceaf9 4370/**
810ddc3a 4371 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4372 *
87e3f136 4373 * @dev: drm dev pointer
87e3f136 4374 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4375 *
4376 * Puts the hw in the suspend state (all asics).
4377 * Returns 0 for success or an error on failure.
4378 * Called at driver suspend.
4379 */
de185019 4380int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4381{
a2e15b0e 4382 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4383 int r = 0;
d38ceaf9 4384
d38ceaf9
AD
4385 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4386 return 0;
4387
44779b43 4388 adev->in_suspend = true;
3fa8f89d 4389
d7274ec7
BZ
4390 if (amdgpu_sriov_vf(adev)) {
4391 amdgpu_virt_fini_data_exchange(adev);
4392 r = amdgpu_virt_request_full_gpu(adev, false);
4393 if (r)
4394 return r;
4395 }
4396
3fa8f89d
S
4397 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4398 DRM_WARN("smart shift update failed\n");
4399
5f818173 4400 if (fbcon)
087451f3 4401 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4402
beff74bc 4403 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4404 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4405
5e6932fe 4406 amdgpu_ras_suspend(adev);
4407
2196927b 4408 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4409
c004d44e 4410 if (!adev->in_s0ix)
5d3a2d95 4411 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4412
7863c155
ML
4413 r = amdgpu_device_evict_resources(adev);
4414 if (r)
4415 return r;
d38ceaf9 4416
8d35a259 4417 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4418
2196927b 4419 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4420
d7274ec7
BZ
4421 if (amdgpu_sriov_vf(adev))
4422 amdgpu_virt_release_full_gpu(adev, false);
4423
d38ceaf9
AD
4424 return 0;
4425}
4426
4427/**
810ddc3a 4428 * amdgpu_device_resume - initiate device resume
d38ceaf9 4429 *
87e3f136 4430 * @dev: drm dev pointer
87e3f136 4431 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4432 *
4433 * Bring the hw back to operating state (all asics).
4434 * Returns 0 for success or an error on failure.
4435 * Called at driver resume.
4436 */
de185019 4437int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4438{
1348969a 4439 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4440 int r = 0;
d38ceaf9 4441
d7274ec7
BZ
4442 if (amdgpu_sriov_vf(adev)) {
4443 r = amdgpu_virt_request_full_gpu(adev, true);
4444 if (r)
4445 return r;
4446 }
4447
d38ceaf9
AD
4448 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4449 return 0;
4450
62498733 4451 if (adev->in_s0ix)
bc143d8b 4452 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4453
d38ceaf9 4454 /* post card */
39c640c0 4455 if (amdgpu_device_need_post(adev)) {
4d2997ab 4456 r = amdgpu_device_asic_init(adev);
74b0b157 4457 if (r)
aac89168 4458 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4459 }
d38ceaf9 4460
06ec9070 4461 r = amdgpu_device_ip_resume(adev);
d7274ec7 4462
e6707218 4463 if (r) {
aac89168 4464 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4465 goto exit;
e6707218 4466 }
8d35a259 4467 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4468
06ec9070 4469 r = amdgpu_device_ip_late_init(adev);
03161a6e 4470 if (r)
3c22c1ea 4471 goto exit;
d38ceaf9 4472
beff74bc
AD
4473 queue_delayed_work(system_wq, &adev->delayed_init_work,
4474 msecs_to_jiffies(AMDGPU_RESUME_MS));
4475
c004d44e 4476 if (!adev->in_s0ix) {
5d3a2d95
AD
4477 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4478 if (r)
3c22c1ea 4479 goto exit;
5d3a2d95 4480 }
756e6880 4481
3c22c1ea
SF
4482exit:
4483 if (amdgpu_sriov_vf(adev)) {
4484 amdgpu_virt_init_data_exchange(adev);
4485 amdgpu_virt_release_full_gpu(adev, true);
4486 }
4487
4488 if (r)
4489 return r;
4490
96a5d8d4 4491 /* Make sure IB tests flushed */
beff74bc 4492 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4493
a2e15b0e 4494 if (fbcon)
087451f3 4495 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4496
5e6932fe 4497 amdgpu_ras_resume(adev);
4498
d09ef243
AD
4499 if (adev->mode_info.num_crtc) {
4500 /*
4501 * Most of the connector probing functions try to acquire runtime pm
4502 * refs to ensure that the GPU is powered on when connector polling is
4503 * performed. Since we're calling this from a runtime PM callback,
4504 * trying to acquire rpm refs will cause us to deadlock.
4505 *
4506 * Since we're guaranteed to be holding the rpm lock, it's safe to
4507 * temporarily disable the rpm helpers so this doesn't deadlock us.
4508 */
23a1a9e5 4509#ifdef CONFIG_PM
d09ef243 4510 dev->dev->power.disable_depth++;
23a1a9e5 4511#endif
d09ef243
AD
4512 if (!adev->dc_enabled)
4513 drm_helper_hpd_irq_event(dev);
4514 else
4515 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4516#ifdef CONFIG_PM
d09ef243 4517 dev->dev->power.disable_depth--;
23a1a9e5 4518#endif
d09ef243 4519 }
44779b43
RZ
4520 adev->in_suspend = false;
4521
dc907c9d
JX
4522 if (adev->enable_mes)
4523 amdgpu_mes_self_test(adev);
4524
3fa8f89d
S
4525 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4526 DRM_WARN("smart shift update failed\n");
4527
4d3b9ae5 4528 return 0;
d38ceaf9
AD
4529}
4530
e3ecdffa
AD
4531/**
4532 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4533 *
4534 * @adev: amdgpu_device pointer
4535 *
4536 * The list of all the hardware IPs that make up the asic is walked and
4537 * the check_soft_reset callbacks are run. check_soft_reset determines
4538 * if the asic is still hung or not.
4539 * Returns true if any of the IPs are still in a hung state, false if not.
4540 */
06ec9070 4541static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4542{
4543 int i;
4544 bool asic_hang = false;
4545
f993d628
ML
4546 if (amdgpu_sriov_vf(adev))
4547 return true;
4548
8bc04c29
AD
4549 if (amdgpu_asic_need_full_reset(adev))
4550 return true;
4551
63fbf42f 4552 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4553 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4554 continue;
a1255107
AD
4555 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4556 adev->ip_blocks[i].status.hang =
4557 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4558 if (adev->ip_blocks[i].status.hang) {
aac89168 4559 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4560 asic_hang = true;
4561 }
4562 }
4563 return asic_hang;
4564}
4565
e3ecdffa
AD
4566/**
4567 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4568 *
4569 * @adev: amdgpu_device pointer
4570 *
4571 * The list of all the hardware IPs that make up the asic is walked and the
4572 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4573 * handles any IP specific hardware or software state changes that are
4574 * necessary for a soft reset to succeed.
4575 * Returns 0 on success, negative error code on failure.
4576 */
06ec9070 4577static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4578{
4579 int i, r = 0;
4580
4581 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4582 if (!adev->ip_blocks[i].status.valid)
d31a501e 4583 continue;
a1255107
AD
4584 if (adev->ip_blocks[i].status.hang &&
4585 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4586 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4587 if (r)
4588 return r;
4589 }
4590 }
4591
4592 return 0;
4593}
4594
e3ecdffa
AD
4595/**
4596 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4597 *
4598 * @adev: amdgpu_device pointer
4599 *
4600 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4601 * reset is necessary to recover.
4602 * Returns true if a full asic reset is required, false if not.
4603 */
06ec9070 4604static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4605{
da146d3b
AD
4606 int i;
4607
8bc04c29
AD
4608 if (amdgpu_asic_need_full_reset(adev))
4609 return true;
4610
da146d3b 4611 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4612 if (!adev->ip_blocks[i].status.valid)
da146d3b 4613 continue;
a1255107
AD
4614 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4615 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4616 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4617 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4618 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4619 if (adev->ip_blocks[i].status.hang) {
aac89168 4620 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4621 return true;
4622 }
4623 }
35d782fe
CZ
4624 }
4625 return false;
4626}
4627
e3ecdffa
AD
4628/**
4629 * amdgpu_device_ip_soft_reset - do a soft reset
4630 *
4631 * @adev: amdgpu_device pointer
4632 *
4633 * The list of all the hardware IPs that make up the asic is walked and the
4634 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4635 * IP specific hardware or software state changes that are necessary to soft
4636 * reset the IP.
4637 * Returns 0 on success, negative error code on failure.
4638 */
06ec9070 4639static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4640{
4641 int i, r = 0;
4642
4643 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4644 if (!adev->ip_blocks[i].status.valid)
35d782fe 4645 continue;
a1255107
AD
4646 if (adev->ip_blocks[i].status.hang &&
4647 adev->ip_blocks[i].version->funcs->soft_reset) {
4648 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4649 if (r)
4650 return r;
4651 }
4652 }
4653
4654 return 0;
4655}
4656
e3ecdffa
AD
4657/**
4658 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4659 *
4660 * @adev: amdgpu_device pointer
4661 *
4662 * The list of all the hardware IPs that make up the asic is walked and the
4663 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4664 * handles any IP specific hardware or software state changes that are
4665 * necessary after the IP has been soft reset.
4666 * Returns 0 on success, negative error code on failure.
4667 */
06ec9070 4668static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4669{
4670 int i, r = 0;
4671
4672 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4673 if (!adev->ip_blocks[i].status.valid)
35d782fe 4674 continue;
a1255107
AD
4675 if (adev->ip_blocks[i].status.hang &&
4676 adev->ip_blocks[i].version->funcs->post_soft_reset)
4677 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4678 if (r)
4679 return r;
4680 }
4681
4682 return 0;
4683}
4684
e3ecdffa 4685/**
c33adbc7 4686 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4687 *
4688 * @adev: amdgpu_device pointer
4689 *
4690 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4691 * restore things like GPUVM page tables after a GPU reset where
4692 * the contents of VRAM might be lost.
403009bf
CK
4693 *
4694 * Returns:
4695 * 0 on success, negative error code on failure.
e3ecdffa 4696 */
c33adbc7 4697static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4698{
c41d1cf6 4699 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4700 struct amdgpu_bo *shadow;
e18aaea7 4701 struct amdgpu_bo_vm *vmbo;
403009bf 4702 long r = 1, tmo;
c41d1cf6
ML
4703
4704 if (amdgpu_sriov_runtime(adev))
b045d3af 4705 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4706 else
4707 tmo = msecs_to_jiffies(100);
4708
aac89168 4709 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4710 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4711 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4712 /* If vm is compute context or adev is APU, shadow will be NULL */
4713 if (!vmbo->shadow)
4714 continue;
4715 shadow = vmbo->shadow;
4716
403009bf 4717 /* No need to recover an evicted BO */
d3116756
CK
4718 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4719 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4720 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4721 continue;
4722
4723 r = amdgpu_bo_restore_shadow(shadow, &next);
4724 if (r)
4725 break;
4726
c41d1cf6 4727 if (fence) {
1712fb1a 4728 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4729 dma_fence_put(fence);
4730 fence = next;
1712fb1a 4731 if (tmo == 0) {
4732 r = -ETIMEDOUT;
c41d1cf6 4733 break;
1712fb1a 4734 } else if (tmo < 0) {
4735 r = tmo;
4736 break;
4737 }
403009bf
CK
4738 } else {
4739 fence = next;
c41d1cf6 4740 }
c41d1cf6
ML
4741 }
4742 mutex_unlock(&adev->shadow_list_lock);
4743
403009bf
CK
4744 if (fence)
4745 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4746 dma_fence_put(fence);
4747
1712fb1a 4748 if (r < 0 || tmo <= 0) {
aac89168 4749 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4750 return -EIO;
4751 }
c41d1cf6 4752
aac89168 4753 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4754 return 0;
c41d1cf6
ML
4755}
4756
a90ad3c2 4757
e3ecdffa 4758/**
06ec9070 4759 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4760 *
982a820b 4761 * @adev: amdgpu_device pointer
87e3f136 4762 * @from_hypervisor: request from hypervisor
5740682e
ML
4763 *
4764 * do VF FLR and reinitialize Asic
3f48c681 4765 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4766 */
4767static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4768 bool from_hypervisor)
5740682e
ML
4769{
4770 int r;
a5f67c93 4771 struct amdgpu_hive_info *hive = NULL;
7258fa31 4772 int retry_limit = 0;
5740682e 4773
7258fa31 4774retry:
c004d44e 4775 amdgpu_amdkfd_pre_reset(adev);
428890a3 4776
5740682e
ML
4777 if (from_hypervisor)
4778 r = amdgpu_virt_request_full_gpu(adev, true);
4779 else
4780 r = amdgpu_virt_reset_gpu(adev);
4781 if (r)
4782 return r;
f734b213 4783 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4784
83f24a8f
HC
4785 /* some sw clean up VF needs to do before recover */
4786 amdgpu_virt_post_reset(adev);
4787
a90ad3c2 4788 /* Resume IP prior to SMC */
06ec9070 4789 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4790 if (r)
4791 goto error;
a90ad3c2 4792
c9ffa427 4793 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4794
7a3e0bb2
RZ
4795 r = amdgpu_device_fw_loading(adev);
4796 if (r)
4797 return r;
4798
a90ad3c2 4799 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4800 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4801 if (r)
4802 goto error;
a90ad3c2 4803
a5f67c93
ZL
4804 hive = amdgpu_get_xgmi_hive(adev);
4805 /* Update PSP FW topology after reset */
4806 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4807 r = amdgpu_xgmi_update_topology(hive, adev);
4808
4809 if (hive)
4810 amdgpu_put_xgmi_hive(hive);
4811
4812 if (!r) {
a5f67c93 4813 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4814
c004d44e 4815 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4816 }
a90ad3c2 4817
abc34253 4818error:
c41d1cf6 4819 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4820 amdgpu_inc_vram_lost(adev);
c33adbc7 4821 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4822 }
437f3e0b 4823 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4824
7258fa31
SK
4825 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4826 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4827 retry_limit++;
4828 goto retry;
4829 } else
4830 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4831 }
4832
a90ad3c2
ML
4833 return r;
4834}
4835
9a1cddd6 4836/**
4837 * amdgpu_device_has_job_running - check if there is any job in mirror list
4838 *
982a820b 4839 * @adev: amdgpu_device pointer
9a1cddd6 4840 *
4841 * check if there is any job in mirror list
4842 */
4843bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4844{
4845 int i;
4846 struct drm_sched_job *job;
4847
4848 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4849 struct amdgpu_ring *ring = adev->rings[i];
4850
4851 if (!ring || !ring->sched.thread)
4852 continue;
4853
4854 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4855 job = list_first_entry_or_null(&ring->sched.pending_list,
4856 struct drm_sched_job, list);
9a1cddd6 4857 spin_unlock(&ring->sched.job_list_lock);
4858 if (job)
4859 return true;
4860 }
4861 return false;
4862}
4863
12938fad
CK
4864/**
4865 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4866 *
982a820b 4867 * @adev: amdgpu_device pointer
12938fad
CK
4868 *
4869 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4870 * a hung GPU.
4871 */
4872bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4873{
12938fad 4874
3ba7b418
AG
4875 if (amdgpu_gpu_recovery == 0)
4876 goto disabled;
4877
1a11a65d
YC
4878 /* Skip soft reset check in fatal error mode */
4879 if (!amdgpu_ras_is_poison_mode_supported(adev))
4880 return true;
4881
3ba7b418
AG
4882 if (amdgpu_sriov_vf(adev))
4883 return true;
4884
4885 if (amdgpu_gpu_recovery == -1) {
4886 switch (adev->asic_type) {
b3523c45
AD
4887#ifdef CONFIG_DRM_AMDGPU_SI
4888 case CHIP_VERDE:
4889 case CHIP_TAHITI:
4890 case CHIP_PITCAIRN:
4891 case CHIP_OLAND:
4892 case CHIP_HAINAN:
4893#endif
4894#ifdef CONFIG_DRM_AMDGPU_CIK
4895 case CHIP_KAVERI:
4896 case CHIP_KABINI:
4897 case CHIP_MULLINS:
4898#endif
4899 case CHIP_CARRIZO:
4900 case CHIP_STONEY:
4901 case CHIP_CYAN_SKILLFISH:
3ba7b418 4902 goto disabled;
b3523c45
AD
4903 default:
4904 break;
3ba7b418 4905 }
12938fad
CK
4906 }
4907
4908 return true;
3ba7b418
AG
4909
4910disabled:
aac89168 4911 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4912 return false;
12938fad
CK
4913}
4914
5c03e584
FX
4915int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4916{
47fc644f
SS
4917 u32 i;
4918 int ret = 0;
5c03e584 4919
47fc644f 4920 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4921
47fc644f 4922 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4923
47fc644f
SS
4924 /* disable BM */
4925 pci_clear_master(adev->pdev);
5c03e584 4926
47fc644f 4927 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4928
47fc644f
SS
4929 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4930 dev_info(adev->dev, "GPU smu mode1 reset\n");
4931 ret = amdgpu_dpm_mode1_reset(adev);
4932 } else {
4933 dev_info(adev->dev, "GPU psp mode1 reset\n");
4934 ret = psp_gpu_reset(adev);
4935 }
5c03e584 4936
47fc644f 4937 if (ret)
2c0f880a 4938 goto mode1_reset_failed;
5c03e584 4939
47fc644f 4940 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4941 ret = amdgpu_psp_wait_for_bootloader(adev);
4942 if (ret)
2c0f880a 4943 goto mode1_reset_failed;
5c03e584 4944
47fc644f
SS
4945 /* wait for asic to come out of reset */
4946 for (i = 0; i < adev->usec_timeout; i++) {
4947 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4948
47fc644f
SS
4949 if (memsize != 0xffffffff)
4950 break;
4951 udelay(1);
4952 }
5c03e584 4953
2c0f880a
HZ
4954 if (i >= adev->usec_timeout) {
4955 ret = -ETIMEDOUT;
4956 goto mode1_reset_failed;
4957 }
4958
47fc644f 4959 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4960
2c0f880a
HZ
4961 return 0;
4962
4963mode1_reset_failed:
4964 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4965 return ret;
5c03e584 4966}
5c6dd71e 4967
e3c1b071 4968int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4969 struct amdgpu_reset_context *reset_context)
26bc5340 4970{
5c1e6fa4 4971 int i, r = 0;
04442bf7
LL
4972 struct amdgpu_job *job = NULL;
4973 bool need_full_reset =
4974 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4975
4976 if (reset_context->reset_req_dev == adev)
4977 job = reset_context->job;
71182665 4978
b602ca5f
TZ
4979 if (amdgpu_sriov_vf(adev)) {
4980 /* stop the data exchange thread */
4981 amdgpu_virt_fini_data_exchange(adev);
4982 }
4983
9e225fb9
AG
4984 amdgpu_fence_driver_isr_toggle(adev, true);
4985
71182665 4986 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4987 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4988 struct amdgpu_ring *ring = adev->rings[i];
4989
51687759 4990 if (!ring || !ring->sched.thread)
0875dc9e 4991 continue;
5740682e 4992
b8920e1e
SS
4993 /* Clear job fence from fence drv to avoid force_completion
4994 * leave NULL and vm flush fence in fence drv
4995 */
5c1e6fa4 4996 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4997
2f9d4084
ML
4998 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4999 amdgpu_fence_driver_force_completion(ring);
0875dc9e 5000 }
d38ceaf9 5001
9e225fb9
AG
5002 amdgpu_fence_driver_isr_toggle(adev, false);
5003
ff99849b 5004 if (job && job->vm)
222b5f04
AG
5005 drm_sched_increase_karma(&job->base);
5006
04442bf7 5007 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 5008 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5009 if (r == -EOPNOTSUPP)
404b277b
LL
5010 r = 0;
5011 else
04442bf7
LL
5012 return r;
5013
1d721ed6 5014 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
5015 if (!amdgpu_sriov_vf(adev)) {
5016
5017 if (!need_full_reset)
5018 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5019
360cd081
LG
5020 if (!need_full_reset && amdgpu_gpu_recovery &&
5021 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
5022 amdgpu_device_ip_pre_soft_reset(adev);
5023 r = amdgpu_device_ip_soft_reset(adev);
5024 amdgpu_device_ip_post_soft_reset(adev);
5025 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 5026 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
5027 need_full_reset = true;
5028 }
5029 }
5030
5031 if (need_full_reset)
5032 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
5033 if (need_full_reset)
5034 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5035 else
5036 clear_bit(AMDGPU_NEED_FULL_RESET,
5037 &reset_context->flags);
26bc5340
AG
5038 }
5039
5040 return r;
5041}
5042
15fd09a0
SA
5043static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5044{
15fd09a0
SA
5045 int i;
5046
38a15ad9 5047 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0 5048
2d6a2a28
AA
5049 for (i = 0; i < adev->reset_info.num_regs; i++) {
5050 adev->reset_info.reset_dump_reg_value[i] =
5051 RREG32(adev->reset_info.reset_dump_reg_list[i]);
5052
5053 trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5054 adev->reset_info.reset_dump_reg_value[i]);
15fd09a0
SA
5055 }
5056
5057 return 0;
5058}
5059
04442bf7
LL
5060int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5061 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5062{
5063 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5064 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5065 int r = 0;
f5c7e779 5066 bool gpu_reset_for_dev_remove = 0;
26bc5340 5067
04442bf7
LL
5068 /* Try reset handler method first */
5069 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5070 reset_list);
15fd09a0 5071 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5072
5073 reset_context->reset_device_list = device_list_handle;
04442bf7 5074 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5075 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5076 if (r == -EOPNOTSUPP)
404b277b
LL
5077 r = 0;
5078 else
04442bf7
LL
5079 return r;
5080
5081 /* Reset handler not implemented, use the default method */
5082 need_full_reset =
5083 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5084 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5085
f5c7e779
YC
5086 gpu_reset_for_dev_remove =
5087 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5088 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5089
26bc5340 5090 /*
655ce9cb 5091 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5092 * to allow proper links negotiation in FW (within 1 sec)
5093 */
7ac71382 5094 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5095 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5096 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5097 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5098 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5099 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5100 r = -EALREADY;
5101 } else
5102 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5103
041a62bc 5104 if (r) {
aac89168 5105 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5106 r, adev_to_drm(tmp_adev)->unique);
19349072 5107 goto out;
ce316fa5
LM
5108 }
5109 }
5110
041a62bc
AG
5111 /* For XGMI wait for all resets to complete before proceed */
5112 if (!r) {
655ce9cb 5113 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5114 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5115 flush_work(&tmp_adev->xgmi_reset_work);
5116 r = tmp_adev->asic_reset_res;
5117 if (r)
5118 break;
ce316fa5
LM
5119 }
5120 }
5121 }
ce316fa5 5122 }
26bc5340 5123
43c4d576 5124 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5125 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
21226f02 5126 amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
43c4d576
JC
5127 }
5128
00eaa571 5129 amdgpu_ras_intr_cleared();
43c4d576 5130 }
00eaa571 5131
f5c7e779
YC
5132 /* Since the mode1 reset affects base ip blocks, the
5133 * phase1 ip blocks need to be resumed. Otherwise there
5134 * will be a BIOS signature error and the psp bootloader
5135 * can't load kdb on the next amdgpu install.
5136 */
5137 if (gpu_reset_for_dev_remove) {
5138 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5139 amdgpu_device_ip_resume_phase1(tmp_adev);
5140
5141 goto end;
5142 }
5143
655ce9cb 5144 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5145 if (need_full_reset) {
5146 /* post card */
e3c1b071 5147 r = amdgpu_device_asic_init(tmp_adev);
5148 if (r) {
aac89168 5149 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5150 } else {
26bc5340 5151 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5152
26bc5340
AG
5153 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5154 if (r)
5155 goto out;
5156
5157 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785
AA
5158
5159 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5160
26bc5340 5161 if (vram_lost) {
77e7f829 5162 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5163 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5164 }
5165
26bc5340
AG
5166 r = amdgpu_device_fw_loading(tmp_adev);
5167 if (r)
5168 return r;
5169
c45e38f2
LL
5170 r = amdgpu_xcp_restore_partition_mode(
5171 tmp_adev->xcp_mgr);
5172 if (r)
5173 goto out;
5174
26bc5340
AG
5175 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5176 if (r)
5177 goto out;
5178
5179 if (vram_lost)
5180 amdgpu_device_fill_reset_magic(tmp_adev);
5181
fdafb359
EQ
5182 /*
5183 * Add this ASIC as tracked as reset was already
5184 * complete successfully.
5185 */
5186 amdgpu_register_gpu_instance(tmp_adev);
5187
04442bf7
LL
5188 if (!reset_context->hive &&
5189 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5190 amdgpu_xgmi_add_device(tmp_adev);
5191
7c04ca50 5192 r = amdgpu_device_ip_late_init(tmp_adev);
5193 if (r)
5194 goto out;
5195
087451f3 5196 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5197
e8fbaf03
GC
5198 /*
5199 * The GPU enters bad state once faulty pages
5200 * by ECC has reached the threshold, and ras
5201 * recovery is scheduled next. So add one check
5202 * here to break recovery if it indeed exceeds
5203 * bad page threshold, and remind user to
5204 * retire this GPU or setting one bigger
5205 * bad_page_threshold value to fix this once
5206 * probing driver again.
5207 */
11003c68 5208 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5209 /* must succeed. */
5210 amdgpu_ras_resume(tmp_adev);
5211 } else {
5212 r = -EINVAL;
5213 goto out;
5214 }
e79a04d5 5215
26bc5340 5216 /* Update PSP FW topology after reset */
04442bf7
LL
5217 if (reset_context->hive &&
5218 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5219 r = amdgpu_xgmi_update_topology(
5220 reset_context->hive, tmp_adev);
26bc5340
AG
5221 }
5222 }
5223
26bc5340
AG
5224out:
5225 if (!r) {
5226 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5227 r = amdgpu_ib_ring_tests(tmp_adev);
5228 if (r) {
5229 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5230 need_full_reset = true;
5231 r = -EAGAIN;
5232 goto end;
5233 }
5234 }
5235
5236 if (!r)
5237 r = amdgpu_device_recover_vram(tmp_adev);
5238 else
5239 tmp_adev->asic_reset_res = r;
5240 }
5241
5242end:
04442bf7
LL
5243 if (need_full_reset)
5244 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5245 else
5246 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5247 return r;
5248}
5249
e923be99 5250static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5251{
5740682e 5252
a3a09142
AD
5253 switch (amdgpu_asic_reset_method(adev)) {
5254 case AMD_RESET_METHOD_MODE1:
5255 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5256 break;
5257 case AMD_RESET_METHOD_MODE2:
5258 adev->mp1_state = PP_MP1_STATE_RESET;
5259 break;
5260 default:
5261 adev->mp1_state = PP_MP1_STATE_NONE;
5262 break;
5263 }
26bc5340 5264}
d38ceaf9 5265
e923be99 5266static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5267{
89041940 5268 amdgpu_vf_error_trans_all(adev);
a3a09142 5269 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5270}
5271
3f12acc8
EQ
5272static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5273{
5274 struct pci_dev *p = NULL;
5275
5276 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5277 adev->pdev->bus->number, 1);
5278 if (p) {
5279 pm_runtime_enable(&(p->dev));
5280 pm_runtime_resume(&(p->dev));
5281 }
b85e285e
YY
5282
5283 pci_dev_put(p);
3f12acc8
EQ
5284}
5285
5286static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5287{
5288 enum amd_reset_method reset_method;
5289 struct pci_dev *p = NULL;
5290 u64 expires;
5291
5292 /*
5293 * For now, only BACO and mode1 reset are confirmed
5294 * to suffer the audio issue without proper suspended.
5295 */
5296 reset_method = amdgpu_asic_reset_method(adev);
5297 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5298 (reset_method != AMD_RESET_METHOD_MODE1))
5299 return -EINVAL;
5300
5301 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5302 adev->pdev->bus->number, 1);
5303 if (!p)
5304 return -ENODEV;
5305
5306 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5307 if (!expires)
5308 /*
5309 * If we cannot get the audio device autosuspend delay,
5310 * a fixed 4S interval will be used. Considering 3S is
5311 * the audio controller default autosuspend delay setting.
5312 * 4S used here is guaranteed to cover that.
5313 */
54b7feb9 5314 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5315
5316 while (!pm_runtime_status_suspended(&(p->dev))) {
5317 if (!pm_runtime_suspend(&(p->dev)))
5318 break;
5319
5320 if (expires < ktime_get_mono_fast_ns()) {
5321 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5322 pci_dev_put(p);
3f12acc8
EQ
5323 /* TODO: abort the succeeding gpu reset? */
5324 return -ETIMEDOUT;
5325 }
5326 }
5327
5328 pm_runtime_disable(&(p->dev));
5329
b85e285e 5330 pci_dev_put(p);
3f12acc8
EQ
5331 return 0;
5332}
5333
d193b12b 5334static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5335{
5336 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5337
5338#if defined(CONFIG_DEBUG_FS)
5339 if (!amdgpu_sriov_vf(adev))
5340 cancel_work(&adev->reset_work);
5341#endif
5342
5343 if (adev->kfd.dev)
5344 cancel_work(&adev->kfd.reset_work);
5345
5346 if (amdgpu_sriov_vf(adev))
5347 cancel_work(&adev->virt.flr_work);
5348
5349 if (con && adev->ras_enabled)
5350 cancel_work(&con->recovery_work);
5351
5352}
5353
26bc5340 5354/**
6e9c65f7 5355 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5356 *
982a820b 5357 * @adev: amdgpu_device pointer
26bc5340 5358 * @job: which job trigger hang
80bd2de1 5359 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5360 *
5361 * Attempt to reset the GPU if it has hung (all asics).
5362 * Attempt to do soft-reset or full-reset and reinitialize Asic
5363 * Returns 0 for success or an error on failure.
5364 */
5365
cf727044 5366int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5367 struct amdgpu_job *job,
5368 struct amdgpu_reset_context *reset_context)
26bc5340 5369{
1d721ed6 5370 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5371 bool job_signaled = false;
26bc5340 5372 struct amdgpu_hive_info *hive = NULL;
26bc5340 5373 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5374 int i, r = 0;
bb5c7235 5375 bool need_emergency_restart = false;
3f12acc8 5376 bool audio_suspended = false;
f5c7e779
YC
5377 bool gpu_reset_for_dev_remove = false;
5378
5379 gpu_reset_for_dev_remove =
5380 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5381 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5382
6e3cd2a9 5383 /*
bb5c7235
WS
5384 * Special case: RAS triggered and full reset isn't supported
5385 */
5386 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5387
d5ea093e
AG
5388 /*
5389 * Flush RAM to disk so that after reboot
5390 * the user can read log and see why the system rebooted.
5391 */
80285ae1
SY
5392 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5393 amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5394 DRM_WARN("Emergency reboot.");
5395
5396 ksys_sync_helper();
5397 emergency_restart();
5398 }
5399
b823821f 5400 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5401 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5402
175ac6ec
ZL
5403 if (!amdgpu_sriov_vf(adev))
5404 hive = amdgpu_get_xgmi_hive(adev);
681260df 5405 if (hive)
53b3f8f4 5406 mutex_lock(&hive->hive_lock);
26bc5340 5407
f1549c09
LG
5408 reset_context->job = job;
5409 reset_context->hive = hive;
9e94d22c
EQ
5410 /*
5411 * Build list of devices to reset.
5412 * In case we are in XGMI hive mode, resort the device list
5413 * to put adev in the 1st position.
5414 */
5415 INIT_LIST_HEAD(&device_list);
175ac6ec 5416 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5417 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5418 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5419 if (gpu_reset_for_dev_remove && adev->shutdown)
5420 tmp_adev->shutdown = true;
5421 }
655ce9cb 5422 if (!list_is_first(&adev->reset_list, &device_list))
5423 list_rotate_to_front(&adev->reset_list, &device_list);
5424 device_list_handle = &device_list;
26bc5340 5425 } else {
655ce9cb 5426 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5427 device_list_handle = &device_list;
5428 }
5429
e923be99
AG
5430 /* We need to lock reset domain only once both for XGMI and single device */
5431 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5432 reset_list);
3675c2f2 5433 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5434
1d721ed6 5435 /* block all schedulers and reset given job's ring */
655ce9cb 5436 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5437
e923be99 5438 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5439
3f12acc8
EQ
5440 /*
5441 * Try to put the audio codec into suspend state
5442 * before gpu reset started.
5443 *
5444 * Due to the power domain of the graphics device
5445 * is shared with AZ power domain. Without this,
5446 * we may change the audio hardware from behind
5447 * the audio driver's back. That will trigger
5448 * some audio codec errors.
5449 */
5450 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5451 audio_suspended = true;
5452
9e94d22c
EQ
5453 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5454
52fb44cf
EQ
5455 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5456
c004d44e 5457 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5458 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5459
12ffa55d
AG
5460 /*
5461 * Mark these ASICs to be reseted as untracked first
5462 * And add them back after reset completed
5463 */
5464 amdgpu_unregister_gpu_instance(tmp_adev);
5465
163d4cd2 5466 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5467
f1c1314b 5468 /* disable ras on ALL IPs */
bb5c7235 5469 if (!need_emergency_restart &&
b823821f 5470 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5471 amdgpu_ras_suspend(tmp_adev);
5472
1d721ed6
AG
5473 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5474 struct amdgpu_ring *ring = tmp_adev->rings[i];
5475
5476 if (!ring || !ring->sched.thread)
5477 continue;
5478
0b2d2c2e 5479 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5480
bb5c7235 5481 if (need_emergency_restart)
7c6e68c7 5482 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5483 }
8f8c80f4 5484 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5485 }
5486
bb5c7235 5487 if (need_emergency_restart)
7c6e68c7
AG
5488 goto skip_sched_resume;
5489
1d721ed6
AG
5490 /*
5491 * Must check guilty signal here since after this point all old
5492 * HW fences are force signaled.
5493 *
5494 * job->base holds a reference to parent fence
5495 */
f6a3f660 5496 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5497 job_signaled = true;
1d721ed6
AG
5498 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5499 goto skip_hw_reset;
5500 }
5501
26bc5340 5502retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5503 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5504 if (gpu_reset_for_dev_remove) {
5505 /* Workaroud for ASICs need to disable SMC first */
5506 amdgpu_device_smu_fini_early(tmp_adev);
5507 }
f1549c09 5508 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5509 /*TODO Should we stop ?*/
5510 if (r) {
aac89168 5511 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5512 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5513 tmp_adev->asic_reset_res = r;
5514 }
247c7b0d
AG
5515
5516 /*
5517 * Drop all pending non scheduler resets. Scheduler resets
5518 * were already dropped during drm_sched_stop
5519 */
d193b12b 5520 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5521 }
5522
5523 /* Actual ASIC resets if needed.*/
4f30d920 5524 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5525 if (amdgpu_sriov_vf(adev)) {
5526 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5527 if (r)
5528 adev->asic_reset_res = r;
950d6425 5529
28606c4e 5530 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5531 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5532 IP_VERSION(9, 4, 2) ||
5533 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5534 amdgpu_ras_resume(adev);
26bc5340 5535 } else {
f1549c09 5536 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5537 if (r && r == -EAGAIN)
26bc5340 5538 goto retry;
f5c7e779
YC
5539
5540 if (!r && gpu_reset_for_dev_remove)
5541 goto recover_end;
26bc5340
AG
5542 }
5543
1d721ed6
AG
5544skip_hw_reset:
5545
26bc5340 5546 /* Post ASIC reset for all devs .*/
655ce9cb 5547 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5548
1d721ed6
AG
5549 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5550 struct amdgpu_ring *ring = tmp_adev->rings[i];
5551
5552 if (!ring || !ring->sched.thread)
5553 continue;
5554
6868a2c4 5555 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5556 }
5557
4e8303cf
LL
5558 if (adev->enable_mes &&
5559 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
ed67f729
JX
5560 amdgpu_mes_self_test(tmp_adev);
5561
b8920e1e 5562 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5563 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5564
7258fa31
SK
5565 if (tmp_adev->asic_reset_res)
5566 r = tmp_adev->asic_reset_res;
5567
1d721ed6 5568 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5569
5570 if (r) {
5571 /* bad news, how to tell it to userspace ? */
12ffa55d 5572 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5573 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5574 } else {
12ffa55d 5575 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5576 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5577 DRM_WARN("smart shift update failed\n");
26bc5340 5578 }
7c6e68c7 5579 }
26bc5340 5580
7c6e68c7 5581skip_sched_resume:
655ce9cb 5582 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5583 /* unlock kfd: SRIOV would do it separately */
c004d44e 5584 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5585 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5586
5587 /* kfd_post_reset will do nothing if kfd device is not initialized,
5588 * need to bring up kfd here if it's not be initialized before
5589 */
5590 if (!adev->kfd.init_complete)
5591 amdgpu_amdkfd_device_init(adev);
5592
3f12acc8
EQ
5593 if (audio_suspended)
5594 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5595
5596 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5597
5598 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5599 }
5600
f5c7e779 5601recover_end:
e923be99
AG
5602 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5603 reset_list);
5604 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5605
9e94d22c 5606 if (hive) {
9e94d22c 5607 mutex_unlock(&hive->hive_lock);
d95e8e97 5608 amdgpu_put_xgmi_hive(hive);
9e94d22c 5609 }
26bc5340 5610
f287a3c5 5611 if (r)
26bc5340 5612 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5613
5614 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5615 return r;
5616}
5617
e3ecdffa
AD
5618/**
5619 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5620 *
5621 * @adev: amdgpu_device pointer
5622 *
5623 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5624 * and lanes) of the slot the device is in. Handles APUs and
5625 * virtualized environments where PCIE config space may not be available.
5626 */
5494d864 5627static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5628{
5d9a6330 5629 struct pci_dev *pdev;
c5313457
HK
5630 enum pci_bus_speed speed_cap, platform_speed_cap;
5631 enum pcie_link_width platform_link_width;
d0dd7f0c 5632
cd474ba0
AD
5633 if (amdgpu_pcie_gen_cap)
5634 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5635
cd474ba0
AD
5636 if (amdgpu_pcie_lane_cap)
5637 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5638
cd474ba0 5639 /* covers APUs as well */
04e85958 5640 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5641 if (adev->pm.pcie_gen_mask == 0)
5642 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5643 if (adev->pm.pcie_mlw_mask == 0)
5644 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5645 return;
cd474ba0 5646 }
d0dd7f0c 5647
c5313457
HK
5648 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5649 return;
5650
dbaa922b
AD
5651 pcie_bandwidth_available(adev->pdev, NULL,
5652 &platform_speed_cap, &platform_link_width);
c5313457 5653
cd474ba0 5654 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5655 /* asic caps */
5656 pdev = adev->pdev;
5657 speed_cap = pcie_get_speed_cap(pdev);
5658 if (speed_cap == PCI_SPEED_UNKNOWN) {
5659 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5660 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5661 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5662 } else {
2b3a1f51
FX
5663 if (speed_cap == PCIE_SPEED_32_0GT)
5664 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5665 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5666 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5667 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5668 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5669 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5670 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5671 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5672 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5673 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5674 else if (speed_cap == PCIE_SPEED_8_0GT)
5675 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5676 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5677 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5678 else if (speed_cap == PCIE_SPEED_5_0GT)
5679 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5680 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5681 else
5682 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5683 }
5684 /* platform caps */
c5313457 5685 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5686 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5687 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5688 } else {
2b3a1f51
FX
5689 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5690 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5691 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5692 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5693 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5694 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5695 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5696 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5697 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5698 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5699 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5700 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5701 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5702 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5703 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5704 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5705 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5706 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5707 else
5708 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5709
cd474ba0
AD
5710 }
5711 }
5712 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5713 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5714 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5715 } else {
c5313457 5716 switch (platform_link_width) {
5d9a6330 5717 case PCIE_LNK_X32:
cd474ba0
AD
5718 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5719 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5720 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5721 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5722 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5723 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5724 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5725 break;
5d9a6330 5726 case PCIE_LNK_X16:
cd474ba0
AD
5727 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5728 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5729 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5730 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5731 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5732 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5733 break;
5d9a6330 5734 case PCIE_LNK_X12:
cd474ba0
AD
5735 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5736 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5737 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5738 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5739 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5740 break;
5d9a6330 5741 case PCIE_LNK_X8:
cd474ba0
AD
5742 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5743 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5744 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5745 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5746 break;
5d9a6330 5747 case PCIE_LNK_X4:
cd474ba0
AD
5748 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5749 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5750 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5751 break;
5d9a6330 5752 case PCIE_LNK_X2:
cd474ba0
AD
5753 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5754 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5755 break;
5d9a6330 5756 case PCIE_LNK_X1:
cd474ba0
AD
5757 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5758 break;
5759 default:
5760 break;
5761 }
d0dd7f0c
AD
5762 }
5763 }
5764}
d38ceaf9 5765
08a2fd23
RE
5766/**
5767 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5768 *
5769 * @adev: amdgpu_device pointer
5770 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5771 *
5772 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5773 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5774 * @peer_adev.
5775 */
5776bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5777 struct amdgpu_device *peer_adev)
5778{
5779#ifdef CONFIG_HSA_AMD_P2P
5780 uint64_t address_mask = peer_adev->dev->dma_mask ?
5781 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5782 resource_size_t aper_limit =
5783 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5784 bool p2p_access =
5785 !adev->gmc.xgmi.connected_to_cpu &&
5786 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5787
5788 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5789 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5790 !(adev->gmc.aper_base & address_mask ||
5791 aper_limit & address_mask));
5792#else
5793 return false;
5794#endif
5795}
5796
361dbd01
AD
5797int amdgpu_device_baco_enter(struct drm_device *dev)
5798{
1348969a 5799 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5800 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5801
6ab68650 5802 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5803 return -ENOTSUPP;
5804
8ab0d6f0 5805 if (ras && adev->ras_enabled &&
acdae216 5806 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5807 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5808
9530273e 5809 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5810}
5811
5812int amdgpu_device_baco_exit(struct drm_device *dev)
5813{
1348969a 5814 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5815 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5816 int ret = 0;
361dbd01 5817
6ab68650 5818 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5819 return -ENOTSUPP;
5820
9530273e
EQ
5821 ret = amdgpu_dpm_baco_exit(adev);
5822 if (ret)
5823 return ret;
7a22677b 5824
8ab0d6f0 5825 if (ras && adev->ras_enabled &&
acdae216 5826 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5827 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5828
1bece222
CL
5829 if (amdgpu_passthrough(adev) &&
5830 adev->nbio.funcs->clear_doorbell_interrupt)
5831 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5832
7a22677b 5833 return 0;
361dbd01 5834}
c9a6b82f
AG
5835
5836/**
5837 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5838 * @pdev: PCI device struct
5839 * @state: PCI channel state
5840 *
5841 * Description: Called when a PCI error is detected.
5842 *
5843 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5844 */
5845pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5846{
5847 struct drm_device *dev = pci_get_drvdata(pdev);
5848 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5849 int i;
c9a6b82f
AG
5850
5851 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5852
6894305c
AG
5853 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5854 DRM_WARN("No support for XGMI hive yet...");
5855 return PCI_ERS_RESULT_DISCONNECT;
5856 }
5857
e17e27f9
GC
5858 adev->pci_channel_state = state;
5859
c9a6b82f
AG
5860 switch (state) {
5861 case pci_channel_io_normal:
5862 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5863 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5864 case pci_channel_io_frozen:
5865 /*
d0fb18b5 5866 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5867 * to GPU during PCI error recovery
5868 */
3675c2f2 5869 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5870 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5871
5872 /*
5873 * Block any work scheduling as we do for regular GPU reset
5874 * for the duration of the recovery
5875 */
5876 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5877 struct amdgpu_ring *ring = adev->rings[i];
5878
5879 if (!ring || !ring->sched.thread)
5880 continue;
5881
5882 drm_sched_stop(&ring->sched, NULL);
5883 }
8f8c80f4 5884 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5885 return PCI_ERS_RESULT_NEED_RESET;
5886 case pci_channel_io_perm_failure:
5887 /* Permanent error, prepare for device removal */
5888 return PCI_ERS_RESULT_DISCONNECT;
5889 }
5890
5891 return PCI_ERS_RESULT_NEED_RESET;
5892}
5893
5894/**
5895 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5896 * @pdev: pointer to PCI device
5897 */
5898pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5899{
5900
5901 DRM_INFO("PCI error: mmio enabled callback!!\n");
5902
5903 /* TODO - dump whatever for debugging purposes */
5904
5905 /* This called only if amdgpu_pci_error_detected returns
5906 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5907 * works, no need to reset slot.
5908 */
5909
5910 return PCI_ERS_RESULT_RECOVERED;
5911}
5912
5913/**
5914 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5915 * @pdev: PCI device struct
5916 *
5917 * Description: This routine is called by the pci error recovery
5918 * code after the PCI slot has been reset, just before we
5919 * should resume normal operations.
5920 */
5921pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5922{
5923 struct drm_device *dev = pci_get_drvdata(pdev);
5924 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5925 int r, i;
04442bf7 5926 struct amdgpu_reset_context reset_context;
362c7b91 5927 u32 memsize;
7ac71382 5928 struct list_head device_list;
c9a6b82f
AG
5929
5930 DRM_INFO("PCI error: slot reset callback!!\n");
5931
04442bf7
LL
5932 memset(&reset_context, 0, sizeof(reset_context));
5933
7ac71382 5934 INIT_LIST_HEAD(&device_list);
655ce9cb 5935 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5936
362c7b91
AG
5937 /* wait for asic to come out of reset */
5938 msleep(500);
5939
7ac71382 5940 /* Restore PCI confspace */
c1dd4aa6 5941 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5942
362c7b91
AG
5943 /* confirm ASIC came out of reset */
5944 for (i = 0; i < adev->usec_timeout; i++) {
5945 memsize = amdgpu_asic_get_config_memsize(adev);
5946
5947 if (memsize != 0xffffffff)
5948 break;
5949 udelay(1);
5950 }
5951 if (memsize == 0xffffffff) {
5952 r = -ETIME;
5953 goto out;
5954 }
5955
04442bf7
LL
5956 reset_context.method = AMD_RESET_METHOD_NONE;
5957 reset_context.reset_req_dev = adev;
5958 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5959 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5960
7afefb81 5961 adev->no_hw_access = true;
04442bf7 5962 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5963 adev->no_hw_access = false;
c9a6b82f
AG
5964 if (r)
5965 goto out;
5966
04442bf7 5967 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5968
5969out:
c9a6b82f 5970 if (!r) {
c1dd4aa6
AG
5971 if (amdgpu_device_cache_pci_state(adev->pdev))
5972 pci_restore_state(adev->pdev);
5973
c9a6b82f
AG
5974 DRM_INFO("PCIe error recovery succeeded\n");
5975 } else {
5976 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5977 amdgpu_device_unset_mp1_state(adev);
5978 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5979 }
5980
5981 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5982}
5983
5984/**
5985 * amdgpu_pci_resume() - resume normal ops after PCI reset
5986 * @pdev: pointer to PCI device
5987 *
5988 * Called when the error recovery driver tells us that its
505199a3 5989 * OK to resume normal operation.
c9a6b82f
AG
5990 */
5991void amdgpu_pci_resume(struct pci_dev *pdev)
5992{
5993 struct drm_device *dev = pci_get_drvdata(pdev);
5994 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5995 int i;
c9a6b82f 5996
c9a6b82f
AG
5997
5998 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5999
e17e27f9
GC
6000 /* Only continue execution for the case of pci_channel_io_frozen */
6001 if (adev->pci_channel_state != pci_channel_io_frozen)
6002 return;
6003
acd89fca
AG
6004 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6005 struct amdgpu_ring *ring = adev->rings[i];
6006
6007 if (!ring || !ring->sched.thread)
6008 continue;
6009
acd89fca
AG
6010 drm_sched_start(&ring->sched, true);
6011 }
6012
e923be99
AG
6013 amdgpu_device_unset_mp1_state(adev);
6014 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 6015}
c1dd4aa6
AG
6016
6017bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6018{
6019 struct drm_device *dev = pci_get_drvdata(pdev);
6020 struct amdgpu_device *adev = drm_to_adev(dev);
6021 int r;
6022
6023 r = pci_save_state(pdev);
6024 if (!r) {
6025 kfree(adev->pci_state);
6026
6027 adev->pci_state = pci_store_saved_state(pdev);
6028
6029 if (!adev->pci_state) {
6030 DRM_ERROR("Failed to store PCI saved state");
6031 return false;
6032 }
6033 } else {
6034 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6035 return false;
6036 }
6037
6038 return true;
6039}
6040
6041bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6042{
6043 struct drm_device *dev = pci_get_drvdata(pdev);
6044 struct amdgpu_device *adev = drm_to_adev(dev);
6045 int r;
6046
6047 if (!adev->pci_state)
6048 return false;
6049
6050 r = pci_load_saved_state(pdev, adev->pci_state);
6051
6052 if (!r) {
6053 pci_restore_state(pdev);
6054 } else {
6055 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6056 return false;
6057 }
6058
6059 return true;
6060}
6061
810085dd
EH
6062void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6063 struct amdgpu_ring *ring)
6064{
6065#ifdef CONFIG_X86_64
b818a5d3 6066 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6067 return;
6068#endif
6069 if (adev->gmc.xgmi.connected_to_cpu)
6070 return;
6071
6072 if (ring && ring->funcs->emit_hdp_flush)
6073 amdgpu_ring_emit_hdp_flush(ring);
6074 else
6075 amdgpu_asic_flush_hdp(adev, ring);
6076}
c1dd4aa6 6077
810085dd
EH
6078void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6079 struct amdgpu_ring *ring)
6080{
6081#ifdef CONFIG_X86_64
b818a5d3 6082 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6083 return;
6084#endif
6085 if (adev->gmc.xgmi.connected_to_cpu)
6086 return;
c1dd4aa6 6087
810085dd
EH
6088 amdgpu_asic_invalidate_hdp(adev, ring);
6089}
34f3a4a9 6090
89a7a870
AG
6091int amdgpu_in_reset(struct amdgpu_device *adev)
6092{
6093 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6094}
6095
34f3a4a9
LY
6096/**
6097 * amdgpu_device_halt() - bring hardware to some kind of halt state
6098 *
6099 * @adev: amdgpu_device pointer
6100 *
6101 * Bring hardware to some kind of halt state so that no one can touch it
6102 * any more. It will help to maintain error context when error occurred.
6103 * Compare to a simple hang, the system will keep stable at least for SSH
6104 * access. Then it should be trivial to inspect the hardware state and
6105 * see what's going on. Implemented as following:
6106 *
6107 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6108 * clears all CPU mappings to device, disallows remappings through page faults
6109 * 2. amdgpu_irq_disable_all() disables all interrupts
6110 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6111 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6112 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6113 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6114 * flush any in flight DMA operations
6115 */
6116void amdgpu_device_halt(struct amdgpu_device *adev)
6117{
6118 struct pci_dev *pdev = adev->pdev;
e0f943b4 6119 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6120
2c1c7ba4 6121 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6122 drm_dev_unplug(ddev);
6123
6124 amdgpu_irq_disable_all(adev);
6125
6126 amdgpu_fence_driver_hw_fini(adev);
6127
6128 adev->no_hw_access = true;
6129
6130 amdgpu_device_unmap_mmio(adev);
6131
6132 pci_disable_device(pdev);
6133 pci_wait_for_pending_transaction(pdev);
6134}
86700a40
XD
6135
6136u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6137 u32 reg)
6138{
6139 unsigned long flags, address, data;
6140 u32 r;
6141
6142 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6143 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6144
6145 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6146 WREG32(address, reg * 4);
6147 (void)RREG32(address);
6148 r = RREG32(data);
6149 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6150 return r;
6151}
6152
6153void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6154 u32 reg, u32 v)
6155{
6156 unsigned long flags, address, data;
6157
6158 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6159 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6160
6161 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6162 WREG32(address, reg * 4);
6163 (void)RREG32(address);
6164 WREG32(data, v);
6165 (void)RREG32(data);
6166 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6167}
68ce8b24
CK
6168
6169/**
6170 * amdgpu_device_switch_gang - switch to a new gang
6171 * @adev: amdgpu_device pointer
6172 * @gang: the gang to switch to
6173 *
6174 * Try to switch to a new gang.
6175 * Returns: NULL if we switched to the new gang or a reference to the current
6176 * gang leader.
6177 */
6178struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6179 struct dma_fence *gang)
6180{
6181 struct dma_fence *old = NULL;
6182
6183 do {
6184 dma_fence_put(old);
6185 rcu_read_lock();
6186 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6187 rcu_read_unlock();
6188
6189 if (old == gang)
6190 break;
6191
6192 if (!dma_fence_is_signaled(old))
6193 return old;
6194
6195 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6196 old, gang) != old);
6197
6198 dma_fence_put(old);
6199 return NULL;
6200}
220c8cc8
AD
6201
6202bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6203{
6204 switch (adev->asic_type) {
6205#ifdef CONFIG_DRM_AMDGPU_SI
6206 case CHIP_HAINAN:
6207#endif
6208 case CHIP_TOPAZ:
6209 /* chips with no display hardware */
6210 return false;
6211#ifdef CONFIG_DRM_AMDGPU_SI
6212 case CHIP_TAHITI:
6213 case CHIP_PITCAIRN:
6214 case CHIP_VERDE:
6215 case CHIP_OLAND:
6216#endif
6217#ifdef CONFIG_DRM_AMDGPU_CIK
6218 case CHIP_BONAIRE:
6219 case CHIP_HAWAII:
6220 case CHIP_KAVERI:
6221 case CHIP_KABINI:
6222 case CHIP_MULLINS:
6223#endif
6224 case CHIP_TONGA:
6225 case CHIP_FIJI:
6226 case CHIP_POLARIS10:
6227 case CHIP_POLARIS11:
6228 case CHIP_POLARIS12:
6229 case CHIP_VEGAM:
6230 case CHIP_CARRIZO:
6231 case CHIP_STONEY:
6232 /* chips with display hardware */
6233 return true;
6234 default:
6235 /* IP discovery */
4e8303cf 6236 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6237 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6238 return false;
6239 return true;
6240 }
6241}
81283fee
JZ
6242
6243uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6244 uint32_t inst, uint32_t reg_addr, char reg_name[],
6245 uint32_t expected_value, uint32_t mask)
6246{
6247 uint32_t ret = 0;
6248 uint32_t old_ = 0;
6249 uint32_t tmp_ = RREG32(reg_addr);
6250 uint32_t loop = adev->usec_timeout;
6251
6252 while ((tmp_ & (mask)) != (expected_value)) {
6253 if (old_ != tmp_) {
6254 loop = adev->usec_timeout;
6255 old_ = tmp_;
6256 } else
6257 udelay(1);
6258 tmp_ = RREG32(reg_addr);
6259 loop--;
6260 if (!loop) {
6261 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6262 inst, reg_name, (uint32_t)expected_value,
6263 (uint32_t)(tmp_ & (mask)));
6264 ret = -ETIMEDOUT;
6265 break;
6266 }
6267 }
6268 return ret;
6269}