drm/amd: Move AMD_IS_APU check for ASPM into top level function
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
08a2fd23 35#include <linux/pci-p2pdma.h>
d37a3929 36#include <linux/apple-gmux.h>
fdf2f6c5 37
b7cdb41e 38#include <drm/drm_aperture.h>
4562236b 39#include <drm/drm_atomic_helper.h>
973ad627 40#include <drm/drm_crtc_helper.h>
45b64fd9 41#include <drm/drm_fb_helper.h>
fcd70cd3 42#include <drm/drm_probe_helper.h>
d38ceaf9
AD
43#include <drm/amdgpu_drm.h>
44#include <linux/vgaarb.h>
45#include <linux/vga_switcheroo.h>
46#include <linux/efi.h>
47#include "amdgpu.h"
f4b373f4 48#include "amdgpu_trace.h"
d38ceaf9
AD
49#include "amdgpu_i2c.h"
50#include "atom.h"
51#include "amdgpu_atombios.h"
a5bde2f9 52#include "amdgpu_atomfirmware.h"
d0dd7f0c 53#include "amd_pcie.h"
33f34802
KW
54#ifdef CONFIG_DRM_AMDGPU_SI
55#include "si.h"
56#endif
a2e73f56
AD
57#ifdef CONFIG_DRM_AMDGPU_CIK
58#include "cik.h"
59#endif
aaa36a97 60#include "vi.h"
460826e6 61#include "soc15.h"
0a5b8c7b 62#include "nv.h"
d38ceaf9 63#include "bif/bif_4_1_d.h"
bec86378 64#include <linux/firmware.h>
89041940 65#include "amdgpu_vf_error.h"
d38ceaf9 66
ba997709 67#include "amdgpu_amdkfd.h"
d2f52ac8 68#include "amdgpu_pm.h"
d38ceaf9 69
5183411b 70#include "amdgpu_xgmi.h"
c030f2e4 71#include "amdgpu_ras.h"
9c7c85f7 72#include "amdgpu_pmu.h"
bd607166 73#include "amdgpu_fru_eeprom.h"
04442bf7 74#include "amdgpu_reset.h"
5183411b 75
d5ea093e 76#include <linux/suspend.h>
c6a6e2db 77#include <drm/task_barrier.h>
3f12acc8 78#include <linux/pm_runtime.h>
d5ea093e 79
f89f8c6b
AG
80#include <drm/drm_drv.h>
81
3ad5dcfe
KHF
82#if IS_ENABLED(CONFIG_X86)
83#include <asm/intel-family.h>
84#endif
85
e2a75f88 86MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 87MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 88MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 89MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 90MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 91MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 92MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 93
2dc80b00 94#define AMDGPU_RESUME_MS 2000
7258fa31
SK
95#define AMDGPU_MAX_RETRY_LIMIT 2
96#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 97
b7cdb41e
ML
98static const struct drm_driver amdgpu_kms_driver;
99
050091ab 100const char *amdgpu_asic_name[] = {
da69c161
KW
101 "TAHITI",
102 "PITCAIRN",
103 "VERDE",
104 "OLAND",
105 "HAINAN",
d38ceaf9
AD
106 "BONAIRE",
107 "KAVERI",
108 "KABINI",
109 "HAWAII",
110 "MULLINS",
111 "TOPAZ",
112 "TONGA",
48299f95 113 "FIJI",
d38ceaf9 114 "CARRIZO",
139f4917 115 "STONEY",
2cc0c0b5
FC
116 "POLARIS10",
117 "POLARIS11",
c4642a47 118 "POLARIS12",
48ff108d 119 "VEGAM",
d4196f01 120 "VEGA10",
8fab806a 121 "VEGA12",
956fcddc 122 "VEGA20",
2ca8a5d2 123 "RAVEN",
d6c3b24e 124 "ARCTURUS",
1eee4228 125 "RENOIR",
d46b417a 126 "ALDEBARAN",
852a6626 127 "NAVI10",
d0f56dc2 128 "CYAN_SKILLFISH",
87dbad02 129 "NAVI14",
9802f5d7 130 "NAVI12",
ccaf72d3 131 "SIENNA_CICHLID",
ddd8fbe7 132 "NAVY_FLOUNDER",
4f1e9a76 133 "VANGOGH",
a2468e04 134 "DIMGREY_CAVEFISH",
6f169591 135 "BEIGE_GOBY",
ee9236b7 136 "YELLOW_CARP",
3ae695d6 137 "IP DISCOVERY",
d38ceaf9
AD
138 "LAST",
139};
140
dcea6e65
KR
141/**
142 * DOC: pcie_replay_count
143 *
144 * The amdgpu driver provides a sysfs API for reporting the total number
145 * of PCIe replays (NAKs)
146 * The file pcie_replay_count is used for this and returns the total
147 * number of replays as a sum of the NAKs generated and NAKs received
148 */
149
150static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
151 struct device_attribute *attr, char *buf)
152{
153 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 154 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
155 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
156
36000c7a 157 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
158}
159
b8920e1e 160static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
161 amdgpu_device_get_pcie_replay_count, NULL);
162
4798db85
LL
163/**
164 * DOC: board_info
165 *
166 * The amdgpu driver provides a sysfs API for giving board related information.
167 * It provides the form factor information in the format
168 *
169 * type : form factor
170 *
171 * Possible form factor values
172 *
173 * - "cem" - PCIE CEM card
174 * - "oam" - Open Compute Accelerator Module
175 * - "unknown" - Not known
176 *
177 */
178
76da73f0
LL
179static ssize_t amdgpu_device_get_board_info(struct device *dev,
180 struct device_attribute *attr,
181 char *buf)
182{
183 struct drm_device *ddev = dev_get_drvdata(dev);
184 struct amdgpu_device *adev = drm_to_adev(ddev);
185 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
186 const char *pkg;
187
188 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
189 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
190
191 switch (pkg_type) {
192 case AMDGPU_PKG_TYPE_CEM:
193 pkg = "cem";
194 break;
195 case AMDGPU_PKG_TYPE_OAM:
196 pkg = "oam";
197 break;
198 default:
199 pkg = "unknown";
200 break;
201 }
202
203 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
204}
205
206static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
207
208static struct attribute *amdgpu_board_attrs[] = {
209 &dev_attr_board_info.attr,
210 NULL,
211};
212
213static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
214 struct attribute *attr, int n)
215{
216 struct device *dev = kobj_to_dev(kobj);
217 struct drm_device *ddev = dev_get_drvdata(dev);
218 struct amdgpu_device *adev = drm_to_adev(ddev);
219
220 if (adev->flags & AMD_IS_APU)
221 return 0;
222
223 return attr->mode;
224}
225
226static const struct attribute_group amdgpu_board_attrs_group = {
227 .attrs = amdgpu_board_attrs,
228 .is_visible = amdgpu_board_attrs_is_visible
229};
230
5494d864
AD
231static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
232
bd607166 233
fd496ca8 234/**
b98c6299 235 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
236 *
237 * @dev: drm_device pointer
238 *
b98c6299 239 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
240 * otherwise return false.
241 */
b98c6299 242bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
243{
244 struct amdgpu_device *adev = drm_to_adev(dev);
245
b98c6299 246 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
247 return true;
248 return false;
249}
250
e3ecdffa 251/**
0330b848 252 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
253 *
254 * @dev: drm_device pointer
255 *
b98c6299 256 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
257 * otherwise return false.
258 */
31af062a 259bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 260{
1348969a 261 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 262
b98c6299
AD
263 if (adev->has_pr3 ||
264 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
265 return true;
266 return false;
267}
268
a69cba42
AD
269/**
270 * amdgpu_device_supports_baco - Does the device support BACO
271 *
272 * @dev: drm_device pointer
273 *
274 * Returns true if the device supporte BACO,
275 * otherwise return false.
276 */
277bool amdgpu_device_supports_baco(struct drm_device *dev)
278{
1348969a 279 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
280
281 return amdgpu_asic_supports_baco(adev);
282}
283
3fa8f89d
S
284/**
285 * amdgpu_device_supports_smart_shift - Is the device dGPU with
286 * smart shift support
287 *
288 * @dev: drm_device pointer
289 *
290 * Returns true if the device is a dGPU with Smart Shift support,
291 * otherwise returns false.
292 */
293bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
294{
295 return (amdgpu_device_supports_boco(dev) &&
296 amdgpu_acpi_is_power_shift_control_supported());
297}
298
6e3cd2a9
MCC
299/*
300 * VRAM access helper functions
301 */
302
e35e2b11 303/**
048af66b 304 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
305 *
306 * @adev: amdgpu_device pointer
307 * @pos: offset of the buffer in vram
308 * @buf: virtual address of the buffer in system memory
309 * @size: read/write size, sizeof(@buf) must > @size
310 * @write: true - write to vram, otherwise - read from vram
311 */
048af66b
KW
312void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
313 void *buf, size_t size, bool write)
e35e2b11 314{
e35e2b11 315 unsigned long flags;
048af66b
KW
316 uint32_t hi = ~0, tmp = 0;
317 uint32_t *data = buf;
ce05ac56 318 uint64_t last;
f89f8c6b 319 int idx;
ce05ac56 320
c58a863b 321 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 322 return;
9d11eb0d 323
048af66b
KW
324 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
325
326 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
327 for (last = pos + size; pos < last; pos += 4) {
328 tmp = pos >> 31;
329
330 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
331 if (tmp != hi) {
332 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
333 hi = tmp;
334 }
335 if (write)
336 WREG32_NO_KIQ(mmMM_DATA, *data++);
337 else
338 *data++ = RREG32_NO_KIQ(mmMM_DATA);
339 }
340
341 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
342 drm_dev_exit(idx);
343}
344
345/**
bbe04dec 346 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
347 *
348 * @adev: amdgpu_device pointer
349 * @pos: offset of the buffer in vram
350 * @buf: virtual address of the buffer in system memory
351 * @size: read/write size, sizeof(@buf) must > @size
352 * @write: true - write to vram, otherwise - read from vram
353 *
354 * The return value means how many bytes have been transferred.
355 */
356size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
357 void *buf, size_t size, bool write)
358{
9d11eb0d 359#ifdef CONFIG_64BIT
048af66b
KW
360 void __iomem *addr;
361 size_t count = 0;
362 uint64_t last;
363
364 if (!adev->mman.aper_base_kaddr)
365 return 0;
366
9d11eb0d
CK
367 last = min(pos + size, adev->gmc.visible_vram_size);
368 if (last > pos) {
048af66b
KW
369 addr = adev->mman.aper_base_kaddr + pos;
370 count = last - pos;
9d11eb0d
CK
371
372 if (write) {
373 memcpy_toio(addr, buf, count);
4c452b5c
SS
374 /* Make sure HDP write cache flush happens without any reordering
375 * after the system memory contents are sent over PCIe device
376 */
9d11eb0d 377 mb();
810085dd 378 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 379 } else {
810085dd 380 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
381 /* Make sure HDP read cache is invalidated before issuing a read
382 * to the PCIe device
383 */
9d11eb0d
CK
384 mb();
385 memcpy_fromio(buf, addr, count);
386 }
387
9d11eb0d 388 }
048af66b
KW
389
390 return count;
391#else
392 return 0;
9d11eb0d 393#endif
048af66b 394}
9d11eb0d 395
048af66b
KW
396/**
397 * amdgpu_device_vram_access - read/write a buffer in vram
398 *
399 * @adev: amdgpu_device pointer
400 * @pos: offset of the buffer in vram
401 * @buf: virtual address of the buffer in system memory
402 * @size: read/write size, sizeof(@buf) must > @size
403 * @write: true - write to vram, otherwise - read from vram
404 */
405void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
406 void *buf, size_t size, bool write)
407{
408 size_t count;
e35e2b11 409
048af66b
KW
410 /* try to using vram apreature to access vram first */
411 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
412 size -= count;
413 if (size) {
414 /* using MM to access rest vram */
415 pos += count;
416 buf += count;
417 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
418 }
419}
420
d38ceaf9 421/*
f7ee1874 422 * register access helper functions.
d38ceaf9 423 */
56b53c0b
DL
424
425/* Check if hw access should be skipped because of hotplug or device error */
426bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
427{
7afefb81 428 if (adev->no_hw_access)
56b53c0b
DL
429 return true;
430
431#ifdef CONFIG_LOCKDEP
432 /*
433 * This is a bit complicated to understand, so worth a comment. What we assert
434 * here is that the GPU reset is not running on another thread in parallel.
435 *
436 * For this we trylock the read side of the reset semaphore, if that succeeds
437 * we know that the reset is not running in paralell.
438 *
439 * If the trylock fails we assert that we are either already holding the read
440 * side of the lock or are the reset thread itself and hold the write side of
441 * the lock.
442 */
443 if (in_task()) {
d0fb18b5
AG
444 if (down_read_trylock(&adev->reset_domain->sem))
445 up_read(&adev->reset_domain->sem);
56b53c0b 446 else
d0fb18b5 447 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
448 }
449#endif
450 return false;
451}
452
e3ecdffa 453/**
f7ee1874 454 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
455 *
456 * @adev: amdgpu_device pointer
457 * @reg: dword aligned register offset
458 * @acc_flags: access flags which require special behavior
459 *
460 * Returns the 32 bit value from the offset specified.
461 */
f7ee1874
HZ
462uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
463 uint32_t reg, uint32_t acc_flags)
d38ceaf9 464{
f4b373f4
TSD
465 uint32_t ret;
466
56b53c0b 467 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
468 return 0;
469
f7ee1874
HZ
470 if ((reg * 4) < adev->rmmio_size) {
471 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
472 amdgpu_sriov_runtime(adev) &&
d0fb18b5 473 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 474 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 475 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
476 } else {
477 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
478 }
479 } else {
480 ret = adev->pcie_rreg(adev, reg * 4);
81202807 481 }
bc992ba5 482
f7ee1874 483 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 484
f4b373f4 485 return ret;
d38ceaf9
AD
486}
487
421a2a30
ML
488/*
489 * MMIO register read with bytes helper functions
490 * @offset:bytes offset from MMIO start
b8920e1e 491 */
421a2a30 492
e3ecdffa
AD
493/**
494 * amdgpu_mm_rreg8 - read a memory mapped IO register
495 *
496 * @adev: amdgpu_device pointer
497 * @offset: byte aligned register offset
498 *
499 * Returns the 8 bit value from the offset specified.
500 */
7cbbc745
AG
501uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
502{
56b53c0b 503 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
504 return 0;
505
421a2a30
ML
506 if (offset < adev->rmmio_size)
507 return (readb(adev->rmmio + offset));
508 BUG();
509}
510
511/*
512 * MMIO register write with bytes helper functions
513 * @offset:bytes offset from MMIO start
514 * @value: the value want to be written to the register
b8920e1e
SS
515 */
516
e3ecdffa
AD
517/**
518 * amdgpu_mm_wreg8 - read a memory mapped IO register
519 *
520 * @adev: amdgpu_device pointer
521 * @offset: byte aligned register offset
522 * @value: 8 bit value to write
523 *
524 * Writes the value specified to the offset specified.
525 */
7cbbc745
AG
526void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
527{
56b53c0b 528 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
529 return;
530
421a2a30
ML
531 if (offset < adev->rmmio_size)
532 writeb(value, adev->rmmio + offset);
533 else
534 BUG();
535}
536
e3ecdffa 537/**
f7ee1874 538 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
539 *
540 * @adev: amdgpu_device pointer
541 * @reg: dword aligned register offset
542 * @v: 32 bit value to write to the register
543 * @acc_flags: access flags which require special behavior
544 *
545 * Writes the value specified to the offset specified.
546 */
f7ee1874
HZ
547void amdgpu_device_wreg(struct amdgpu_device *adev,
548 uint32_t reg, uint32_t v,
549 uint32_t acc_flags)
d38ceaf9 550{
56b53c0b 551 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
552 return;
553
f7ee1874
HZ
554 if ((reg * 4) < adev->rmmio_size) {
555 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
556 amdgpu_sriov_runtime(adev) &&
d0fb18b5 557 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 558 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 559 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
560 } else {
561 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
562 }
563 } else {
564 adev->pcie_wreg(adev, reg * 4, v);
81202807 565 }
bc992ba5 566
f7ee1874 567 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 568}
d38ceaf9 569
03f2abb0 570/**
4cc9f86f 571 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 572 *
71579346
RB
573 * @adev: amdgpu_device pointer
574 * @reg: mmio/rlc register
575 * @v: value to write
8057a9d6 576 * @xcc_id: xcc accelerated compute core id
71579346
RB
577 *
578 * this function is invoked only for the debugfs register access
03f2abb0 579 */
f7ee1874 580void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
581 uint32_t reg, uint32_t v,
582 uint32_t xcc_id)
2e0cc4d4 583{
56b53c0b 584 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
585 return;
586
2e0cc4d4 587 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
588 adev->gfx.rlc.funcs &&
589 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 590 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 591 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
592 } else if ((reg * 4) >= adev->rmmio_size) {
593 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
594 } else {
595 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 596 }
d38ceaf9
AD
597}
598
1bba3683
HZ
599/**
600 * amdgpu_device_indirect_rreg - read an indirect register
601 *
602 * @adev: amdgpu_device pointer
22f453fb 603 * @reg_addr: indirect register address to read from
1bba3683
HZ
604 *
605 * Returns the value of indirect register @reg_addr
606 */
607u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
608 u32 reg_addr)
609{
65ba96e9 610 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
611 void __iomem *pcie_index_offset;
612 void __iomem *pcie_data_offset;
65ba96e9
HZ
613 u32 r;
614
615 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
616 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
617
618 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
619 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
620 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
621
622 writel(reg_addr, pcie_index_offset);
623 readl(pcie_index_offset);
624 r = readl(pcie_data_offset);
625 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
626
627 return r;
628}
629
0c552ed3
LM
630u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
631 u64 reg_addr)
632{
633 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
634 u32 r;
635 void __iomem *pcie_index_offset;
636 void __iomem *pcie_index_hi_offset;
637 void __iomem *pcie_data_offset;
638
639 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
640 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 641 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
642 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
643 else
644 pcie_index_hi = 0;
645
646 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
647 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
648 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
649 if (pcie_index_hi != 0)
650 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
651 pcie_index_hi * 4;
652
653 writel(reg_addr, pcie_index_offset);
654 readl(pcie_index_offset);
655 if (pcie_index_hi != 0) {
656 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
657 readl(pcie_index_hi_offset);
658 }
659 r = readl(pcie_data_offset);
660
661 /* clear the high bits */
662 if (pcie_index_hi != 0) {
663 writel(0, pcie_index_hi_offset);
664 readl(pcie_index_hi_offset);
665 }
666
667 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
668
669 return r;
670}
671
1bba3683
HZ
672/**
673 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
674 *
675 * @adev: amdgpu_device pointer
22f453fb 676 * @reg_addr: indirect register address to read from
1bba3683
HZ
677 *
678 * Returns the value of indirect register @reg_addr
679 */
680u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
681 u32 reg_addr)
682{
65ba96e9 683 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
684 void __iomem *pcie_index_offset;
685 void __iomem *pcie_data_offset;
65ba96e9
HZ
686 u64 r;
687
688 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
689 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
690
691 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
692 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
693 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
694
695 /* read low 32 bits */
696 writel(reg_addr, pcie_index_offset);
697 readl(pcie_index_offset);
698 r = readl(pcie_data_offset);
699 /* read high 32 bits */
700 writel(reg_addr + 4, pcie_index_offset);
701 readl(pcie_index_offset);
702 r |= ((u64)readl(pcie_data_offset) << 32);
703 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
704
705 return r;
706}
707
a76b2870
CL
708u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
709 u64 reg_addr)
710{
711 unsigned long flags, pcie_index, pcie_data;
712 unsigned long pcie_index_hi = 0;
713 void __iomem *pcie_index_offset;
714 void __iomem *pcie_index_hi_offset;
715 void __iomem *pcie_data_offset;
716 u64 r;
717
718 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
719 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
720 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
721 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
722
723 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
724 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
725 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
726 if (pcie_index_hi != 0)
727 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
728 pcie_index_hi * 4;
729
730 /* read low 32 bits */
731 writel(reg_addr, pcie_index_offset);
732 readl(pcie_index_offset);
733 if (pcie_index_hi != 0) {
734 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
735 readl(pcie_index_hi_offset);
736 }
737 r = readl(pcie_data_offset);
738 /* read high 32 bits */
739 writel(reg_addr + 4, pcie_index_offset);
740 readl(pcie_index_offset);
741 if (pcie_index_hi != 0) {
742 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
743 readl(pcie_index_hi_offset);
744 }
745 r |= ((u64)readl(pcie_data_offset) << 32);
746
747 /* clear the high bits */
748 if (pcie_index_hi != 0) {
749 writel(0, pcie_index_hi_offset);
750 readl(pcie_index_hi_offset);
751 }
752
753 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
754
755 return r;
756}
757
1bba3683
HZ
758/**
759 * amdgpu_device_indirect_wreg - write an indirect register address
760 *
761 * @adev: amdgpu_device pointer
1bba3683
HZ
762 * @reg_addr: indirect register offset
763 * @reg_data: indirect register data
764 *
765 */
766void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
767 u32 reg_addr, u32 reg_data)
768{
65ba96e9 769 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
770 void __iomem *pcie_index_offset;
771 void __iomem *pcie_data_offset;
772
65ba96e9
HZ
773 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
774 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
775
1bba3683
HZ
776 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
777 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
778 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
779
780 writel(reg_addr, pcie_index_offset);
781 readl(pcie_index_offset);
782 writel(reg_data, pcie_data_offset);
783 readl(pcie_data_offset);
784 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
785}
786
0c552ed3
LM
787void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
788 u64 reg_addr, u32 reg_data)
789{
790 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
791 void __iomem *pcie_index_offset;
792 void __iomem *pcie_index_hi_offset;
793 void __iomem *pcie_data_offset;
794
795 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
796 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 797 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
798 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
799 else
800 pcie_index_hi = 0;
801
802 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
803 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
804 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
805 if (pcie_index_hi != 0)
806 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
807 pcie_index_hi * 4;
808
809 writel(reg_addr, pcie_index_offset);
810 readl(pcie_index_offset);
811 if (pcie_index_hi != 0) {
812 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
813 readl(pcie_index_hi_offset);
814 }
815 writel(reg_data, pcie_data_offset);
816 readl(pcie_data_offset);
817
818 /* clear the high bits */
819 if (pcie_index_hi != 0) {
820 writel(0, pcie_index_hi_offset);
821 readl(pcie_index_hi_offset);
822 }
823
824 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
825}
826
1bba3683
HZ
827/**
828 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
829 *
830 * @adev: amdgpu_device pointer
1bba3683
HZ
831 * @reg_addr: indirect register offset
832 * @reg_data: indirect register data
833 *
834 */
835void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
836 u32 reg_addr, u64 reg_data)
837{
65ba96e9 838 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
839 void __iomem *pcie_index_offset;
840 void __iomem *pcie_data_offset;
841
65ba96e9
HZ
842 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
843 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
844
1bba3683
HZ
845 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
846 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
847 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
848
849 /* write low 32 bits */
850 writel(reg_addr, pcie_index_offset);
851 readl(pcie_index_offset);
852 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
853 readl(pcie_data_offset);
854 /* write high 32 bits */
855 writel(reg_addr + 4, pcie_index_offset);
856 readl(pcie_index_offset);
857 writel((u32)(reg_data >> 32), pcie_data_offset);
858 readl(pcie_data_offset);
859 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
860}
861
a76b2870
CL
862void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
863 u64 reg_addr, u64 reg_data)
864{
865 unsigned long flags, pcie_index, pcie_data;
866 unsigned long pcie_index_hi = 0;
867 void __iomem *pcie_index_offset;
868 void __iomem *pcie_index_hi_offset;
869 void __iomem *pcie_data_offset;
870
871 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
872 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
873 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
874 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
875
876 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
877 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
878 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
879 if (pcie_index_hi != 0)
880 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
881 pcie_index_hi * 4;
882
883 /* write low 32 bits */
884 writel(reg_addr, pcie_index_offset);
885 readl(pcie_index_offset);
886 if (pcie_index_hi != 0) {
887 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
888 readl(pcie_index_hi_offset);
889 }
890 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
891 readl(pcie_data_offset);
892 /* write high 32 bits */
893 writel(reg_addr + 4, pcie_index_offset);
894 readl(pcie_index_offset);
895 if (pcie_index_hi != 0) {
896 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
897 readl(pcie_index_hi_offset);
898 }
899 writel((u32)(reg_data >> 32), pcie_data_offset);
900 readl(pcie_data_offset);
901
902 /* clear the high bits */
903 if (pcie_index_hi != 0) {
904 writel(0, pcie_index_hi_offset);
905 readl(pcie_index_hi_offset);
906 }
907
908 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
909}
910
dabc114e
HZ
911/**
912 * amdgpu_device_get_rev_id - query device rev_id
913 *
914 * @adev: amdgpu_device pointer
915 *
916 * Return device rev_id
917 */
918u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
919{
920 return adev->nbio.funcs->get_rev_id(adev);
921}
922
d38ceaf9
AD
923/**
924 * amdgpu_invalid_rreg - dummy reg read function
925 *
982a820b 926 * @adev: amdgpu_device pointer
d38ceaf9
AD
927 * @reg: offset of register
928 *
929 * Dummy register read function. Used for register blocks
930 * that certain asics don't have (all asics).
931 * Returns the value in the register.
932 */
933static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
934{
935 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
936 BUG();
937 return 0;
938}
939
0c552ed3
LM
940static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
941{
942 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
943 BUG();
944 return 0;
945}
946
d38ceaf9
AD
947/**
948 * amdgpu_invalid_wreg - dummy reg write function
949 *
982a820b 950 * @adev: amdgpu_device pointer
d38ceaf9
AD
951 * @reg: offset of register
952 * @v: value to write to the register
953 *
954 * Dummy register read function. Used for register blocks
955 * that certain asics don't have (all asics).
956 */
957static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
958{
959 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
960 reg, v);
961 BUG();
962}
963
0c552ed3
LM
964static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
965{
966 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
967 reg, v);
968 BUG();
969}
970
4fa1c6a6
TZ
971/**
972 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
973 *
982a820b 974 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
975 * @reg: offset of register
976 *
977 * Dummy register read function. Used for register blocks
978 * that certain asics don't have (all asics).
979 * Returns the value in the register.
980 */
981static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
982{
983 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
984 BUG();
985 return 0;
986}
987
a76b2870
CL
988static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
989{
990 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
991 BUG();
992 return 0;
993}
994
4fa1c6a6
TZ
995/**
996 * amdgpu_invalid_wreg64 - dummy reg write function
997 *
982a820b 998 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
999 * @reg: offset of register
1000 * @v: value to write to the register
1001 *
1002 * Dummy register read function. Used for register blocks
1003 * that certain asics don't have (all asics).
1004 */
1005static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1006{
1007 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1008 reg, v);
1009 BUG();
1010}
1011
a76b2870
CL
1012static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1013{
1014 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1015 reg, v);
1016 BUG();
1017}
1018
d38ceaf9
AD
1019/**
1020 * amdgpu_block_invalid_rreg - dummy reg read function
1021 *
982a820b 1022 * @adev: amdgpu_device pointer
d38ceaf9
AD
1023 * @block: offset of instance
1024 * @reg: offset of register
1025 *
1026 * Dummy register read function. Used for register blocks
1027 * that certain asics don't have (all asics).
1028 * Returns the value in the register.
1029 */
1030static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1031 uint32_t block, uint32_t reg)
1032{
1033 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1034 reg, block);
1035 BUG();
1036 return 0;
1037}
1038
1039/**
1040 * amdgpu_block_invalid_wreg - dummy reg write function
1041 *
982a820b 1042 * @adev: amdgpu_device pointer
d38ceaf9
AD
1043 * @block: offset of instance
1044 * @reg: offset of register
1045 * @v: value to write to the register
1046 *
1047 * Dummy register read function. Used for register blocks
1048 * that certain asics don't have (all asics).
1049 */
1050static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1051 uint32_t block,
1052 uint32_t reg, uint32_t v)
1053{
1054 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1055 reg, block, v);
1056 BUG();
1057}
1058
4d2997ab
AD
1059/**
1060 * amdgpu_device_asic_init - Wrapper for atom asic_init
1061 *
982a820b 1062 * @adev: amdgpu_device pointer
4d2997ab
AD
1063 *
1064 * Does any asic specific work and then calls atom asic init.
1065 */
1066static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1067{
15c5c5f5
LL
1068 int ret;
1069
4d2997ab
AD
1070 amdgpu_asic_pre_asic_init(adev);
1071
4e8303cf
LL
1072 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1073 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1074 amdgpu_psp_wait_for_bootloader(adev);
1075 ret = amdgpu_atomfirmware_asic_init(adev, true);
1076 return ret;
1077 } else {
85d1bcc6 1078 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1079 }
1080
1081 return 0;
4d2997ab
AD
1082}
1083
e3ecdffa 1084/**
7ccfd79f 1085 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1086 *
982a820b 1087 * @adev: amdgpu_device pointer
e3ecdffa
AD
1088 *
1089 * Allocates a scratch page of VRAM for use by various things in the
1090 * driver.
1091 */
7ccfd79f 1092static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1093{
7ccfd79f
CK
1094 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1095 AMDGPU_GEM_DOMAIN_VRAM |
1096 AMDGPU_GEM_DOMAIN_GTT,
1097 &adev->mem_scratch.robj,
1098 &adev->mem_scratch.gpu_addr,
1099 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1100}
1101
e3ecdffa 1102/**
7ccfd79f 1103 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1104 *
982a820b 1105 * @adev: amdgpu_device pointer
e3ecdffa
AD
1106 *
1107 * Frees the VRAM scratch page.
1108 */
7ccfd79f 1109static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1110{
7ccfd79f 1111 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1112}
1113
1114/**
9c3f2b54 1115 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1116 *
1117 * @adev: amdgpu_device pointer
1118 * @registers: pointer to the register array
1119 * @array_size: size of the register array
1120 *
b8920e1e 1121 * Programs an array or registers with and or masks.
d38ceaf9
AD
1122 * This is a helper for setting golden registers.
1123 */
9c3f2b54
AD
1124void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1125 const u32 *registers,
1126 const u32 array_size)
d38ceaf9
AD
1127{
1128 u32 tmp, reg, and_mask, or_mask;
1129 int i;
1130
1131 if (array_size % 3)
1132 return;
1133
47fc644f 1134 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1135 reg = registers[i + 0];
1136 and_mask = registers[i + 1];
1137 or_mask = registers[i + 2];
1138
1139 if (and_mask == 0xffffffff) {
1140 tmp = or_mask;
1141 } else {
1142 tmp = RREG32(reg);
1143 tmp &= ~and_mask;
e0d07657
HZ
1144 if (adev->family >= AMDGPU_FAMILY_AI)
1145 tmp |= (or_mask & and_mask);
1146 else
1147 tmp |= or_mask;
d38ceaf9
AD
1148 }
1149 WREG32(reg, tmp);
1150 }
1151}
1152
e3ecdffa
AD
1153/**
1154 * amdgpu_device_pci_config_reset - reset the GPU
1155 *
1156 * @adev: amdgpu_device pointer
1157 *
1158 * Resets the GPU using the pci config reset sequence.
1159 * Only applicable to asics prior to vega10.
1160 */
8111c387 1161void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1162{
1163 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1164}
1165
af484df8
AD
1166/**
1167 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1168 *
1169 * @adev: amdgpu_device pointer
1170 *
1171 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1172 */
1173int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1174{
1175 return pci_reset_function(adev->pdev);
1176}
1177
d38ceaf9 1178/*
06ec9070 1179 * amdgpu_device_wb_*()
455a7bc2 1180 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1181 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1182 */
1183
1184/**
06ec9070 1185 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1186 *
1187 * @adev: amdgpu_device pointer
1188 *
1189 * Disables Writeback and frees the Writeback memory (all asics).
1190 * Used at driver shutdown.
1191 */
06ec9070 1192static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1193{
1194 if (adev->wb.wb_obj) {
a76ed485
AD
1195 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1196 &adev->wb.gpu_addr,
1197 (void **)&adev->wb.wb);
d38ceaf9
AD
1198 adev->wb.wb_obj = NULL;
1199 }
1200}
1201
1202/**
03f2abb0 1203 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1204 *
1205 * @adev: amdgpu_device pointer
1206 *
455a7bc2 1207 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1208 * Used at driver startup.
1209 * Returns 0 on success or an -error on failure.
1210 */
06ec9070 1211static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1212{
1213 int r;
1214
1215 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1216 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1217 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1218 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1219 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1220 (void **)&adev->wb.wb);
d38ceaf9
AD
1221 if (r) {
1222 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1223 return r;
1224 }
d38ceaf9
AD
1225
1226 adev->wb.num_wb = AMDGPU_MAX_WB;
1227 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1228
1229 /* clear wb memory */
73469585 1230 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1231 }
1232
1233 return 0;
1234}
1235
1236/**
131b4b36 1237 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1238 *
1239 * @adev: amdgpu_device pointer
1240 * @wb: wb index
1241 *
1242 * Allocate a wb slot for use by the driver (all asics).
1243 * Returns 0 on success or -EINVAL on failure.
1244 */
131b4b36 1245int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1246{
1247 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1248
97407b63 1249 if (offset < adev->wb.num_wb) {
7014285a 1250 __set_bit(offset, adev->wb.used);
63ae07ca 1251 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1252 return 0;
1253 } else {
1254 return -EINVAL;
1255 }
1256}
1257
d38ceaf9 1258/**
131b4b36 1259 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1260 *
1261 * @adev: amdgpu_device pointer
1262 * @wb: wb index
1263 *
1264 * Free a wb slot allocated for use by the driver (all asics)
1265 */
131b4b36 1266void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1267{
73469585 1268 wb >>= 3;
d38ceaf9 1269 if (wb < adev->wb.num_wb)
73469585 1270 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1271}
1272
d6895ad3
CK
1273/**
1274 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1275 *
1276 * @adev: amdgpu_device pointer
1277 *
1278 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1279 * to fail, but if any of the BARs is not accessible after the size we abort
1280 * driver loading by returning -ENODEV.
1281 */
1282int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1283{
453f617a 1284 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1285 struct pci_bus *root;
1286 struct resource *res;
b8920e1e 1287 unsigned int i;
d6895ad3
CK
1288 u16 cmd;
1289 int r;
1290
822130b5
AB
1291 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1292 return 0;
1293
0c03b912 1294 /* Bypass for VF */
1295 if (amdgpu_sriov_vf(adev))
1296 return 0;
1297
b7221f2b
AD
1298 /* skip if the bios has already enabled large BAR */
1299 if (adev->gmc.real_vram_size &&
1300 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1301 return 0;
1302
31b8adab
CK
1303 /* Check if the root BUS has 64bit memory resources */
1304 root = adev->pdev->bus;
1305 while (root->parent)
1306 root = root->parent;
1307
1308 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1309 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1310 res->start > 0x100000000ull)
1311 break;
1312 }
1313
1314 /* Trying to resize is pointless without a root hub window above 4GB */
1315 if (!res)
1316 return 0;
1317
453f617a
ND
1318 /* Limit the BAR size to what is available */
1319 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1320 rbar_size);
1321
d6895ad3
CK
1322 /* Disable memory decoding while we change the BAR addresses and size */
1323 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1324 pci_write_config_word(adev->pdev, PCI_COMMAND,
1325 cmd & ~PCI_COMMAND_MEMORY);
1326
1327 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1328 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1329 if (adev->asic_type >= CHIP_BONAIRE)
1330 pci_release_resource(adev->pdev, 2);
1331
1332 pci_release_resource(adev->pdev, 0);
1333
1334 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1335 if (r == -ENOSPC)
1336 DRM_INFO("Not enough PCI address space for a large BAR.");
1337 else if (r && r != -ENOTSUPP)
1338 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1339
1340 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1341
1342 /* When the doorbell or fb BAR isn't available we have no chance of
1343 * using the device.
1344 */
43c064db 1345 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1346 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1347 return -ENODEV;
1348
1349 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1350
1351 return 0;
1352}
a05502e5 1353
9535a86a
SZ
1354static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1355{
b8920e1e 1356 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1357 return false;
9535a86a
SZ
1358
1359 return true;
1360}
1361
d38ceaf9
AD
1362/*
1363 * GPU helpers function.
1364 */
1365/**
39c640c0 1366 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1367 *
1368 * @adev: amdgpu_device pointer
1369 *
c836fec5
JQ
1370 * Check if the asic has been initialized (all asics) at driver startup
1371 * or post is needed if hw reset is performed.
1372 * Returns true if need or false if not.
d38ceaf9 1373 */
39c640c0 1374bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1375{
1376 uint32_t reg;
1377
bec86378
ML
1378 if (amdgpu_sriov_vf(adev))
1379 return false;
1380
9535a86a
SZ
1381 if (!amdgpu_device_read_bios(adev))
1382 return false;
1383
bec86378 1384 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1385 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1386 * some old smc fw still need driver do vPost otherwise gpu hang, while
1387 * those smc fw version above 22.15 doesn't have this flaw, so we force
1388 * vpost executed for smc version below 22.15
bec86378
ML
1389 */
1390 if (adev->asic_type == CHIP_FIJI) {
1391 int err;
1392 uint32_t fw_ver;
b8920e1e 1393
bec86378
ML
1394 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1395 /* force vPost if error occured */
1396 if (err)
1397 return true;
1398
1399 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1400 if (fw_ver < 0x00160e00)
1401 return true;
bec86378 1402 }
bec86378 1403 }
91fe77eb 1404
e3c1b071 1405 /* Don't post if we need to reset whole hive on init */
1406 if (adev->gmc.xgmi.pending_reset)
1407 return false;
1408
91fe77eb 1409 if (adev->has_hw_reset) {
1410 adev->has_hw_reset = false;
1411 return true;
1412 }
1413
1414 /* bios scratch used on CIK+ */
1415 if (adev->asic_type >= CHIP_BONAIRE)
1416 return amdgpu_atombios_scratch_need_asic_init(adev);
1417
1418 /* check MEM_SIZE for older asics */
1419 reg = amdgpu_asic_get_config_memsize(adev);
1420
1421 if ((reg != 0) && (reg != 0xffffffff))
1422 return false;
1423
1424 return true;
70e64c4d
ML
1425}
1426
bb0f8429
ML
1427/*
1428 * Check whether seamless boot is supported.
1429 *
7f4ce7b5
ML
1430 * So far we only support seamless boot on DCE 3.0 or later.
1431 * If users report that it works on older ASICS as well, we may
1432 * loosen this.
bb0f8429
ML
1433 */
1434bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1435{
5dc270d3
ML
1436 switch (amdgpu_seamless) {
1437 case -1:
1438 break;
1439 case 1:
1440 return true;
1441 case 0:
1442 return false;
1443 default:
1444 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1445 amdgpu_seamless);
1446 return false;
1447 }
1448
3657a1d5
ML
1449 if (!(adev->flags & AMD_IS_APU))
1450 return false;
1451
5dc270d3
ML
1452 if (adev->mman.keep_stolen_vga_memory)
1453 return false;
1454
7f4ce7b5 1455 return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1456}
1457
5d1eb4c4
ML
1458/*
1459 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1460 * speed switching. Until we have confirmation from Intel that a specific host
1461 * supports it, it's safer that we keep it disabled for all.
1462 *
1463 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1464 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1465 */
1466bool amdgpu_device_pcie_dynamic_switching_supported(void)
1467{
1468#if IS_ENABLED(CONFIG_X86)
1469 struct cpuinfo_x86 *c = &cpu_data(0);
1470
1471 if (c->x86_vendor == X86_VENDOR_INTEL)
1472 return false;
1473#endif
1474 return true;
1475}
1476
0ab5d711
ML
1477/**
1478 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1479 *
1480 * @adev: amdgpu_device pointer
1481 *
1482 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1483 * be set for this device.
1484 *
1485 * Returns true if it should be used or false if not.
1486 */
1487bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1488{
1489 switch (amdgpu_aspm) {
1490 case -1:
1491 break;
1492 case 0:
1493 return false;
1494 case 1:
1495 return true;
1496 default:
1497 return false;
1498 }
1a6513de
ML
1499 if (adev->flags & AMD_IS_APU)
1500 return false;
0ab5d711
ML
1501 return pcie_aspm_enabled(adev->pdev);
1502}
1503
3ad5dcfe
KHF
1504bool amdgpu_device_aspm_support_quirk(void)
1505{
1506#if IS_ENABLED(CONFIG_X86)
1507 struct cpuinfo_x86 *c = &cpu_data(0);
1508
1509 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1510#else
1511 return true;
1512#endif
1513}
1514
d38ceaf9
AD
1515/* if we get transitioned to only one device, take VGA back */
1516/**
06ec9070 1517 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1518 *
bf44e8ce 1519 * @pdev: PCI device pointer
d38ceaf9
AD
1520 * @state: enable/disable vga decode
1521 *
1522 * Enable/disable vga decode (all asics).
1523 * Returns VGA resource flags.
1524 */
bf44e8ce
CH
1525static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1526 bool state)
d38ceaf9 1527{
bf44e8ce 1528 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1529
d38ceaf9
AD
1530 amdgpu_asic_set_vga_state(adev, state);
1531 if (state)
1532 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1533 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1534 else
1535 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1536}
1537
e3ecdffa
AD
1538/**
1539 * amdgpu_device_check_block_size - validate the vm block size
1540 *
1541 * @adev: amdgpu_device pointer
1542 *
1543 * Validates the vm block size specified via module parameter.
1544 * The vm block size defines number of bits in page table versus page directory,
1545 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1546 * page table and the remaining bits are in the page directory.
1547 */
06ec9070 1548static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1549{
1550 /* defines number of bits in page table versus page directory,
1551 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1552 * page table and the remaining bits are in the page directory
1553 */
bab4fee7
JZ
1554 if (amdgpu_vm_block_size == -1)
1555 return;
a1adf8be 1556
bab4fee7 1557 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1558 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1559 amdgpu_vm_block_size);
97489129 1560 amdgpu_vm_block_size = -1;
a1adf8be 1561 }
a1adf8be
CZ
1562}
1563
e3ecdffa
AD
1564/**
1565 * amdgpu_device_check_vm_size - validate the vm size
1566 *
1567 * @adev: amdgpu_device pointer
1568 *
1569 * Validates the vm size in GB specified via module parameter.
1570 * The VM size is the size of the GPU virtual memory space in GB.
1571 */
06ec9070 1572static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1573{
64dab074
AD
1574 /* no need to check the default value */
1575 if (amdgpu_vm_size == -1)
1576 return;
1577
83ca145d
ZJ
1578 if (amdgpu_vm_size < 1) {
1579 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1580 amdgpu_vm_size);
f3368128 1581 amdgpu_vm_size = -1;
83ca145d 1582 }
83ca145d
ZJ
1583}
1584
7951e376
RZ
1585static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1586{
1587 struct sysinfo si;
a9d4fe2f 1588 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1589 uint64_t total_memory;
1590 uint64_t dram_size_seven_GB = 0x1B8000000;
1591 uint64_t dram_size_three_GB = 0xB8000000;
1592
1593 if (amdgpu_smu_memory_pool_size == 0)
1594 return;
1595
1596 if (!is_os_64) {
1597 DRM_WARN("Not 64-bit OS, feature not supported\n");
1598 goto def_value;
1599 }
1600 si_meminfo(&si);
1601 total_memory = (uint64_t)si.totalram * si.mem_unit;
1602
1603 if ((amdgpu_smu_memory_pool_size == 1) ||
1604 (amdgpu_smu_memory_pool_size == 2)) {
1605 if (total_memory < dram_size_three_GB)
1606 goto def_value1;
1607 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1608 (amdgpu_smu_memory_pool_size == 8)) {
1609 if (total_memory < dram_size_seven_GB)
1610 goto def_value1;
1611 } else {
1612 DRM_WARN("Smu memory pool size not supported\n");
1613 goto def_value;
1614 }
1615 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1616
1617 return;
1618
1619def_value1:
1620 DRM_WARN("No enough system memory\n");
1621def_value:
1622 adev->pm.smu_prv_buffer_size = 0;
1623}
1624
9f6a7857
HR
1625static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1626{
1627 if (!(adev->flags & AMD_IS_APU) ||
1628 adev->asic_type < CHIP_RAVEN)
1629 return 0;
1630
1631 switch (adev->asic_type) {
1632 case CHIP_RAVEN:
1633 if (adev->pdev->device == 0x15dd)
1634 adev->apu_flags |= AMD_APU_IS_RAVEN;
1635 if (adev->pdev->device == 0x15d8)
1636 adev->apu_flags |= AMD_APU_IS_PICASSO;
1637 break;
1638 case CHIP_RENOIR:
1639 if ((adev->pdev->device == 0x1636) ||
1640 (adev->pdev->device == 0x164c))
1641 adev->apu_flags |= AMD_APU_IS_RENOIR;
1642 else
1643 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1644 break;
1645 case CHIP_VANGOGH:
1646 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1647 break;
1648 case CHIP_YELLOW_CARP:
1649 break;
d0f56dc2 1650 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1651 if ((adev->pdev->device == 0x13FE) ||
1652 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1653 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1654 break;
9f6a7857 1655 default:
4eaf21b7 1656 break;
9f6a7857
HR
1657 }
1658
1659 return 0;
1660}
1661
d38ceaf9 1662/**
06ec9070 1663 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1664 *
1665 * @adev: amdgpu_device pointer
1666 *
1667 * Validates certain module parameters and updates
1668 * the associated values used by the driver (all asics).
1669 */
912dfc84 1670static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1671{
5b011235
CZ
1672 if (amdgpu_sched_jobs < 4) {
1673 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1674 amdgpu_sched_jobs);
1675 amdgpu_sched_jobs = 4;
47fc644f 1676 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1677 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1678 amdgpu_sched_jobs);
1679 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1680 }
d38ceaf9 1681
83e74db6 1682 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1683 /* gart size must be greater or equal to 32M */
1684 dev_warn(adev->dev, "gart size (%d) too small\n",
1685 amdgpu_gart_size);
83e74db6 1686 amdgpu_gart_size = -1;
d38ceaf9
AD
1687 }
1688
36d38372 1689 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1690 /* gtt size must be greater or equal to 32M */
36d38372
CK
1691 dev_warn(adev->dev, "gtt size (%d) too small\n",
1692 amdgpu_gtt_size);
1693 amdgpu_gtt_size = -1;
d38ceaf9
AD
1694 }
1695
d07f14be
RH
1696 /* valid range is between 4 and 9 inclusive */
1697 if (amdgpu_vm_fragment_size != -1 &&
1698 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1699 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1700 amdgpu_vm_fragment_size = -1;
1701 }
1702
5d5bd5e3
KW
1703 if (amdgpu_sched_hw_submission < 2) {
1704 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1705 amdgpu_sched_hw_submission);
1706 amdgpu_sched_hw_submission = 2;
1707 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1708 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1709 amdgpu_sched_hw_submission);
1710 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1711 }
1712
2656fd23
AG
1713 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1714 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1715 amdgpu_reset_method = -1;
1716 }
1717
7951e376
RZ
1718 amdgpu_device_check_smu_prv_buffer_size(adev);
1719
06ec9070 1720 amdgpu_device_check_vm_size(adev);
d38ceaf9 1721
06ec9070 1722 amdgpu_device_check_block_size(adev);
6a7f76e7 1723
19aede77 1724 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1725
e3c00faa 1726 return 0;
d38ceaf9
AD
1727}
1728
1729/**
1730 * amdgpu_switcheroo_set_state - set switcheroo state
1731 *
1732 * @pdev: pci dev pointer
1694467b 1733 * @state: vga_switcheroo state
d38ceaf9 1734 *
12024b17 1735 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1736 * the asics before or after it is powered up using ACPI methods.
1737 */
8aba21b7
LT
1738static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1739 enum vga_switcheroo_state state)
d38ceaf9
AD
1740{
1741 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1742 int r;
d38ceaf9 1743
b98c6299 1744 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1745 return;
1746
1747 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1748 pr_info("switched on\n");
d38ceaf9
AD
1749 /* don't suspend or resume card normally */
1750 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1751
8f66090b
TZ
1752 pci_set_power_state(pdev, PCI_D0);
1753 amdgpu_device_load_pci_state(pdev);
1754 r = pci_enable_device(pdev);
de185019
AD
1755 if (r)
1756 DRM_WARN("pci_enable_device failed (%d)\n", r);
1757 amdgpu_device_resume(dev, true);
d38ceaf9 1758
d38ceaf9 1759 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1760 } else {
dd4fa6c1 1761 pr_info("switched off\n");
d38ceaf9 1762 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
5095d541 1763 amdgpu_device_prepare(dev);
de185019 1764 amdgpu_device_suspend(dev, true);
8f66090b 1765 amdgpu_device_cache_pci_state(pdev);
de185019 1766 /* Shut down the device */
8f66090b
TZ
1767 pci_disable_device(pdev);
1768 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1769 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1770 }
1771}
1772
1773/**
1774 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1775 *
1776 * @pdev: pci dev pointer
1777 *
1778 * Callback for the switcheroo driver. Check of the switcheroo
1779 * state can be changed.
1780 * Returns true if the state can be changed, false if not.
1781 */
1782static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1783{
1784 struct drm_device *dev = pci_get_drvdata(pdev);
1785
b8920e1e 1786 /*
d38ceaf9
AD
1787 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1788 * locking inversion with the driver load path. And the access here is
1789 * completely racy anyway. So don't bother with locking for now.
1790 */
7e13ad89 1791 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1792}
1793
1794static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1795 .set_gpu_state = amdgpu_switcheroo_set_state,
1796 .reprobe = NULL,
1797 .can_switch = amdgpu_switcheroo_can_switch,
1798};
1799
e3ecdffa
AD
1800/**
1801 * amdgpu_device_ip_set_clockgating_state - set the CG state
1802 *
87e3f136 1803 * @dev: amdgpu_device pointer
e3ecdffa
AD
1804 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1805 * @state: clockgating state (gate or ungate)
1806 *
1807 * Sets the requested clockgating state for all instances of
1808 * the hardware IP specified.
1809 * Returns the error code from the last instance.
1810 */
43fa561f 1811int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1812 enum amd_ip_block_type block_type,
1813 enum amd_clockgating_state state)
d38ceaf9 1814{
43fa561f 1815 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1816 int i, r = 0;
1817
1818 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1819 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1820 continue;
c722865a
RZ
1821 if (adev->ip_blocks[i].version->type != block_type)
1822 continue;
1823 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1824 continue;
1825 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1826 (void *)adev, state);
1827 if (r)
1828 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1829 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1830 }
1831 return r;
1832}
1833
e3ecdffa
AD
1834/**
1835 * amdgpu_device_ip_set_powergating_state - set the PG state
1836 *
87e3f136 1837 * @dev: amdgpu_device pointer
e3ecdffa
AD
1838 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1839 * @state: powergating state (gate or ungate)
1840 *
1841 * Sets the requested powergating state for all instances of
1842 * the hardware IP specified.
1843 * Returns the error code from the last instance.
1844 */
43fa561f 1845int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1846 enum amd_ip_block_type block_type,
1847 enum amd_powergating_state state)
d38ceaf9 1848{
43fa561f 1849 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1850 int i, r = 0;
1851
1852 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1853 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1854 continue;
c722865a
RZ
1855 if (adev->ip_blocks[i].version->type != block_type)
1856 continue;
1857 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1858 continue;
1859 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1860 (void *)adev, state);
1861 if (r)
1862 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1863 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1864 }
1865 return r;
1866}
1867
e3ecdffa
AD
1868/**
1869 * amdgpu_device_ip_get_clockgating_state - get the CG state
1870 *
1871 * @adev: amdgpu_device pointer
1872 * @flags: clockgating feature flags
1873 *
1874 * Walks the list of IPs on the device and updates the clockgating
1875 * flags for each IP.
1876 * Updates @flags with the feature flags for each hardware IP where
1877 * clockgating is enabled.
1878 */
2990a1fc 1879void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1880 u64 *flags)
6cb2d4e4
HR
1881{
1882 int i;
1883
1884 for (i = 0; i < adev->num_ip_blocks; i++) {
1885 if (!adev->ip_blocks[i].status.valid)
1886 continue;
1887 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1888 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1889 }
1890}
1891
e3ecdffa
AD
1892/**
1893 * amdgpu_device_ip_wait_for_idle - wait for idle
1894 *
1895 * @adev: amdgpu_device pointer
1896 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1897 *
1898 * Waits for the request hardware IP to be idle.
1899 * Returns 0 for success or a negative error code on failure.
1900 */
2990a1fc
AD
1901int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1902 enum amd_ip_block_type block_type)
5dbbb60b
AD
1903{
1904 int i, r;
1905
1906 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1907 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1908 continue;
a1255107
AD
1909 if (adev->ip_blocks[i].version->type == block_type) {
1910 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1911 if (r)
1912 return r;
1913 break;
1914 }
1915 }
1916 return 0;
1917
1918}
1919
e3ecdffa
AD
1920/**
1921 * amdgpu_device_ip_is_idle - is the hardware IP idle
1922 *
1923 * @adev: amdgpu_device pointer
1924 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1925 *
1926 * Check if the hardware IP is idle or not.
1927 * Returns true if it the IP is idle, false if not.
1928 */
2990a1fc
AD
1929bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1930 enum amd_ip_block_type block_type)
5dbbb60b
AD
1931{
1932 int i;
1933
1934 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1935 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1936 continue;
a1255107
AD
1937 if (adev->ip_blocks[i].version->type == block_type)
1938 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1939 }
1940 return true;
1941
1942}
1943
e3ecdffa
AD
1944/**
1945 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1946 *
1947 * @adev: amdgpu_device pointer
87e3f136 1948 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1949 *
1950 * Returns a pointer to the hardware IP block structure
1951 * if it exists for the asic, otherwise NULL.
1952 */
2990a1fc
AD
1953struct amdgpu_ip_block *
1954amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1955 enum amd_ip_block_type type)
d38ceaf9
AD
1956{
1957 int i;
1958
1959 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1960 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1961 return &adev->ip_blocks[i];
1962
1963 return NULL;
1964}
1965
1966/**
2990a1fc 1967 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1968 *
1969 * @adev: amdgpu_device pointer
5fc3aeeb 1970 * @type: enum amd_ip_block_type
d38ceaf9
AD
1971 * @major: major version
1972 * @minor: minor version
1973 *
1974 * return 0 if equal or greater
1975 * return 1 if smaller or the ip_block doesn't exist
1976 */
2990a1fc
AD
1977int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1978 enum amd_ip_block_type type,
1979 u32 major, u32 minor)
d38ceaf9 1980{
2990a1fc 1981 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1982
a1255107
AD
1983 if (ip_block && ((ip_block->version->major > major) ||
1984 ((ip_block->version->major == major) &&
1985 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1986 return 0;
1987
1988 return 1;
1989}
1990
a1255107 1991/**
2990a1fc 1992 * amdgpu_device_ip_block_add
a1255107
AD
1993 *
1994 * @adev: amdgpu_device pointer
1995 * @ip_block_version: pointer to the IP to add
1996 *
1997 * Adds the IP block driver information to the collection of IPs
1998 * on the asic.
1999 */
2990a1fc
AD
2000int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2001 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
2002{
2003 if (!ip_block_version)
2004 return -EINVAL;
2005
7bd939d0
LG
2006 switch (ip_block_version->type) {
2007 case AMD_IP_BLOCK_TYPE_VCN:
2008 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2009 return 0;
2010 break;
2011 case AMD_IP_BLOCK_TYPE_JPEG:
2012 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2013 return 0;
2014 break;
2015 default:
2016 break;
2017 }
2018
e966a725 2019 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
2020 ip_block_version->funcs->name);
2021
a1255107
AD
2022 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2023
2024 return 0;
2025}
2026
e3ecdffa
AD
2027/**
2028 * amdgpu_device_enable_virtual_display - enable virtual display feature
2029 *
2030 * @adev: amdgpu_device pointer
2031 *
2032 * Enabled the virtual display feature if the user has enabled it via
2033 * the module parameter virtual_display. This feature provides a virtual
2034 * display hardware on headless boards or in virtualized environments.
2035 * This function parses and validates the configuration string specified by
2036 * the user and configues the virtual display configuration (number of
2037 * virtual connectors, crtcs, etc.) specified.
2038 */
483ef985 2039static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2040{
2041 adev->enable_virtual_display = false;
2042
2043 if (amdgpu_virtual_display) {
8f66090b 2044 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2045 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2046
2047 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2048 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2049 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2050 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2051 if (!strcmp("all", pciaddname)
2052 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2053 long num_crtc;
2054 int res = -1;
2055
9accf2fd 2056 adev->enable_virtual_display = true;
0f66356d
ED
2057
2058 if (pciaddname_tmp)
2059 res = kstrtol(pciaddname_tmp, 10,
2060 &num_crtc);
2061
2062 if (!res) {
2063 if (num_crtc < 1)
2064 num_crtc = 1;
2065 if (num_crtc > 6)
2066 num_crtc = 6;
2067 adev->mode_info.num_crtc = num_crtc;
2068 } else {
2069 adev->mode_info.num_crtc = 1;
2070 }
9accf2fd
ED
2071 break;
2072 }
2073 }
2074
0f66356d
ED
2075 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2076 amdgpu_virtual_display, pci_address_name,
2077 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2078
2079 kfree(pciaddstr);
2080 }
2081}
2082
25263da3
AD
2083void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2084{
2085 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2086 adev->mode_info.num_crtc = 1;
2087 adev->enable_virtual_display = true;
2088 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2089 adev->enable_virtual_display, adev->mode_info.num_crtc);
2090 }
2091}
2092
e3ecdffa
AD
2093/**
2094 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2095 *
2096 * @adev: amdgpu_device pointer
2097 *
2098 * Parses the asic configuration parameters specified in the gpu info
2099 * firmware and makes them availale to the driver for use in configuring
2100 * the asic.
2101 * Returns 0 on success, -EINVAL on failure.
2102 */
e2a75f88
AD
2103static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2104{
e2a75f88 2105 const char *chip_name;
c0a43457 2106 char fw_name[40];
e2a75f88
AD
2107 int err;
2108 const struct gpu_info_firmware_header_v1_0 *hdr;
2109
ab4fe3e1
HR
2110 adev->firmware.gpu_info_fw = NULL;
2111
72de33f8 2112 if (adev->mman.discovery_bin) {
cc375d8c
TY
2113 /*
2114 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2115 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2116 * when DAL no longer needs it.
2117 */
2118 if (adev->asic_type != CHIP_NAVI12)
2119 return 0;
258620d0
AD
2120 }
2121
e2a75f88 2122 switch (adev->asic_type) {
e2a75f88
AD
2123 default:
2124 return 0;
2125 case CHIP_VEGA10:
2126 chip_name = "vega10";
2127 break;
3f76dced
AD
2128 case CHIP_VEGA12:
2129 chip_name = "vega12";
2130 break;
2d2e5e7e 2131 case CHIP_RAVEN:
54f78a76 2132 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2133 chip_name = "raven2";
54f78a76 2134 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2135 chip_name = "picasso";
54c4d17e
FX
2136 else
2137 chip_name = "raven";
2d2e5e7e 2138 break;
65e60f6e
LM
2139 case CHIP_ARCTURUS:
2140 chip_name = "arcturus";
2141 break;
42b325e5
XY
2142 case CHIP_NAVI12:
2143 chip_name = "navi12";
2144 break;
e2a75f88
AD
2145 }
2146
2147 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2148 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2149 if (err) {
2150 dev_err(adev->dev,
b31d3063 2151 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2152 fw_name);
2153 goto out;
2154 }
2155
ab4fe3e1 2156 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2157 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2158
2159 switch (hdr->version_major) {
2160 case 1:
2161 {
2162 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2163 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2164 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2165
cc375d8c
TY
2166 /*
2167 * Should be droped when DAL no longer needs it.
2168 */
2169 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2170 goto parse_soc_bounding_box;
2171
b5ab16bf
AD
2172 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2173 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2174 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2175 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2176 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2177 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2178 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2179 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2180 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2181 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2182 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2183 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2184 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2185 adev->gfx.cu_info.max_waves_per_simd =
2186 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2187 adev->gfx.cu_info.max_scratch_slots_per_cu =
2188 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2189 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2190 if (hdr->version_minor >= 1) {
35c2e910
HZ
2191 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2192 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2193 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2194 adev->gfx.config.num_sc_per_sh =
2195 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2196 adev->gfx.config.num_packer_per_sc =
2197 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2198 }
ec51d3fa
XY
2199
2200parse_soc_bounding_box:
ec51d3fa
XY
2201 /*
2202 * soc bounding box info is not integrated in disocovery table,
258620d0 2203 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2204 */
48321c3d
HW
2205 if (hdr->version_minor == 2) {
2206 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2207 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2208 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2209 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2210 }
e2a75f88
AD
2211 break;
2212 }
2213 default:
2214 dev_err(adev->dev,
2215 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2216 err = -EINVAL;
2217 goto out;
2218 }
2219out:
e2a75f88
AD
2220 return err;
2221}
2222
e3ecdffa
AD
2223/**
2224 * amdgpu_device_ip_early_init - run early init for hardware IPs
2225 *
2226 * @adev: amdgpu_device pointer
2227 *
2228 * Early initialization pass for hardware IPs. The hardware IPs that make
2229 * up each asic are discovered each IP's early_init callback is run. This
2230 * is the first stage in initializing the asic.
2231 * Returns 0 on success, negative error code on failure.
2232 */
06ec9070 2233static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2234{
901e2be2
AD
2235 struct drm_device *dev = adev_to_drm(adev);
2236 struct pci_dev *parent;
aaa36a97 2237 int i, r;
ced69502 2238 bool total;
d38ceaf9 2239
483ef985 2240 amdgpu_device_enable_virtual_display(adev);
a6be7570 2241
00a979f3 2242 if (amdgpu_sriov_vf(adev)) {
00a979f3 2243 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2244 if (r)
2245 return r;
00a979f3
WS
2246 }
2247
d38ceaf9 2248 switch (adev->asic_type) {
33f34802
KW
2249#ifdef CONFIG_DRM_AMDGPU_SI
2250 case CHIP_VERDE:
2251 case CHIP_TAHITI:
2252 case CHIP_PITCAIRN:
2253 case CHIP_OLAND:
2254 case CHIP_HAINAN:
295d0daf 2255 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2256 r = si_set_ip_blocks(adev);
2257 if (r)
2258 return r;
2259 break;
2260#endif
a2e73f56
AD
2261#ifdef CONFIG_DRM_AMDGPU_CIK
2262 case CHIP_BONAIRE:
2263 case CHIP_HAWAII:
2264 case CHIP_KAVERI:
2265 case CHIP_KABINI:
2266 case CHIP_MULLINS:
e1ad2d53 2267 if (adev->flags & AMD_IS_APU)
a2e73f56 2268 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2269 else
2270 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2271
2272 r = cik_set_ip_blocks(adev);
2273 if (r)
2274 return r;
2275 break;
2276#endif
da87c30b
AD
2277 case CHIP_TOPAZ:
2278 case CHIP_TONGA:
2279 case CHIP_FIJI:
2280 case CHIP_POLARIS10:
2281 case CHIP_POLARIS11:
2282 case CHIP_POLARIS12:
2283 case CHIP_VEGAM:
2284 case CHIP_CARRIZO:
2285 case CHIP_STONEY:
2286 if (adev->flags & AMD_IS_APU)
2287 adev->family = AMDGPU_FAMILY_CZ;
2288 else
2289 adev->family = AMDGPU_FAMILY_VI;
2290
2291 r = vi_set_ip_blocks(adev);
2292 if (r)
2293 return r;
2294 break;
d38ceaf9 2295 default:
63352b7f
AD
2296 r = amdgpu_discovery_set_ip_blocks(adev);
2297 if (r)
2298 return r;
2299 break;
d38ceaf9
AD
2300 }
2301
901e2be2
AD
2302 if (amdgpu_has_atpx() &&
2303 (amdgpu_is_atpx_hybrid() ||
2304 amdgpu_has_atpx_dgpu_power_cntl()) &&
2305 ((adev->flags & AMD_IS_APU) == 0) &&
2306 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2307 adev->flags |= AMD_IS_PX;
2308
85ac2021 2309 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2310 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2311 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2312 }
901e2be2 2313
1884734a 2314
3b94fb10 2315 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2316 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2317 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2318 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2319 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
fbf1035b
ML
2320 if (!amdgpu_device_pcie_dynamic_switching_supported())
2321 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
00f54b97 2322
ced69502 2323 total = true;
d38ceaf9
AD
2324 for (i = 0; i < adev->num_ip_blocks; i++) {
2325 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2326 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2327 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2328 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2329 } else {
a1255107
AD
2330 if (adev->ip_blocks[i].version->funcs->early_init) {
2331 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2332 if (r == -ENOENT) {
a1255107 2333 adev->ip_blocks[i].status.valid = false;
2c1a2784 2334 } else if (r) {
a1255107
AD
2335 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2336 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2337 total = false;
2c1a2784 2338 } else {
a1255107 2339 adev->ip_blocks[i].status.valid = true;
2c1a2784 2340 }
974e6b64 2341 } else {
a1255107 2342 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2343 }
d38ceaf9 2344 }
21a249ca
AD
2345 /* get the vbios after the asic_funcs are set up */
2346 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2347 r = amdgpu_device_parse_gpu_info_fw(adev);
2348 if (r)
2349 return r;
2350
21a249ca 2351 /* Read BIOS */
9535a86a
SZ
2352 if (amdgpu_device_read_bios(adev)) {
2353 if (!amdgpu_get_bios(adev))
2354 return -EINVAL;
21a249ca 2355
9535a86a
SZ
2356 r = amdgpu_atombios_init(adev);
2357 if (r) {
2358 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2359 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2360 return r;
2361 }
21a249ca 2362 }
77eabc6f
PJZ
2363
2364 /*get pf2vf msg info at it's earliest time*/
2365 if (amdgpu_sriov_vf(adev))
2366 amdgpu_virt_init_data_exchange(adev);
2367
21a249ca 2368 }
d38ceaf9 2369 }
ced69502
ML
2370 if (!total)
2371 return -ENODEV;
d38ceaf9 2372
00fa4035 2373 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2374 adev->cg_flags &= amdgpu_cg_mask;
2375 adev->pg_flags &= amdgpu_pg_mask;
2376
d38ceaf9
AD
2377 return 0;
2378}
2379
0a4f2520
RZ
2380static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2381{
2382 int i, r;
2383
2384 for (i = 0; i < adev->num_ip_blocks; i++) {
2385 if (!adev->ip_blocks[i].status.sw)
2386 continue;
2387 if (adev->ip_blocks[i].status.hw)
2388 continue;
2389 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2390 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2391 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2392 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2393 if (r) {
2394 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2395 adev->ip_blocks[i].version->funcs->name, r);
2396 return r;
2397 }
2398 adev->ip_blocks[i].status.hw = true;
2399 }
2400 }
2401
2402 return 0;
2403}
2404
2405static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2406{
2407 int i, r;
2408
2409 for (i = 0; i < adev->num_ip_blocks; i++) {
2410 if (!adev->ip_blocks[i].status.sw)
2411 continue;
2412 if (adev->ip_blocks[i].status.hw)
2413 continue;
2414 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2415 if (r) {
2416 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2417 adev->ip_blocks[i].version->funcs->name, r);
2418 return r;
2419 }
2420 adev->ip_blocks[i].status.hw = true;
2421 }
2422
2423 return 0;
2424}
2425
7a3e0bb2
RZ
2426static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2427{
2428 int r = 0;
2429 int i;
80f41f84 2430 uint32_t smu_version;
7a3e0bb2
RZ
2431
2432 if (adev->asic_type >= CHIP_VEGA10) {
2433 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2434 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2435 continue;
2436
e3c1b071 2437 if (!adev->ip_blocks[i].status.sw)
2438 continue;
2439
482f0e53
ML
2440 /* no need to do the fw loading again if already done*/
2441 if (adev->ip_blocks[i].status.hw == true)
2442 break;
2443
53b3f8f4 2444 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2445 r = adev->ip_blocks[i].version->funcs->resume(adev);
2446 if (r) {
2447 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2448 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2449 return r;
2450 }
2451 } else {
2452 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2453 if (r) {
2454 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2455 adev->ip_blocks[i].version->funcs->name, r);
2456 return r;
7a3e0bb2 2457 }
7a3e0bb2 2458 }
482f0e53
ML
2459
2460 adev->ip_blocks[i].status.hw = true;
2461 break;
7a3e0bb2
RZ
2462 }
2463 }
482f0e53 2464
8973d9ec
ED
2465 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2466 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2467
80f41f84 2468 return r;
7a3e0bb2
RZ
2469}
2470
5fd8518d
AG
2471static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2472{
2473 long timeout;
2474 int r, i;
2475
2476 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2477 struct amdgpu_ring *ring = adev->rings[i];
2478
2479 /* No need to setup the GPU scheduler for rings that don't need it */
2480 if (!ring || ring->no_scheduler)
2481 continue;
2482
2483 switch (ring->funcs->type) {
2484 case AMDGPU_RING_TYPE_GFX:
2485 timeout = adev->gfx_timeout;
2486 break;
2487 case AMDGPU_RING_TYPE_COMPUTE:
2488 timeout = adev->compute_timeout;
2489 break;
2490 case AMDGPU_RING_TYPE_SDMA:
2491 timeout = adev->sdma_timeout;
2492 break;
2493 default:
2494 timeout = adev->video_timeout;
2495 break;
2496 }
2497
2498 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2499 ring->num_hw_submission, 0,
8ab62eda
JG
2500 timeout, adev->reset_domain->wq,
2501 ring->sched_score, ring->name,
2502 adev->dev);
5fd8518d
AG
2503 if (r) {
2504 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2505 ring->name);
2506 return r;
2507 }
2508 }
2509
d425c6f4
JZ
2510 amdgpu_xcp_update_partition_sched_list(adev);
2511
5fd8518d
AG
2512 return 0;
2513}
2514
2515
e3ecdffa
AD
2516/**
2517 * amdgpu_device_ip_init - run init for hardware IPs
2518 *
2519 * @adev: amdgpu_device pointer
2520 *
2521 * Main initialization pass for hardware IPs. The list of all the hardware
2522 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2523 * are run. sw_init initializes the software state associated with each IP
2524 * and hw_init initializes the hardware associated with each IP.
2525 * Returns 0 on success, negative error code on failure.
2526 */
06ec9070 2527static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2528{
2529 int i, r;
2530
c030f2e4 2531 r = amdgpu_ras_init(adev);
2532 if (r)
2533 return r;
2534
d38ceaf9 2535 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2536 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2537 continue;
a1255107 2538 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2539 if (r) {
a1255107
AD
2540 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2541 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2542 goto init_failed;
2c1a2784 2543 }
a1255107 2544 adev->ip_blocks[i].status.sw = true;
bfca0289 2545
c1c39032
AD
2546 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2547 /* need to do common hw init early so everything is set up for gmc */
2548 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2549 if (r) {
2550 DRM_ERROR("hw_init %d failed %d\n", i, r);
2551 goto init_failed;
2552 }
2553 adev->ip_blocks[i].status.hw = true;
2554 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2555 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2556 /* Try to reserve bad pages early */
2557 if (amdgpu_sriov_vf(adev))
2558 amdgpu_virt_exchange_data(adev);
2559
7ccfd79f 2560 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2561 if (r) {
7ccfd79f 2562 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2563 goto init_failed;
2c1a2784 2564 }
a1255107 2565 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2566 if (r) {
2567 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2568 goto init_failed;
2c1a2784 2569 }
06ec9070 2570 r = amdgpu_device_wb_init(adev);
2c1a2784 2571 if (r) {
06ec9070 2572 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2573 goto init_failed;
2c1a2784 2574 }
a1255107 2575 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2576
2577 /* right after GMC hw init, we create CSA */
02ff519e 2578 if (adev->gfx.mcbp) {
1e256e27 2579 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2580 AMDGPU_GEM_DOMAIN_VRAM |
2581 AMDGPU_GEM_DOMAIN_GTT,
2582 AMDGPU_CSA_SIZE);
2493664f
ML
2583 if (r) {
2584 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2585 goto init_failed;
2493664f
ML
2586 }
2587 }
d38ceaf9
AD
2588 }
2589 }
2590
c9ffa427 2591 if (amdgpu_sriov_vf(adev))
22c16d25 2592 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2593
533aed27
AG
2594 r = amdgpu_ib_pool_init(adev);
2595 if (r) {
2596 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2597 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2598 goto init_failed;
2599 }
2600
c8963ea4
RZ
2601 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2602 if (r)
72d3f592 2603 goto init_failed;
0a4f2520
RZ
2604
2605 r = amdgpu_device_ip_hw_init_phase1(adev);
2606 if (r)
72d3f592 2607 goto init_failed;
0a4f2520 2608
7a3e0bb2
RZ
2609 r = amdgpu_device_fw_loading(adev);
2610 if (r)
72d3f592 2611 goto init_failed;
7a3e0bb2 2612
0a4f2520
RZ
2613 r = amdgpu_device_ip_hw_init_phase2(adev);
2614 if (r)
72d3f592 2615 goto init_failed;
d38ceaf9 2616
121a2bc6
AG
2617 /*
2618 * retired pages will be loaded from eeprom and reserved here,
2619 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2620 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2621 * for I2C communication which only true at this point.
b82e65a9
GC
2622 *
2623 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2624 * failure from bad gpu situation and stop amdgpu init process
2625 * accordingly. For other failed cases, it will still release all
2626 * the resource and print error message, rather than returning one
2627 * negative value to upper level.
121a2bc6
AG
2628 *
2629 * Note: theoretically, this should be called before all vram allocations
2630 * to protect retired page from abusing
2631 */
b82e65a9
GC
2632 r = amdgpu_ras_recovery_init(adev);
2633 if (r)
2634 goto init_failed;
121a2bc6 2635
cfbb6b00
AG
2636 /**
2637 * In case of XGMI grab extra reference for reset domain for this device
2638 */
a4c63caf 2639 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2640 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2641 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2642 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2643
dfd0287b
LH
2644 if (WARN_ON(!hive)) {
2645 r = -ENOENT;
2646 goto init_failed;
2647 }
2648
46c67660 2649 if (!hive->reset_domain ||
2650 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2651 r = -ENOENT;
2652 amdgpu_put_xgmi_hive(hive);
2653 goto init_failed;
2654 }
2655
2656 /* Drop the early temporary reset domain we created for device */
2657 amdgpu_reset_put_reset_domain(adev->reset_domain);
2658 adev->reset_domain = hive->reset_domain;
9dfa4860 2659 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2660 }
a4c63caf
AG
2661 }
2662 }
2663
5fd8518d
AG
2664 r = amdgpu_device_init_schedulers(adev);
2665 if (r)
2666 goto init_failed;
e3c1b071 2667
2668 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2669 if (!adev->gmc.xgmi.pending_reset) {
2670 kgd2kfd_init_zone_device(adev);
e3c1b071 2671 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2672 }
c6332b97 2673
bd607166
KR
2674 amdgpu_fru_get_product_info(adev);
2675
72d3f592 2676init_failed:
c6332b97 2677
72d3f592 2678 return r;
d38ceaf9
AD
2679}
2680
e3ecdffa
AD
2681/**
2682 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2683 *
2684 * @adev: amdgpu_device pointer
2685 *
2686 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2687 * this function before a GPU reset. If the value is retained after a
2688 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2689 */
06ec9070 2690static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2691{
2692 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2693}
2694
e3ecdffa
AD
2695/**
2696 * amdgpu_device_check_vram_lost - check if vram is valid
2697 *
2698 * @adev: amdgpu_device pointer
2699 *
2700 * Checks the reset magic value written to the gart pointer in VRAM.
2701 * The driver calls this after a GPU reset to see if the contents of
2702 * VRAM is lost or now.
2703 * returns true if vram is lost, false if not.
2704 */
06ec9070 2705static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2706{
dadce777
EQ
2707 if (memcmp(adev->gart.ptr, adev->reset_magic,
2708 AMDGPU_RESET_MAGIC_NUM))
2709 return true;
2710
53b3f8f4 2711 if (!amdgpu_in_reset(adev))
dadce777
EQ
2712 return false;
2713
2714 /*
2715 * For all ASICs with baco/mode1 reset, the VRAM is
2716 * always assumed to be lost.
2717 */
2718 switch (amdgpu_asic_reset_method(adev)) {
2719 case AMD_RESET_METHOD_BACO:
2720 case AMD_RESET_METHOD_MODE1:
2721 return true;
2722 default:
2723 return false;
2724 }
0c49e0b8
CZ
2725}
2726
e3ecdffa 2727/**
1112a46b 2728 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2729 *
2730 * @adev: amdgpu_device pointer
b8b72130 2731 * @state: clockgating state (gate or ungate)
e3ecdffa 2732 *
e3ecdffa 2733 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2734 * set_clockgating_state callbacks are run.
2735 * Late initialization pass enabling clockgating for hardware IPs.
2736 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2737 * Returns 0 on success, negative error code on failure.
2738 */
fdd34271 2739
5d89bb2d
LL
2740int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2741 enum amd_clockgating_state state)
d38ceaf9 2742{
1112a46b 2743 int i, j, r;
d38ceaf9 2744
4a2ba394
SL
2745 if (amdgpu_emu_mode == 1)
2746 return 0;
2747
1112a46b
RZ
2748 for (j = 0; j < adev->num_ip_blocks; j++) {
2749 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2750 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2751 continue;
47198eb7 2752 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2753 if (adev->in_s0ix &&
47198eb7
AD
2754 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2755 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2756 continue;
4a446d55 2757 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2758 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2759 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2760 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2761 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2762 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2763 /* enable clockgating to save power */
a1255107 2764 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2765 state);
4a446d55
AD
2766 if (r) {
2767 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2768 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2769 return r;
2770 }
b0b00ff1 2771 }
d38ceaf9 2772 }
06b18f61 2773
c9f96fd5
RZ
2774 return 0;
2775}
2776
5d89bb2d
LL
2777int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2778 enum amd_powergating_state state)
c9f96fd5 2779{
1112a46b 2780 int i, j, r;
06b18f61 2781
c9f96fd5
RZ
2782 if (amdgpu_emu_mode == 1)
2783 return 0;
2784
1112a46b
RZ
2785 for (j = 0; j < adev->num_ip_blocks; j++) {
2786 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2787 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2788 continue;
47198eb7 2789 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2790 if (adev->in_s0ix &&
47198eb7
AD
2791 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2792 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2793 continue;
c9f96fd5
RZ
2794 /* skip CG for VCE/UVD, it's handled specially */
2795 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2796 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2797 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2798 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2799 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2800 /* enable powergating to save power */
2801 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2802 state);
c9f96fd5
RZ
2803 if (r) {
2804 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2805 adev->ip_blocks[i].version->funcs->name, r);
2806 return r;
2807 }
2808 }
2809 }
2dc80b00
S
2810 return 0;
2811}
2812
beff74bc
AD
2813static int amdgpu_device_enable_mgpu_fan_boost(void)
2814{
2815 struct amdgpu_gpu_instance *gpu_ins;
2816 struct amdgpu_device *adev;
2817 int i, ret = 0;
2818
2819 mutex_lock(&mgpu_info.mutex);
2820
2821 /*
2822 * MGPU fan boost feature should be enabled
2823 * only when there are two or more dGPUs in
2824 * the system
2825 */
2826 if (mgpu_info.num_dgpu < 2)
2827 goto out;
2828
2829 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2830 gpu_ins = &(mgpu_info.gpu_ins[i]);
2831 adev = gpu_ins->adev;
2832 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2833 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2834 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2835 if (ret)
2836 break;
2837
2838 gpu_ins->mgpu_fan_enabled = 1;
2839 }
2840 }
2841
2842out:
2843 mutex_unlock(&mgpu_info.mutex);
2844
2845 return ret;
2846}
2847
e3ecdffa
AD
2848/**
2849 * amdgpu_device_ip_late_init - run late init for hardware IPs
2850 *
2851 * @adev: amdgpu_device pointer
2852 *
2853 * Late initialization pass for hardware IPs. The list of all the hardware
2854 * IPs that make up the asic is walked and the late_init callbacks are run.
2855 * late_init covers any special initialization that an IP requires
2856 * after all of the have been initialized or something that needs to happen
2857 * late in the init process.
2858 * Returns 0 on success, negative error code on failure.
2859 */
06ec9070 2860static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2861{
60599a03 2862 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2863 int i = 0, r;
2864
2865 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2866 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2867 continue;
2868 if (adev->ip_blocks[i].version->funcs->late_init) {
2869 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2870 if (r) {
2871 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2872 adev->ip_blocks[i].version->funcs->name, r);
2873 return r;
2874 }
2dc80b00 2875 }
73f847db 2876 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2877 }
2878
867e24ca 2879 r = amdgpu_ras_late_init(adev);
2880 if (r) {
2881 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2882 return r;
2883 }
2884
a891d239
DL
2885 amdgpu_ras_set_error_query_ready(adev, true);
2886
1112a46b
RZ
2887 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2888 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2889
06ec9070 2890 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2891
beff74bc
AD
2892 r = amdgpu_device_enable_mgpu_fan_boost();
2893 if (r)
2894 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2895
4da8b639 2896 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2897 if (amdgpu_passthrough(adev) &&
2898 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2899 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2900 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2901
2902 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2903 mutex_lock(&mgpu_info.mutex);
2904
2905 /*
2906 * Reset device p-state to low as this was booted with high.
2907 *
2908 * This should be performed only after all devices from the same
2909 * hive get initialized.
2910 *
2911 * However, it's unknown how many device in the hive in advance.
2912 * As this is counted one by one during devices initializations.
2913 *
2914 * So, we wait for all XGMI interlinked devices initialized.
2915 * This may bring some delays as those devices may come from
2916 * different hives. But that should be OK.
2917 */
2918 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2919 for (i = 0; i < mgpu_info.num_gpu; i++) {
2920 gpu_instance = &(mgpu_info.gpu_ins[i]);
2921 if (gpu_instance->adev->flags & AMD_IS_APU)
2922 continue;
2923
d84a430d
JK
2924 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2925 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2926 if (r) {
2927 DRM_ERROR("pstate setting failed (%d).\n", r);
2928 break;
2929 }
2930 }
2931 }
2932
2933 mutex_unlock(&mgpu_info.mutex);
2934 }
2935
d38ceaf9
AD
2936 return 0;
2937}
2938
613aa3ea
LY
2939/**
2940 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2941 *
2942 * @adev: amdgpu_device pointer
2943 *
2944 * For ASICs need to disable SMC first
2945 */
2946static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2947{
2948 int i, r;
2949
4e8303cf 2950 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
2951 return;
2952
2953 for (i = 0; i < adev->num_ip_blocks; i++) {
2954 if (!adev->ip_blocks[i].status.hw)
2955 continue;
2956 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2957 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2958 /* XXX handle errors */
2959 if (r) {
2960 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2961 adev->ip_blocks[i].version->funcs->name, r);
2962 }
2963 adev->ip_blocks[i].status.hw = false;
2964 break;
2965 }
2966 }
2967}
2968
e9669fb7 2969static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2970{
2971 int i, r;
2972
e9669fb7
AG
2973 for (i = 0; i < adev->num_ip_blocks; i++) {
2974 if (!adev->ip_blocks[i].version->funcs->early_fini)
2975 continue;
5278a159 2976
e9669fb7
AG
2977 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2978 if (r) {
2979 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2980 adev->ip_blocks[i].version->funcs->name, r);
2981 }
2982 }
c030f2e4 2983
05df1f01 2984 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2985 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2986
7270e895
TY
2987 amdgpu_amdkfd_suspend(adev, false);
2988
613aa3ea
LY
2989 /* Workaroud for ASICs need to disable SMC first */
2990 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2991
d38ceaf9 2992 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2993 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2994 continue;
8201a67a 2995
a1255107 2996 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2997 /* XXX handle errors */
2c1a2784 2998 if (r) {
a1255107
AD
2999 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3000 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3001 }
8201a67a 3002
a1255107 3003 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
3004 }
3005
6effad8a
GC
3006 if (amdgpu_sriov_vf(adev)) {
3007 if (amdgpu_virt_release_full_gpu(adev, false))
3008 DRM_ERROR("failed to release exclusive mode on fini\n");
3009 }
3010
e9669fb7
AG
3011 return 0;
3012}
3013
3014/**
3015 * amdgpu_device_ip_fini - run fini for hardware IPs
3016 *
3017 * @adev: amdgpu_device pointer
3018 *
3019 * Main teardown pass for hardware IPs. The list of all the hardware
3020 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3021 * are run. hw_fini tears down the hardware associated with each IP
3022 * and sw_fini tears down any software state associated with each IP.
3023 * Returns 0 on success, negative error code on failure.
3024 */
3025static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3026{
3027 int i, r;
3028
3029 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3030 amdgpu_virt_release_ras_err_handler_data(adev);
3031
e9669fb7
AG
3032 if (adev->gmc.xgmi.num_physical_nodes > 1)
3033 amdgpu_xgmi_remove_device(adev);
3034
c004d44e 3035 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3036
d38ceaf9 3037 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3038 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3039 continue;
c12aba3a
ML
3040
3041 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3042 amdgpu_ucode_free_bo(adev);
1e256e27 3043 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3044 amdgpu_device_wb_fini(adev);
7ccfd79f 3045 amdgpu_device_mem_scratch_fini(adev);
533aed27 3046 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
3047 }
3048
a1255107 3049 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3050 /* XXX handle errors */
2c1a2784 3051 if (r) {
a1255107
AD
3052 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3053 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3054 }
a1255107
AD
3055 adev->ip_blocks[i].status.sw = false;
3056 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3057 }
3058
a6dcfd9c 3059 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3060 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3061 continue;
a1255107
AD
3062 if (adev->ip_blocks[i].version->funcs->late_fini)
3063 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3064 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3065 }
3066
c030f2e4 3067 amdgpu_ras_fini(adev);
3068
d38ceaf9
AD
3069 return 0;
3070}
3071
e3ecdffa 3072/**
beff74bc 3073 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3074 *
1112a46b 3075 * @work: work_struct.
e3ecdffa 3076 */
beff74bc 3077static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3078{
3079 struct amdgpu_device *adev =
beff74bc 3080 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3081 int r;
3082
3083 r = amdgpu_ib_ring_tests(adev);
3084 if (r)
3085 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3086}
3087
1e317b99
RZ
3088static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3089{
3090 struct amdgpu_device *adev =
3091 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3092
90a92662
MD
3093 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3094 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3095
3096 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3097 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3098}
3099
e3ecdffa 3100/**
e7854a03 3101 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3102 *
3103 * @adev: amdgpu_device pointer
3104 *
3105 * Main suspend function for hardware IPs. The list of all the hardware
3106 * IPs that make up the asic is walked, clockgating is disabled and the
3107 * suspend callbacks are run. suspend puts the hardware and software state
3108 * in each IP into a state suitable for suspend.
3109 * Returns 0 on success, negative error code on failure.
3110 */
e7854a03
AD
3111static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3112{
3113 int i, r;
3114
50ec83f0
AD
3115 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3116 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3117
b31d6ada
EQ
3118 /*
3119 * Per PMFW team's suggestion, driver needs to handle gfxoff
3120 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3121 * scenario. Add the missing df cstate disablement here.
3122 */
3123 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3124 dev_warn(adev->dev, "Failed to disallow df cstate");
3125
e7854a03
AD
3126 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3127 if (!adev->ip_blocks[i].status.valid)
3128 continue;
2b9f7848 3129
e7854a03 3130 /* displays are handled separately */
2b9f7848
ND
3131 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3132 continue;
3133
3134 /* XXX handle errors */
3135 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3136 /* XXX handle errors */
3137 if (r) {
3138 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3139 adev->ip_blocks[i].version->funcs->name, r);
3140 return r;
e7854a03 3141 }
2b9f7848
ND
3142
3143 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3144 }
3145
e7854a03
AD
3146 return 0;
3147}
3148
3149/**
3150 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3151 *
3152 * @adev: amdgpu_device pointer
3153 *
3154 * Main suspend function for hardware IPs. The list of all the hardware
3155 * IPs that make up the asic is walked, clockgating is disabled and the
3156 * suspend callbacks are run. suspend puts the hardware and software state
3157 * in each IP into a state suitable for suspend.
3158 * Returns 0 on success, negative error code on failure.
3159 */
3160static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3161{
3162 int i, r;
3163
557f42a2 3164 if (adev->in_s0ix)
bc143d8b 3165 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3166
d38ceaf9 3167 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3168 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3169 continue;
e7854a03
AD
3170 /* displays are handled in phase1 */
3171 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3172 continue;
bff77e86
LM
3173 /* PSP lost connection when err_event_athub occurs */
3174 if (amdgpu_ras_intr_triggered() &&
3175 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3176 adev->ip_blocks[i].status.hw = false;
3177 continue;
3178 }
e3c1b071 3179
3180 /* skip unnecessary suspend if we do not initialize them yet */
3181 if (adev->gmc.xgmi.pending_reset &&
3182 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3183 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3184 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3185 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3186 adev->ip_blocks[i].status.hw = false;
3187 continue;
3188 }
557f42a2 3189
afa6646b 3190 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3191 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3192 * like at runtime. PSP is also part of the always on hardware
3193 * so no need to suspend it.
3194 */
557f42a2 3195 if (adev->in_s0ix &&
32ff160d 3196 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3197 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3198 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3199 continue;
3200
2a7798ea
AD
3201 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3202 if (adev->in_s0ix &&
4e8303cf
LL
3203 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3204 IP_VERSION(5, 0, 0)) &&
3205 (adev->ip_blocks[i].version->type ==
3206 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3207 continue;
3208
e11c7750
TH
3209 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3210 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3211 * from this location and RLC Autoload automatically also gets loaded
3212 * from here based on PMFW -> PSP message during re-init sequence.
3213 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3214 * the TMR and reload FWs again for IMU enabled APU ASICs.
3215 */
3216 if (amdgpu_in_reset(adev) &&
3217 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3218 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3219 continue;
3220
d38ceaf9 3221 /* XXX handle errors */
a1255107 3222 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3223 /* XXX handle errors */
2c1a2784 3224 if (r) {
a1255107
AD
3225 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3226 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3227 }
876923fb 3228 adev->ip_blocks[i].status.hw = false;
a3a09142 3229 /* handle putting the SMC in the appropriate state */
47fc644f 3230 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3231 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3232 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3233 if (r) {
3234 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3235 adev->mp1_state, r);
3236 return r;
3237 }
a3a09142
AD
3238 }
3239 }
d38ceaf9
AD
3240 }
3241
3242 return 0;
3243}
3244
e7854a03
AD
3245/**
3246 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3247 *
3248 * @adev: amdgpu_device pointer
3249 *
3250 * Main suspend function for hardware IPs. The list of all the hardware
3251 * IPs that make up the asic is walked, clockgating is disabled and the
3252 * suspend callbacks are run. suspend puts the hardware and software state
3253 * in each IP into a state suitable for suspend.
3254 * Returns 0 on success, negative error code on failure.
3255 */
3256int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3257{
3258 int r;
3259
3c73683c
JC
3260 if (amdgpu_sriov_vf(adev)) {
3261 amdgpu_virt_fini_data_exchange(adev);
e7819644 3262 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3263 }
e7819644 3264
e7854a03
AD
3265 r = amdgpu_device_ip_suspend_phase1(adev);
3266 if (r)
3267 return r;
3268 r = amdgpu_device_ip_suspend_phase2(adev);
3269
e7819644
YT
3270 if (amdgpu_sriov_vf(adev))
3271 amdgpu_virt_release_full_gpu(adev, false);
3272
e7854a03
AD
3273 return r;
3274}
3275
06ec9070 3276static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3277{
3278 int i, r;
3279
2cb681b6 3280 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3281 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3282 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3283 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3284 AMD_IP_BLOCK_TYPE_IH,
3285 };
a90ad3c2 3286
95ea3dbc 3287 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3288 int j;
3289 struct amdgpu_ip_block *block;
a90ad3c2 3290
4cd2a96d
J
3291 block = &adev->ip_blocks[i];
3292 block->status.hw = false;
2cb681b6 3293
4cd2a96d 3294 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3295
4cd2a96d 3296 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3297 !block->status.valid)
3298 continue;
3299
3300 r = block->version->funcs->hw_init(adev);
0aaeefcc 3301 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3302 if (r)
3303 return r;
482f0e53 3304 block->status.hw = true;
a90ad3c2
ML
3305 }
3306 }
3307
3308 return 0;
3309}
3310
06ec9070 3311static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3312{
3313 int i, r;
3314
2cb681b6
ML
3315 static enum amd_ip_block_type ip_order[] = {
3316 AMD_IP_BLOCK_TYPE_SMC,
3317 AMD_IP_BLOCK_TYPE_DCE,
3318 AMD_IP_BLOCK_TYPE_GFX,
3319 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3320 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3321 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3322 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3323 AMD_IP_BLOCK_TYPE_VCN,
3324 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3325 };
a90ad3c2 3326
2cb681b6
ML
3327 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3328 int j;
3329 struct amdgpu_ip_block *block;
a90ad3c2 3330
2cb681b6
ML
3331 for (j = 0; j < adev->num_ip_blocks; j++) {
3332 block = &adev->ip_blocks[j];
3333
3334 if (block->version->type != ip_order[i] ||
482f0e53
ML
3335 !block->status.valid ||
3336 block->status.hw)
2cb681b6
ML
3337 continue;
3338
895bd048
JZ
3339 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3340 r = block->version->funcs->resume(adev);
3341 else
3342 r = block->version->funcs->hw_init(adev);
3343
0aaeefcc 3344 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3345 if (r)
3346 return r;
482f0e53 3347 block->status.hw = true;
a90ad3c2
ML
3348 }
3349 }
3350
3351 return 0;
3352}
3353
e3ecdffa
AD
3354/**
3355 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3356 *
3357 * @adev: amdgpu_device pointer
3358 *
3359 * First resume function for hardware IPs. The list of all the hardware
3360 * IPs that make up the asic is walked and the resume callbacks are run for
3361 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3362 * after a suspend and updates the software state as necessary. This
3363 * function is also used for restoring the GPU after a GPU reset.
3364 * Returns 0 on success, negative error code on failure.
3365 */
06ec9070 3366static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3367{
3368 int i, r;
3369
a90ad3c2 3370 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3371 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3372 continue;
a90ad3c2 3373 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3374 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3375 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3376 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3377
fcf0649f
CZ
3378 r = adev->ip_blocks[i].version->funcs->resume(adev);
3379 if (r) {
3380 DRM_ERROR("resume of IP block <%s> failed %d\n",
3381 adev->ip_blocks[i].version->funcs->name, r);
3382 return r;
3383 }
482f0e53 3384 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3385 }
3386 }
3387
3388 return 0;
3389}
3390
e3ecdffa
AD
3391/**
3392 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3393 *
3394 * @adev: amdgpu_device pointer
3395 *
3396 * First resume function for hardware IPs. The list of all the hardware
3397 * IPs that make up the asic is walked and the resume callbacks are run for
3398 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3399 * functional state after a suspend and updates the software state as
3400 * necessary. This function is also used for restoring the GPU after a GPU
3401 * reset.
3402 * Returns 0 on success, negative error code on failure.
3403 */
06ec9070 3404static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3405{
3406 int i, r;
3407
3408 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3409 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3410 continue;
fcf0649f 3411 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3412 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3413 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3414 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3415 continue;
a1255107 3416 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3417 if (r) {
a1255107
AD
3418 DRM_ERROR("resume of IP block <%s> failed %d\n",
3419 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3420 return r;
2c1a2784 3421 }
482f0e53 3422 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3423 }
3424
3425 return 0;
3426}
3427
e3ecdffa
AD
3428/**
3429 * amdgpu_device_ip_resume - run resume for hardware IPs
3430 *
3431 * @adev: amdgpu_device pointer
3432 *
3433 * Main resume function for hardware IPs. The hardware IPs
3434 * are split into two resume functions because they are
b8920e1e 3435 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3436 * steps need to be take between them. In this case (S3/S4) they are
3437 * run sequentially.
3438 * Returns 0 on success, negative error code on failure.
3439 */
06ec9070 3440static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3441{
3442 int r;
3443
06ec9070 3444 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3445 if (r)
3446 return r;
7a3e0bb2
RZ
3447
3448 r = amdgpu_device_fw_loading(adev);
3449 if (r)
3450 return r;
3451
06ec9070 3452 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3453
3454 return r;
3455}
3456
e3ecdffa
AD
3457/**
3458 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3459 *
3460 * @adev: amdgpu_device pointer
3461 *
3462 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3463 */
4e99a44e 3464static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3465{
6867e1b5
ML
3466 if (amdgpu_sriov_vf(adev)) {
3467 if (adev->is_atom_fw) {
58ff791a 3468 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3469 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3470 } else {
3471 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3472 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3473 }
3474
3475 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3476 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3477 }
048765ad
AR
3478}
3479
e3ecdffa
AD
3480/**
3481 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3482 *
3483 * @asic_type: AMD asic type
3484 *
3485 * Check if there is DC (new modesetting infrastructre) support for an asic.
3486 * returns true if DC has support, false if not.
3487 */
4562236b
HW
3488bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3489{
3490 switch (asic_type) {
0637d417
AD
3491#ifdef CONFIG_DRM_AMDGPU_SI
3492 case CHIP_HAINAN:
3493#endif
3494 case CHIP_TOPAZ:
3495 /* chips with no display hardware */
3496 return false;
4562236b 3497#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3498 case CHIP_TAHITI:
3499 case CHIP_PITCAIRN:
3500 case CHIP_VERDE:
3501 case CHIP_OLAND:
2d32ffd6
AD
3502 /*
3503 * We have systems in the wild with these ASICs that require
3504 * LVDS and VGA support which is not supported with DC.
3505 *
3506 * Fallback to the non-DC driver here by default so as not to
3507 * cause regressions.
3508 */
3509#if defined(CONFIG_DRM_AMD_DC_SI)
3510 return amdgpu_dc > 0;
3511#else
3512 return false;
64200c46 3513#endif
4562236b 3514 case CHIP_BONAIRE:
0d6fbccb 3515 case CHIP_KAVERI:
367e6687
AD
3516 case CHIP_KABINI:
3517 case CHIP_MULLINS:
d9fda248
HW
3518 /*
3519 * We have systems in the wild with these ASICs that require
b5a0168e 3520 * VGA support which is not supported with DC.
d9fda248
HW
3521 *
3522 * Fallback to the non-DC driver here by default so as not to
3523 * cause regressions.
3524 */
3525 return amdgpu_dc > 0;
f7f12b25 3526 default:
fd187853 3527 return amdgpu_dc != 0;
f7f12b25 3528#else
4562236b 3529 default:
93b09a9a 3530 if (amdgpu_dc > 0)
b8920e1e 3531 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3532 return false;
f7f12b25 3533#endif
4562236b
HW
3534 }
3535}
3536
3537/**
3538 * amdgpu_device_has_dc_support - check if dc is supported
3539 *
982a820b 3540 * @adev: amdgpu_device pointer
4562236b
HW
3541 *
3542 * Returns true for supported, false for not supported
3543 */
3544bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3545{
25263da3 3546 if (adev->enable_virtual_display ||
abaf210c 3547 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3548 return false;
3549
4562236b
HW
3550 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3551}
3552
d4535e2c
AG
3553static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3554{
3555 struct amdgpu_device *adev =
3556 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3557 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3558
c6a6e2db
AG
3559 /* It's a bug to not have a hive within this function */
3560 if (WARN_ON(!hive))
3561 return;
3562
3563 /*
3564 * Use task barrier to synchronize all xgmi reset works across the
3565 * hive. task_barrier_enter and task_barrier_exit will block
3566 * until all the threads running the xgmi reset works reach
3567 * those points. task_barrier_full will do both blocks.
3568 */
3569 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3570
3571 task_barrier_enter(&hive->tb);
4a580877 3572 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3573
3574 if (adev->asic_reset_res)
3575 goto fail;
3576
3577 task_barrier_exit(&hive->tb);
4a580877 3578 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3579
3580 if (adev->asic_reset_res)
3581 goto fail;
43c4d576 3582
21226f02 3583 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
c6a6e2db
AG
3584 } else {
3585
3586 task_barrier_full(&hive->tb);
3587 adev->asic_reset_res = amdgpu_asic_reset(adev);
3588 }
ce316fa5 3589
c6a6e2db 3590fail:
d4535e2c 3591 if (adev->asic_reset_res)
fed184e9 3592 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3593 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3594 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3595}
3596
71f98027
AD
3597static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3598{
3599 char *input = amdgpu_lockup_timeout;
3600 char *timeout_setting = NULL;
3601 int index = 0;
3602 long timeout;
3603 int ret = 0;
3604
3605 /*
67387dfe
AD
3606 * By default timeout for non compute jobs is 10000
3607 * and 60000 for compute jobs.
71f98027 3608 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3609 * jobs are 60000 by default.
71f98027
AD
3610 */
3611 adev->gfx_timeout = msecs_to_jiffies(10000);
3612 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3613 if (amdgpu_sriov_vf(adev))
3614 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3615 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3616 else
67387dfe 3617 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3618
f440ff44 3619 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3620 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3621 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3622 ret = kstrtol(timeout_setting, 0, &timeout);
3623 if (ret)
3624 return ret;
3625
3626 if (timeout == 0) {
3627 index++;
3628 continue;
3629 } else if (timeout < 0) {
3630 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3631 dev_warn(adev->dev, "lockup timeout disabled");
3632 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3633 } else {
3634 timeout = msecs_to_jiffies(timeout);
3635 }
3636
3637 switch (index++) {
3638 case 0:
3639 adev->gfx_timeout = timeout;
3640 break;
3641 case 1:
3642 adev->compute_timeout = timeout;
3643 break;
3644 case 2:
3645 adev->sdma_timeout = timeout;
3646 break;
3647 case 3:
3648 adev->video_timeout = timeout;
3649 break;
3650 default:
3651 break;
3652 }
3653 }
3654 /*
3655 * There is only one value specified and
3656 * it should apply to all non-compute jobs.
3657 */
bcccee89 3658 if (index == 1) {
71f98027 3659 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3660 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3661 adev->compute_timeout = adev->gfx_timeout;
3662 }
71f98027
AD
3663 }
3664
3665 return ret;
3666}
d4535e2c 3667
4a74c38c
PY
3668/**
3669 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3670 *
3671 * @adev: amdgpu_device pointer
3672 *
3673 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3674 */
3675static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3676{
3677 struct iommu_domain *domain;
3678
3679 domain = iommu_get_domain_for_dev(adev->dev);
3680 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3681 adev->ram_is_direct_mapped = true;
3682}
3683
77f3a5cd 3684static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3685 &dev_attr_pcie_replay_count.attr,
3686 NULL
3687};
3688
02ff519e
AD
3689static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3690{
3691 if (amdgpu_mcbp == 1)
3692 adev->gfx.mcbp = true;
1e9e15dc
JZ
3693 else if (amdgpu_mcbp == 0)
3694 adev->gfx.mcbp = false;
4e8303cf
LL
3695 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3696 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3697 adev->gfx.num_gfx_rings)
50a7c876
AD
3698 adev->gfx.mcbp = true;
3699
02ff519e
AD
3700 if (amdgpu_sriov_vf(adev))
3701 adev->gfx.mcbp = true;
3702
3703 if (adev->gfx.mcbp)
3704 DRM_INFO("MCBP is enabled\n");
3705}
3706
d38ceaf9
AD
3707/**
3708 * amdgpu_device_init - initialize the driver
3709 *
3710 * @adev: amdgpu_device pointer
d38ceaf9
AD
3711 * @flags: driver flags
3712 *
3713 * Initializes the driver info and hw (all asics).
3714 * Returns 0 for success or an error on failure.
3715 * Called at driver startup.
3716 */
3717int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3718 uint32_t flags)
3719{
8aba21b7
LT
3720 struct drm_device *ddev = adev_to_drm(adev);
3721 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3722 int r, i;
b98c6299 3723 bool px = false;
95844d20 3724 u32 max_MBps;
59e9fff1 3725 int tmp;
d38ceaf9
AD
3726
3727 adev->shutdown = false;
d38ceaf9 3728 adev->flags = flags;
4e66d7d2
YZ
3729
3730 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3731 adev->asic_type = amdgpu_force_asic_type;
3732 else
3733 adev->asic_type = flags & AMD_ASIC_MASK;
3734
d38ceaf9 3735 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3736 if (amdgpu_emu_mode == 1)
8bdab6bb 3737 adev->usec_timeout *= 10;
770d13b1 3738 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3739 adev->accel_working = false;
3740 adev->num_rings = 0;
68ce8b24 3741 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3742 adev->mman.buffer_funcs = NULL;
3743 adev->mman.buffer_funcs_ring = NULL;
3744 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3745 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3746 adev->gmc.gmc_funcs = NULL;
7bd939d0 3747 adev->harvest_ip_mask = 0x0;
f54d1867 3748 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3749 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3750
3751 adev->smc_rreg = &amdgpu_invalid_rreg;
3752 adev->smc_wreg = &amdgpu_invalid_wreg;
3753 adev->pcie_rreg = &amdgpu_invalid_rreg;
3754 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3755 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3756 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3757 adev->pciep_rreg = &amdgpu_invalid_rreg;
3758 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3759 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3760 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3761 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3762 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3763 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3764 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3765 adev->didt_rreg = &amdgpu_invalid_rreg;
3766 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3767 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3768 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3769 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3770 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3771
3e39ab90
AD
3772 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3773 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3774 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3775
3776 /* mutex initialization are all done here so we
b8920e1e
SS
3777 * can recall function without having locking issues
3778 */
0e5ca0d1 3779 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3780 mutex_init(&adev->pm.mutex);
3781 mutex_init(&adev->gfx.gpu_clock_mutex);
3782 mutex_init(&adev->srbm_mutex);
b8866c26 3783 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3784 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3785 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3786 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3787 mutex_init(&adev->mn_lock);
e23b74aa 3788 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3789 hash_init(adev->mn_hash);
32eaeae0 3790 mutex_init(&adev->psp.mutex);
bd052211 3791 mutex_init(&adev->notifier_lock);
8cda7a4f 3792 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3793 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3794
ab3b9de6 3795 amdgpu_device_init_apu_flags(adev);
9f6a7857 3796
912dfc84
EQ
3797 r = amdgpu_device_check_arguments(adev);
3798 if (r)
3799 return r;
d38ceaf9 3800
d38ceaf9
AD
3801 spin_lock_init(&adev->mmio_idx_lock);
3802 spin_lock_init(&adev->smc_idx_lock);
3803 spin_lock_init(&adev->pcie_idx_lock);
3804 spin_lock_init(&adev->uvd_ctx_idx_lock);
3805 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3806 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3807 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3808 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3809 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3810
0c4e7fa5
CZ
3811 INIT_LIST_HEAD(&adev->shadow_list);
3812 mutex_init(&adev->shadow_list_lock);
3813
655ce9cb 3814 INIT_LIST_HEAD(&adev->reset_list);
3815
6492e1b0 3816 INIT_LIST_HEAD(&adev->ras_list);
3817
3e38b634
EQ
3818 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3819
beff74bc
AD
3820 INIT_DELAYED_WORK(&adev->delayed_init_work,
3821 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3822 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3823 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3824
d4535e2c
AG
3825 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3826
d23ee13f 3827 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3828 adev->gfx.gfx_off_residency = 0;
3829 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3830 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3831
b265bdbd
EQ
3832 atomic_set(&adev->throttling_logging_enabled, 1);
3833 /*
3834 * If throttling continues, logging will be performed every minute
3835 * to avoid log flooding. "-1" is subtracted since the thermal
3836 * throttling interrupt comes every second. Thus, the total logging
3837 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3838 * for throttling interrupt) = 60 seconds.
3839 */
3840 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3841 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3842
0fa49558
AX
3843 /* Registers mapping */
3844 /* TODO: block userspace mapping of io register */
da69c161
KW
3845 if (adev->asic_type >= CHIP_BONAIRE) {
3846 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3847 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3848 } else {
3849 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3850 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3851 }
d38ceaf9 3852
6c08e0ef
EQ
3853 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3854 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3855
d38ceaf9 3856 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3857 if (!adev->rmmio)
d38ceaf9 3858 return -ENOMEM;
b8920e1e 3859
d38ceaf9 3860 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3861 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3862
436afdfa
PY
3863 /*
3864 * Reset domain needs to be present early, before XGMI hive discovered
3865 * (if any) and intitialized to use reset sem and in_gpu reset flag
3866 * early on during init and before calling to RREG32.
3867 */
3868 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3869 if (!adev->reset_domain)
3870 return -ENOMEM;
3871
3aa0115d
ML
3872 /* detect hw virtualization here */
3873 amdgpu_detect_virtualization(adev);
3874
04e85958
TL
3875 amdgpu_device_get_pcie_info(adev);
3876
dffa11b4
ML
3877 r = amdgpu_device_get_job_timeout_settings(adev);
3878 if (r) {
3879 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3880 return r;
a190d1c7
XY
3881 }
3882
d38ceaf9 3883 /* early init functions */
06ec9070 3884 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3885 if (r)
4ef87d8f 3886 return r;
d38ceaf9 3887
02ff519e
AD
3888 amdgpu_device_set_mcbp(adev);
3889
b7cdb41e
ML
3890 /* Get rid of things like offb */
3891 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3892 if (r)
3893 return r;
3894
4d33e704
SK
3895 /* Enable TMZ based on IP_VERSION */
3896 amdgpu_gmc_tmz_set(adev);
3897
957b0787 3898 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3899 /* Need to get xgmi info early to decide the reset behavior*/
3900 if (adev->gmc.xgmi.supported) {
3901 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3902 if (r)
3903 return r;
3904 }
3905
8e6d0b69 3906 /* enable PCIE atomic ops */
b4520bfd
GW
3907 if (amdgpu_sriov_vf(adev)) {
3908 if (adev->virt.fw_reserve.p_pf2vf)
3909 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3910 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3911 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3912 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3913 * internal path natively support atomics, set have_atomics_support to true.
3914 */
b4520bfd 3915 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
3916 (amdgpu_ip_version(adev, GC_HWIP, 0) >
3917 IP_VERSION(9, 0, 0))) {
0e768043 3918 adev->have_atomics_support = true;
b4520bfd 3919 } else {
8e6d0b69 3920 adev->have_atomics_support =
3921 !pci_enable_atomic_ops_to_root(adev->pdev,
3922 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3923 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3924 }
3925
8e6d0b69 3926 if (!adev->have_atomics_support)
3927 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3928
6585661d 3929 /* doorbell bar mapping and doorbell index init*/
43c064db 3930 amdgpu_doorbell_init(adev);
6585661d 3931
9475a943
SL
3932 if (amdgpu_emu_mode == 1) {
3933 /* post the asic on emulation mode */
3934 emu_soc_asic_init(adev);
bfca0289 3935 goto fence_driver_init;
9475a943 3936 }
bfca0289 3937
04442bf7
LL
3938 amdgpu_reset_init(adev);
3939
4e99a44e 3940 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3941 if (adev->bios)
3942 amdgpu_device_detect_sriov_bios(adev);
048765ad 3943
95e8e59e
AD
3944 /* check if we need to reset the asic
3945 * E.g., driver was not cleanly unloaded previously, etc.
3946 */
f14899fd 3947 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3948 if (adev->gmc.xgmi.num_physical_nodes) {
3949 dev_info(adev->dev, "Pending hive reset.\n");
3950 adev->gmc.xgmi.pending_reset = true;
3951 /* Only need to init necessary block for SMU to handle the reset */
3952 for (i = 0; i < adev->num_ip_blocks; i++) {
3953 if (!adev->ip_blocks[i].status.valid)
3954 continue;
3955 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3956 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3957 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3958 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3959 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3960 adev->ip_blocks[i].version->funcs->name);
3961 adev->ip_blocks[i].status.hw = true;
3962 }
3963 }
3964 } else {
59e9fff1 3965 tmp = amdgpu_reset_method;
3966 /* It should do a default reset when loading or reloading the driver,
3967 * regardless of the module parameter reset_method.
3968 */
3969 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3970 r = amdgpu_asic_reset(adev);
59e9fff1 3971 amdgpu_reset_method = tmp;
e3c1b071 3972 if (r) {
3973 dev_err(adev->dev, "asic reset on init failed\n");
3974 goto failed;
3975 }
95e8e59e
AD
3976 }
3977 }
3978
d38ceaf9 3979 /* Post card if necessary */
39c640c0 3980 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3981 if (!adev->bios) {
bec86378 3982 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3983 r = -EINVAL;
3984 goto failed;
d38ceaf9 3985 }
bec86378 3986 DRM_INFO("GPU posting now...\n");
4d2997ab 3987 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3988 if (r) {
3989 dev_err(adev->dev, "gpu post error!\n");
3990 goto failed;
3991 }
d38ceaf9
AD
3992 }
3993
9535a86a
SZ
3994 if (adev->bios) {
3995 if (adev->is_atom_fw) {
3996 /* Initialize clocks */
3997 r = amdgpu_atomfirmware_get_clock_info(adev);
3998 if (r) {
3999 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4000 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4001 goto failed;
4002 }
4003 } else {
4004 /* Initialize clocks */
4005 r = amdgpu_atombios_get_clock_info(adev);
4006 if (r) {
4007 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4008 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4009 goto failed;
4010 }
4011 /* init i2c buses */
4012 if (!amdgpu_device_has_dc_support(adev))
4013 amdgpu_atombios_i2c_init(adev);
a5bde2f9 4014 }
2c1a2784 4015 }
d38ceaf9 4016
bfca0289 4017fence_driver_init:
d38ceaf9 4018 /* Fence driver */
067f44c8 4019 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4020 if (r) {
067f44c8 4021 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4022 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4023 goto failed;
2c1a2784 4024 }
d38ceaf9
AD
4025
4026 /* init the mode config */
4a580877 4027 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4028
06ec9070 4029 r = amdgpu_device_ip_init(adev);
d38ceaf9 4030 if (r) {
06ec9070 4031 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4032 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4033 goto release_ras_con;
d38ceaf9
AD
4034 }
4035
8d35a259
LG
4036 amdgpu_fence_driver_hw_init(adev);
4037
d69b8971
YZ
4038 dev_info(adev->dev,
4039 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4040 adev->gfx.config.max_shader_engines,
4041 adev->gfx.config.max_sh_per_se,
4042 adev->gfx.config.max_cu_per_sh,
4043 adev->gfx.cu_info.number);
4044
d38ceaf9
AD
4045 adev->accel_working = true;
4046
e59c0205
AX
4047 amdgpu_vm_check_compute_bug(adev);
4048
95844d20
MO
4049 /* Initialize the buffer migration limit. */
4050 if (amdgpu_moverate >= 0)
4051 max_MBps = amdgpu_moverate;
4052 else
4053 max_MBps = 8; /* Allow 8 MB/s. */
4054 /* Get a log2 for easy divisions. */
4055 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4056
b0adca4d
EQ
4057 /*
4058 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4059 * Otherwise the mgpu fan boost feature will be skipped due to the
4060 * gpu instance is counted less.
4061 */
4062 amdgpu_register_gpu_instance(adev);
4063
d38ceaf9
AD
4064 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4065 * explicit gating rather than handling it automatically.
4066 */
e3c1b071 4067 if (!adev->gmc.xgmi.pending_reset) {
4068 r = amdgpu_device_ip_late_init(adev);
4069 if (r) {
4070 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4071 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4072 goto release_ras_con;
e3c1b071 4073 }
4074 /* must succeed. */
4075 amdgpu_ras_resume(adev);
4076 queue_delayed_work(system_wq, &adev->delayed_init_work,
4077 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4078 }
d38ceaf9 4079
38eecbe0
CL
4080 if (amdgpu_sriov_vf(adev)) {
4081 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4082 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4083 }
2c738637 4084
90bcb9b5
EQ
4085 /*
4086 * Place those sysfs registering after `late_init`. As some of those
4087 * operations performed in `late_init` might affect the sysfs
4088 * interfaces creating.
4089 */
4090 r = amdgpu_atombios_sysfs_init(adev);
4091 if (r)
4092 drm_err(&adev->ddev,
4093 "registering atombios sysfs failed (%d).\n", r);
4094
4095 r = amdgpu_pm_sysfs_init(adev);
4096 if (r)
4097 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4098
4099 r = amdgpu_ucode_sysfs_init(adev);
4100 if (r) {
4101 adev->ucode_sysfs_en = false;
4102 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4103 } else
4104 adev->ucode_sysfs_en = true;
4105
77f3a5cd 4106 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4107 if (r)
77f3a5cd 4108 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4109
76da73f0
LL
4110 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4111 if (r)
4112 dev_err(adev->dev,
4113 "Could not create amdgpu board attributes\n");
4114
7957ec80
LL
4115 amdgpu_fru_sysfs_init(adev);
4116
d155bef0
AB
4117 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4118 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4119 if (r)
4120 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4121
c1dd4aa6
AG
4122 /* Have stored pci confspace at hand for restore in sudden PCI error */
4123 if (amdgpu_device_cache_pci_state(adev->pdev))
4124 pci_restore_state(pdev);
4125
8c3dd61c
KHF
4126 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4127 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4128 * ignore it
4129 */
8c3dd61c 4130 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4131 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4132
d37a3929
OC
4133 px = amdgpu_device_supports_px(ddev);
4134
4135 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4136 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4137 vga_switcheroo_register_client(adev->pdev,
4138 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4139
4140 if (px)
8c3dd61c 4141 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4142
e3c1b071 4143 if (adev->gmc.xgmi.pending_reset)
4144 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4145 msecs_to_jiffies(AMDGPU_RESUME_MS));
4146
4a74c38c
PY
4147 amdgpu_device_check_iommu_direct_map(adev);
4148
d38ceaf9 4149 return 0;
83ba126a 4150
970fd197 4151release_ras_con:
38eecbe0
CL
4152 if (amdgpu_sriov_vf(adev))
4153 amdgpu_virt_release_full_gpu(adev, true);
4154
4155 /* failed in exclusive mode due to timeout */
4156 if (amdgpu_sriov_vf(adev) &&
4157 !amdgpu_sriov_runtime(adev) &&
4158 amdgpu_virt_mmio_blocked(adev) &&
4159 !amdgpu_virt_wait_reset(adev)) {
4160 dev_err(adev->dev, "VF exclusive mode timeout\n");
4161 /* Don't send request since VF is inactive. */
4162 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4163 adev->virt.ops = NULL;
4164 r = -EAGAIN;
4165 }
970fd197
SY
4166 amdgpu_release_ras_context(adev);
4167
83ba126a 4168failed:
89041940 4169 amdgpu_vf_error_trans_all(adev);
8840a387 4170
83ba126a 4171 return r;
d38ceaf9
AD
4172}
4173
07775fc1
AG
4174static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4175{
62d5f9f7 4176
07775fc1
AG
4177 /* Clear all CPU mappings pointing to this device */
4178 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4179
4180 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4181 amdgpu_doorbell_fini(adev);
07775fc1
AG
4182
4183 iounmap(adev->rmmio);
4184 adev->rmmio = NULL;
4185 if (adev->mman.aper_base_kaddr)
4186 iounmap(adev->mman.aper_base_kaddr);
4187 adev->mman.aper_base_kaddr = NULL;
4188
4189 /* Memory manager related */
a0ba1279 4190 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4191 arch_phys_wc_del(adev->gmc.vram_mtrr);
4192 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4193 }
4194}
4195
d38ceaf9 4196/**
bbe04dec 4197 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4198 *
4199 * @adev: amdgpu_device pointer
4200 *
4201 * Tear down the driver info (all asics).
4202 * Called at driver shutdown.
4203 */
72c8c97b 4204void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4205{
aac89168 4206 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4207 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4208 adev->shutdown = true;
9f875167 4209
752c683d
ML
4210 /* make sure IB test finished before entering exclusive mode
4211 * to avoid preemption on IB test
b8920e1e 4212 */
519b8b76 4213 if (amdgpu_sriov_vf(adev)) {
752c683d 4214 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4215 amdgpu_virt_fini_data_exchange(adev);
4216 }
752c683d 4217
e5b03032
ML
4218 /* disable all interrupts */
4219 amdgpu_irq_disable_all(adev);
47fc644f 4220 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4221 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4222 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4223 else
4a580877 4224 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4225 }
8d35a259 4226 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4227
cd3a8a59 4228 if (adev->mman.initialized)
9bff18d1 4229 drain_workqueue(adev->mman.bdev.wq);
98f56188 4230
53e9d836 4231 if (adev->pm.sysfs_initialized)
7c868b59 4232 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4233 if (adev->ucode_sysfs_en)
4234 amdgpu_ucode_sysfs_fini(adev);
4235 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4236 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4237
232d1d43
SY
4238 /* disable ras feature must before hw fini */
4239 amdgpu_ras_pre_fini(adev);
4240
e9669fb7 4241 amdgpu_device_ip_fini_early(adev);
d10d0daa 4242
a3848df6
YW
4243 amdgpu_irq_fini_hw(adev);
4244
b6fd6e0f
SK
4245 if (adev->mman.initialized)
4246 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4247
d10d0daa 4248 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4249
39934d3e
VP
4250 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4251 amdgpu_device_unmap_mmio(adev);
87172e89 4252
72c8c97b
AG
4253}
4254
4255void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4256{
62d5f9f7 4257 int idx;
d37a3929 4258 bool px;
62d5f9f7 4259
8d35a259 4260 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4261 amdgpu_device_ip_fini(adev);
b31d3063 4262 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4263 adev->accel_working = false;
68ce8b24 4264 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4265
4266 amdgpu_reset_fini(adev);
4267
d38ceaf9 4268 /* free i2c buses */
4562236b
HW
4269 if (!amdgpu_device_has_dc_support(adev))
4270 amdgpu_i2c_fini(adev);
bfca0289
SL
4271
4272 if (amdgpu_emu_mode != 1)
4273 amdgpu_atombios_fini(adev);
4274
d38ceaf9
AD
4275 kfree(adev->bios);
4276 adev->bios = NULL;
d37a3929 4277
8a2b5139
LL
4278 kfree(adev->fru_info);
4279 adev->fru_info = NULL;
4280
d37a3929
OC
4281 px = amdgpu_device_supports_px(adev_to_drm(adev));
4282
4283 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4284 apple_gmux_detect(NULL, NULL)))
84c8b22e 4285 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4286
4287 if (px)
83ba126a 4288 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4289
38d6be81 4290 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4291 vga_client_unregister(adev->pdev);
e9bc1bf7 4292
62d5f9f7
LS
4293 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4294
4295 iounmap(adev->rmmio);
4296 adev->rmmio = NULL;
43c064db 4297 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4298 drm_dev_exit(idx);
4299 }
4300
d155bef0
AB
4301 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4302 amdgpu_pmu_fini(adev);
72de33f8 4303 if (adev->mman.discovery_bin)
a190d1c7 4304 amdgpu_discovery_fini(adev);
72c8c97b 4305
cfbb6b00
AG
4306 amdgpu_reset_put_reset_domain(adev->reset_domain);
4307 adev->reset_domain = NULL;
4308
72c8c97b
AG
4309 kfree(adev->pci_state);
4310
d38ceaf9
AD
4311}
4312
58144d28
ND
4313/**
4314 * amdgpu_device_evict_resources - evict device resources
4315 * @adev: amdgpu device object
4316 *
4317 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4318 * of the vram memory type. Mainly used for evicting device resources
4319 * at suspend time.
4320 *
4321 */
7863c155 4322static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4323{
7863c155
ML
4324 int ret;
4325
e53d9665
ML
4326 /* No need to evict vram on APUs for suspend to ram or s2idle */
4327 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4328 return 0;
58144d28 4329
7863c155
ML
4330 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4331 if (ret)
58144d28 4332 DRM_WARN("evicting device resources failed\n");
7863c155 4333 return ret;
58144d28 4334}
d38ceaf9
AD
4335
4336/*
4337 * Suspend & resume.
4338 */
5095d541
ML
4339/**
4340 * amdgpu_device_prepare - prepare for device suspend
4341 *
4342 * @dev: drm dev pointer
4343 *
4344 * Prepare to put the hw in the suspend state (all asics).
4345 * Returns 0 for success or an error on failure.
4346 * Called at driver suspend.
4347 */
4348int amdgpu_device_prepare(struct drm_device *dev)
4349{
4350 struct amdgpu_device *adev = drm_to_adev(dev);
cb11ca32 4351 int i, r;
5095d541
ML
4352
4353 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4354 return 0;
4355
4356 /* Evict the majority of BOs before starting suspend sequence */
4357 r = amdgpu_device_evict_resources(adev);
4358 if (r)
4359 return r;
4360
cb11ca32
ML
4361 for (i = 0; i < adev->num_ip_blocks; i++) {
4362 if (!adev->ip_blocks[i].status.valid)
4363 continue;
4364 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4365 continue;
4366 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4367 if (r)
4368 return r;
4369 }
4370
5095d541
ML
4371 return 0;
4372}
4373
d38ceaf9 4374/**
810ddc3a 4375 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4376 *
87e3f136 4377 * @dev: drm dev pointer
87e3f136 4378 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4379 *
4380 * Puts the hw in the suspend state (all asics).
4381 * Returns 0 for success or an error on failure.
4382 * Called at driver suspend.
4383 */
de185019 4384int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4385{
a2e15b0e 4386 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4387 int r = 0;
d38ceaf9 4388
d38ceaf9
AD
4389 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4390 return 0;
4391
44779b43 4392 adev->in_suspend = true;
3fa8f89d 4393
d7274ec7
BZ
4394 if (amdgpu_sriov_vf(adev)) {
4395 amdgpu_virt_fini_data_exchange(adev);
4396 r = amdgpu_virt_request_full_gpu(adev, false);
4397 if (r)
4398 return r;
4399 }
4400
3fa8f89d
S
4401 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4402 DRM_WARN("smart shift update failed\n");
4403
5f818173 4404 if (fbcon)
087451f3 4405 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4406
beff74bc 4407 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4408 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4409
5e6932fe 4410 amdgpu_ras_suspend(adev);
4411
2196927b 4412 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4413
c004d44e 4414 if (!adev->in_s0ix)
5d3a2d95 4415 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4416
7863c155
ML
4417 r = amdgpu_device_evict_resources(adev);
4418 if (r)
4419 return r;
d38ceaf9 4420
8d35a259 4421 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4422
2196927b 4423 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4424
d7274ec7
BZ
4425 if (amdgpu_sriov_vf(adev))
4426 amdgpu_virt_release_full_gpu(adev, false);
4427
d38ceaf9
AD
4428 return 0;
4429}
4430
4431/**
810ddc3a 4432 * amdgpu_device_resume - initiate device resume
d38ceaf9 4433 *
87e3f136 4434 * @dev: drm dev pointer
87e3f136 4435 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4436 *
4437 * Bring the hw back to operating state (all asics).
4438 * Returns 0 for success or an error on failure.
4439 * Called at driver resume.
4440 */
de185019 4441int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4442{
1348969a 4443 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4444 int r = 0;
d38ceaf9 4445
d7274ec7
BZ
4446 if (amdgpu_sriov_vf(adev)) {
4447 r = amdgpu_virt_request_full_gpu(adev, true);
4448 if (r)
4449 return r;
4450 }
4451
d38ceaf9
AD
4452 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4453 return 0;
4454
62498733 4455 if (adev->in_s0ix)
bc143d8b 4456 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4457
d38ceaf9 4458 /* post card */
39c640c0 4459 if (amdgpu_device_need_post(adev)) {
4d2997ab 4460 r = amdgpu_device_asic_init(adev);
74b0b157 4461 if (r)
aac89168 4462 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4463 }
d38ceaf9 4464
06ec9070 4465 r = amdgpu_device_ip_resume(adev);
d7274ec7 4466
e6707218 4467 if (r) {
aac89168 4468 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4469 goto exit;
e6707218 4470 }
8d35a259 4471 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4472
06ec9070 4473 r = amdgpu_device_ip_late_init(adev);
03161a6e 4474 if (r)
3c22c1ea 4475 goto exit;
d38ceaf9 4476
beff74bc
AD
4477 queue_delayed_work(system_wq, &adev->delayed_init_work,
4478 msecs_to_jiffies(AMDGPU_RESUME_MS));
4479
c004d44e 4480 if (!adev->in_s0ix) {
5d3a2d95
AD
4481 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4482 if (r)
3c22c1ea 4483 goto exit;
5d3a2d95 4484 }
756e6880 4485
3c22c1ea
SF
4486exit:
4487 if (amdgpu_sriov_vf(adev)) {
4488 amdgpu_virt_init_data_exchange(adev);
4489 amdgpu_virt_release_full_gpu(adev, true);
4490 }
4491
4492 if (r)
4493 return r;
4494
96a5d8d4 4495 /* Make sure IB tests flushed */
beff74bc 4496 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4497
a2e15b0e 4498 if (fbcon)
087451f3 4499 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4500
5e6932fe 4501 amdgpu_ras_resume(adev);
4502
d09ef243
AD
4503 if (adev->mode_info.num_crtc) {
4504 /*
4505 * Most of the connector probing functions try to acquire runtime pm
4506 * refs to ensure that the GPU is powered on when connector polling is
4507 * performed. Since we're calling this from a runtime PM callback,
4508 * trying to acquire rpm refs will cause us to deadlock.
4509 *
4510 * Since we're guaranteed to be holding the rpm lock, it's safe to
4511 * temporarily disable the rpm helpers so this doesn't deadlock us.
4512 */
23a1a9e5 4513#ifdef CONFIG_PM
d09ef243 4514 dev->dev->power.disable_depth++;
23a1a9e5 4515#endif
d09ef243
AD
4516 if (!adev->dc_enabled)
4517 drm_helper_hpd_irq_event(dev);
4518 else
4519 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4520#ifdef CONFIG_PM
d09ef243 4521 dev->dev->power.disable_depth--;
23a1a9e5 4522#endif
d09ef243 4523 }
44779b43
RZ
4524 adev->in_suspend = false;
4525
dc907c9d
JX
4526 if (adev->enable_mes)
4527 amdgpu_mes_self_test(adev);
4528
3fa8f89d
S
4529 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4530 DRM_WARN("smart shift update failed\n");
4531
4d3b9ae5 4532 return 0;
d38ceaf9
AD
4533}
4534
e3ecdffa
AD
4535/**
4536 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4537 *
4538 * @adev: amdgpu_device pointer
4539 *
4540 * The list of all the hardware IPs that make up the asic is walked and
4541 * the check_soft_reset callbacks are run. check_soft_reset determines
4542 * if the asic is still hung or not.
4543 * Returns true if any of the IPs are still in a hung state, false if not.
4544 */
06ec9070 4545static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4546{
4547 int i;
4548 bool asic_hang = false;
4549
f993d628
ML
4550 if (amdgpu_sriov_vf(adev))
4551 return true;
4552
8bc04c29
AD
4553 if (amdgpu_asic_need_full_reset(adev))
4554 return true;
4555
63fbf42f 4556 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4557 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4558 continue;
a1255107
AD
4559 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4560 adev->ip_blocks[i].status.hang =
4561 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4562 if (adev->ip_blocks[i].status.hang) {
aac89168 4563 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4564 asic_hang = true;
4565 }
4566 }
4567 return asic_hang;
4568}
4569
e3ecdffa
AD
4570/**
4571 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4572 *
4573 * @adev: amdgpu_device pointer
4574 *
4575 * The list of all the hardware IPs that make up the asic is walked and the
4576 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4577 * handles any IP specific hardware or software state changes that are
4578 * necessary for a soft reset to succeed.
4579 * Returns 0 on success, negative error code on failure.
4580 */
06ec9070 4581static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4582{
4583 int i, r = 0;
4584
4585 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4586 if (!adev->ip_blocks[i].status.valid)
d31a501e 4587 continue;
a1255107
AD
4588 if (adev->ip_blocks[i].status.hang &&
4589 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4590 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4591 if (r)
4592 return r;
4593 }
4594 }
4595
4596 return 0;
4597}
4598
e3ecdffa
AD
4599/**
4600 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4601 *
4602 * @adev: amdgpu_device pointer
4603 *
4604 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4605 * reset is necessary to recover.
4606 * Returns true if a full asic reset is required, false if not.
4607 */
06ec9070 4608static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4609{
da146d3b
AD
4610 int i;
4611
8bc04c29
AD
4612 if (amdgpu_asic_need_full_reset(adev))
4613 return true;
4614
da146d3b 4615 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4616 if (!adev->ip_blocks[i].status.valid)
da146d3b 4617 continue;
a1255107
AD
4618 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4619 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4620 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4621 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4622 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4623 if (adev->ip_blocks[i].status.hang) {
aac89168 4624 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4625 return true;
4626 }
4627 }
35d782fe
CZ
4628 }
4629 return false;
4630}
4631
e3ecdffa
AD
4632/**
4633 * amdgpu_device_ip_soft_reset - do a soft reset
4634 *
4635 * @adev: amdgpu_device pointer
4636 *
4637 * The list of all the hardware IPs that make up the asic is walked and the
4638 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4639 * IP specific hardware or software state changes that are necessary to soft
4640 * reset the IP.
4641 * Returns 0 on success, negative error code on failure.
4642 */
06ec9070 4643static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4644{
4645 int i, r = 0;
4646
4647 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4648 if (!adev->ip_blocks[i].status.valid)
35d782fe 4649 continue;
a1255107
AD
4650 if (adev->ip_blocks[i].status.hang &&
4651 adev->ip_blocks[i].version->funcs->soft_reset) {
4652 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4653 if (r)
4654 return r;
4655 }
4656 }
4657
4658 return 0;
4659}
4660
e3ecdffa
AD
4661/**
4662 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4663 *
4664 * @adev: amdgpu_device pointer
4665 *
4666 * The list of all the hardware IPs that make up the asic is walked and the
4667 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4668 * handles any IP specific hardware or software state changes that are
4669 * necessary after the IP has been soft reset.
4670 * Returns 0 on success, negative error code on failure.
4671 */
06ec9070 4672static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4673{
4674 int i, r = 0;
4675
4676 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4677 if (!adev->ip_blocks[i].status.valid)
35d782fe 4678 continue;
a1255107
AD
4679 if (adev->ip_blocks[i].status.hang &&
4680 adev->ip_blocks[i].version->funcs->post_soft_reset)
4681 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4682 if (r)
4683 return r;
4684 }
4685
4686 return 0;
4687}
4688
e3ecdffa 4689/**
c33adbc7 4690 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4691 *
4692 * @adev: amdgpu_device pointer
4693 *
4694 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4695 * restore things like GPUVM page tables after a GPU reset where
4696 * the contents of VRAM might be lost.
403009bf
CK
4697 *
4698 * Returns:
4699 * 0 on success, negative error code on failure.
e3ecdffa 4700 */
c33adbc7 4701static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4702{
c41d1cf6 4703 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4704 struct amdgpu_bo *shadow;
e18aaea7 4705 struct amdgpu_bo_vm *vmbo;
403009bf 4706 long r = 1, tmo;
c41d1cf6
ML
4707
4708 if (amdgpu_sriov_runtime(adev))
b045d3af 4709 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4710 else
4711 tmo = msecs_to_jiffies(100);
4712
aac89168 4713 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4714 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4715 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4716 /* If vm is compute context or adev is APU, shadow will be NULL */
4717 if (!vmbo->shadow)
4718 continue;
4719 shadow = vmbo->shadow;
4720
403009bf 4721 /* No need to recover an evicted BO */
d3116756
CK
4722 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4723 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4724 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4725 continue;
4726
4727 r = amdgpu_bo_restore_shadow(shadow, &next);
4728 if (r)
4729 break;
4730
c41d1cf6 4731 if (fence) {
1712fb1a 4732 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4733 dma_fence_put(fence);
4734 fence = next;
1712fb1a 4735 if (tmo == 0) {
4736 r = -ETIMEDOUT;
c41d1cf6 4737 break;
1712fb1a 4738 } else if (tmo < 0) {
4739 r = tmo;
4740 break;
4741 }
403009bf
CK
4742 } else {
4743 fence = next;
c41d1cf6 4744 }
c41d1cf6
ML
4745 }
4746 mutex_unlock(&adev->shadow_list_lock);
4747
403009bf
CK
4748 if (fence)
4749 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4750 dma_fence_put(fence);
4751
1712fb1a 4752 if (r < 0 || tmo <= 0) {
aac89168 4753 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4754 return -EIO;
4755 }
c41d1cf6 4756
aac89168 4757 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4758 return 0;
c41d1cf6
ML
4759}
4760
a90ad3c2 4761
e3ecdffa 4762/**
06ec9070 4763 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4764 *
982a820b 4765 * @adev: amdgpu_device pointer
87e3f136 4766 * @from_hypervisor: request from hypervisor
5740682e
ML
4767 *
4768 * do VF FLR and reinitialize Asic
3f48c681 4769 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4770 */
4771static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4772 bool from_hypervisor)
5740682e
ML
4773{
4774 int r;
a5f67c93 4775 struct amdgpu_hive_info *hive = NULL;
7258fa31 4776 int retry_limit = 0;
5740682e 4777
7258fa31 4778retry:
c004d44e 4779 amdgpu_amdkfd_pre_reset(adev);
428890a3 4780
5740682e
ML
4781 if (from_hypervisor)
4782 r = amdgpu_virt_request_full_gpu(adev, true);
4783 else
4784 r = amdgpu_virt_reset_gpu(adev);
4785 if (r)
4786 return r;
f734b213 4787 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4788
83f24a8f
HC
4789 /* some sw clean up VF needs to do before recover */
4790 amdgpu_virt_post_reset(adev);
4791
a90ad3c2 4792 /* Resume IP prior to SMC */
06ec9070 4793 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4794 if (r)
4795 goto error;
a90ad3c2 4796
c9ffa427 4797 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4798
7a3e0bb2
RZ
4799 r = amdgpu_device_fw_loading(adev);
4800 if (r)
4801 return r;
4802
a90ad3c2 4803 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4804 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4805 if (r)
4806 goto error;
a90ad3c2 4807
a5f67c93
ZL
4808 hive = amdgpu_get_xgmi_hive(adev);
4809 /* Update PSP FW topology after reset */
4810 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4811 r = amdgpu_xgmi_update_topology(hive, adev);
4812
4813 if (hive)
4814 amdgpu_put_xgmi_hive(hive);
4815
4816 if (!r) {
a5f67c93 4817 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4818
c004d44e 4819 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4820 }
a90ad3c2 4821
abc34253 4822error:
c41d1cf6 4823 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4824 amdgpu_inc_vram_lost(adev);
c33adbc7 4825 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4826 }
437f3e0b 4827 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4828
7258fa31
SK
4829 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4830 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4831 retry_limit++;
4832 goto retry;
4833 } else
4834 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4835 }
4836
a90ad3c2
ML
4837 return r;
4838}
4839
9a1cddd6 4840/**
4841 * amdgpu_device_has_job_running - check if there is any job in mirror list
4842 *
982a820b 4843 * @adev: amdgpu_device pointer
9a1cddd6 4844 *
4845 * check if there is any job in mirror list
4846 */
4847bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4848{
4849 int i;
4850 struct drm_sched_job *job;
4851
4852 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4853 struct amdgpu_ring *ring = adev->rings[i];
4854
4855 if (!ring || !ring->sched.thread)
4856 continue;
4857
4858 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4859 job = list_first_entry_or_null(&ring->sched.pending_list,
4860 struct drm_sched_job, list);
9a1cddd6 4861 spin_unlock(&ring->sched.job_list_lock);
4862 if (job)
4863 return true;
4864 }
4865 return false;
4866}
4867
12938fad
CK
4868/**
4869 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4870 *
982a820b 4871 * @adev: amdgpu_device pointer
12938fad
CK
4872 *
4873 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4874 * a hung GPU.
4875 */
4876bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4877{
12938fad 4878
3ba7b418
AG
4879 if (amdgpu_gpu_recovery == 0)
4880 goto disabled;
4881
1a11a65d
YC
4882 /* Skip soft reset check in fatal error mode */
4883 if (!amdgpu_ras_is_poison_mode_supported(adev))
4884 return true;
4885
3ba7b418
AG
4886 if (amdgpu_sriov_vf(adev))
4887 return true;
4888
4889 if (amdgpu_gpu_recovery == -1) {
4890 switch (adev->asic_type) {
b3523c45
AD
4891#ifdef CONFIG_DRM_AMDGPU_SI
4892 case CHIP_VERDE:
4893 case CHIP_TAHITI:
4894 case CHIP_PITCAIRN:
4895 case CHIP_OLAND:
4896 case CHIP_HAINAN:
4897#endif
4898#ifdef CONFIG_DRM_AMDGPU_CIK
4899 case CHIP_KAVERI:
4900 case CHIP_KABINI:
4901 case CHIP_MULLINS:
4902#endif
4903 case CHIP_CARRIZO:
4904 case CHIP_STONEY:
4905 case CHIP_CYAN_SKILLFISH:
3ba7b418 4906 goto disabled;
b3523c45
AD
4907 default:
4908 break;
3ba7b418 4909 }
12938fad
CK
4910 }
4911
4912 return true;
3ba7b418
AG
4913
4914disabled:
aac89168 4915 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4916 return false;
12938fad
CK
4917}
4918
5c03e584
FX
4919int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4920{
47fc644f
SS
4921 u32 i;
4922 int ret = 0;
5c03e584 4923
47fc644f 4924 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4925
47fc644f 4926 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4927
47fc644f
SS
4928 /* disable BM */
4929 pci_clear_master(adev->pdev);
5c03e584 4930
47fc644f 4931 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4932
47fc644f
SS
4933 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4934 dev_info(adev->dev, "GPU smu mode1 reset\n");
4935 ret = amdgpu_dpm_mode1_reset(adev);
4936 } else {
4937 dev_info(adev->dev, "GPU psp mode1 reset\n");
4938 ret = psp_gpu_reset(adev);
4939 }
5c03e584 4940
47fc644f 4941 if (ret)
2c0f880a 4942 goto mode1_reset_failed;
5c03e584 4943
47fc644f 4944 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4945 ret = amdgpu_psp_wait_for_bootloader(adev);
4946 if (ret)
2c0f880a 4947 goto mode1_reset_failed;
5c03e584 4948
47fc644f
SS
4949 /* wait for asic to come out of reset */
4950 for (i = 0; i < adev->usec_timeout; i++) {
4951 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4952
47fc644f
SS
4953 if (memsize != 0xffffffff)
4954 break;
4955 udelay(1);
4956 }
5c03e584 4957
2c0f880a
HZ
4958 if (i >= adev->usec_timeout) {
4959 ret = -ETIMEDOUT;
4960 goto mode1_reset_failed;
4961 }
4962
47fc644f 4963 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4964
2c0f880a
HZ
4965 return 0;
4966
4967mode1_reset_failed:
4968 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4969 return ret;
5c03e584 4970}
5c6dd71e 4971
e3c1b071 4972int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4973 struct amdgpu_reset_context *reset_context)
26bc5340 4974{
5c1e6fa4 4975 int i, r = 0;
04442bf7
LL
4976 struct amdgpu_job *job = NULL;
4977 bool need_full_reset =
4978 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4979
4980 if (reset_context->reset_req_dev == adev)
4981 job = reset_context->job;
71182665 4982
b602ca5f
TZ
4983 if (amdgpu_sriov_vf(adev)) {
4984 /* stop the data exchange thread */
4985 amdgpu_virt_fini_data_exchange(adev);
4986 }
4987
9e225fb9
AG
4988 amdgpu_fence_driver_isr_toggle(adev, true);
4989
71182665 4990 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4991 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4992 struct amdgpu_ring *ring = adev->rings[i];
4993
51687759 4994 if (!ring || !ring->sched.thread)
0875dc9e 4995 continue;
5740682e 4996
b8920e1e
SS
4997 /* Clear job fence from fence drv to avoid force_completion
4998 * leave NULL and vm flush fence in fence drv
4999 */
5c1e6fa4 5000 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 5001
2f9d4084
ML
5002 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5003 amdgpu_fence_driver_force_completion(ring);
0875dc9e 5004 }
d38ceaf9 5005
9e225fb9
AG
5006 amdgpu_fence_driver_isr_toggle(adev, false);
5007
ff99849b 5008 if (job && job->vm)
222b5f04
AG
5009 drm_sched_increase_karma(&job->base);
5010
04442bf7 5011 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 5012 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5013 if (r == -EOPNOTSUPP)
404b277b
LL
5014 r = 0;
5015 else
04442bf7
LL
5016 return r;
5017
1d721ed6 5018 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
5019 if (!amdgpu_sriov_vf(adev)) {
5020
5021 if (!need_full_reset)
5022 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5023
360cd081
LG
5024 if (!need_full_reset && amdgpu_gpu_recovery &&
5025 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
5026 amdgpu_device_ip_pre_soft_reset(adev);
5027 r = amdgpu_device_ip_soft_reset(adev);
5028 amdgpu_device_ip_post_soft_reset(adev);
5029 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 5030 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
5031 need_full_reset = true;
5032 }
5033 }
5034
5035 if (need_full_reset)
5036 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
5037 if (need_full_reset)
5038 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5039 else
5040 clear_bit(AMDGPU_NEED_FULL_RESET,
5041 &reset_context->flags);
26bc5340
AG
5042 }
5043
5044 return r;
5045}
5046
15fd09a0
SA
5047static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5048{
15fd09a0
SA
5049 int i;
5050
38a15ad9 5051 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0 5052
2d6a2a28
AA
5053 for (i = 0; i < adev->reset_info.num_regs; i++) {
5054 adev->reset_info.reset_dump_reg_value[i] =
5055 RREG32(adev->reset_info.reset_dump_reg_list[i]);
5056
5057 trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5058 adev->reset_info.reset_dump_reg_value[i]);
15fd09a0
SA
5059 }
5060
5061 return 0;
5062}
5063
04442bf7
LL
5064int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5065 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5066{
5067 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5068 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5069 int r = 0;
f5c7e779 5070 bool gpu_reset_for_dev_remove = 0;
26bc5340 5071
04442bf7
LL
5072 /* Try reset handler method first */
5073 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5074 reset_list);
15fd09a0 5075 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5076
5077 reset_context->reset_device_list = device_list_handle;
04442bf7 5078 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5079 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5080 if (r == -EOPNOTSUPP)
404b277b
LL
5081 r = 0;
5082 else
04442bf7
LL
5083 return r;
5084
5085 /* Reset handler not implemented, use the default method */
5086 need_full_reset =
5087 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5088 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5089
f5c7e779
YC
5090 gpu_reset_for_dev_remove =
5091 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5092 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5093
26bc5340 5094 /*
655ce9cb 5095 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5096 * to allow proper links negotiation in FW (within 1 sec)
5097 */
7ac71382 5098 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5099 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5100 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5101 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5102 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5103 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5104 r = -EALREADY;
5105 } else
5106 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5107
041a62bc 5108 if (r) {
aac89168 5109 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5110 r, adev_to_drm(tmp_adev)->unique);
19349072 5111 goto out;
ce316fa5
LM
5112 }
5113 }
5114
041a62bc
AG
5115 /* For XGMI wait for all resets to complete before proceed */
5116 if (!r) {
655ce9cb 5117 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5118 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5119 flush_work(&tmp_adev->xgmi_reset_work);
5120 r = tmp_adev->asic_reset_res;
5121 if (r)
5122 break;
ce316fa5
LM
5123 }
5124 }
5125 }
ce316fa5 5126 }
26bc5340 5127
43c4d576 5128 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5129 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
21226f02 5130 amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
43c4d576
JC
5131 }
5132
00eaa571 5133 amdgpu_ras_intr_cleared();
43c4d576 5134 }
00eaa571 5135
f5c7e779
YC
5136 /* Since the mode1 reset affects base ip blocks, the
5137 * phase1 ip blocks need to be resumed. Otherwise there
5138 * will be a BIOS signature error and the psp bootloader
5139 * can't load kdb on the next amdgpu install.
5140 */
5141 if (gpu_reset_for_dev_remove) {
5142 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5143 amdgpu_device_ip_resume_phase1(tmp_adev);
5144
5145 goto end;
5146 }
5147
655ce9cb 5148 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5149 if (need_full_reset) {
5150 /* post card */
e3c1b071 5151 r = amdgpu_device_asic_init(tmp_adev);
5152 if (r) {
aac89168 5153 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5154 } else {
26bc5340 5155 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5156
26bc5340
AG
5157 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5158 if (r)
5159 goto out;
5160
5161 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785
AA
5162
5163 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5164
26bc5340 5165 if (vram_lost) {
77e7f829 5166 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5167 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5168 }
5169
26bc5340
AG
5170 r = amdgpu_device_fw_loading(tmp_adev);
5171 if (r)
5172 return r;
5173
c45e38f2
LL
5174 r = amdgpu_xcp_restore_partition_mode(
5175 tmp_adev->xcp_mgr);
5176 if (r)
5177 goto out;
5178
26bc5340
AG
5179 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5180 if (r)
5181 goto out;
5182
5183 if (vram_lost)
5184 amdgpu_device_fill_reset_magic(tmp_adev);
5185
fdafb359
EQ
5186 /*
5187 * Add this ASIC as tracked as reset was already
5188 * complete successfully.
5189 */
5190 amdgpu_register_gpu_instance(tmp_adev);
5191
04442bf7
LL
5192 if (!reset_context->hive &&
5193 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5194 amdgpu_xgmi_add_device(tmp_adev);
5195
7c04ca50 5196 r = amdgpu_device_ip_late_init(tmp_adev);
5197 if (r)
5198 goto out;
5199
087451f3 5200 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5201
e8fbaf03
GC
5202 /*
5203 * The GPU enters bad state once faulty pages
5204 * by ECC has reached the threshold, and ras
5205 * recovery is scheduled next. So add one check
5206 * here to break recovery if it indeed exceeds
5207 * bad page threshold, and remind user to
5208 * retire this GPU or setting one bigger
5209 * bad_page_threshold value to fix this once
5210 * probing driver again.
5211 */
11003c68 5212 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5213 /* must succeed. */
5214 amdgpu_ras_resume(tmp_adev);
5215 } else {
5216 r = -EINVAL;
5217 goto out;
5218 }
e79a04d5 5219
26bc5340 5220 /* Update PSP FW topology after reset */
04442bf7
LL
5221 if (reset_context->hive &&
5222 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5223 r = amdgpu_xgmi_update_topology(
5224 reset_context->hive, tmp_adev);
26bc5340
AG
5225 }
5226 }
5227
26bc5340
AG
5228out:
5229 if (!r) {
5230 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5231 r = amdgpu_ib_ring_tests(tmp_adev);
5232 if (r) {
5233 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5234 need_full_reset = true;
5235 r = -EAGAIN;
5236 goto end;
5237 }
5238 }
5239
5240 if (!r)
5241 r = amdgpu_device_recover_vram(tmp_adev);
5242 else
5243 tmp_adev->asic_reset_res = r;
5244 }
5245
5246end:
04442bf7
LL
5247 if (need_full_reset)
5248 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5249 else
5250 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5251 return r;
5252}
5253
e923be99 5254static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5255{
5740682e 5256
a3a09142
AD
5257 switch (amdgpu_asic_reset_method(adev)) {
5258 case AMD_RESET_METHOD_MODE1:
5259 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5260 break;
5261 case AMD_RESET_METHOD_MODE2:
5262 adev->mp1_state = PP_MP1_STATE_RESET;
5263 break;
5264 default:
5265 adev->mp1_state = PP_MP1_STATE_NONE;
5266 break;
5267 }
26bc5340 5268}
d38ceaf9 5269
e923be99 5270static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5271{
89041940 5272 amdgpu_vf_error_trans_all(adev);
a3a09142 5273 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5274}
5275
3f12acc8
EQ
5276static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5277{
5278 struct pci_dev *p = NULL;
5279
5280 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5281 adev->pdev->bus->number, 1);
5282 if (p) {
5283 pm_runtime_enable(&(p->dev));
5284 pm_runtime_resume(&(p->dev));
5285 }
b85e285e
YY
5286
5287 pci_dev_put(p);
3f12acc8
EQ
5288}
5289
5290static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5291{
5292 enum amd_reset_method reset_method;
5293 struct pci_dev *p = NULL;
5294 u64 expires;
5295
5296 /*
5297 * For now, only BACO and mode1 reset are confirmed
5298 * to suffer the audio issue without proper suspended.
5299 */
5300 reset_method = amdgpu_asic_reset_method(adev);
5301 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5302 (reset_method != AMD_RESET_METHOD_MODE1))
5303 return -EINVAL;
5304
5305 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5306 adev->pdev->bus->number, 1);
5307 if (!p)
5308 return -ENODEV;
5309
5310 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5311 if (!expires)
5312 /*
5313 * If we cannot get the audio device autosuspend delay,
5314 * a fixed 4S interval will be used. Considering 3S is
5315 * the audio controller default autosuspend delay setting.
5316 * 4S used here is guaranteed to cover that.
5317 */
54b7feb9 5318 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5319
5320 while (!pm_runtime_status_suspended(&(p->dev))) {
5321 if (!pm_runtime_suspend(&(p->dev)))
5322 break;
5323
5324 if (expires < ktime_get_mono_fast_ns()) {
5325 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5326 pci_dev_put(p);
3f12acc8
EQ
5327 /* TODO: abort the succeeding gpu reset? */
5328 return -ETIMEDOUT;
5329 }
5330 }
5331
5332 pm_runtime_disable(&(p->dev));
5333
b85e285e 5334 pci_dev_put(p);
3f12acc8
EQ
5335 return 0;
5336}
5337
d193b12b 5338static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5339{
5340 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5341
5342#if defined(CONFIG_DEBUG_FS)
5343 if (!amdgpu_sriov_vf(adev))
5344 cancel_work(&adev->reset_work);
5345#endif
5346
5347 if (adev->kfd.dev)
5348 cancel_work(&adev->kfd.reset_work);
5349
5350 if (amdgpu_sriov_vf(adev))
5351 cancel_work(&adev->virt.flr_work);
5352
5353 if (con && adev->ras_enabled)
5354 cancel_work(&con->recovery_work);
5355
5356}
5357
26bc5340 5358/**
6e9c65f7 5359 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5360 *
982a820b 5361 * @adev: amdgpu_device pointer
26bc5340 5362 * @job: which job trigger hang
80bd2de1 5363 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5364 *
5365 * Attempt to reset the GPU if it has hung (all asics).
5366 * Attempt to do soft-reset or full-reset and reinitialize Asic
5367 * Returns 0 for success or an error on failure.
5368 */
5369
cf727044 5370int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5371 struct amdgpu_job *job,
5372 struct amdgpu_reset_context *reset_context)
26bc5340 5373{
1d721ed6 5374 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5375 bool job_signaled = false;
26bc5340 5376 struct amdgpu_hive_info *hive = NULL;
26bc5340 5377 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5378 int i, r = 0;
bb5c7235 5379 bool need_emergency_restart = false;
3f12acc8 5380 bool audio_suspended = false;
f5c7e779
YC
5381 bool gpu_reset_for_dev_remove = false;
5382
5383 gpu_reset_for_dev_remove =
5384 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5385 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5386
6e3cd2a9 5387 /*
bb5c7235
WS
5388 * Special case: RAS triggered and full reset isn't supported
5389 */
5390 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5391
d5ea093e
AG
5392 /*
5393 * Flush RAM to disk so that after reboot
5394 * the user can read log and see why the system rebooted.
5395 */
80285ae1
SY
5396 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5397 amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5398 DRM_WARN("Emergency reboot.");
5399
5400 ksys_sync_helper();
5401 emergency_restart();
5402 }
5403
b823821f 5404 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5405 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5406
175ac6ec
ZL
5407 if (!amdgpu_sriov_vf(adev))
5408 hive = amdgpu_get_xgmi_hive(adev);
681260df 5409 if (hive)
53b3f8f4 5410 mutex_lock(&hive->hive_lock);
26bc5340 5411
f1549c09
LG
5412 reset_context->job = job;
5413 reset_context->hive = hive;
9e94d22c
EQ
5414 /*
5415 * Build list of devices to reset.
5416 * In case we are in XGMI hive mode, resort the device list
5417 * to put adev in the 1st position.
5418 */
5419 INIT_LIST_HEAD(&device_list);
175ac6ec 5420 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5421 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5422 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5423 if (gpu_reset_for_dev_remove && adev->shutdown)
5424 tmp_adev->shutdown = true;
5425 }
655ce9cb 5426 if (!list_is_first(&adev->reset_list, &device_list))
5427 list_rotate_to_front(&adev->reset_list, &device_list);
5428 device_list_handle = &device_list;
26bc5340 5429 } else {
655ce9cb 5430 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5431 device_list_handle = &device_list;
5432 }
5433
e923be99
AG
5434 /* We need to lock reset domain only once both for XGMI and single device */
5435 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5436 reset_list);
3675c2f2 5437 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5438
1d721ed6 5439 /* block all schedulers and reset given job's ring */
655ce9cb 5440 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5441
e923be99 5442 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5443
3f12acc8
EQ
5444 /*
5445 * Try to put the audio codec into suspend state
5446 * before gpu reset started.
5447 *
5448 * Due to the power domain of the graphics device
5449 * is shared with AZ power domain. Without this,
5450 * we may change the audio hardware from behind
5451 * the audio driver's back. That will trigger
5452 * some audio codec errors.
5453 */
5454 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5455 audio_suspended = true;
5456
9e94d22c
EQ
5457 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5458
52fb44cf
EQ
5459 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5460
c004d44e 5461 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5462 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5463
12ffa55d
AG
5464 /*
5465 * Mark these ASICs to be reseted as untracked first
5466 * And add them back after reset completed
5467 */
5468 amdgpu_unregister_gpu_instance(tmp_adev);
5469
163d4cd2 5470 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5471
f1c1314b 5472 /* disable ras on ALL IPs */
bb5c7235 5473 if (!need_emergency_restart &&
b823821f 5474 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5475 amdgpu_ras_suspend(tmp_adev);
5476
1d721ed6
AG
5477 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5478 struct amdgpu_ring *ring = tmp_adev->rings[i];
5479
5480 if (!ring || !ring->sched.thread)
5481 continue;
5482
0b2d2c2e 5483 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5484
bb5c7235 5485 if (need_emergency_restart)
7c6e68c7 5486 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5487 }
8f8c80f4 5488 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5489 }
5490
bb5c7235 5491 if (need_emergency_restart)
7c6e68c7
AG
5492 goto skip_sched_resume;
5493
1d721ed6
AG
5494 /*
5495 * Must check guilty signal here since after this point all old
5496 * HW fences are force signaled.
5497 *
5498 * job->base holds a reference to parent fence
5499 */
f6a3f660 5500 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5501 job_signaled = true;
1d721ed6
AG
5502 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5503 goto skip_hw_reset;
5504 }
5505
26bc5340 5506retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5507 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5508 if (gpu_reset_for_dev_remove) {
5509 /* Workaroud for ASICs need to disable SMC first */
5510 amdgpu_device_smu_fini_early(tmp_adev);
5511 }
f1549c09 5512 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5513 /*TODO Should we stop ?*/
5514 if (r) {
aac89168 5515 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5516 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5517 tmp_adev->asic_reset_res = r;
5518 }
247c7b0d
AG
5519
5520 /*
5521 * Drop all pending non scheduler resets. Scheduler resets
5522 * were already dropped during drm_sched_stop
5523 */
d193b12b 5524 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5525 }
5526
5527 /* Actual ASIC resets if needed.*/
4f30d920 5528 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5529 if (amdgpu_sriov_vf(adev)) {
5530 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5531 if (r)
5532 adev->asic_reset_res = r;
950d6425 5533
28606c4e 5534 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5535 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5536 IP_VERSION(9, 4, 2) ||
5537 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5538 amdgpu_ras_resume(adev);
26bc5340 5539 } else {
f1549c09 5540 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5541 if (r && r == -EAGAIN)
26bc5340 5542 goto retry;
f5c7e779
YC
5543
5544 if (!r && gpu_reset_for_dev_remove)
5545 goto recover_end;
26bc5340
AG
5546 }
5547
1d721ed6
AG
5548skip_hw_reset:
5549
26bc5340 5550 /* Post ASIC reset for all devs .*/
655ce9cb 5551 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5552
1d721ed6
AG
5553 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5554 struct amdgpu_ring *ring = tmp_adev->rings[i];
5555
5556 if (!ring || !ring->sched.thread)
5557 continue;
5558
6868a2c4 5559 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5560 }
5561
4e8303cf
LL
5562 if (adev->enable_mes &&
5563 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
ed67f729
JX
5564 amdgpu_mes_self_test(tmp_adev);
5565
b8920e1e 5566 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5567 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5568
7258fa31
SK
5569 if (tmp_adev->asic_reset_res)
5570 r = tmp_adev->asic_reset_res;
5571
1d721ed6 5572 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5573
5574 if (r) {
5575 /* bad news, how to tell it to userspace ? */
12ffa55d 5576 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5577 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5578 } else {
12ffa55d 5579 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5580 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5581 DRM_WARN("smart shift update failed\n");
26bc5340 5582 }
7c6e68c7 5583 }
26bc5340 5584
7c6e68c7 5585skip_sched_resume:
655ce9cb 5586 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5587 /* unlock kfd: SRIOV would do it separately */
c004d44e 5588 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5589 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5590
5591 /* kfd_post_reset will do nothing if kfd device is not initialized,
5592 * need to bring up kfd here if it's not be initialized before
5593 */
5594 if (!adev->kfd.init_complete)
5595 amdgpu_amdkfd_device_init(adev);
5596
3f12acc8
EQ
5597 if (audio_suspended)
5598 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5599
5600 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5601
5602 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5603 }
5604
f5c7e779 5605recover_end:
e923be99
AG
5606 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5607 reset_list);
5608 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5609
9e94d22c 5610 if (hive) {
9e94d22c 5611 mutex_unlock(&hive->hive_lock);
d95e8e97 5612 amdgpu_put_xgmi_hive(hive);
9e94d22c 5613 }
26bc5340 5614
f287a3c5 5615 if (r)
26bc5340 5616 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5617
5618 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5619 return r;
5620}
5621
e3ecdffa
AD
5622/**
5623 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5624 *
5625 * @adev: amdgpu_device pointer
5626 *
5627 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5628 * and lanes) of the slot the device is in. Handles APUs and
5629 * virtualized environments where PCIE config space may not be available.
5630 */
5494d864 5631static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5632{
5d9a6330 5633 struct pci_dev *pdev;
c5313457
HK
5634 enum pci_bus_speed speed_cap, platform_speed_cap;
5635 enum pcie_link_width platform_link_width;
d0dd7f0c 5636
cd474ba0
AD
5637 if (amdgpu_pcie_gen_cap)
5638 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5639
cd474ba0
AD
5640 if (amdgpu_pcie_lane_cap)
5641 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5642
cd474ba0 5643 /* covers APUs as well */
04e85958 5644 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5645 if (adev->pm.pcie_gen_mask == 0)
5646 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5647 if (adev->pm.pcie_mlw_mask == 0)
5648 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5649 return;
cd474ba0 5650 }
d0dd7f0c 5651
c5313457
HK
5652 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5653 return;
5654
dbaa922b
AD
5655 pcie_bandwidth_available(adev->pdev, NULL,
5656 &platform_speed_cap, &platform_link_width);
c5313457 5657
cd474ba0 5658 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5659 /* asic caps */
5660 pdev = adev->pdev;
5661 speed_cap = pcie_get_speed_cap(pdev);
5662 if (speed_cap == PCI_SPEED_UNKNOWN) {
5663 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5664 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5665 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5666 } else {
2b3a1f51
FX
5667 if (speed_cap == PCIE_SPEED_32_0GT)
5668 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5669 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5670 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5671 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5672 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5673 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5674 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5675 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5676 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5677 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5678 else if (speed_cap == PCIE_SPEED_8_0GT)
5679 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5680 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5681 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5682 else if (speed_cap == PCIE_SPEED_5_0GT)
5683 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5684 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5685 else
5686 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5687 }
5688 /* platform caps */
c5313457 5689 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5690 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5691 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5692 } else {
2b3a1f51
FX
5693 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5694 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5695 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5696 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5697 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5698 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5699 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5700 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5701 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5702 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5703 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5704 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5705 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5706 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5707 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5708 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5709 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5710 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5711 else
5712 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5713
cd474ba0
AD
5714 }
5715 }
5716 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5717 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5718 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5719 } else {
c5313457 5720 switch (platform_link_width) {
5d9a6330 5721 case PCIE_LNK_X32:
cd474ba0
AD
5722 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5723 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5724 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5725 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5726 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5727 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5728 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5729 break;
5d9a6330 5730 case PCIE_LNK_X16:
cd474ba0
AD
5731 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5732 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5733 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5734 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5735 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5736 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5737 break;
5d9a6330 5738 case PCIE_LNK_X12:
cd474ba0
AD
5739 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5740 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5741 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5742 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5743 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5744 break;
5d9a6330 5745 case PCIE_LNK_X8:
cd474ba0
AD
5746 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5747 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5748 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5749 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5750 break;
5d9a6330 5751 case PCIE_LNK_X4:
cd474ba0
AD
5752 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5753 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5754 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5755 break;
5d9a6330 5756 case PCIE_LNK_X2:
cd474ba0
AD
5757 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5758 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5759 break;
5d9a6330 5760 case PCIE_LNK_X1:
cd474ba0
AD
5761 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5762 break;
5763 default:
5764 break;
5765 }
d0dd7f0c
AD
5766 }
5767 }
5768}
d38ceaf9 5769
08a2fd23
RE
5770/**
5771 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5772 *
5773 * @adev: amdgpu_device pointer
5774 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5775 *
5776 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5777 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5778 * @peer_adev.
5779 */
5780bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5781 struct amdgpu_device *peer_adev)
5782{
5783#ifdef CONFIG_HSA_AMD_P2P
5784 uint64_t address_mask = peer_adev->dev->dma_mask ?
5785 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5786 resource_size_t aper_limit =
5787 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5788 bool p2p_access =
5789 !adev->gmc.xgmi.connected_to_cpu &&
5790 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5791
5792 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5793 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5794 !(adev->gmc.aper_base & address_mask ||
5795 aper_limit & address_mask));
5796#else
5797 return false;
5798#endif
5799}
5800
361dbd01
AD
5801int amdgpu_device_baco_enter(struct drm_device *dev)
5802{
1348969a 5803 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5804 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5805
6ab68650 5806 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5807 return -ENOTSUPP;
5808
8ab0d6f0 5809 if (ras && adev->ras_enabled &&
acdae216 5810 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5811 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5812
9530273e 5813 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5814}
5815
5816int amdgpu_device_baco_exit(struct drm_device *dev)
5817{
1348969a 5818 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5819 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5820 int ret = 0;
361dbd01 5821
6ab68650 5822 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5823 return -ENOTSUPP;
5824
9530273e
EQ
5825 ret = amdgpu_dpm_baco_exit(adev);
5826 if (ret)
5827 return ret;
7a22677b 5828
8ab0d6f0 5829 if (ras && adev->ras_enabled &&
acdae216 5830 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5831 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5832
1bece222
CL
5833 if (amdgpu_passthrough(adev) &&
5834 adev->nbio.funcs->clear_doorbell_interrupt)
5835 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5836
7a22677b 5837 return 0;
361dbd01 5838}
c9a6b82f
AG
5839
5840/**
5841 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5842 * @pdev: PCI device struct
5843 * @state: PCI channel state
5844 *
5845 * Description: Called when a PCI error is detected.
5846 *
5847 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5848 */
5849pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5850{
5851 struct drm_device *dev = pci_get_drvdata(pdev);
5852 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5853 int i;
c9a6b82f
AG
5854
5855 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5856
6894305c
AG
5857 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5858 DRM_WARN("No support for XGMI hive yet...");
5859 return PCI_ERS_RESULT_DISCONNECT;
5860 }
5861
e17e27f9
GC
5862 adev->pci_channel_state = state;
5863
c9a6b82f
AG
5864 switch (state) {
5865 case pci_channel_io_normal:
5866 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5867 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5868 case pci_channel_io_frozen:
5869 /*
d0fb18b5 5870 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5871 * to GPU during PCI error recovery
5872 */
3675c2f2 5873 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5874 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5875
5876 /*
5877 * Block any work scheduling as we do for regular GPU reset
5878 * for the duration of the recovery
5879 */
5880 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5881 struct amdgpu_ring *ring = adev->rings[i];
5882
5883 if (!ring || !ring->sched.thread)
5884 continue;
5885
5886 drm_sched_stop(&ring->sched, NULL);
5887 }
8f8c80f4 5888 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5889 return PCI_ERS_RESULT_NEED_RESET;
5890 case pci_channel_io_perm_failure:
5891 /* Permanent error, prepare for device removal */
5892 return PCI_ERS_RESULT_DISCONNECT;
5893 }
5894
5895 return PCI_ERS_RESULT_NEED_RESET;
5896}
5897
5898/**
5899 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5900 * @pdev: pointer to PCI device
5901 */
5902pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5903{
5904
5905 DRM_INFO("PCI error: mmio enabled callback!!\n");
5906
5907 /* TODO - dump whatever for debugging purposes */
5908
5909 /* This called only if amdgpu_pci_error_detected returns
5910 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5911 * works, no need to reset slot.
5912 */
5913
5914 return PCI_ERS_RESULT_RECOVERED;
5915}
5916
5917/**
5918 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5919 * @pdev: PCI device struct
5920 *
5921 * Description: This routine is called by the pci error recovery
5922 * code after the PCI slot has been reset, just before we
5923 * should resume normal operations.
5924 */
5925pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5926{
5927 struct drm_device *dev = pci_get_drvdata(pdev);
5928 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5929 int r, i;
04442bf7 5930 struct amdgpu_reset_context reset_context;
362c7b91 5931 u32 memsize;
7ac71382 5932 struct list_head device_list;
c9a6b82f
AG
5933
5934 DRM_INFO("PCI error: slot reset callback!!\n");
5935
04442bf7
LL
5936 memset(&reset_context, 0, sizeof(reset_context));
5937
7ac71382 5938 INIT_LIST_HEAD(&device_list);
655ce9cb 5939 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5940
362c7b91
AG
5941 /* wait for asic to come out of reset */
5942 msleep(500);
5943
7ac71382 5944 /* Restore PCI confspace */
c1dd4aa6 5945 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5946
362c7b91
AG
5947 /* confirm ASIC came out of reset */
5948 for (i = 0; i < adev->usec_timeout; i++) {
5949 memsize = amdgpu_asic_get_config_memsize(adev);
5950
5951 if (memsize != 0xffffffff)
5952 break;
5953 udelay(1);
5954 }
5955 if (memsize == 0xffffffff) {
5956 r = -ETIME;
5957 goto out;
5958 }
5959
04442bf7
LL
5960 reset_context.method = AMD_RESET_METHOD_NONE;
5961 reset_context.reset_req_dev = adev;
5962 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5963 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5964
7afefb81 5965 adev->no_hw_access = true;
04442bf7 5966 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5967 adev->no_hw_access = false;
c9a6b82f
AG
5968 if (r)
5969 goto out;
5970
04442bf7 5971 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5972
5973out:
c9a6b82f 5974 if (!r) {
c1dd4aa6
AG
5975 if (amdgpu_device_cache_pci_state(adev->pdev))
5976 pci_restore_state(adev->pdev);
5977
c9a6b82f
AG
5978 DRM_INFO("PCIe error recovery succeeded\n");
5979 } else {
5980 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5981 amdgpu_device_unset_mp1_state(adev);
5982 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5983 }
5984
5985 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5986}
5987
5988/**
5989 * amdgpu_pci_resume() - resume normal ops after PCI reset
5990 * @pdev: pointer to PCI device
5991 *
5992 * Called when the error recovery driver tells us that its
505199a3 5993 * OK to resume normal operation.
c9a6b82f
AG
5994 */
5995void amdgpu_pci_resume(struct pci_dev *pdev)
5996{
5997 struct drm_device *dev = pci_get_drvdata(pdev);
5998 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5999 int i;
c9a6b82f 6000
c9a6b82f
AG
6001
6002 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 6003
e17e27f9
GC
6004 /* Only continue execution for the case of pci_channel_io_frozen */
6005 if (adev->pci_channel_state != pci_channel_io_frozen)
6006 return;
6007
acd89fca
AG
6008 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6009 struct amdgpu_ring *ring = adev->rings[i];
6010
6011 if (!ring || !ring->sched.thread)
6012 continue;
6013
acd89fca
AG
6014 drm_sched_start(&ring->sched, true);
6015 }
6016
e923be99
AG
6017 amdgpu_device_unset_mp1_state(adev);
6018 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 6019}
c1dd4aa6
AG
6020
6021bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6022{
6023 struct drm_device *dev = pci_get_drvdata(pdev);
6024 struct amdgpu_device *adev = drm_to_adev(dev);
6025 int r;
6026
6027 r = pci_save_state(pdev);
6028 if (!r) {
6029 kfree(adev->pci_state);
6030
6031 adev->pci_state = pci_store_saved_state(pdev);
6032
6033 if (!adev->pci_state) {
6034 DRM_ERROR("Failed to store PCI saved state");
6035 return false;
6036 }
6037 } else {
6038 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6039 return false;
6040 }
6041
6042 return true;
6043}
6044
6045bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6046{
6047 struct drm_device *dev = pci_get_drvdata(pdev);
6048 struct amdgpu_device *adev = drm_to_adev(dev);
6049 int r;
6050
6051 if (!adev->pci_state)
6052 return false;
6053
6054 r = pci_load_saved_state(pdev, adev->pci_state);
6055
6056 if (!r) {
6057 pci_restore_state(pdev);
6058 } else {
6059 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6060 return false;
6061 }
6062
6063 return true;
6064}
6065
810085dd
EH
6066void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6067 struct amdgpu_ring *ring)
6068{
6069#ifdef CONFIG_X86_64
b818a5d3 6070 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6071 return;
6072#endif
6073 if (adev->gmc.xgmi.connected_to_cpu)
6074 return;
6075
6076 if (ring && ring->funcs->emit_hdp_flush)
6077 amdgpu_ring_emit_hdp_flush(ring);
6078 else
6079 amdgpu_asic_flush_hdp(adev, ring);
6080}
c1dd4aa6 6081
810085dd
EH
6082void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6083 struct amdgpu_ring *ring)
6084{
6085#ifdef CONFIG_X86_64
b818a5d3 6086 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6087 return;
6088#endif
6089 if (adev->gmc.xgmi.connected_to_cpu)
6090 return;
c1dd4aa6 6091
810085dd
EH
6092 amdgpu_asic_invalidate_hdp(adev, ring);
6093}
34f3a4a9 6094
89a7a870
AG
6095int amdgpu_in_reset(struct amdgpu_device *adev)
6096{
6097 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6098}
6099
34f3a4a9
LY
6100/**
6101 * amdgpu_device_halt() - bring hardware to some kind of halt state
6102 *
6103 * @adev: amdgpu_device pointer
6104 *
6105 * Bring hardware to some kind of halt state so that no one can touch it
6106 * any more. It will help to maintain error context when error occurred.
6107 * Compare to a simple hang, the system will keep stable at least for SSH
6108 * access. Then it should be trivial to inspect the hardware state and
6109 * see what's going on. Implemented as following:
6110 *
6111 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6112 * clears all CPU mappings to device, disallows remappings through page faults
6113 * 2. amdgpu_irq_disable_all() disables all interrupts
6114 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6115 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6116 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6117 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6118 * flush any in flight DMA operations
6119 */
6120void amdgpu_device_halt(struct amdgpu_device *adev)
6121{
6122 struct pci_dev *pdev = adev->pdev;
e0f943b4 6123 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6124
2c1c7ba4 6125 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6126 drm_dev_unplug(ddev);
6127
6128 amdgpu_irq_disable_all(adev);
6129
6130 amdgpu_fence_driver_hw_fini(adev);
6131
6132 adev->no_hw_access = true;
6133
6134 amdgpu_device_unmap_mmio(adev);
6135
6136 pci_disable_device(pdev);
6137 pci_wait_for_pending_transaction(pdev);
6138}
86700a40
XD
6139
6140u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6141 u32 reg)
6142{
6143 unsigned long flags, address, data;
6144 u32 r;
6145
6146 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6147 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6148
6149 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6150 WREG32(address, reg * 4);
6151 (void)RREG32(address);
6152 r = RREG32(data);
6153 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6154 return r;
6155}
6156
6157void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6158 u32 reg, u32 v)
6159{
6160 unsigned long flags, address, data;
6161
6162 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6163 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6164
6165 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6166 WREG32(address, reg * 4);
6167 (void)RREG32(address);
6168 WREG32(data, v);
6169 (void)RREG32(data);
6170 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6171}
68ce8b24
CK
6172
6173/**
6174 * amdgpu_device_switch_gang - switch to a new gang
6175 * @adev: amdgpu_device pointer
6176 * @gang: the gang to switch to
6177 *
6178 * Try to switch to a new gang.
6179 * Returns: NULL if we switched to the new gang or a reference to the current
6180 * gang leader.
6181 */
6182struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6183 struct dma_fence *gang)
6184{
6185 struct dma_fence *old = NULL;
6186
6187 do {
6188 dma_fence_put(old);
6189 rcu_read_lock();
6190 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6191 rcu_read_unlock();
6192
6193 if (old == gang)
6194 break;
6195
6196 if (!dma_fence_is_signaled(old))
6197 return old;
6198
6199 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6200 old, gang) != old);
6201
6202 dma_fence_put(old);
6203 return NULL;
6204}
220c8cc8
AD
6205
6206bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6207{
6208 switch (adev->asic_type) {
6209#ifdef CONFIG_DRM_AMDGPU_SI
6210 case CHIP_HAINAN:
6211#endif
6212 case CHIP_TOPAZ:
6213 /* chips with no display hardware */
6214 return false;
6215#ifdef CONFIG_DRM_AMDGPU_SI
6216 case CHIP_TAHITI:
6217 case CHIP_PITCAIRN:
6218 case CHIP_VERDE:
6219 case CHIP_OLAND:
6220#endif
6221#ifdef CONFIG_DRM_AMDGPU_CIK
6222 case CHIP_BONAIRE:
6223 case CHIP_HAWAII:
6224 case CHIP_KAVERI:
6225 case CHIP_KABINI:
6226 case CHIP_MULLINS:
6227#endif
6228 case CHIP_TONGA:
6229 case CHIP_FIJI:
6230 case CHIP_POLARIS10:
6231 case CHIP_POLARIS11:
6232 case CHIP_POLARIS12:
6233 case CHIP_VEGAM:
6234 case CHIP_CARRIZO:
6235 case CHIP_STONEY:
6236 /* chips with display hardware */
6237 return true;
6238 default:
6239 /* IP discovery */
4e8303cf 6240 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6241 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6242 return false;
6243 return true;
6244 }
6245}
81283fee
JZ
6246
6247uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6248 uint32_t inst, uint32_t reg_addr, char reg_name[],
6249 uint32_t expected_value, uint32_t mask)
6250{
6251 uint32_t ret = 0;
6252 uint32_t old_ = 0;
6253 uint32_t tmp_ = RREG32(reg_addr);
6254 uint32_t loop = adev->usec_timeout;
6255
6256 while ((tmp_ & (mask)) != (expected_value)) {
6257 if (old_ != tmp_) {
6258 loop = adev->usec_timeout;
6259 old_ = tmp_;
6260 } else
6261 udelay(1);
6262 tmp_ = RREG32(reg_addr);
6263 loop--;
6264 if (!loop) {
6265 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6266 inst, reg_name, (uint32_t)expected_value,
6267 (uint32_t)(tmp_ & (mask)));
6268 ret = -ETIMEDOUT;
6269 break;
6270 }
6271 }
6272 return ret;
6273}