drm/amd: Disable PP_PCIE_DPM_MASK when dynamic speed switching not supported
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
08a2fd23 35#include <linux/pci-p2pdma.h>
d37a3929 36#include <linux/apple-gmux.h>
fdf2f6c5 37
b7cdb41e 38#include <drm/drm_aperture.h>
4562236b 39#include <drm/drm_atomic_helper.h>
973ad627 40#include <drm/drm_crtc_helper.h>
45b64fd9 41#include <drm/drm_fb_helper.h>
fcd70cd3 42#include <drm/drm_probe_helper.h>
d38ceaf9
AD
43#include <drm/amdgpu_drm.h>
44#include <linux/vgaarb.h>
45#include <linux/vga_switcheroo.h>
46#include <linux/efi.h>
47#include "amdgpu.h"
f4b373f4 48#include "amdgpu_trace.h"
d38ceaf9
AD
49#include "amdgpu_i2c.h"
50#include "atom.h"
51#include "amdgpu_atombios.h"
a5bde2f9 52#include "amdgpu_atomfirmware.h"
d0dd7f0c 53#include "amd_pcie.h"
33f34802
KW
54#ifdef CONFIG_DRM_AMDGPU_SI
55#include "si.h"
56#endif
a2e73f56
AD
57#ifdef CONFIG_DRM_AMDGPU_CIK
58#include "cik.h"
59#endif
aaa36a97 60#include "vi.h"
460826e6 61#include "soc15.h"
0a5b8c7b 62#include "nv.h"
d38ceaf9 63#include "bif/bif_4_1_d.h"
bec86378 64#include <linux/firmware.h>
89041940 65#include "amdgpu_vf_error.h"
d38ceaf9 66
ba997709 67#include "amdgpu_amdkfd.h"
d2f52ac8 68#include "amdgpu_pm.h"
d38ceaf9 69
5183411b 70#include "amdgpu_xgmi.h"
c030f2e4 71#include "amdgpu_ras.h"
9c7c85f7 72#include "amdgpu_pmu.h"
bd607166 73#include "amdgpu_fru_eeprom.h"
04442bf7 74#include "amdgpu_reset.h"
5183411b 75
d5ea093e 76#include <linux/suspend.h>
c6a6e2db 77#include <drm/task_barrier.h>
3f12acc8 78#include <linux/pm_runtime.h>
d5ea093e 79
f89f8c6b
AG
80#include <drm/drm_drv.h>
81
3ad5dcfe
KHF
82#if IS_ENABLED(CONFIG_X86)
83#include <asm/intel-family.h>
84#endif
85
e2a75f88 86MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 87MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 88MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 89MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 90MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 91MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 92MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 93
2dc80b00 94#define AMDGPU_RESUME_MS 2000
7258fa31
SK
95#define AMDGPU_MAX_RETRY_LIMIT 2
96#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 97
b7cdb41e
ML
98static const struct drm_driver amdgpu_kms_driver;
99
050091ab 100const char *amdgpu_asic_name[] = {
da69c161
KW
101 "TAHITI",
102 "PITCAIRN",
103 "VERDE",
104 "OLAND",
105 "HAINAN",
d38ceaf9
AD
106 "BONAIRE",
107 "KAVERI",
108 "KABINI",
109 "HAWAII",
110 "MULLINS",
111 "TOPAZ",
112 "TONGA",
48299f95 113 "FIJI",
d38ceaf9 114 "CARRIZO",
139f4917 115 "STONEY",
2cc0c0b5
FC
116 "POLARIS10",
117 "POLARIS11",
c4642a47 118 "POLARIS12",
48ff108d 119 "VEGAM",
d4196f01 120 "VEGA10",
8fab806a 121 "VEGA12",
956fcddc 122 "VEGA20",
2ca8a5d2 123 "RAVEN",
d6c3b24e 124 "ARCTURUS",
1eee4228 125 "RENOIR",
d46b417a 126 "ALDEBARAN",
852a6626 127 "NAVI10",
d0f56dc2 128 "CYAN_SKILLFISH",
87dbad02 129 "NAVI14",
9802f5d7 130 "NAVI12",
ccaf72d3 131 "SIENNA_CICHLID",
ddd8fbe7 132 "NAVY_FLOUNDER",
4f1e9a76 133 "VANGOGH",
a2468e04 134 "DIMGREY_CAVEFISH",
6f169591 135 "BEIGE_GOBY",
ee9236b7 136 "YELLOW_CARP",
3ae695d6 137 "IP DISCOVERY",
d38ceaf9
AD
138 "LAST",
139};
140
dcea6e65
KR
141/**
142 * DOC: pcie_replay_count
143 *
144 * The amdgpu driver provides a sysfs API for reporting the total number
145 * of PCIe replays (NAKs)
146 * The file pcie_replay_count is used for this and returns the total
147 * number of replays as a sum of the NAKs generated and NAKs received
148 */
149
150static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
151 struct device_attribute *attr, char *buf)
152{
153 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 154 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
155 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
156
36000c7a 157 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
158}
159
b8920e1e 160static DEVICE_ATTR(pcie_replay_count, 0444,
dcea6e65
KR
161 amdgpu_device_get_pcie_replay_count, NULL);
162
4798db85
LL
163/**
164 * DOC: board_info
165 *
166 * The amdgpu driver provides a sysfs API for giving board related information.
167 * It provides the form factor information in the format
168 *
169 * type : form factor
170 *
171 * Possible form factor values
172 *
173 * - "cem" - PCIE CEM card
174 * - "oam" - Open Compute Accelerator Module
175 * - "unknown" - Not known
176 *
177 */
178
76da73f0
LL
179static ssize_t amdgpu_device_get_board_info(struct device *dev,
180 struct device_attribute *attr,
181 char *buf)
182{
183 struct drm_device *ddev = dev_get_drvdata(dev);
184 struct amdgpu_device *adev = drm_to_adev(ddev);
185 enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
186 const char *pkg;
187
188 if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
189 pkg_type = adev->smuio.funcs->get_pkg_type(adev);
190
191 switch (pkg_type) {
192 case AMDGPU_PKG_TYPE_CEM:
193 pkg = "cem";
194 break;
195 case AMDGPU_PKG_TYPE_OAM:
196 pkg = "oam";
197 break;
198 default:
199 pkg = "unknown";
200 break;
201 }
202
203 return sysfs_emit(buf, "%s : %s\n", "type", pkg);
204}
205
206static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
207
208static struct attribute *amdgpu_board_attrs[] = {
209 &dev_attr_board_info.attr,
210 NULL,
211};
212
213static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
214 struct attribute *attr, int n)
215{
216 struct device *dev = kobj_to_dev(kobj);
217 struct drm_device *ddev = dev_get_drvdata(dev);
218 struct amdgpu_device *adev = drm_to_adev(ddev);
219
220 if (adev->flags & AMD_IS_APU)
221 return 0;
222
223 return attr->mode;
224}
225
226static const struct attribute_group amdgpu_board_attrs_group = {
227 .attrs = amdgpu_board_attrs,
228 .is_visible = amdgpu_board_attrs_is_visible
229};
230
5494d864
AD
231static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
232
bd607166 233
fd496ca8 234/**
b98c6299 235 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
236 *
237 * @dev: drm_device pointer
238 *
b98c6299 239 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
240 * otherwise return false.
241 */
b98c6299 242bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
243{
244 struct amdgpu_device *adev = drm_to_adev(dev);
245
b98c6299 246 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
247 return true;
248 return false;
249}
250
e3ecdffa 251/**
0330b848 252 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
253 *
254 * @dev: drm_device pointer
255 *
b98c6299 256 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
257 * otherwise return false.
258 */
31af062a 259bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 260{
1348969a 261 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 262
b98c6299
AD
263 if (adev->has_pr3 ||
264 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
265 return true;
266 return false;
267}
268
a69cba42
AD
269/**
270 * amdgpu_device_supports_baco - Does the device support BACO
271 *
272 * @dev: drm_device pointer
273 *
274 * Returns true if the device supporte BACO,
275 * otherwise return false.
276 */
277bool amdgpu_device_supports_baco(struct drm_device *dev)
278{
1348969a 279 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
280
281 return amdgpu_asic_supports_baco(adev);
282}
283
3fa8f89d
S
284/**
285 * amdgpu_device_supports_smart_shift - Is the device dGPU with
286 * smart shift support
287 *
288 * @dev: drm_device pointer
289 *
290 * Returns true if the device is a dGPU with Smart Shift support,
291 * otherwise returns false.
292 */
293bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
294{
295 return (amdgpu_device_supports_boco(dev) &&
296 amdgpu_acpi_is_power_shift_control_supported());
297}
298
6e3cd2a9
MCC
299/*
300 * VRAM access helper functions
301 */
302
e35e2b11 303/**
048af66b 304 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
305 *
306 * @adev: amdgpu_device pointer
307 * @pos: offset of the buffer in vram
308 * @buf: virtual address of the buffer in system memory
309 * @size: read/write size, sizeof(@buf) must > @size
310 * @write: true - write to vram, otherwise - read from vram
311 */
048af66b
KW
312void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
313 void *buf, size_t size, bool write)
e35e2b11 314{
e35e2b11 315 unsigned long flags;
048af66b
KW
316 uint32_t hi = ~0, tmp = 0;
317 uint32_t *data = buf;
ce05ac56 318 uint64_t last;
f89f8c6b 319 int idx;
ce05ac56 320
c58a863b 321 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 322 return;
9d11eb0d 323
048af66b
KW
324 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
325
326 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
327 for (last = pos + size; pos < last; pos += 4) {
328 tmp = pos >> 31;
329
330 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
331 if (tmp != hi) {
332 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
333 hi = tmp;
334 }
335 if (write)
336 WREG32_NO_KIQ(mmMM_DATA, *data++);
337 else
338 *data++ = RREG32_NO_KIQ(mmMM_DATA);
339 }
340
341 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
342 drm_dev_exit(idx);
343}
344
345/**
bbe04dec 346 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
347 *
348 * @adev: amdgpu_device pointer
349 * @pos: offset of the buffer in vram
350 * @buf: virtual address of the buffer in system memory
351 * @size: read/write size, sizeof(@buf) must > @size
352 * @write: true - write to vram, otherwise - read from vram
353 *
354 * The return value means how many bytes have been transferred.
355 */
356size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
357 void *buf, size_t size, bool write)
358{
9d11eb0d 359#ifdef CONFIG_64BIT
048af66b
KW
360 void __iomem *addr;
361 size_t count = 0;
362 uint64_t last;
363
364 if (!adev->mman.aper_base_kaddr)
365 return 0;
366
9d11eb0d
CK
367 last = min(pos + size, adev->gmc.visible_vram_size);
368 if (last > pos) {
048af66b
KW
369 addr = adev->mman.aper_base_kaddr + pos;
370 count = last - pos;
9d11eb0d
CK
371
372 if (write) {
373 memcpy_toio(addr, buf, count);
4c452b5c
SS
374 /* Make sure HDP write cache flush happens without any reordering
375 * after the system memory contents are sent over PCIe device
376 */
9d11eb0d 377 mb();
810085dd 378 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 379 } else {
810085dd 380 amdgpu_device_invalidate_hdp(adev, NULL);
4c452b5c
SS
381 /* Make sure HDP read cache is invalidated before issuing a read
382 * to the PCIe device
383 */
9d11eb0d
CK
384 mb();
385 memcpy_fromio(buf, addr, count);
386 }
387
9d11eb0d 388 }
048af66b
KW
389
390 return count;
391#else
392 return 0;
9d11eb0d 393#endif
048af66b 394}
9d11eb0d 395
048af66b
KW
396/**
397 * amdgpu_device_vram_access - read/write a buffer in vram
398 *
399 * @adev: amdgpu_device pointer
400 * @pos: offset of the buffer in vram
401 * @buf: virtual address of the buffer in system memory
402 * @size: read/write size, sizeof(@buf) must > @size
403 * @write: true - write to vram, otherwise - read from vram
404 */
405void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
406 void *buf, size_t size, bool write)
407{
408 size_t count;
e35e2b11 409
048af66b
KW
410 /* try to using vram apreature to access vram first */
411 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
412 size -= count;
413 if (size) {
414 /* using MM to access rest vram */
415 pos += count;
416 buf += count;
417 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
418 }
419}
420
d38ceaf9 421/*
f7ee1874 422 * register access helper functions.
d38ceaf9 423 */
56b53c0b
DL
424
425/* Check if hw access should be skipped because of hotplug or device error */
426bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
427{
7afefb81 428 if (adev->no_hw_access)
56b53c0b
DL
429 return true;
430
431#ifdef CONFIG_LOCKDEP
432 /*
433 * This is a bit complicated to understand, so worth a comment. What we assert
434 * here is that the GPU reset is not running on another thread in parallel.
435 *
436 * For this we trylock the read side of the reset semaphore, if that succeeds
437 * we know that the reset is not running in paralell.
438 *
439 * If the trylock fails we assert that we are either already holding the read
440 * side of the lock or are the reset thread itself and hold the write side of
441 * the lock.
442 */
443 if (in_task()) {
d0fb18b5
AG
444 if (down_read_trylock(&adev->reset_domain->sem))
445 up_read(&adev->reset_domain->sem);
56b53c0b 446 else
d0fb18b5 447 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
448 }
449#endif
450 return false;
451}
452
e3ecdffa 453/**
f7ee1874 454 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
455 *
456 * @adev: amdgpu_device pointer
457 * @reg: dword aligned register offset
458 * @acc_flags: access flags which require special behavior
459 *
460 * Returns the 32 bit value from the offset specified.
461 */
f7ee1874
HZ
462uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
463 uint32_t reg, uint32_t acc_flags)
d38ceaf9 464{
f4b373f4
TSD
465 uint32_t ret;
466
56b53c0b 467 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
468 return 0;
469
f7ee1874
HZ
470 if ((reg * 4) < adev->rmmio_size) {
471 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
472 amdgpu_sriov_runtime(adev) &&
d0fb18b5 473 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 474 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 475 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
476 } else {
477 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
478 }
479 } else {
480 ret = adev->pcie_rreg(adev, reg * 4);
81202807 481 }
bc992ba5 482
f7ee1874 483 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 484
f4b373f4 485 return ret;
d38ceaf9
AD
486}
487
421a2a30
ML
488/*
489 * MMIO register read with bytes helper functions
490 * @offset:bytes offset from MMIO start
b8920e1e 491 */
421a2a30 492
e3ecdffa
AD
493/**
494 * amdgpu_mm_rreg8 - read a memory mapped IO register
495 *
496 * @adev: amdgpu_device pointer
497 * @offset: byte aligned register offset
498 *
499 * Returns the 8 bit value from the offset specified.
500 */
7cbbc745
AG
501uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
502{
56b53c0b 503 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
504 return 0;
505
421a2a30
ML
506 if (offset < adev->rmmio_size)
507 return (readb(adev->rmmio + offset));
508 BUG();
509}
510
511/*
512 * MMIO register write with bytes helper functions
513 * @offset:bytes offset from MMIO start
514 * @value: the value want to be written to the register
b8920e1e
SS
515 */
516
e3ecdffa
AD
517/**
518 * amdgpu_mm_wreg8 - read a memory mapped IO register
519 *
520 * @adev: amdgpu_device pointer
521 * @offset: byte aligned register offset
522 * @value: 8 bit value to write
523 *
524 * Writes the value specified to the offset specified.
525 */
7cbbc745
AG
526void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
527{
56b53c0b 528 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
529 return;
530
421a2a30
ML
531 if (offset < adev->rmmio_size)
532 writeb(value, adev->rmmio + offset);
533 else
534 BUG();
535}
536
e3ecdffa 537/**
f7ee1874 538 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
539 *
540 * @adev: amdgpu_device pointer
541 * @reg: dword aligned register offset
542 * @v: 32 bit value to write to the register
543 * @acc_flags: access flags which require special behavior
544 *
545 * Writes the value specified to the offset specified.
546 */
f7ee1874
HZ
547void amdgpu_device_wreg(struct amdgpu_device *adev,
548 uint32_t reg, uint32_t v,
549 uint32_t acc_flags)
d38ceaf9 550{
56b53c0b 551 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
552 return;
553
f7ee1874
HZ
554 if ((reg * 4) < adev->rmmio_size) {
555 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
556 amdgpu_sriov_runtime(adev) &&
d0fb18b5 557 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 558 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 559 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
560 } else {
561 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
562 }
563 } else {
564 adev->pcie_wreg(adev, reg * 4, v);
81202807 565 }
bc992ba5 566
f7ee1874 567 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 568}
d38ceaf9 569
03f2abb0 570/**
4cc9f86f 571 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 572 *
71579346
RB
573 * @adev: amdgpu_device pointer
574 * @reg: mmio/rlc register
575 * @v: value to write
8057a9d6 576 * @xcc_id: xcc accelerated compute core id
71579346
RB
577 *
578 * this function is invoked only for the debugfs register access
03f2abb0 579 */
f7ee1874 580void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
8ed49dd1
VL
581 uint32_t reg, uint32_t v,
582 uint32_t xcc_id)
2e0cc4d4 583{
56b53c0b 584 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
585 return;
586
2e0cc4d4 587 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
588 adev->gfx.rlc.funcs &&
589 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 590 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
8ed49dd1 591 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
4cc9f86f
TSD
592 } else if ((reg * 4) >= adev->rmmio_size) {
593 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
594 } else {
595 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 596 }
d38ceaf9
AD
597}
598
1bba3683
HZ
599/**
600 * amdgpu_device_indirect_rreg - read an indirect register
601 *
602 * @adev: amdgpu_device pointer
22f453fb 603 * @reg_addr: indirect register address to read from
1bba3683
HZ
604 *
605 * Returns the value of indirect register @reg_addr
606 */
607u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
608 u32 reg_addr)
609{
65ba96e9 610 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
611 void __iomem *pcie_index_offset;
612 void __iomem *pcie_data_offset;
65ba96e9
HZ
613 u32 r;
614
615 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
616 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
617
618 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
619 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
620 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
621
622 writel(reg_addr, pcie_index_offset);
623 readl(pcie_index_offset);
624 r = readl(pcie_data_offset);
625 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
626
627 return r;
628}
629
0c552ed3
LM
630u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
631 u64 reg_addr)
632{
633 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
634 u32 r;
635 void __iomem *pcie_index_offset;
636 void __iomem *pcie_index_hi_offset;
637 void __iomem *pcie_data_offset;
638
639 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
640 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 641 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
642 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
643 else
644 pcie_index_hi = 0;
645
646 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
647 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
648 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
649 if (pcie_index_hi != 0)
650 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
651 pcie_index_hi * 4;
652
653 writel(reg_addr, pcie_index_offset);
654 readl(pcie_index_offset);
655 if (pcie_index_hi != 0) {
656 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
657 readl(pcie_index_hi_offset);
658 }
659 r = readl(pcie_data_offset);
660
661 /* clear the high bits */
662 if (pcie_index_hi != 0) {
663 writel(0, pcie_index_hi_offset);
664 readl(pcie_index_hi_offset);
665 }
666
667 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
668
669 return r;
670}
671
1bba3683
HZ
672/**
673 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
674 *
675 * @adev: amdgpu_device pointer
22f453fb 676 * @reg_addr: indirect register address to read from
1bba3683
HZ
677 *
678 * Returns the value of indirect register @reg_addr
679 */
680u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
681 u32 reg_addr)
682{
65ba96e9 683 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
684 void __iomem *pcie_index_offset;
685 void __iomem *pcie_data_offset;
65ba96e9
HZ
686 u64 r;
687
688 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
689 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
690
691 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
692 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
693 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
694
695 /* read low 32 bits */
696 writel(reg_addr, pcie_index_offset);
697 readl(pcie_index_offset);
698 r = readl(pcie_data_offset);
699 /* read high 32 bits */
700 writel(reg_addr + 4, pcie_index_offset);
701 readl(pcie_index_offset);
702 r |= ((u64)readl(pcie_data_offset) << 32);
703 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
704
705 return r;
706}
707
a76b2870
CL
708u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
709 u64 reg_addr)
710{
711 unsigned long flags, pcie_index, pcie_data;
712 unsigned long pcie_index_hi = 0;
713 void __iomem *pcie_index_offset;
714 void __iomem *pcie_index_hi_offset;
715 void __iomem *pcie_data_offset;
716 u64 r;
717
718 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
719 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
720 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
721 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
722
723 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
724 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
725 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
726 if (pcie_index_hi != 0)
727 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
728 pcie_index_hi * 4;
729
730 /* read low 32 bits */
731 writel(reg_addr, pcie_index_offset);
732 readl(pcie_index_offset);
733 if (pcie_index_hi != 0) {
734 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
735 readl(pcie_index_hi_offset);
736 }
737 r = readl(pcie_data_offset);
738 /* read high 32 bits */
739 writel(reg_addr + 4, pcie_index_offset);
740 readl(pcie_index_offset);
741 if (pcie_index_hi != 0) {
742 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
743 readl(pcie_index_hi_offset);
744 }
745 r |= ((u64)readl(pcie_data_offset) << 32);
746
747 /* clear the high bits */
748 if (pcie_index_hi != 0) {
749 writel(0, pcie_index_hi_offset);
750 readl(pcie_index_hi_offset);
751 }
752
753 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
754
755 return r;
756}
757
1bba3683
HZ
758/**
759 * amdgpu_device_indirect_wreg - write an indirect register address
760 *
761 * @adev: amdgpu_device pointer
1bba3683
HZ
762 * @reg_addr: indirect register offset
763 * @reg_data: indirect register data
764 *
765 */
766void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
767 u32 reg_addr, u32 reg_data)
768{
65ba96e9 769 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
770 void __iomem *pcie_index_offset;
771 void __iomem *pcie_data_offset;
772
65ba96e9
HZ
773 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
774 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
775
1bba3683
HZ
776 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
777 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
778 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
779
780 writel(reg_addr, pcie_index_offset);
781 readl(pcie_index_offset);
782 writel(reg_data, pcie_data_offset);
783 readl(pcie_data_offset);
784 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
785}
786
0c552ed3
LM
787void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
788 u64 reg_addr, u32 reg_data)
789{
790 unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
791 void __iomem *pcie_index_offset;
792 void __iomem *pcie_index_hi_offset;
793 void __iomem *pcie_data_offset;
794
795 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
796 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
d57e24aa 797 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
0c552ed3
LM
798 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
799 else
800 pcie_index_hi = 0;
801
802 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
803 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
804 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
805 if (pcie_index_hi != 0)
806 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
807 pcie_index_hi * 4;
808
809 writel(reg_addr, pcie_index_offset);
810 readl(pcie_index_offset);
811 if (pcie_index_hi != 0) {
812 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
813 readl(pcie_index_hi_offset);
814 }
815 writel(reg_data, pcie_data_offset);
816 readl(pcie_data_offset);
817
818 /* clear the high bits */
819 if (pcie_index_hi != 0) {
820 writel(0, pcie_index_hi_offset);
821 readl(pcie_index_hi_offset);
822 }
823
824 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
825}
826
1bba3683
HZ
827/**
828 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
829 *
830 * @adev: amdgpu_device pointer
1bba3683
HZ
831 * @reg_addr: indirect register offset
832 * @reg_data: indirect register data
833 *
834 */
835void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
836 u32 reg_addr, u64 reg_data)
837{
65ba96e9 838 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
839 void __iomem *pcie_index_offset;
840 void __iomem *pcie_data_offset;
841
65ba96e9
HZ
842 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
843 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
844
1bba3683
HZ
845 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
846 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
847 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
848
849 /* write low 32 bits */
850 writel(reg_addr, pcie_index_offset);
851 readl(pcie_index_offset);
852 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
853 readl(pcie_data_offset);
854 /* write high 32 bits */
855 writel(reg_addr + 4, pcie_index_offset);
856 readl(pcie_index_offset);
857 writel((u32)(reg_data >> 32), pcie_data_offset);
858 readl(pcie_data_offset);
859 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
860}
861
a76b2870
CL
862void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
863 u64 reg_addr, u64 reg_data)
864{
865 unsigned long flags, pcie_index, pcie_data;
866 unsigned long pcie_index_hi = 0;
867 void __iomem *pcie_index_offset;
868 void __iomem *pcie_index_hi_offset;
869 void __iomem *pcie_data_offset;
870
871 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
872 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
873 if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
874 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
875
876 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
877 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
878 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
879 if (pcie_index_hi != 0)
880 pcie_index_hi_offset = (void __iomem *)adev->rmmio +
881 pcie_index_hi * 4;
882
883 /* write low 32 bits */
884 writel(reg_addr, pcie_index_offset);
885 readl(pcie_index_offset);
886 if (pcie_index_hi != 0) {
887 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
888 readl(pcie_index_hi_offset);
889 }
890 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
891 readl(pcie_data_offset);
892 /* write high 32 bits */
893 writel(reg_addr + 4, pcie_index_offset);
894 readl(pcie_index_offset);
895 if (pcie_index_hi != 0) {
896 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
897 readl(pcie_index_hi_offset);
898 }
899 writel((u32)(reg_data >> 32), pcie_data_offset);
900 readl(pcie_data_offset);
901
902 /* clear the high bits */
903 if (pcie_index_hi != 0) {
904 writel(0, pcie_index_hi_offset);
905 readl(pcie_index_hi_offset);
906 }
907
908 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
909}
910
dabc114e
HZ
911/**
912 * amdgpu_device_get_rev_id - query device rev_id
913 *
914 * @adev: amdgpu_device pointer
915 *
916 * Return device rev_id
917 */
918u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
919{
920 return adev->nbio.funcs->get_rev_id(adev);
921}
922
d38ceaf9
AD
923/**
924 * amdgpu_invalid_rreg - dummy reg read function
925 *
982a820b 926 * @adev: amdgpu_device pointer
d38ceaf9
AD
927 * @reg: offset of register
928 *
929 * Dummy register read function. Used for register blocks
930 * that certain asics don't have (all asics).
931 * Returns the value in the register.
932 */
933static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
934{
935 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
936 BUG();
937 return 0;
938}
939
0c552ed3
LM
940static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
941{
942 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
943 BUG();
944 return 0;
945}
946
d38ceaf9
AD
947/**
948 * amdgpu_invalid_wreg - dummy reg write function
949 *
982a820b 950 * @adev: amdgpu_device pointer
d38ceaf9
AD
951 * @reg: offset of register
952 * @v: value to write to the register
953 *
954 * Dummy register read function. Used for register blocks
955 * that certain asics don't have (all asics).
956 */
957static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
958{
959 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
960 reg, v);
961 BUG();
962}
963
0c552ed3
LM
964static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
965{
966 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
967 reg, v);
968 BUG();
969}
970
4fa1c6a6
TZ
971/**
972 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
973 *
982a820b 974 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
975 * @reg: offset of register
976 *
977 * Dummy register read function. Used for register blocks
978 * that certain asics don't have (all asics).
979 * Returns the value in the register.
980 */
981static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
982{
983 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
984 BUG();
985 return 0;
986}
987
a76b2870
CL
988static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
989{
990 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
991 BUG();
992 return 0;
993}
994
4fa1c6a6
TZ
995/**
996 * amdgpu_invalid_wreg64 - dummy reg write function
997 *
982a820b 998 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
999 * @reg: offset of register
1000 * @v: value to write to the register
1001 *
1002 * Dummy register read function. Used for register blocks
1003 * that certain asics don't have (all asics).
1004 */
1005static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1006{
1007 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1008 reg, v);
1009 BUG();
1010}
1011
a76b2870
CL
1012static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1013{
1014 DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1015 reg, v);
1016 BUG();
1017}
1018
d38ceaf9
AD
1019/**
1020 * amdgpu_block_invalid_rreg - dummy reg read function
1021 *
982a820b 1022 * @adev: amdgpu_device pointer
d38ceaf9
AD
1023 * @block: offset of instance
1024 * @reg: offset of register
1025 *
1026 * Dummy register read function. Used for register blocks
1027 * that certain asics don't have (all asics).
1028 * Returns the value in the register.
1029 */
1030static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1031 uint32_t block, uint32_t reg)
1032{
1033 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1034 reg, block);
1035 BUG();
1036 return 0;
1037}
1038
1039/**
1040 * amdgpu_block_invalid_wreg - dummy reg write function
1041 *
982a820b 1042 * @adev: amdgpu_device pointer
d38ceaf9
AD
1043 * @block: offset of instance
1044 * @reg: offset of register
1045 * @v: value to write to the register
1046 *
1047 * Dummy register read function. Used for register blocks
1048 * that certain asics don't have (all asics).
1049 */
1050static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1051 uint32_t block,
1052 uint32_t reg, uint32_t v)
1053{
1054 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1055 reg, block, v);
1056 BUG();
1057}
1058
4d2997ab
AD
1059/**
1060 * amdgpu_device_asic_init - Wrapper for atom asic_init
1061 *
982a820b 1062 * @adev: amdgpu_device pointer
4d2997ab
AD
1063 *
1064 * Does any asic specific work and then calls atom asic init.
1065 */
1066static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1067{
15c5c5f5
LL
1068 int ret;
1069
4d2997ab
AD
1070 amdgpu_asic_pre_asic_init(adev);
1071
4e8303cf
LL
1072 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1073 amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
15c5c5f5
LL
1074 amdgpu_psp_wait_for_bootloader(adev);
1075 ret = amdgpu_atomfirmware_asic_init(adev, true);
1076 return ret;
1077 } else {
85d1bcc6 1078 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
15c5c5f5
LL
1079 }
1080
1081 return 0;
4d2997ab
AD
1082}
1083
e3ecdffa 1084/**
7ccfd79f 1085 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 1086 *
982a820b 1087 * @adev: amdgpu_device pointer
e3ecdffa
AD
1088 *
1089 * Allocates a scratch page of VRAM for use by various things in the
1090 * driver.
1091 */
7ccfd79f 1092static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 1093{
7ccfd79f
CK
1094 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1095 AMDGPU_GEM_DOMAIN_VRAM |
1096 AMDGPU_GEM_DOMAIN_GTT,
1097 &adev->mem_scratch.robj,
1098 &adev->mem_scratch.gpu_addr,
1099 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
1100}
1101
e3ecdffa 1102/**
7ccfd79f 1103 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 1104 *
982a820b 1105 * @adev: amdgpu_device pointer
e3ecdffa
AD
1106 *
1107 * Frees the VRAM scratch page.
1108 */
7ccfd79f 1109static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 1110{
7ccfd79f 1111 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
1112}
1113
1114/**
9c3f2b54 1115 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
1116 *
1117 * @adev: amdgpu_device pointer
1118 * @registers: pointer to the register array
1119 * @array_size: size of the register array
1120 *
b8920e1e 1121 * Programs an array or registers with and or masks.
d38ceaf9
AD
1122 * This is a helper for setting golden registers.
1123 */
9c3f2b54
AD
1124void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1125 const u32 *registers,
1126 const u32 array_size)
d38ceaf9
AD
1127{
1128 u32 tmp, reg, and_mask, or_mask;
1129 int i;
1130
1131 if (array_size % 3)
1132 return;
1133
47fc644f 1134 for (i = 0; i < array_size; i += 3) {
d38ceaf9
AD
1135 reg = registers[i + 0];
1136 and_mask = registers[i + 1];
1137 or_mask = registers[i + 2];
1138
1139 if (and_mask == 0xffffffff) {
1140 tmp = or_mask;
1141 } else {
1142 tmp = RREG32(reg);
1143 tmp &= ~and_mask;
e0d07657
HZ
1144 if (adev->family >= AMDGPU_FAMILY_AI)
1145 tmp |= (or_mask & and_mask);
1146 else
1147 tmp |= or_mask;
d38ceaf9
AD
1148 }
1149 WREG32(reg, tmp);
1150 }
1151}
1152
e3ecdffa
AD
1153/**
1154 * amdgpu_device_pci_config_reset - reset the GPU
1155 *
1156 * @adev: amdgpu_device pointer
1157 *
1158 * Resets the GPU using the pci config reset sequence.
1159 * Only applicable to asics prior to vega10.
1160 */
8111c387 1161void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1162{
1163 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1164}
1165
af484df8
AD
1166/**
1167 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1168 *
1169 * @adev: amdgpu_device pointer
1170 *
1171 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1172 */
1173int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1174{
1175 return pci_reset_function(adev->pdev);
1176}
1177
d38ceaf9 1178/*
06ec9070 1179 * amdgpu_device_wb_*()
455a7bc2 1180 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1181 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1182 */
1183
1184/**
06ec9070 1185 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1186 *
1187 * @adev: amdgpu_device pointer
1188 *
1189 * Disables Writeback and frees the Writeback memory (all asics).
1190 * Used at driver shutdown.
1191 */
06ec9070 1192static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1193{
1194 if (adev->wb.wb_obj) {
a76ed485
AD
1195 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1196 &adev->wb.gpu_addr,
1197 (void **)&adev->wb.wb);
d38ceaf9
AD
1198 adev->wb.wb_obj = NULL;
1199 }
1200}
1201
1202/**
03f2abb0 1203 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1204 *
1205 * @adev: amdgpu_device pointer
1206 *
455a7bc2 1207 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1208 * Used at driver startup.
1209 * Returns 0 on success or an -error on failure.
1210 */
06ec9070 1211static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1212{
1213 int r;
1214
1215 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1216 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1217 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1218 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1219 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1220 (void **)&adev->wb.wb);
d38ceaf9
AD
1221 if (r) {
1222 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1223 return r;
1224 }
d38ceaf9
AD
1225
1226 adev->wb.num_wb = AMDGPU_MAX_WB;
1227 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1228
1229 /* clear wb memory */
73469585 1230 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1231 }
1232
1233 return 0;
1234}
1235
1236/**
131b4b36 1237 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1238 *
1239 * @adev: amdgpu_device pointer
1240 * @wb: wb index
1241 *
1242 * Allocate a wb slot for use by the driver (all asics).
1243 * Returns 0 on success or -EINVAL on failure.
1244 */
131b4b36 1245int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1246{
1247 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1248
97407b63 1249 if (offset < adev->wb.num_wb) {
7014285a 1250 __set_bit(offset, adev->wb.used);
63ae07ca 1251 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1252 return 0;
1253 } else {
1254 return -EINVAL;
1255 }
1256}
1257
d38ceaf9 1258/**
131b4b36 1259 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1260 *
1261 * @adev: amdgpu_device pointer
1262 * @wb: wb index
1263 *
1264 * Free a wb slot allocated for use by the driver (all asics)
1265 */
131b4b36 1266void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1267{
73469585 1268 wb >>= 3;
d38ceaf9 1269 if (wb < adev->wb.num_wb)
73469585 1270 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1271}
1272
d6895ad3
CK
1273/**
1274 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1275 *
1276 * @adev: amdgpu_device pointer
1277 *
1278 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1279 * to fail, but if any of the BARs is not accessible after the size we abort
1280 * driver loading by returning -ENODEV.
1281 */
1282int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1283{
453f617a 1284 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1285 struct pci_bus *root;
1286 struct resource *res;
b8920e1e 1287 unsigned int i;
d6895ad3
CK
1288 u16 cmd;
1289 int r;
1290
822130b5
AB
1291 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1292 return 0;
1293
0c03b912 1294 /* Bypass for VF */
1295 if (amdgpu_sriov_vf(adev))
1296 return 0;
1297
b7221f2b
AD
1298 /* skip if the bios has already enabled large BAR */
1299 if (adev->gmc.real_vram_size &&
1300 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1301 return 0;
1302
31b8adab
CK
1303 /* Check if the root BUS has 64bit memory resources */
1304 root = adev->pdev->bus;
1305 while (root->parent)
1306 root = root->parent;
1307
1308 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1309 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1310 res->start > 0x100000000ull)
1311 break;
1312 }
1313
1314 /* Trying to resize is pointless without a root hub window above 4GB */
1315 if (!res)
1316 return 0;
1317
453f617a
ND
1318 /* Limit the BAR size to what is available */
1319 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1320 rbar_size);
1321
d6895ad3
CK
1322 /* Disable memory decoding while we change the BAR addresses and size */
1323 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1324 pci_write_config_word(adev->pdev, PCI_COMMAND,
1325 cmd & ~PCI_COMMAND_MEMORY);
1326
1327 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
43c064db 1328 amdgpu_doorbell_fini(adev);
d6895ad3
CK
1329 if (adev->asic_type >= CHIP_BONAIRE)
1330 pci_release_resource(adev->pdev, 2);
1331
1332 pci_release_resource(adev->pdev, 0);
1333
1334 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1335 if (r == -ENOSPC)
1336 DRM_INFO("Not enough PCI address space for a large BAR.");
1337 else if (r && r != -ENOTSUPP)
1338 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1339
1340 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1341
1342 /* When the doorbell or fb BAR isn't available we have no chance of
1343 * using the device.
1344 */
43c064db 1345 r = amdgpu_doorbell_init(adev);
d6895ad3
CK
1346 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1347 return -ENODEV;
1348
1349 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1350
1351 return 0;
1352}
a05502e5 1353
9535a86a
SZ
1354static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1355{
b8920e1e 1356 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
9535a86a 1357 return false;
9535a86a
SZ
1358
1359 return true;
1360}
1361
d38ceaf9
AD
1362/*
1363 * GPU helpers function.
1364 */
1365/**
39c640c0 1366 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1367 *
1368 * @adev: amdgpu_device pointer
1369 *
c836fec5
JQ
1370 * Check if the asic has been initialized (all asics) at driver startup
1371 * or post is needed if hw reset is performed.
1372 * Returns true if need or false if not.
d38ceaf9 1373 */
39c640c0 1374bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1375{
1376 uint32_t reg;
1377
bec86378
ML
1378 if (amdgpu_sriov_vf(adev))
1379 return false;
1380
9535a86a
SZ
1381 if (!amdgpu_device_read_bios(adev))
1382 return false;
1383
bec86378 1384 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1385 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1386 * some old smc fw still need driver do vPost otherwise gpu hang, while
1387 * those smc fw version above 22.15 doesn't have this flaw, so we force
1388 * vpost executed for smc version below 22.15
bec86378
ML
1389 */
1390 if (adev->asic_type == CHIP_FIJI) {
1391 int err;
1392 uint32_t fw_ver;
b8920e1e 1393
bec86378
ML
1394 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1395 /* force vPost if error occured */
1396 if (err)
1397 return true;
1398
1399 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1400 if (fw_ver < 0x00160e00)
1401 return true;
bec86378 1402 }
bec86378 1403 }
91fe77eb 1404
e3c1b071 1405 /* Don't post if we need to reset whole hive on init */
1406 if (adev->gmc.xgmi.pending_reset)
1407 return false;
1408
91fe77eb 1409 if (adev->has_hw_reset) {
1410 adev->has_hw_reset = false;
1411 return true;
1412 }
1413
1414 /* bios scratch used on CIK+ */
1415 if (adev->asic_type >= CHIP_BONAIRE)
1416 return amdgpu_atombios_scratch_need_asic_init(adev);
1417
1418 /* check MEM_SIZE for older asics */
1419 reg = amdgpu_asic_get_config_memsize(adev);
1420
1421 if ((reg != 0) && (reg != 0xffffffff))
1422 return false;
1423
1424 return true;
70e64c4d
ML
1425}
1426
bb0f8429
ML
1427/*
1428 * Check whether seamless boot is supported.
1429 *
7f4ce7b5
ML
1430 * So far we only support seamless boot on DCE 3.0 or later.
1431 * If users report that it works on older ASICS as well, we may
1432 * loosen this.
bb0f8429
ML
1433 */
1434bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1435{
5dc270d3
ML
1436 switch (amdgpu_seamless) {
1437 case -1:
1438 break;
1439 case 1:
1440 return true;
1441 case 0:
1442 return false;
1443 default:
1444 DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1445 amdgpu_seamless);
1446 return false;
1447 }
1448
3657a1d5
ML
1449 if (!(adev->flags & AMD_IS_APU))
1450 return false;
1451
5dc270d3
ML
1452 if (adev->mman.keep_stolen_vga_memory)
1453 return false;
1454
7f4ce7b5 1455 return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0);
bb0f8429
ML
1456}
1457
5d1eb4c4
ML
1458/*
1459 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1460 * speed switching. Until we have confirmation from Intel that a specific host
1461 * supports it, it's safer that we keep it disabled for all.
1462 *
1463 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1464 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1465 */
1466bool amdgpu_device_pcie_dynamic_switching_supported(void)
1467{
1468#if IS_ENABLED(CONFIG_X86)
1469 struct cpuinfo_x86 *c = &cpu_data(0);
1470
1471 if (c->x86_vendor == X86_VENDOR_INTEL)
1472 return false;
1473#endif
1474 return true;
1475}
1476
0ab5d711
ML
1477/**
1478 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1479 *
1480 * @adev: amdgpu_device pointer
1481 *
1482 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1483 * be set for this device.
1484 *
1485 * Returns true if it should be used or false if not.
1486 */
1487bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1488{
1489 switch (amdgpu_aspm) {
1490 case -1:
1491 break;
1492 case 0:
1493 return false;
1494 case 1:
1495 return true;
1496 default:
1497 return false;
1498 }
1499 return pcie_aspm_enabled(adev->pdev);
1500}
1501
3ad5dcfe
KHF
1502bool amdgpu_device_aspm_support_quirk(void)
1503{
1504#if IS_ENABLED(CONFIG_X86)
1505 struct cpuinfo_x86 *c = &cpu_data(0);
1506
1507 return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1508#else
1509 return true;
1510#endif
1511}
1512
d38ceaf9
AD
1513/* if we get transitioned to only one device, take VGA back */
1514/**
06ec9070 1515 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1516 *
bf44e8ce 1517 * @pdev: PCI device pointer
d38ceaf9
AD
1518 * @state: enable/disable vga decode
1519 *
1520 * Enable/disable vga decode (all asics).
1521 * Returns VGA resource flags.
1522 */
bf44e8ce
CH
1523static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1524 bool state)
d38ceaf9 1525{
bf44e8ce 1526 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
b8920e1e 1527
d38ceaf9
AD
1528 amdgpu_asic_set_vga_state(adev, state);
1529 if (state)
1530 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1531 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1532 else
1533 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1534}
1535
e3ecdffa
AD
1536/**
1537 * amdgpu_device_check_block_size - validate the vm block size
1538 *
1539 * @adev: amdgpu_device pointer
1540 *
1541 * Validates the vm block size specified via module parameter.
1542 * The vm block size defines number of bits in page table versus page directory,
1543 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1544 * page table and the remaining bits are in the page directory.
1545 */
06ec9070 1546static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1547{
1548 /* defines number of bits in page table versus page directory,
1549 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
b8920e1e
SS
1550 * page table and the remaining bits are in the page directory
1551 */
bab4fee7
JZ
1552 if (amdgpu_vm_block_size == -1)
1553 return;
a1adf8be 1554
bab4fee7 1555 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1556 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1557 amdgpu_vm_block_size);
97489129 1558 amdgpu_vm_block_size = -1;
a1adf8be 1559 }
a1adf8be
CZ
1560}
1561
e3ecdffa
AD
1562/**
1563 * amdgpu_device_check_vm_size - validate the vm size
1564 *
1565 * @adev: amdgpu_device pointer
1566 *
1567 * Validates the vm size in GB specified via module parameter.
1568 * The VM size is the size of the GPU virtual memory space in GB.
1569 */
06ec9070 1570static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1571{
64dab074
AD
1572 /* no need to check the default value */
1573 if (amdgpu_vm_size == -1)
1574 return;
1575
83ca145d
ZJ
1576 if (amdgpu_vm_size < 1) {
1577 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1578 amdgpu_vm_size);
f3368128 1579 amdgpu_vm_size = -1;
83ca145d 1580 }
83ca145d
ZJ
1581}
1582
7951e376
RZ
1583static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1584{
1585 struct sysinfo si;
a9d4fe2f 1586 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1587 uint64_t total_memory;
1588 uint64_t dram_size_seven_GB = 0x1B8000000;
1589 uint64_t dram_size_three_GB = 0xB8000000;
1590
1591 if (amdgpu_smu_memory_pool_size == 0)
1592 return;
1593
1594 if (!is_os_64) {
1595 DRM_WARN("Not 64-bit OS, feature not supported\n");
1596 goto def_value;
1597 }
1598 si_meminfo(&si);
1599 total_memory = (uint64_t)si.totalram * si.mem_unit;
1600
1601 if ((amdgpu_smu_memory_pool_size == 1) ||
1602 (amdgpu_smu_memory_pool_size == 2)) {
1603 if (total_memory < dram_size_three_GB)
1604 goto def_value1;
1605 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1606 (amdgpu_smu_memory_pool_size == 8)) {
1607 if (total_memory < dram_size_seven_GB)
1608 goto def_value1;
1609 } else {
1610 DRM_WARN("Smu memory pool size not supported\n");
1611 goto def_value;
1612 }
1613 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1614
1615 return;
1616
1617def_value1:
1618 DRM_WARN("No enough system memory\n");
1619def_value:
1620 adev->pm.smu_prv_buffer_size = 0;
1621}
1622
9f6a7857
HR
1623static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1624{
1625 if (!(adev->flags & AMD_IS_APU) ||
1626 adev->asic_type < CHIP_RAVEN)
1627 return 0;
1628
1629 switch (adev->asic_type) {
1630 case CHIP_RAVEN:
1631 if (adev->pdev->device == 0x15dd)
1632 adev->apu_flags |= AMD_APU_IS_RAVEN;
1633 if (adev->pdev->device == 0x15d8)
1634 adev->apu_flags |= AMD_APU_IS_PICASSO;
1635 break;
1636 case CHIP_RENOIR:
1637 if ((adev->pdev->device == 0x1636) ||
1638 (adev->pdev->device == 0x164c))
1639 adev->apu_flags |= AMD_APU_IS_RENOIR;
1640 else
1641 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1642 break;
1643 case CHIP_VANGOGH:
1644 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1645 break;
1646 case CHIP_YELLOW_CARP:
1647 break;
d0f56dc2 1648 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1649 if ((adev->pdev->device == 0x13FE) ||
1650 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1651 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1652 break;
9f6a7857 1653 default:
4eaf21b7 1654 break;
9f6a7857
HR
1655 }
1656
1657 return 0;
1658}
1659
d38ceaf9 1660/**
06ec9070 1661 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1662 *
1663 * @adev: amdgpu_device pointer
1664 *
1665 * Validates certain module parameters and updates
1666 * the associated values used by the driver (all asics).
1667 */
912dfc84 1668static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1669{
5b011235
CZ
1670 if (amdgpu_sched_jobs < 4) {
1671 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1672 amdgpu_sched_jobs);
1673 amdgpu_sched_jobs = 4;
47fc644f 1674 } else if (!is_power_of_2(amdgpu_sched_jobs)) {
5b011235
CZ
1675 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1676 amdgpu_sched_jobs);
1677 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1678 }
d38ceaf9 1679
83e74db6 1680 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1681 /* gart size must be greater or equal to 32M */
1682 dev_warn(adev->dev, "gart size (%d) too small\n",
1683 amdgpu_gart_size);
83e74db6 1684 amdgpu_gart_size = -1;
d38ceaf9
AD
1685 }
1686
36d38372 1687 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1688 /* gtt size must be greater or equal to 32M */
36d38372
CK
1689 dev_warn(adev->dev, "gtt size (%d) too small\n",
1690 amdgpu_gtt_size);
1691 amdgpu_gtt_size = -1;
d38ceaf9
AD
1692 }
1693
d07f14be
RH
1694 /* valid range is between 4 and 9 inclusive */
1695 if (amdgpu_vm_fragment_size != -1 &&
1696 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1697 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1698 amdgpu_vm_fragment_size = -1;
1699 }
1700
5d5bd5e3
KW
1701 if (amdgpu_sched_hw_submission < 2) {
1702 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1703 amdgpu_sched_hw_submission);
1704 amdgpu_sched_hw_submission = 2;
1705 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1706 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1707 amdgpu_sched_hw_submission);
1708 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1709 }
1710
2656fd23
AG
1711 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1712 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1713 amdgpu_reset_method = -1;
1714 }
1715
7951e376
RZ
1716 amdgpu_device_check_smu_prv_buffer_size(adev);
1717
06ec9070 1718 amdgpu_device_check_vm_size(adev);
d38ceaf9 1719
06ec9070 1720 amdgpu_device_check_block_size(adev);
6a7f76e7 1721
19aede77 1722 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1723
e3c00faa 1724 return 0;
d38ceaf9
AD
1725}
1726
1727/**
1728 * amdgpu_switcheroo_set_state - set switcheroo state
1729 *
1730 * @pdev: pci dev pointer
1694467b 1731 * @state: vga_switcheroo state
d38ceaf9 1732 *
12024b17 1733 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1734 * the asics before or after it is powered up using ACPI methods.
1735 */
8aba21b7
LT
1736static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1737 enum vga_switcheroo_state state)
d38ceaf9
AD
1738{
1739 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1740 int r;
d38ceaf9 1741
b98c6299 1742 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1743 return;
1744
1745 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1746 pr_info("switched on\n");
d38ceaf9
AD
1747 /* don't suspend or resume card normally */
1748 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1749
8f66090b
TZ
1750 pci_set_power_state(pdev, PCI_D0);
1751 amdgpu_device_load_pci_state(pdev);
1752 r = pci_enable_device(pdev);
de185019
AD
1753 if (r)
1754 DRM_WARN("pci_enable_device failed (%d)\n", r);
1755 amdgpu_device_resume(dev, true);
d38ceaf9 1756
d38ceaf9 1757 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1758 } else {
dd4fa6c1 1759 pr_info("switched off\n");
d38ceaf9 1760 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
5095d541 1761 amdgpu_device_prepare(dev);
de185019 1762 amdgpu_device_suspend(dev, true);
8f66090b 1763 amdgpu_device_cache_pci_state(pdev);
de185019 1764 /* Shut down the device */
8f66090b
TZ
1765 pci_disable_device(pdev);
1766 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1767 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1768 }
1769}
1770
1771/**
1772 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1773 *
1774 * @pdev: pci dev pointer
1775 *
1776 * Callback for the switcheroo driver. Check of the switcheroo
1777 * state can be changed.
1778 * Returns true if the state can be changed, false if not.
1779 */
1780static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1781{
1782 struct drm_device *dev = pci_get_drvdata(pdev);
1783
b8920e1e 1784 /*
d38ceaf9
AD
1785 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1786 * locking inversion with the driver load path. And the access here is
1787 * completely racy anyway. So don't bother with locking for now.
1788 */
7e13ad89 1789 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1790}
1791
1792static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1793 .set_gpu_state = amdgpu_switcheroo_set_state,
1794 .reprobe = NULL,
1795 .can_switch = amdgpu_switcheroo_can_switch,
1796};
1797
e3ecdffa
AD
1798/**
1799 * amdgpu_device_ip_set_clockgating_state - set the CG state
1800 *
87e3f136 1801 * @dev: amdgpu_device pointer
e3ecdffa
AD
1802 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1803 * @state: clockgating state (gate or ungate)
1804 *
1805 * Sets the requested clockgating state for all instances of
1806 * the hardware IP specified.
1807 * Returns the error code from the last instance.
1808 */
43fa561f 1809int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1810 enum amd_ip_block_type block_type,
1811 enum amd_clockgating_state state)
d38ceaf9 1812{
43fa561f 1813 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1814 int i, r = 0;
1815
1816 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1817 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1818 continue;
c722865a
RZ
1819 if (adev->ip_blocks[i].version->type != block_type)
1820 continue;
1821 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1822 continue;
1823 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1824 (void *)adev, state);
1825 if (r)
1826 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1827 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1828 }
1829 return r;
1830}
1831
e3ecdffa
AD
1832/**
1833 * amdgpu_device_ip_set_powergating_state - set the PG state
1834 *
87e3f136 1835 * @dev: amdgpu_device pointer
e3ecdffa
AD
1836 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1837 * @state: powergating state (gate or ungate)
1838 *
1839 * Sets the requested powergating state for all instances of
1840 * the hardware IP specified.
1841 * Returns the error code from the last instance.
1842 */
43fa561f 1843int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1844 enum amd_ip_block_type block_type,
1845 enum amd_powergating_state state)
d38ceaf9 1846{
43fa561f 1847 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1848 int i, r = 0;
1849
1850 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1851 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1852 continue;
c722865a
RZ
1853 if (adev->ip_blocks[i].version->type != block_type)
1854 continue;
1855 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1856 continue;
1857 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1858 (void *)adev, state);
1859 if (r)
1860 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1861 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1862 }
1863 return r;
1864}
1865
e3ecdffa
AD
1866/**
1867 * amdgpu_device_ip_get_clockgating_state - get the CG state
1868 *
1869 * @adev: amdgpu_device pointer
1870 * @flags: clockgating feature flags
1871 *
1872 * Walks the list of IPs on the device and updates the clockgating
1873 * flags for each IP.
1874 * Updates @flags with the feature flags for each hardware IP where
1875 * clockgating is enabled.
1876 */
2990a1fc 1877void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1878 u64 *flags)
6cb2d4e4
HR
1879{
1880 int i;
1881
1882 for (i = 0; i < adev->num_ip_blocks; i++) {
1883 if (!adev->ip_blocks[i].status.valid)
1884 continue;
1885 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1886 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1887 }
1888}
1889
e3ecdffa
AD
1890/**
1891 * amdgpu_device_ip_wait_for_idle - wait for idle
1892 *
1893 * @adev: amdgpu_device pointer
1894 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1895 *
1896 * Waits for the request hardware IP to be idle.
1897 * Returns 0 for success or a negative error code on failure.
1898 */
2990a1fc
AD
1899int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1900 enum amd_ip_block_type block_type)
5dbbb60b
AD
1901{
1902 int i, r;
1903
1904 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1905 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1906 continue;
a1255107
AD
1907 if (adev->ip_blocks[i].version->type == block_type) {
1908 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1909 if (r)
1910 return r;
1911 break;
1912 }
1913 }
1914 return 0;
1915
1916}
1917
e3ecdffa
AD
1918/**
1919 * amdgpu_device_ip_is_idle - is the hardware IP idle
1920 *
1921 * @adev: amdgpu_device pointer
1922 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1923 *
1924 * Check if the hardware IP is idle or not.
1925 * Returns true if it the IP is idle, false if not.
1926 */
2990a1fc
AD
1927bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1928 enum amd_ip_block_type block_type)
5dbbb60b
AD
1929{
1930 int i;
1931
1932 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1933 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1934 continue;
a1255107
AD
1935 if (adev->ip_blocks[i].version->type == block_type)
1936 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1937 }
1938 return true;
1939
1940}
1941
e3ecdffa
AD
1942/**
1943 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1944 *
1945 * @adev: amdgpu_device pointer
87e3f136 1946 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1947 *
1948 * Returns a pointer to the hardware IP block structure
1949 * if it exists for the asic, otherwise NULL.
1950 */
2990a1fc
AD
1951struct amdgpu_ip_block *
1952amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1953 enum amd_ip_block_type type)
d38ceaf9
AD
1954{
1955 int i;
1956
1957 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1958 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1959 return &adev->ip_blocks[i];
1960
1961 return NULL;
1962}
1963
1964/**
2990a1fc 1965 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1966 *
1967 * @adev: amdgpu_device pointer
5fc3aeeb 1968 * @type: enum amd_ip_block_type
d38ceaf9
AD
1969 * @major: major version
1970 * @minor: minor version
1971 *
1972 * return 0 if equal or greater
1973 * return 1 if smaller or the ip_block doesn't exist
1974 */
2990a1fc
AD
1975int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1976 enum amd_ip_block_type type,
1977 u32 major, u32 minor)
d38ceaf9 1978{
2990a1fc 1979 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1980
a1255107
AD
1981 if (ip_block && ((ip_block->version->major > major) ||
1982 ((ip_block->version->major == major) &&
1983 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1984 return 0;
1985
1986 return 1;
1987}
1988
a1255107 1989/**
2990a1fc 1990 * amdgpu_device_ip_block_add
a1255107
AD
1991 *
1992 * @adev: amdgpu_device pointer
1993 * @ip_block_version: pointer to the IP to add
1994 *
1995 * Adds the IP block driver information to the collection of IPs
1996 * on the asic.
1997 */
2990a1fc
AD
1998int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1999 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
2000{
2001 if (!ip_block_version)
2002 return -EINVAL;
2003
7bd939d0
LG
2004 switch (ip_block_version->type) {
2005 case AMD_IP_BLOCK_TYPE_VCN:
2006 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2007 return 0;
2008 break;
2009 case AMD_IP_BLOCK_TYPE_JPEG:
2010 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2011 return 0;
2012 break;
2013 default:
2014 break;
2015 }
2016
e966a725 2017 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
2018 ip_block_version->funcs->name);
2019
a1255107
AD
2020 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2021
2022 return 0;
2023}
2024
e3ecdffa
AD
2025/**
2026 * amdgpu_device_enable_virtual_display - enable virtual display feature
2027 *
2028 * @adev: amdgpu_device pointer
2029 *
2030 * Enabled the virtual display feature if the user has enabled it via
2031 * the module parameter virtual_display. This feature provides a virtual
2032 * display hardware on headless boards or in virtualized environments.
2033 * This function parses and validates the configuration string specified by
2034 * the user and configues the virtual display configuration (number of
2035 * virtual connectors, crtcs, etc.) specified.
2036 */
483ef985 2037static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
2038{
2039 adev->enable_virtual_display = false;
2040
2041 if (amdgpu_virtual_display) {
8f66090b 2042 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 2043 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
2044
2045 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2046 pciaddstr_tmp = pciaddstr;
0f66356d
ED
2047 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2048 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
2049 if (!strcmp("all", pciaddname)
2050 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
2051 long num_crtc;
2052 int res = -1;
2053
9accf2fd 2054 adev->enable_virtual_display = true;
0f66356d
ED
2055
2056 if (pciaddname_tmp)
2057 res = kstrtol(pciaddname_tmp, 10,
2058 &num_crtc);
2059
2060 if (!res) {
2061 if (num_crtc < 1)
2062 num_crtc = 1;
2063 if (num_crtc > 6)
2064 num_crtc = 6;
2065 adev->mode_info.num_crtc = num_crtc;
2066 } else {
2067 adev->mode_info.num_crtc = 1;
2068 }
9accf2fd
ED
2069 break;
2070 }
2071 }
2072
0f66356d
ED
2073 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2074 amdgpu_virtual_display, pci_address_name,
2075 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
2076
2077 kfree(pciaddstr);
2078 }
2079}
2080
25263da3
AD
2081void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2082{
2083 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2084 adev->mode_info.num_crtc = 1;
2085 adev->enable_virtual_display = true;
2086 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2087 adev->enable_virtual_display, adev->mode_info.num_crtc);
2088 }
2089}
2090
e3ecdffa
AD
2091/**
2092 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2093 *
2094 * @adev: amdgpu_device pointer
2095 *
2096 * Parses the asic configuration parameters specified in the gpu info
2097 * firmware and makes them availale to the driver for use in configuring
2098 * the asic.
2099 * Returns 0 on success, -EINVAL on failure.
2100 */
e2a75f88
AD
2101static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2102{
e2a75f88 2103 const char *chip_name;
c0a43457 2104 char fw_name[40];
e2a75f88
AD
2105 int err;
2106 const struct gpu_info_firmware_header_v1_0 *hdr;
2107
ab4fe3e1
HR
2108 adev->firmware.gpu_info_fw = NULL;
2109
72de33f8 2110 if (adev->mman.discovery_bin) {
cc375d8c
TY
2111 /*
2112 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 2113 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
2114 * when DAL no longer needs it.
2115 */
2116 if (adev->asic_type != CHIP_NAVI12)
2117 return 0;
258620d0
AD
2118 }
2119
e2a75f88 2120 switch (adev->asic_type) {
e2a75f88
AD
2121 default:
2122 return 0;
2123 case CHIP_VEGA10:
2124 chip_name = "vega10";
2125 break;
3f76dced
AD
2126 case CHIP_VEGA12:
2127 chip_name = "vega12";
2128 break;
2d2e5e7e 2129 case CHIP_RAVEN:
54f78a76 2130 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 2131 chip_name = "raven2";
54f78a76 2132 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 2133 chip_name = "picasso";
54c4d17e
FX
2134 else
2135 chip_name = "raven";
2d2e5e7e 2136 break;
65e60f6e
LM
2137 case CHIP_ARCTURUS:
2138 chip_name = "arcturus";
2139 break;
42b325e5
XY
2140 case CHIP_NAVI12:
2141 chip_name = "navi12";
2142 break;
e2a75f88
AD
2143 }
2144
2145 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2146 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2147 if (err) {
2148 dev_err(adev->dev,
b31d3063 2149 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2150 fw_name);
2151 goto out;
2152 }
2153
ab4fe3e1 2154 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2155 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2156
2157 switch (hdr->version_major) {
2158 case 1:
2159 {
2160 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2161 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2162 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2163
cc375d8c
TY
2164 /*
2165 * Should be droped when DAL no longer needs it.
2166 */
2167 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2168 goto parse_soc_bounding_box;
2169
b5ab16bf
AD
2170 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2171 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2172 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2173 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2174 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2175 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2176 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2177 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2178 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2179 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2180 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2181 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2182 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2183 adev->gfx.cu_info.max_waves_per_simd =
2184 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2185 adev->gfx.cu_info.max_scratch_slots_per_cu =
2186 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2187 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2188 if (hdr->version_minor >= 1) {
35c2e910
HZ
2189 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2190 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2191 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2192 adev->gfx.config.num_sc_per_sh =
2193 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2194 adev->gfx.config.num_packer_per_sc =
2195 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2196 }
ec51d3fa
XY
2197
2198parse_soc_bounding_box:
ec51d3fa
XY
2199 /*
2200 * soc bounding box info is not integrated in disocovery table,
258620d0 2201 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2202 */
48321c3d
HW
2203 if (hdr->version_minor == 2) {
2204 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2205 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2206 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2207 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2208 }
e2a75f88
AD
2209 break;
2210 }
2211 default:
2212 dev_err(adev->dev,
2213 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2214 err = -EINVAL;
2215 goto out;
2216 }
2217out:
e2a75f88
AD
2218 return err;
2219}
2220
e3ecdffa
AD
2221/**
2222 * amdgpu_device_ip_early_init - run early init for hardware IPs
2223 *
2224 * @adev: amdgpu_device pointer
2225 *
2226 * Early initialization pass for hardware IPs. The hardware IPs that make
2227 * up each asic are discovered each IP's early_init callback is run. This
2228 * is the first stage in initializing the asic.
2229 * Returns 0 on success, negative error code on failure.
2230 */
06ec9070 2231static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2232{
901e2be2
AD
2233 struct drm_device *dev = adev_to_drm(adev);
2234 struct pci_dev *parent;
aaa36a97 2235 int i, r;
ced69502 2236 bool total;
d38ceaf9 2237
483ef985 2238 amdgpu_device_enable_virtual_display(adev);
a6be7570 2239
00a979f3 2240 if (amdgpu_sriov_vf(adev)) {
00a979f3 2241 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2242 if (r)
2243 return r;
00a979f3
WS
2244 }
2245
d38ceaf9 2246 switch (adev->asic_type) {
33f34802
KW
2247#ifdef CONFIG_DRM_AMDGPU_SI
2248 case CHIP_VERDE:
2249 case CHIP_TAHITI:
2250 case CHIP_PITCAIRN:
2251 case CHIP_OLAND:
2252 case CHIP_HAINAN:
295d0daf 2253 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2254 r = si_set_ip_blocks(adev);
2255 if (r)
2256 return r;
2257 break;
2258#endif
a2e73f56
AD
2259#ifdef CONFIG_DRM_AMDGPU_CIK
2260 case CHIP_BONAIRE:
2261 case CHIP_HAWAII:
2262 case CHIP_KAVERI:
2263 case CHIP_KABINI:
2264 case CHIP_MULLINS:
e1ad2d53 2265 if (adev->flags & AMD_IS_APU)
a2e73f56 2266 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2267 else
2268 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2269
2270 r = cik_set_ip_blocks(adev);
2271 if (r)
2272 return r;
2273 break;
2274#endif
da87c30b
AD
2275 case CHIP_TOPAZ:
2276 case CHIP_TONGA:
2277 case CHIP_FIJI:
2278 case CHIP_POLARIS10:
2279 case CHIP_POLARIS11:
2280 case CHIP_POLARIS12:
2281 case CHIP_VEGAM:
2282 case CHIP_CARRIZO:
2283 case CHIP_STONEY:
2284 if (adev->flags & AMD_IS_APU)
2285 adev->family = AMDGPU_FAMILY_CZ;
2286 else
2287 adev->family = AMDGPU_FAMILY_VI;
2288
2289 r = vi_set_ip_blocks(adev);
2290 if (r)
2291 return r;
2292 break;
d38ceaf9 2293 default:
63352b7f
AD
2294 r = amdgpu_discovery_set_ip_blocks(adev);
2295 if (r)
2296 return r;
2297 break;
d38ceaf9
AD
2298 }
2299
901e2be2
AD
2300 if (amdgpu_has_atpx() &&
2301 (amdgpu_is_atpx_hybrid() ||
2302 amdgpu_has_atpx_dgpu_power_cntl()) &&
2303 ((adev->flags & AMD_IS_APU) == 0) &&
2304 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2305 adev->flags |= AMD_IS_PX;
2306
85ac2021 2307 if (!(adev->flags & AMD_IS_APU)) {
c4c8955b 2308 parent = pcie_find_root_port(adev->pdev);
85ac2021
AD
2309 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2310 }
901e2be2 2311
1884734a 2312
3b94fb10 2313 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2314 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2315 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2316 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2317 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
fbf1035b
ML
2318 if (!amdgpu_device_pcie_dynamic_switching_supported())
2319 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
00f54b97 2320
ced69502 2321 total = true;
d38ceaf9
AD
2322 for (i = 0; i < adev->num_ip_blocks; i++) {
2323 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
0c451baf 2324 DRM_WARN("disabled ip block: %d <%s>\n",
ed8cf00c 2325 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2326 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2327 } else {
a1255107
AD
2328 if (adev->ip_blocks[i].version->funcs->early_init) {
2329 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2330 if (r == -ENOENT) {
a1255107 2331 adev->ip_blocks[i].status.valid = false;
2c1a2784 2332 } else if (r) {
a1255107
AD
2333 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2334 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2335 total = false;
2c1a2784 2336 } else {
a1255107 2337 adev->ip_blocks[i].status.valid = true;
2c1a2784 2338 }
974e6b64 2339 } else {
a1255107 2340 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2341 }
d38ceaf9 2342 }
21a249ca
AD
2343 /* get the vbios after the asic_funcs are set up */
2344 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2345 r = amdgpu_device_parse_gpu_info_fw(adev);
2346 if (r)
2347 return r;
2348
21a249ca 2349 /* Read BIOS */
9535a86a
SZ
2350 if (amdgpu_device_read_bios(adev)) {
2351 if (!amdgpu_get_bios(adev))
2352 return -EINVAL;
21a249ca 2353
9535a86a
SZ
2354 r = amdgpu_atombios_init(adev);
2355 if (r) {
2356 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2357 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2358 return r;
2359 }
21a249ca 2360 }
77eabc6f
PJZ
2361
2362 /*get pf2vf msg info at it's earliest time*/
2363 if (amdgpu_sriov_vf(adev))
2364 amdgpu_virt_init_data_exchange(adev);
2365
21a249ca 2366 }
d38ceaf9 2367 }
ced69502
ML
2368 if (!total)
2369 return -ENODEV;
d38ceaf9 2370
00fa4035 2371 amdgpu_amdkfd_device_probe(adev);
395d1fb9
NH
2372 adev->cg_flags &= amdgpu_cg_mask;
2373 adev->pg_flags &= amdgpu_pg_mask;
2374
d38ceaf9
AD
2375 return 0;
2376}
2377
0a4f2520
RZ
2378static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2379{
2380 int i, r;
2381
2382 for (i = 0; i < adev->num_ip_blocks; i++) {
2383 if (!adev->ip_blocks[i].status.sw)
2384 continue;
2385 if (adev->ip_blocks[i].status.hw)
2386 continue;
2387 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2388 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2389 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2390 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2391 if (r) {
2392 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2393 adev->ip_blocks[i].version->funcs->name, r);
2394 return r;
2395 }
2396 adev->ip_blocks[i].status.hw = true;
2397 }
2398 }
2399
2400 return 0;
2401}
2402
2403static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2404{
2405 int i, r;
2406
2407 for (i = 0; i < adev->num_ip_blocks; i++) {
2408 if (!adev->ip_blocks[i].status.sw)
2409 continue;
2410 if (adev->ip_blocks[i].status.hw)
2411 continue;
2412 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2413 if (r) {
2414 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2415 adev->ip_blocks[i].version->funcs->name, r);
2416 return r;
2417 }
2418 adev->ip_blocks[i].status.hw = true;
2419 }
2420
2421 return 0;
2422}
2423
7a3e0bb2
RZ
2424static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2425{
2426 int r = 0;
2427 int i;
80f41f84 2428 uint32_t smu_version;
7a3e0bb2
RZ
2429
2430 if (adev->asic_type >= CHIP_VEGA10) {
2431 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2432 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2433 continue;
2434
e3c1b071 2435 if (!adev->ip_blocks[i].status.sw)
2436 continue;
2437
482f0e53
ML
2438 /* no need to do the fw loading again if already done*/
2439 if (adev->ip_blocks[i].status.hw == true)
2440 break;
2441
53b3f8f4 2442 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2443 r = adev->ip_blocks[i].version->funcs->resume(adev);
2444 if (r) {
2445 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2446 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2447 return r;
2448 }
2449 } else {
2450 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2451 if (r) {
2452 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2453 adev->ip_blocks[i].version->funcs->name, r);
2454 return r;
7a3e0bb2 2455 }
7a3e0bb2 2456 }
482f0e53
ML
2457
2458 adev->ip_blocks[i].status.hw = true;
2459 break;
7a3e0bb2
RZ
2460 }
2461 }
482f0e53 2462
8973d9ec
ED
2463 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2464 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2465
80f41f84 2466 return r;
7a3e0bb2
RZ
2467}
2468
5fd8518d
AG
2469static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2470{
2471 long timeout;
2472 int r, i;
2473
2474 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2475 struct amdgpu_ring *ring = adev->rings[i];
2476
2477 /* No need to setup the GPU scheduler for rings that don't need it */
2478 if (!ring || ring->no_scheduler)
2479 continue;
2480
2481 switch (ring->funcs->type) {
2482 case AMDGPU_RING_TYPE_GFX:
2483 timeout = adev->gfx_timeout;
2484 break;
2485 case AMDGPU_RING_TYPE_COMPUTE:
2486 timeout = adev->compute_timeout;
2487 break;
2488 case AMDGPU_RING_TYPE_SDMA:
2489 timeout = adev->sdma_timeout;
2490 break;
2491 default:
2492 timeout = adev->video_timeout;
2493 break;
2494 }
2495
2496 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
11f25c84 2497 ring->num_hw_submission, 0,
8ab62eda
JG
2498 timeout, adev->reset_domain->wq,
2499 ring->sched_score, ring->name,
2500 adev->dev);
5fd8518d
AG
2501 if (r) {
2502 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2503 ring->name);
2504 return r;
2505 }
2506 }
2507
d425c6f4
JZ
2508 amdgpu_xcp_update_partition_sched_list(adev);
2509
5fd8518d
AG
2510 return 0;
2511}
2512
2513
e3ecdffa
AD
2514/**
2515 * amdgpu_device_ip_init - run init for hardware IPs
2516 *
2517 * @adev: amdgpu_device pointer
2518 *
2519 * Main initialization pass for hardware IPs. The list of all the hardware
2520 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2521 * are run. sw_init initializes the software state associated with each IP
2522 * and hw_init initializes the hardware associated with each IP.
2523 * Returns 0 on success, negative error code on failure.
2524 */
06ec9070 2525static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2526{
2527 int i, r;
2528
c030f2e4 2529 r = amdgpu_ras_init(adev);
2530 if (r)
2531 return r;
2532
d38ceaf9 2533 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2534 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2535 continue;
a1255107 2536 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2537 if (r) {
a1255107
AD
2538 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2539 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2540 goto init_failed;
2c1a2784 2541 }
a1255107 2542 adev->ip_blocks[i].status.sw = true;
bfca0289 2543
c1c39032
AD
2544 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2545 /* need to do common hw init early so everything is set up for gmc */
2546 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2547 if (r) {
2548 DRM_ERROR("hw_init %d failed %d\n", i, r);
2549 goto init_failed;
2550 }
2551 adev->ip_blocks[i].status.hw = true;
2552 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2553 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2554 /* Try to reserve bad pages early */
2555 if (amdgpu_sriov_vf(adev))
2556 amdgpu_virt_exchange_data(adev);
2557
7ccfd79f 2558 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2559 if (r) {
7ccfd79f 2560 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2561 goto init_failed;
2c1a2784 2562 }
a1255107 2563 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2564 if (r) {
2565 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2566 goto init_failed;
2c1a2784 2567 }
06ec9070 2568 r = amdgpu_device_wb_init(adev);
2c1a2784 2569 if (r) {
06ec9070 2570 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2571 goto init_failed;
2c1a2784 2572 }
a1255107 2573 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2574
2575 /* right after GMC hw init, we create CSA */
02ff519e 2576 if (adev->gfx.mcbp) {
1e256e27 2577 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2578 AMDGPU_GEM_DOMAIN_VRAM |
2579 AMDGPU_GEM_DOMAIN_GTT,
2580 AMDGPU_CSA_SIZE);
2493664f
ML
2581 if (r) {
2582 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2583 goto init_failed;
2493664f
ML
2584 }
2585 }
d38ceaf9
AD
2586 }
2587 }
2588
c9ffa427 2589 if (amdgpu_sriov_vf(adev))
22c16d25 2590 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2591
533aed27
AG
2592 r = amdgpu_ib_pool_init(adev);
2593 if (r) {
2594 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2595 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2596 goto init_failed;
2597 }
2598
c8963ea4
RZ
2599 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2600 if (r)
72d3f592 2601 goto init_failed;
0a4f2520
RZ
2602
2603 r = amdgpu_device_ip_hw_init_phase1(adev);
2604 if (r)
72d3f592 2605 goto init_failed;
0a4f2520 2606
7a3e0bb2
RZ
2607 r = amdgpu_device_fw_loading(adev);
2608 if (r)
72d3f592 2609 goto init_failed;
7a3e0bb2 2610
0a4f2520
RZ
2611 r = amdgpu_device_ip_hw_init_phase2(adev);
2612 if (r)
72d3f592 2613 goto init_failed;
d38ceaf9 2614
121a2bc6
AG
2615 /*
2616 * retired pages will be loaded from eeprom and reserved here,
2617 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2618 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2619 * for I2C communication which only true at this point.
b82e65a9
GC
2620 *
2621 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2622 * failure from bad gpu situation and stop amdgpu init process
2623 * accordingly. For other failed cases, it will still release all
2624 * the resource and print error message, rather than returning one
2625 * negative value to upper level.
121a2bc6
AG
2626 *
2627 * Note: theoretically, this should be called before all vram allocations
2628 * to protect retired page from abusing
2629 */
b82e65a9
GC
2630 r = amdgpu_ras_recovery_init(adev);
2631 if (r)
2632 goto init_failed;
121a2bc6 2633
cfbb6b00
AG
2634 /**
2635 * In case of XGMI grab extra reference for reset domain for this device
2636 */
a4c63caf 2637 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2638 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2639 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2640 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2641
dfd0287b
LH
2642 if (WARN_ON(!hive)) {
2643 r = -ENOENT;
2644 goto init_failed;
2645 }
2646
46c67660 2647 if (!hive->reset_domain ||
2648 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2649 r = -ENOENT;
2650 amdgpu_put_xgmi_hive(hive);
2651 goto init_failed;
2652 }
2653
2654 /* Drop the early temporary reset domain we created for device */
2655 amdgpu_reset_put_reset_domain(adev->reset_domain);
2656 adev->reset_domain = hive->reset_domain;
9dfa4860 2657 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2658 }
a4c63caf
AG
2659 }
2660 }
2661
5fd8518d
AG
2662 r = amdgpu_device_init_schedulers(adev);
2663 if (r)
2664 goto init_failed;
e3c1b071 2665
2666 /* Don't init kfd if whole hive need to be reset during init */
84b4dd3f
PY
2667 if (!adev->gmc.xgmi.pending_reset) {
2668 kgd2kfd_init_zone_device(adev);
e3c1b071 2669 amdgpu_amdkfd_device_init(adev);
84b4dd3f 2670 }
c6332b97 2671
bd607166
KR
2672 amdgpu_fru_get_product_info(adev);
2673
72d3f592 2674init_failed:
c6332b97 2675
72d3f592 2676 return r;
d38ceaf9
AD
2677}
2678
e3ecdffa
AD
2679/**
2680 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2681 *
2682 * @adev: amdgpu_device pointer
2683 *
2684 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2685 * this function before a GPU reset. If the value is retained after a
2686 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2687 */
06ec9070 2688static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2689{
2690 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2691}
2692
e3ecdffa
AD
2693/**
2694 * amdgpu_device_check_vram_lost - check if vram is valid
2695 *
2696 * @adev: amdgpu_device pointer
2697 *
2698 * Checks the reset magic value written to the gart pointer in VRAM.
2699 * The driver calls this after a GPU reset to see if the contents of
2700 * VRAM is lost or now.
2701 * returns true if vram is lost, false if not.
2702 */
06ec9070 2703static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2704{
dadce777
EQ
2705 if (memcmp(adev->gart.ptr, adev->reset_magic,
2706 AMDGPU_RESET_MAGIC_NUM))
2707 return true;
2708
53b3f8f4 2709 if (!amdgpu_in_reset(adev))
dadce777
EQ
2710 return false;
2711
2712 /*
2713 * For all ASICs with baco/mode1 reset, the VRAM is
2714 * always assumed to be lost.
2715 */
2716 switch (amdgpu_asic_reset_method(adev)) {
2717 case AMD_RESET_METHOD_BACO:
2718 case AMD_RESET_METHOD_MODE1:
2719 return true;
2720 default:
2721 return false;
2722 }
0c49e0b8
CZ
2723}
2724
e3ecdffa 2725/**
1112a46b 2726 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2727 *
2728 * @adev: amdgpu_device pointer
b8b72130 2729 * @state: clockgating state (gate or ungate)
e3ecdffa 2730 *
e3ecdffa 2731 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2732 * set_clockgating_state callbacks are run.
2733 * Late initialization pass enabling clockgating for hardware IPs.
2734 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2735 * Returns 0 on success, negative error code on failure.
2736 */
fdd34271 2737
5d89bb2d
LL
2738int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2739 enum amd_clockgating_state state)
d38ceaf9 2740{
1112a46b 2741 int i, j, r;
d38ceaf9 2742
4a2ba394
SL
2743 if (amdgpu_emu_mode == 1)
2744 return 0;
2745
1112a46b
RZ
2746 for (j = 0; j < adev->num_ip_blocks; j++) {
2747 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2748 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2749 continue;
47198eb7 2750 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2751 if (adev->in_s0ix &&
47198eb7
AD
2752 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2753 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2754 continue;
4a446d55 2755 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2756 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2757 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2758 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2759 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2760 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2761 /* enable clockgating to save power */
a1255107 2762 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2763 state);
4a446d55
AD
2764 if (r) {
2765 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2766 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2767 return r;
2768 }
b0b00ff1 2769 }
d38ceaf9 2770 }
06b18f61 2771
c9f96fd5
RZ
2772 return 0;
2773}
2774
5d89bb2d
LL
2775int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2776 enum amd_powergating_state state)
c9f96fd5 2777{
1112a46b 2778 int i, j, r;
06b18f61 2779
c9f96fd5
RZ
2780 if (amdgpu_emu_mode == 1)
2781 return 0;
2782
1112a46b
RZ
2783 for (j = 0; j < adev->num_ip_blocks; j++) {
2784 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2785 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2786 continue;
47198eb7 2787 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2788 if (adev->in_s0ix &&
47198eb7
AD
2789 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2790 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2791 continue;
c9f96fd5
RZ
2792 /* skip CG for VCE/UVD, it's handled specially */
2793 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2794 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2795 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2796 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2797 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2798 /* enable powergating to save power */
2799 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2800 state);
c9f96fd5
RZ
2801 if (r) {
2802 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2803 adev->ip_blocks[i].version->funcs->name, r);
2804 return r;
2805 }
2806 }
2807 }
2dc80b00
S
2808 return 0;
2809}
2810
beff74bc
AD
2811static int amdgpu_device_enable_mgpu_fan_boost(void)
2812{
2813 struct amdgpu_gpu_instance *gpu_ins;
2814 struct amdgpu_device *adev;
2815 int i, ret = 0;
2816
2817 mutex_lock(&mgpu_info.mutex);
2818
2819 /*
2820 * MGPU fan boost feature should be enabled
2821 * only when there are two or more dGPUs in
2822 * the system
2823 */
2824 if (mgpu_info.num_dgpu < 2)
2825 goto out;
2826
2827 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2828 gpu_ins = &(mgpu_info.gpu_ins[i]);
2829 adev = gpu_ins->adev;
2830 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2831 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2832 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2833 if (ret)
2834 break;
2835
2836 gpu_ins->mgpu_fan_enabled = 1;
2837 }
2838 }
2839
2840out:
2841 mutex_unlock(&mgpu_info.mutex);
2842
2843 return ret;
2844}
2845
e3ecdffa
AD
2846/**
2847 * amdgpu_device_ip_late_init - run late init for hardware IPs
2848 *
2849 * @adev: amdgpu_device pointer
2850 *
2851 * Late initialization pass for hardware IPs. The list of all the hardware
2852 * IPs that make up the asic is walked and the late_init callbacks are run.
2853 * late_init covers any special initialization that an IP requires
2854 * after all of the have been initialized or something that needs to happen
2855 * late in the init process.
2856 * Returns 0 on success, negative error code on failure.
2857 */
06ec9070 2858static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2859{
60599a03 2860 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2861 int i = 0, r;
2862
2863 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2864 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2865 continue;
2866 if (adev->ip_blocks[i].version->funcs->late_init) {
2867 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2868 if (r) {
2869 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2870 adev->ip_blocks[i].version->funcs->name, r);
2871 return r;
2872 }
2dc80b00 2873 }
73f847db 2874 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2875 }
2876
867e24ca 2877 r = amdgpu_ras_late_init(adev);
2878 if (r) {
2879 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2880 return r;
2881 }
2882
a891d239
DL
2883 amdgpu_ras_set_error_query_ready(adev, true);
2884
1112a46b
RZ
2885 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2886 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2887
06ec9070 2888 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2889
beff74bc
AD
2890 r = amdgpu_device_enable_mgpu_fan_boost();
2891 if (r)
2892 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2893
4da8b639 2894 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
47fc644f
SS
2895 if (amdgpu_passthrough(adev) &&
2896 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2897 adev->asic_type == CHIP_ALDEBARAN))
bc143d8b 2898 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2899
2900 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2901 mutex_lock(&mgpu_info.mutex);
2902
2903 /*
2904 * Reset device p-state to low as this was booted with high.
2905 *
2906 * This should be performed only after all devices from the same
2907 * hive get initialized.
2908 *
2909 * However, it's unknown how many device in the hive in advance.
2910 * As this is counted one by one during devices initializations.
2911 *
2912 * So, we wait for all XGMI interlinked devices initialized.
2913 * This may bring some delays as those devices may come from
2914 * different hives. But that should be OK.
2915 */
2916 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2917 for (i = 0; i < mgpu_info.num_gpu; i++) {
2918 gpu_instance = &(mgpu_info.gpu_ins[i]);
2919 if (gpu_instance->adev->flags & AMD_IS_APU)
2920 continue;
2921
d84a430d
JK
2922 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2923 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2924 if (r) {
2925 DRM_ERROR("pstate setting failed (%d).\n", r);
2926 break;
2927 }
2928 }
2929 }
2930
2931 mutex_unlock(&mgpu_info.mutex);
2932 }
2933
d38ceaf9
AD
2934 return 0;
2935}
2936
613aa3ea
LY
2937/**
2938 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2939 *
2940 * @adev: amdgpu_device pointer
2941 *
2942 * For ASICs need to disable SMC first
2943 */
2944static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2945{
2946 int i, r;
2947
4e8303cf 2948 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
613aa3ea
LY
2949 return;
2950
2951 for (i = 0; i < adev->num_ip_blocks; i++) {
2952 if (!adev->ip_blocks[i].status.hw)
2953 continue;
2954 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2955 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2956 /* XXX handle errors */
2957 if (r) {
2958 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2959 adev->ip_blocks[i].version->funcs->name, r);
2960 }
2961 adev->ip_blocks[i].status.hw = false;
2962 break;
2963 }
2964 }
2965}
2966
e9669fb7 2967static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2968{
2969 int i, r;
2970
e9669fb7
AG
2971 for (i = 0; i < adev->num_ip_blocks; i++) {
2972 if (!adev->ip_blocks[i].version->funcs->early_fini)
2973 continue;
5278a159 2974
e9669fb7
AG
2975 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2976 if (r) {
2977 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2978 adev->ip_blocks[i].version->funcs->name, r);
2979 }
2980 }
c030f2e4 2981
05df1f01 2982 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2983 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2984
7270e895
TY
2985 amdgpu_amdkfd_suspend(adev, false);
2986
613aa3ea
LY
2987 /* Workaroud for ASICs need to disable SMC first */
2988 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2989
d38ceaf9 2990 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2991 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2992 continue;
8201a67a 2993
a1255107 2994 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2995 /* XXX handle errors */
2c1a2784 2996 if (r) {
a1255107
AD
2997 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2998 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2999 }
8201a67a 3000
a1255107 3001 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
3002 }
3003
6effad8a
GC
3004 if (amdgpu_sriov_vf(adev)) {
3005 if (amdgpu_virt_release_full_gpu(adev, false))
3006 DRM_ERROR("failed to release exclusive mode on fini\n");
3007 }
3008
e9669fb7
AG
3009 return 0;
3010}
3011
3012/**
3013 * amdgpu_device_ip_fini - run fini for hardware IPs
3014 *
3015 * @adev: amdgpu_device pointer
3016 *
3017 * Main teardown pass for hardware IPs. The list of all the hardware
3018 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3019 * are run. hw_fini tears down the hardware associated with each IP
3020 * and sw_fini tears down any software state associated with each IP.
3021 * Returns 0 on success, negative error code on failure.
3022 */
3023static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3024{
3025 int i, r;
3026
3027 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3028 amdgpu_virt_release_ras_err_handler_data(adev);
3029
e9669fb7
AG
3030 if (adev->gmc.xgmi.num_physical_nodes > 1)
3031 amdgpu_xgmi_remove_device(adev);
3032
c004d44e 3033 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 3034
d38ceaf9 3035 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3036 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 3037 continue;
c12aba3a
ML
3038
3039 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 3040 amdgpu_ucode_free_bo(adev);
1e256e27 3041 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 3042 amdgpu_device_wb_fini(adev);
7ccfd79f 3043 amdgpu_device_mem_scratch_fini(adev);
533aed27 3044 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
3045 }
3046
a1255107 3047 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 3048 /* XXX handle errors */
2c1a2784 3049 if (r) {
a1255107
AD
3050 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3051 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3052 }
a1255107
AD
3053 adev->ip_blocks[i].status.sw = false;
3054 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
3055 }
3056
a6dcfd9c 3057 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3058 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 3059 continue;
a1255107
AD
3060 if (adev->ip_blocks[i].version->funcs->late_fini)
3061 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3062 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
3063 }
3064
c030f2e4 3065 amdgpu_ras_fini(adev);
3066
d38ceaf9
AD
3067 return 0;
3068}
3069
e3ecdffa 3070/**
beff74bc 3071 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 3072 *
1112a46b 3073 * @work: work_struct.
e3ecdffa 3074 */
beff74bc 3075static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
3076{
3077 struct amdgpu_device *adev =
beff74bc 3078 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
3079 int r;
3080
3081 r = amdgpu_ib_ring_tests(adev);
3082 if (r)
3083 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
3084}
3085
1e317b99
RZ
3086static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3087{
3088 struct amdgpu_device *adev =
3089 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3090
90a92662
MD
3091 WARN_ON_ONCE(adev->gfx.gfx_off_state);
3092 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3093
3094 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3095 adev->gfx.gfx_off_state = true;
1e317b99
RZ
3096}
3097
e3ecdffa 3098/**
e7854a03 3099 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
3100 *
3101 * @adev: amdgpu_device pointer
3102 *
3103 * Main suspend function for hardware IPs. The list of all the hardware
3104 * IPs that make up the asic is walked, clockgating is disabled and the
3105 * suspend callbacks are run. suspend puts the hardware and software state
3106 * in each IP into a state suitable for suspend.
3107 * Returns 0 on success, negative error code on failure.
3108 */
e7854a03
AD
3109static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3110{
3111 int i, r;
3112
50ec83f0
AD
3113 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3114 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 3115
b31d6ada
EQ
3116 /*
3117 * Per PMFW team's suggestion, driver needs to handle gfxoff
3118 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3119 * scenario. Add the missing df cstate disablement here.
3120 */
3121 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3122 dev_warn(adev->dev, "Failed to disallow df cstate");
3123
e7854a03
AD
3124 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3125 if (!adev->ip_blocks[i].status.valid)
3126 continue;
2b9f7848 3127
e7854a03 3128 /* displays are handled separately */
2b9f7848
ND
3129 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3130 continue;
3131
3132 /* XXX handle errors */
3133 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3134 /* XXX handle errors */
3135 if (r) {
3136 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3137 adev->ip_blocks[i].version->funcs->name, r);
3138 return r;
e7854a03 3139 }
2b9f7848
ND
3140
3141 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
3142 }
3143
e7854a03
AD
3144 return 0;
3145}
3146
3147/**
3148 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3149 *
3150 * @adev: amdgpu_device pointer
3151 *
3152 * Main suspend function for hardware IPs. The list of all the hardware
3153 * IPs that make up the asic is walked, clockgating is disabled and the
3154 * suspend callbacks are run. suspend puts the hardware and software state
3155 * in each IP into a state suitable for suspend.
3156 * Returns 0 on success, negative error code on failure.
3157 */
3158static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3159{
3160 int i, r;
3161
557f42a2 3162 if (adev->in_s0ix)
bc143d8b 3163 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3164
d38ceaf9 3165 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3166 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3167 continue;
e7854a03
AD
3168 /* displays are handled in phase1 */
3169 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3170 continue;
bff77e86
LM
3171 /* PSP lost connection when err_event_athub occurs */
3172 if (amdgpu_ras_intr_triggered() &&
3173 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3174 adev->ip_blocks[i].status.hw = false;
3175 continue;
3176 }
e3c1b071 3177
3178 /* skip unnecessary suspend if we do not initialize them yet */
3179 if (adev->gmc.xgmi.pending_reset &&
3180 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3181 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3182 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3183 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3184 adev->ip_blocks[i].status.hw = false;
3185 continue;
3186 }
557f42a2 3187
afa6646b 3188 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3189 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3190 * like at runtime. PSP is also part of the always on hardware
3191 * so no need to suspend it.
3192 */
557f42a2 3193 if (adev->in_s0ix &&
32ff160d 3194 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3195 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3196 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3197 continue;
3198
2a7798ea
AD
3199 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3200 if (adev->in_s0ix &&
4e8303cf
LL
3201 (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3202 IP_VERSION(5, 0, 0)) &&
3203 (adev->ip_blocks[i].version->type ==
3204 AMD_IP_BLOCK_TYPE_SDMA))
2a7798ea
AD
3205 continue;
3206
e11c7750
TH
3207 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3208 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3209 * from this location and RLC Autoload automatically also gets loaded
3210 * from here based on PMFW -> PSP message during re-init sequence.
3211 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3212 * the TMR and reload FWs again for IMU enabled APU ASICs.
3213 */
3214 if (amdgpu_in_reset(adev) &&
3215 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3216 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3217 continue;
3218
d38ceaf9 3219 /* XXX handle errors */
a1255107 3220 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3221 /* XXX handle errors */
2c1a2784 3222 if (r) {
a1255107
AD
3223 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3224 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3225 }
876923fb 3226 adev->ip_blocks[i].status.hw = false;
a3a09142 3227 /* handle putting the SMC in the appropriate state */
47fc644f 3228 if (!amdgpu_sriov_vf(adev)) {
86b93fd6
JZ
3229 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3230 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3231 if (r) {
3232 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3233 adev->mp1_state, r);
3234 return r;
3235 }
a3a09142
AD
3236 }
3237 }
d38ceaf9
AD
3238 }
3239
3240 return 0;
3241}
3242
e7854a03
AD
3243/**
3244 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3245 *
3246 * @adev: amdgpu_device pointer
3247 *
3248 * Main suspend function for hardware IPs. The list of all the hardware
3249 * IPs that make up the asic is walked, clockgating is disabled and the
3250 * suspend callbacks are run. suspend puts the hardware and software state
3251 * in each IP into a state suitable for suspend.
3252 * Returns 0 on success, negative error code on failure.
3253 */
3254int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3255{
3256 int r;
3257
3c73683c
JC
3258 if (amdgpu_sriov_vf(adev)) {
3259 amdgpu_virt_fini_data_exchange(adev);
e7819644 3260 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3261 }
e7819644 3262
e7854a03
AD
3263 r = amdgpu_device_ip_suspend_phase1(adev);
3264 if (r)
3265 return r;
3266 r = amdgpu_device_ip_suspend_phase2(adev);
3267
e7819644
YT
3268 if (amdgpu_sriov_vf(adev))
3269 amdgpu_virt_release_full_gpu(adev, false);
3270
e7854a03
AD
3271 return r;
3272}
3273
06ec9070 3274static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3275{
3276 int i, r;
3277
2cb681b6 3278 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3279 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3280 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3281 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3282 AMD_IP_BLOCK_TYPE_IH,
3283 };
a90ad3c2 3284
95ea3dbc 3285 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3286 int j;
3287 struct amdgpu_ip_block *block;
a90ad3c2 3288
4cd2a96d
J
3289 block = &adev->ip_blocks[i];
3290 block->status.hw = false;
2cb681b6 3291
4cd2a96d 3292 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3293
4cd2a96d 3294 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3295 !block->status.valid)
3296 continue;
3297
3298 r = block->version->funcs->hw_init(adev);
0aaeefcc 3299 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3300 if (r)
3301 return r;
482f0e53 3302 block->status.hw = true;
a90ad3c2
ML
3303 }
3304 }
3305
3306 return 0;
3307}
3308
06ec9070 3309static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3310{
3311 int i, r;
3312
2cb681b6
ML
3313 static enum amd_ip_block_type ip_order[] = {
3314 AMD_IP_BLOCK_TYPE_SMC,
3315 AMD_IP_BLOCK_TYPE_DCE,
3316 AMD_IP_BLOCK_TYPE_GFX,
3317 AMD_IP_BLOCK_TYPE_SDMA,
ec64350d 3318 AMD_IP_BLOCK_TYPE_MES,
257deb8c 3319 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07 3320 AMD_IP_BLOCK_TYPE_VCE,
d2cdc014
YZ
3321 AMD_IP_BLOCK_TYPE_VCN,
3322 AMD_IP_BLOCK_TYPE_JPEG
2cb681b6 3323 };
a90ad3c2 3324
2cb681b6
ML
3325 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3326 int j;
3327 struct amdgpu_ip_block *block;
a90ad3c2 3328
2cb681b6
ML
3329 for (j = 0; j < adev->num_ip_blocks; j++) {
3330 block = &adev->ip_blocks[j];
3331
3332 if (block->version->type != ip_order[i] ||
482f0e53
ML
3333 !block->status.valid ||
3334 block->status.hw)
2cb681b6
ML
3335 continue;
3336
895bd048
JZ
3337 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3338 r = block->version->funcs->resume(adev);
3339 else
3340 r = block->version->funcs->hw_init(adev);
3341
0aaeefcc 3342 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3343 if (r)
3344 return r;
482f0e53 3345 block->status.hw = true;
a90ad3c2
ML
3346 }
3347 }
3348
3349 return 0;
3350}
3351
e3ecdffa
AD
3352/**
3353 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3354 *
3355 * @adev: amdgpu_device pointer
3356 *
3357 * First resume function for hardware IPs. The list of all the hardware
3358 * IPs that make up the asic is walked and the resume callbacks are run for
3359 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3360 * after a suspend and updates the software state as necessary. This
3361 * function is also used for restoring the GPU after a GPU reset.
3362 * Returns 0 on success, negative error code on failure.
3363 */
06ec9070 3364static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3365{
3366 int i, r;
3367
a90ad3c2 3368 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3369 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3370 continue;
a90ad3c2 3371 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3372 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3373 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3374 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3375
fcf0649f
CZ
3376 r = adev->ip_blocks[i].version->funcs->resume(adev);
3377 if (r) {
3378 DRM_ERROR("resume of IP block <%s> failed %d\n",
3379 adev->ip_blocks[i].version->funcs->name, r);
3380 return r;
3381 }
482f0e53 3382 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3383 }
3384 }
3385
3386 return 0;
3387}
3388
e3ecdffa
AD
3389/**
3390 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3391 *
3392 * @adev: amdgpu_device pointer
3393 *
3394 * First resume function for hardware IPs. The list of all the hardware
3395 * IPs that make up the asic is walked and the resume callbacks are run for
3396 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3397 * functional state after a suspend and updates the software state as
3398 * necessary. This function is also used for restoring the GPU after a GPU
3399 * reset.
3400 * Returns 0 on success, negative error code on failure.
3401 */
06ec9070 3402static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3403{
3404 int i, r;
3405
3406 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3407 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3408 continue;
fcf0649f 3409 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3410 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3411 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3412 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3413 continue;
a1255107 3414 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3415 if (r) {
a1255107
AD
3416 DRM_ERROR("resume of IP block <%s> failed %d\n",
3417 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3418 return r;
2c1a2784 3419 }
482f0e53 3420 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3421 }
3422
3423 return 0;
3424}
3425
e3ecdffa
AD
3426/**
3427 * amdgpu_device_ip_resume - run resume for hardware IPs
3428 *
3429 * @adev: amdgpu_device pointer
3430 *
3431 * Main resume function for hardware IPs. The hardware IPs
3432 * are split into two resume functions because they are
b8920e1e 3433 * also used in recovering from a GPU reset and some additional
e3ecdffa
AD
3434 * steps need to be take between them. In this case (S3/S4) they are
3435 * run sequentially.
3436 * Returns 0 on success, negative error code on failure.
3437 */
06ec9070 3438static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3439{
3440 int r;
3441
06ec9070 3442 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3443 if (r)
3444 return r;
7a3e0bb2
RZ
3445
3446 r = amdgpu_device_fw_loading(adev);
3447 if (r)
3448 return r;
3449
06ec9070 3450 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3451
3452 return r;
3453}
3454
e3ecdffa
AD
3455/**
3456 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3457 *
3458 * @adev: amdgpu_device pointer
3459 *
3460 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3461 */
4e99a44e 3462static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3463{
6867e1b5
ML
3464 if (amdgpu_sriov_vf(adev)) {
3465 if (adev->is_atom_fw) {
58ff791a 3466 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3467 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3468 } else {
3469 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3470 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3471 }
3472
3473 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3474 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3475 }
048765ad
AR
3476}
3477
e3ecdffa
AD
3478/**
3479 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3480 *
3481 * @asic_type: AMD asic type
3482 *
3483 * Check if there is DC (new modesetting infrastructre) support for an asic.
3484 * returns true if DC has support, false if not.
3485 */
4562236b
HW
3486bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3487{
3488 switch (asic_type) {
0637d417
AD
3489#ifdef CONFIG_DRM_AMDGPU_SI
3490 case CHIP_HAINAN:
3491#endif
3492 case CHIP_TOPAZ:
3493 /* chips with no display hardware */
3494 return false;
4562236b 3495#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3496 case CHIP_TAHITI:
3497 case CHIP_PITCAIRN:
3498 case CHIP_VERDE:
3499 case CHIP_OLAND:
2d32ffd6
AD
3500 /*
3501 * We have systems in the wild with these ASICs that require
3502 * LVDS and VGA support which is not supported with DC.
3503 *
3504 * Fallback to the non-DC driver here by default so as not to
3505 * cause regressions.
3506 */
3507#if defined(CONFIG_DRM_AMD_DC_SI)
3508 return amdgpu_dc > 0;
3509#else
3510 return false;
64200c46 3511#endif
4562236b 3512 case CHIP_BONAIRE:
0d6fbccb 3513 case CHIP_KAVERI:
367e6687
AD
3514 case CHIP_KABINI:
3515 case CHIP_MULLINS:
d9fda248
HW
3516 /*
3517 * We have systems in the wild with these ASICs that require
b5a0168e 3518 * VGA support which is not supported with DC.
d9fda248
HW
3519 *
3520 * Fallback to the non-DC driver here by default so as not to
3521 * cause regressions.
3522 */
3523 return amdgpu_dc > 0;
f7f12b25 3524 default:
fd187853 3525 return amdgpu_dc != 0;
f7f12b25 3526#else
4562236b 3527 default:
93b09a9a 3528 if (amdgpu_dc > 0)
b8920e1e 3529 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4562236b 3530 return false;
f7f12b25 3531#endif
4562236b
HW
3532 }
3533}
3534
3535/**
3536 * amdgpu_device_has_dc_support - check if dc is supported
3537 *
982a820b 3538 * @adev: amdgpu_device pointer
4562236b
HW
3539 *
3540 * Returns true for supported, false for not supported
3541 */
3542bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3543{
25263da3 3544 if (adev->enable_virtual_display ||
abaf210c 3545 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3546 return false;
3547
4562236b
HW
3548 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3549}
3550
d4535e2c
AG
3551static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3552{
3553 struct amdgpu_device *adev =
3554 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3555 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3556
c6a6e2db
AG
3557 /* It's a bug to not have a hive within this function */
3558 if (WARN_ON(!hive))
3559 return;
3560
3561 /*
3562 * Use task barrier to synchronize all xgmi reset works across the
3563 * hive. task_barrier_enter and task_barrier_exit will block
3564 * until all the threads running the xgmi reset works reach
3565 * those points. task_barrier_full will do both blocks.
3566 */
3567 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3568
3569 task_barrier_enter(&hive->tb);
4a580877 3570 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3571
3572 if (adev->asic_reset_res)
3573 goto fail;
3574
3575 task_barrier_exit(&hive->tb);
4a580877 3576 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3577
3578 if (adev->asic_reset_res)
3579 goto fail;
43c4d576 3580
21226f02 3581 amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
c6a6e2db
AG
3582 } else {
3583
3584 task_barrier_full(&hive->tb);
3585 adev->asic_reset_res = amdgpu_asic_reset(adev);
3586 }
ce316fa5 3587
c6a6e2db 3588fail:
d4535e2c 3589 if (adev->asic_reset_res)
fed184e9 3590 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3591 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3592 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3593}
3594
71f98027
AD
3595static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3596{
3597 char *input = amdgpu_lockup_timeout;
3598 char *timeout_setting = NULL;
3599 int index = 0;
3600 long timeout;
3601 int ret = 0;
3602
3603 /*
67387dfe
AD
3604 * By default timeout for non compute jobs is 10000
3605 * and 60000 for compute jobs.
71f98027 3606 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3607 * jobs are 60000 by default.
71f98027
AD
3608 */
3609 adev->gfx_timeout = msecs_to_jiffies(10000);
3610 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3611 if (amdgpu_sriov_vf(adev))
3612 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3613 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3614 else
67387dfe 3615 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3616
f440ff44 3617 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3618 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3619 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3620 ret = kstrtol(timeout_setting, 0, &timeout);
3621 if (ret)
3622 return ret;
3623
3624 if (timeout == 0) {
3625 index++;
3626 continue;
3627 } else if (timeout < 0) {
3628 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3629 dev_warn(adev->dev, "lockup timeout disabled");
3630 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3631 } else {
3632 timeout = msecs_to_jiffies(timeout);
3633 }
3634
3635 switch (index++) {
3636 case 0:
3637 adev->gfx_timeout = timeout;
3638 break;
3639 case 1:
3640 adev->compute_timeout = timeout;
3641 break;
3642 case 2:
3643 adev->sdma_timeout = timeout;
3644 break;
3645 case 3:
3646 adev->video_timeout = timeout;
3647 break;
3648 default:
3649 break;
3650 }
3651 }
3652 /*
3653 * There is only one value specified and
3654 * it should apply to all non-compute jobs.
3655 */
bcccee89 3656 if (index == 1) {
71f98027 3657 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3658 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3659 adev->compute_timeout = adev->gfx_timeout;
3660 }
71f98027
AD
3661 }
3662
3663 return ret;
3664}
d4535e2c 3665
4a74c38c
PY
3666/**
3667 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3668 *
3669 * @adev: amdgpu_device pointer
3670 *
3671 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3672 */
3673static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3674{
3675 struct iommu_domain *domain;
3676
3677 domain = iommu_get_domain_for_dev(adev->dev);
3678 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3679 adev->ram_is_direct_mapped = true;
3680}
3681
77f3a5cd 3682static const struct attribute *amdgpu_dev_attributes[] = {
77f3a5cd
ND
3683 &dev_attr_pcie_replay_count.attr,
3684 NULL
3685};
3686
02ff519e
AD
3687static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
3688{
3689 if (amdgpu_mcbp == 1)
3690 adev->gfx.mcbp = true;
1e9e15dc
JZ
3691 else if (amdgpu_mcbp == 0)
3692 adev->gfx.mcbp = false;
4e8303cf
LL
3693 else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) &&
3694 (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) &&
1e9e15dc 3695 adev->gfx.num_gfx_rings)
50a7c876
AD
3696 adev->gfx.mcbp = true;
3697
02ff519e
AD
3698 if (amdgpu_sriov_vf(adev))
3699 adev->gfx.mcbp = true;
3700
3701 if (adev->gfx.mcbp)
3702 DRM_INFO("MCBP is enabled\n");
3703}
3704
d38ceaf9
AD
3705/**
3706 * amdgpu_device_init - initialize the driver
3707 *
3708 * @adev: amdgpu_device pointer
d38ceaf9
AD
3709 * @flags: driver flags
3710 *
3711 * Initializes the driver info and hw (all asics).
3712 * Returns 0 for success or an error on failure.
3713 * Called at driver startup.
3714 */
3715int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3716 uint32_t flags)
3717{
8aba21b7
LT
3718 struct drm_device *ddev = adev_to_drm(adev);
3719 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3720 int r, i;
b98c6299 3721 bool px = false;
95844d20 3722 u32 max_MBps;
59e9fff1 3723 int tmp;
d38ceaf9
AD
3724
3725 adev->shutdown = false;
d38ceaf9 3726 adev->flags = flags;
4e66d7d2
YZ
3727
3728 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3729 adev->asic_type = amdgpu_force_asic_type;
3730 else
3731 adev->asic_type = flags & AMD_ASIC_MASK;
3732
d38ceaf9 3733 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3734 if (amdgpu_emu_mode == 1)
8bdab6bb 3735 adev->usec_timeout *= 10;
770d13b1 3736 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3737 adev->accel_working = false;
3738 adev->num_rings = 0;
68ce8b24 3739 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3740 adev->mman.buffer_funcs = NULL;
3741 adev->mman.buffer_funcs_ring = NULL;
3742 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3743 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3744 adev->gmc.gmc_funcs = NULL;
7bd939d0 3745 adev->harvest_ip_mask = 0x0;
f54d1867 3746 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3747 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3748
3749 adev->smc_rreg = &amdgpu_invalid_rreg;
3750 adev->smc_wreg = &amdgpu_invalid_wreg;
3751 adev->pcie_rreg = &amdgpu_invalid_rreg;
3752 adev->pcie_wreg = &amdgpu_invalid_wreg;
0c552ed3
LM
3753 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
3754 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
36b9a952
HR
3755 adev->pciep_rreg = &amdgpu_invalid_rreg;
3756 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3757 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3758 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
a76b2870
CL
3759 adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
3760 adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
d38ceaf9
AD
3761 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3762 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3763 adev->didt_rreg = &amdgpu_invalid_rreg;
3764 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3765 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3766 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3767 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3768 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3769
3e39ab90
AD
3770 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3771 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3772 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3773
3774 /* mutex initialization are all done here so we
b8920e1e
SS
3775 * can recall function without having locking issues
3776 */
0e5ca0d1 3777 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3778 mutex_init(&adev->pm.mutex);
3779 mutex_init(&adev->gfx.gpu_clock_mutex);
3780 mutex_init(&adev->srbm_mutex);
b8866c26 3781 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3782 mutex_init(&adev->gfx.gfx_off_mutex);
98a54e88 3783 mutex_init(&adev->gfx.partition_mutex);
d38ceaf9 3784 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3785 mutex_init(&adev->mn_lock);
e23b74aa 3786 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3787 hash_init(adev->mn_hash);
32eaeae0 3788 mutex_init(&adev->psp.mutex);
bd052211 3789 mutex_init(&adev->notifier_lock);
8cda7a4f 3790 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3791 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3792
ab3b9de6 3793 amdgpu_device_init_apu_flags(adev);
9f6a7857 3794
912dfc84
EQ
3795 r = amdgpu_device_check_arguments(adev);
3796 if (r)
3797 return r;
d38ceaf9 3798
d38ceaf9
AD
3799 spin_lock_init(&adev->mmio_idx_lock);
3800 spin_lock_init(&adev->smc_idx_lock);
3801 spin_lock_init(&adev->pcie_idx_lock);
3802 spin_lock_init(&adev->uvd_ctx_idx_lock);
3803 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3804 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3805 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3806 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3807 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3808
0c4e7fa5
CZ
3809 INIT_LIST_HEAD(&adev->shadow_list);
3810 mutex_init(&adev->shadow_list_lock);
3811
655ce9cb 3812 INIT_LIST_HEAD(&adev->reset_list);
3813
6492e1b0 3814 INIT_LIST_HEAD(&adev->ras_list);
3815
3e38b634
EQ
3816 INIT_LIST_HEAD(&adev->pm.od_kobj_list);
3817
beff74bc
AD
3818 INIT_DELAYED_WORK(&adev->delayed_init_work,
3819 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3820 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3821 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3822
d4535e2c
AG
3823 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3824
d23ee13f 3825 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3826 adev->gfx.gfx_off_residency = 0;
3827 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3828 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3829
b265bdbd
EQ
3830 atomic_set(&adev->throttling_logging_enabled, 1);
3831 /*
3832 * If throttling continues, logging will be performed every minute
3833 * to avoid log flooding. "-1" is subtracted since the thermal
3834 * throttling interrupt comes every second. Thus, the total logging
3835 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3836 * for throttling interrupt) = 60 seconds.
3837 */
3838 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3839 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3840
0fa49558
AX
3841 /* Registers mapping */
3842 /* TODO: block userspace mapping of io register */
da69c161
KW
3843 if (adev->asic_type >= CHIP_BONAIRE) {
3844 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3845 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3846 } else {
3847 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3848 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3849 }
d38ceaf9 3850
6c08e0ef
EQ
3851 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3852 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3853
d38ceaf9 3854 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
b8920e1e 3855 if (!adev->rmmio)
d38ceaf9 3856 return -ENOMEM;
b8920e1e 3857
d38ceaf9 3858 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
b8920e1e 3859 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
d38ceaf9 3860
436afdfa
PY
3861 /*
3862 * Reset domain needs to be present early, before XGMI hive discovered
3863 * (if any) and intitialized to use reset sem and in_gpu reset flag
3864 * early on during init and before calling to RREG32.
3865 */
3866 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3867 if (!adev->reset_domain)
3868 return -ENOMEM;
3869
3aa0115d
ML
3870 /* detect hw virtualization here */
3871 amdgpu_detect_virtualization(adev);
3872
04e85958
TL
3873 amdgpu_device_get_pcie_info(adev);
3874
dffa11b4
ML
3875 r = amdgpu_device_get_job_timeout_settings(adev);
3876 if (r) {
3877 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3878 return r;
a190d1c7
XY
3879 }
3880
d38ceaf9 3881 /* early init functions */
06ec9070 3882 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3883 if (r)
4ef87d8f 3884 return r;
d38ceaf9 3885
02ff519e
AD
3886 amdgpu_device_set_mcbp(adev);
3887
b7cdb41e
ML
3888 /* Get rid of things like offb */
3889 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3890 if (r)
3891 return r;
3892
4d33e704
SK
3893 /* Enable TMZ based on IP_VERSION */
3894 amdgpu_gmc_tmz_set(adev);
3895
957b0787 3896 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3897 /* Need to get xgmi info early to decide the reset behavior*/
3898 if (adev->gmc.xgmi.supported) {
3899 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3900 if (r)
3901 return r;
3902 }
3903
8e6d0b69 3904 /* enable PCIE atomic ops */
b4520bfd
GW
3905 if (amdgpu_sriov_vf(adev)) {
3906 if (adev->virt.fw_reserve.p_pf2vf)
3907 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3908 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3909 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
0e768043
YZ
3910 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3911 * internal path natively support atomics, set have_atomics_support to true.
3912 */
b4520bfd 3913 } else if ((adev->flags & AMD_IS_APU) &&
4e8303cf
LL
3914 (amdgpu_ip_version(adev, GC_HWIP, 0) >
3915 IP_VERSION(9, 0, 0))) {
0e768043 3916 adev->have_atomics_support = true;
b4520bfd 3917 } else {
8e6d0b69 3918 adev->have_atomics_support =
3919 !pci_enable_atomic_ops_to_root(adev->pdev,
3920 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3921 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
b4520bfd
GW
3922 }
3923
8e6d0b69 3924 if (!adev->have_atomics_support)
3925 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3926
6585661d 3927 /* doorbell bar mapping and doorbell index init*/
43c064db 3928 amdgpu_doorbell_init(adev);
6585661d 3929
9475a943
SL
3930 if (amdgpu_emu_mode == 1) {
3931 /* post the asic on emulation mode */
3932 emu_soc_asic_init(adev);
bfca0289 3933 goto fence_driver_init;
9475a943 3934 }
bfca0289 3935
04442bf7
LL
3936 amdgpu_reset_init(adev);
3937
4e99a44e 3938 /* detect if we are with an SRIOV vbios */
b4520bfd
GW
3939 if (adev->bios)
3940 amdgpu_device_detect_sriov_bios(adev);
048765ad 3941
95e8e59e
AD
3942 /* check if we need to reset the asic
3943 * E.g., driver was not cleanly unloaded previously, etc.
3944 */
f14899fd 3945 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3946 if (adev->gmc.xgmi.num_physical_nodes) {
3947 dev_info(adev->dev, "Pending hive reset.\n");
3948 adev->gmc.xgmi.pending_reset = true;
3949 /* Only need to init necessary block for SMU to handle the reset */
3950 for (i = 0; i < adev->num_ip_blocks; i++) {
3951 if (!adev->ip_blocks[i].status.valid)
3952 continue;
3953 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3954 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3955 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3956 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3957 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3958 adev->ip_blocks[i].version->funcs->name);
3959 adev->ip_blocks[i].status.hw = true;
3960 }
3961 }
3962 } else {
59e9fff1 3963 tmp = amdgpu_reset_method;
3964 /* It should do a default reset when loading or reloading the driver,
3965 * regardless of the module parameter reset_method.
3966 */
3967 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
e3c1b071 3968 r = amdgpu_asic_reset(adev);
59e9fff1 3969 amdgpu_reset_method = tmp;
e3c1b071 3970 if (r) {
3971 dev_err(adev->dev, "asic reset on init failed\n");
3972 goto failed;
3973 }
95e8e59e
AD
3974 }
3975 }
3976
d38ceaf9 3977 /* Post card if necessary */
39c640c0 3978 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3979 if (!adev->bios) {
bec86378 3980 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3981 r = -EINVAL;
3982 goto failed;
d38ceaf9 3983 }
bec86378 3984 DRM_INFO("GPU posting now...\n");
4d2997ab 3985 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3986 if (r) {
3987 dev_err(adev->dev, "gpu post error!\n");
3988 goto failed;
3989 }
d38ceaf9
AD
3990 }
3991
9535a86a
SZ
3992 if (adev->bios) {
3993 if (adev->is_atom_fw) {
3994 /* Initialize clocks */
3995 r = amdgpu_atomfirmware_get_clock_info(adev);
3996 if (r) {
3997 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3998 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3999 goto failed;
4000 }
4001 } else {
4002 /* Initialize clocks */
4003 r = amdgpu_atombios_get_clock_info(adev);
4004 if (r) {
4005 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4006 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4007 goto failed;
4008 }
4009 /* init i2c buses */
4010 if (!amdgpu_device_has_dc_support(adev))
4011 amdgpu_atombios_i2c_init(adev);
a5bde2f9 4012 }
2c1a2784 4013 }
d38ceaf9 4014
bfca0289 4015fence_driver_init:
d38ceaf9 4016 /* Fence driver */
067f44c8 4017 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 4018 if (r) {
067f44c8 4019 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 4020 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 4021 goto failed;
2c1a2784 4022 }
d38ceaf9
AD
4023
4024 /* init the mode config */
4a580877 4025 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 4026
06ec9070 4027 r = amdgpu_device_ip_init(adev);
d38ceaf9 4028 if (r) {
06ec9070 4029 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 4030 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 4031 goto release_ras_con;
d38ceaf9
AD
4032 }
4033
8d35a259
LG
4034 amdgpu_fence_driver_hw_init(adev);
4035
d69b8971
YZ
4036 dev_info(adev->dev,
4037 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
4038 adev->gfx.config.max_shader_engines,
4039 adev->gfx.config.max_sh_per_se,
4040 adev->gfx.config.max_cu_per_sh,
4041 adev->gfx.cu_info.number);
4042
d38ceaf9
AD
4043 adev->accel_working = true;
4044
e59c0205
AX
4045 amdgpu_vm_check_compute_bug(adev);
4046
95844d20
MO
4047 /* Initialize the buffer migration limit. */
4048 if (amdgpu_moverate >= 0)
4049 max_MBps = amdgpu_moverate;
4050 else
4051 max_MBps = 8; /* Allow 8 MB/s. */
4052 /* Get a log2 for easy divisions. */
4053 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4054
b0adca4d
EQ
4055 /*
4056 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4057 * Otherwise the mgpu fan boost feature will be skipped due to the
4058 * gpu instance is counted less.
4059 */
4060 amdgpu_register_gpu_instance(adev);
4061
d38ceaf9
AD
4062 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4063 * explicit gating rather than handling it automatically.
4064 */
e3c1b071 4065 if (!adev->gmc.xgmi.pending_reset) {
4066 r = amdgpu_device_ip_late_init(adev);
4067 if (r) {
4068 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4069 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 4070 goto release_ras_con;
e3c1b071 4071 }
4072 /* must succeed. */
4073 amdgpu_ras_resume(adev);
4074 queue_delayed_work(system_wq, &adev->delayed_init_work,
4075 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 4076 }
d38ceaf9 4077
38eecbe0
CL
4078 if (amdgpu_sriov_vf(adev)) {
4079 amdgpu_virt_release_full_gpu(adev, true);
2c738637 4080 flush_delayed_work(&adev->delayed_init_work);
38eecbe0 4081 }
2c738637 4082
90bcb9b5
EQ
4083 /*
4084 * Place those sysfs registering after `late_init`. As some of those
4085 * operations performed in `late_init` might affect the sysfs
4086 * interfaces creating.
4087 */
4088 r = amdgpu_atombios_sysfs_init(adev);
4089 if (r)
4090 drm_err(&adev->ddev,
4091 "registering atombios sysfs failed (%d).\n", r);
4092
4093 r = amdgpu_pm_sysfs_init(adev);
4094 if (r)
4095 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4096
4097 r = amdgpu_ucode_sysfs_init(adev);
4098 if (r) {
4099 adev->ucode_sysfs_en = false;
4100 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4101 } else
4102 adev->ucode_sysfs_en = true;
4103
77f3a5cd 4104 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 4105 if (r)
77f3a5cd 4106 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 4107
76da73f0
LL
4108 r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4109 if (r)
4110 dev_err(adev->dev,
4111 "Could not create amdgpu board attributes\n");
4112
7957ec80
LL
4113 amdgpu_fru_sysfs_init(adev);
4114
d155bef0
AB
4115 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4116 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
4117 if (r)
4118 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4119
c1dd4aa6
AG
4120 /* Have stored pci confspace at hand for restore in sudden PCI error */
4121 if (amdgpu_device_cache_pci_state(adev->pdev))
4122 pci_restore_state(pdev);
4123
8c3dd61c
KHF
4124 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4125 /* this will fail for cards that aren't VGA class devices, just
b8920e1e
SS
4126 * ignore it
4127 */
8c3dd61c 4128 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 4129 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 4130
d37a3929
OC
4131 px = amdgpu_device_supports_px(ddev);
4132
4133 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4134 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
4135 vga_switcheroo_register_client(adev->pdev,
4136 &amdgpu_switcheroo_ops, px);
d37a3929
OC
4137
4138 if (px)
8c3dd61c 4139 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 4140
e3c1b071 4141 if (adev->gmc.xgmi.pending_reset)
4142 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4143 msecs_to_jiffies(AMDGPU_RESUME_MS));
4144
4a74c38c
PY
4145 amdgpu_device_check_iommu_direct_map(adev);
4146
d38ceaf9 4147 return 0;
83ba126a 4148
970fd197 4149release_ras_con:
38eecbe0
CL
4150 if (amdgpu_sriov_vf(adev))
4151 amdgpu_virt_release_full_gpu(adev, true);
4152
4153 /* failed in exclusive mode due to timeout */
4154 if (amdgpu_sriov_vf(adev) &&
4155 !amdgpu_sriov_runtime(adev) &&
4156 amdgpu_virt_mmio_blocked(adev) &&
4157 !amdgpu_virt_wait_reset(adev)) {
4158 dev_err(adev->dev, "VF exclusive mode timeout\n");
4159 /* Don't send request since VF is inactive. */
4160 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4161 adev->virt.ops = NULL;
4162 r = -EAGAIN;
4163 }
970fd197
SY
4164 amdgpu_release_ras_context(adev);
4165
83ba126a 4166failed:
89041940 4167 amdgpu_vf_error_trans_all(adev);
8840a387 4168
83ba126a 4169 return r;
d38ceaf9
AD
4170}
4171
07775fc1
AG
4172static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4173{
62d5f9f7 4174
07775fc1
AG
4175 /* Clear all CPU mappings pointing to this device */
4176 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4177
4178 /* Unmap all mapped bars - Doorbell, registers and VRAM */
43c064db 4179 amdgpu_doorbell_fini(adev);
07775fc1
AG
4180
4181 iounmap(adev->rmmio);
4182 adev->rmmio = NULL;
4183 if (adev->mman.aper_base_kaddr)
4184 iounmap(adev->mman.aper_base_kaddr);
4185 adev->mman.aper_base_kaddr = NULL;
4186
4187 /* Memory manager related */
a0ba1279 4188 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
07775fc1
AG
4189 arch_phys_wc_del(adev->gmc.vram_mtrr);
4190 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4191 }
4192}
4193
d38ceaf9 4194/**
bbe04dec 4195 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
4196 *
4197 * @adev: amdgpu_device pointer
4198 *
4199 * Tear down the driver info (all asics).
4200 * Called at driver shutdown.
4201 */
72c8c97b 4202void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4203{
aac89168 4204 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4205 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4206 adev->shutdown = true;
9f875167 4207
752c683d
ML
4208 /* make sure IB test finished before entering exclusive mode
4209 * to avoid preemption on IB test
b8920e1e 4210 */
519b8b76 4211 if (amdgpu_sriov_vf(adev)) {
752c683d 4212 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4213 amdgpu_virt_fini_data_exchange(adev);
4214 }
752c683d 4215
e5b03032
ML
4216 /* disable all interrupts */
4217 amdgpu_irq_disable_all(adev);
47fc644f 4218 if (adev->mode_info.mode_config_initialized) {
1053b9c9 4219 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4220 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4221 else
4a580877 4222 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4223 }
8d35a259 4224 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4225
cd3a8a59 4226 if (adev->mman.initialized)
9bff18d1 4227 drain_workqueue(adev->mman.bdev.wq);
98f56188 4228
53e9d836 4229 if (adev->pm.sysfs_initialized)
7c868b59 4230 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4231 if (adev->ucode_sysfs_en)
4232 amdgpu_ucode_sysfs_fini(adev);
4233 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
7957ec80 4234 amdgpu_fru_sysfs_fini(adev);
72c8c97b 4235
232d1d43
SY
4236 /* disable ras feature must before hw fini */
4237 amdgpu_ras_pre_fini(adev);
4238
e9669fb7 4239 amdgpu_device_ip_fini_early(adev);
d10d0daa 4240
a3848df6
YW
4241 amdgpu_irq_fini_hw(adev);
4242
b6fd6e0f
SK
4243 if (adev->mman.initialized)
4244 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4245
d10d0daa 4246 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4247
39934d3e
VP
4248 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4249 amdgpu_device_unmap_mmio(adev);
87172e89 4250
72c8c97b
AG
4251}
4252
4253void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4254{
62d5f9f7 4255 int idx;
d37a3929 4256 bool px;
62d5f9f7 4257
8d35a259 4258 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4259 amdgpu_device_ip_fini(adev);
b31d3063 4260 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4261 adev->accel_working = false;
68ce8b24 4262 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4263
4264 amdgpu_reset_fini(adev);
4265
d38ceaf9 4266 /* free i2c buses */
4562236b
HW
4267 if (!amdgpu_device_has_dc_support(adev))
4268 amdgpu_i2c_fini(adev);
bfca0289
SL
4269
4270 if (amdgpu_emu_mode != 1)
4271 amdgpu_atombios_fini(adev);
4272
d38ceaf9
AD
4273 kfree(adev->bios);
4274 adev->bios = NULL;
d37a3929 4275
8a2b5139
LL
4276 kfree(adev->fru_info);
4277 adev->fru_info = NULL;
4278
d37a3929
OC
4279 px = amdgpu_device_supports_px(adev_to_drm(adev));
4280
4281 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4282 apple_gmux_detect(NULL, NULL)))
84c8b22e 4283 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4284
4285 if (px)
83ba126a 4286 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4287
38d6be81 4288 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4289 vga_client_unregister(adev->pdev);
e9bc1bf7 4290
62d5f9f7
LS
4291 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4292
4293 iounmap(adev->rmmio);
4294 adev->rmmio = NULL;
43c064db 4295 amdgpu_doorbell_fini(adev);
62d5f9f7
LS
4296 drm_dev_exit(idx);
4297 }
4298
d155bef0
AB
4299 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4300 amdgpu_pmu_fini(adev);
72de33f8 4301 if (adev->mman.discovery_bin)
a190d1c7 4302 amdgpu_discovery_fini(adev);
72c8c97b 4303
cfbb6b00
AG
4304 amdgpu_reset_put_reset_domain(adev->reset_domain);
4305 adev->reset_domain = NULL;
4306
72c8c97b
AG
4307 kfree(adev->pci_state);
4308
d38ceaf9
AD
4309}
4310
58144d28
ND
4311/**
4312 * amdgpu_device_evict_resources - evict device resources
4313 * @adev: amdgpu device object
4314 *
4315 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4316 * of the vram memory type. Mainly used for evicting device resources
4317 * at suspend time.
4318 *
4319 */
7863c155 4320static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4321{
7863c155
ML
4322 int ret;
4323
e53d9665
ML
4324 /* No need to evict vram on APUs for suspend to ram or s2idle */
4325 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4326 return 0;
58144d28 4327
7863c155
ML
4328 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4329 if (ret)
58144d28 4330 DRM_WARN("evicting device resources failed\n");
7863c155 4331 return ret;
58144d28 4332}
d38ceaf9
AD
4333
4334/*
4335 * Suspend & resume.
4336 */
5095d541
ML
4337/**
4338 * amdgpu_device_prepare - prepare for device suspend
4339 *
4340 * @dev: drm dev pointer
4341 *
4342 * Prepare to put the hw in the suspend state (all asics).
4343 * Returns 0 for success or an error on failure.
4344 * Called at driver suspend.
4345 */
4346int amdgpu_device_prepare(struct drm_device *dev)
4347{
4348 struct amdgpu_device *adev = drm_to_adev(dev);
cb11ca32 4349 int i, r;
5095d541
ML
4350
4351 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4352 return 0;
4353
4354 /* Evict the majority of BOs before starting suspend sequence */
4355 r = amdgpu_device_evict_resources(adev);
4356 if (r)
4357 return r;
4358
cb11ca32
ML
4359 for (i = 0; i < adev->num_ip_blocks; i++) {
4360 if (!adev->ip_blocks[i].status.valid)
4361 continue;
4362 if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4363 continue;
4364 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4365 if (r)
4366 return r;
4367 }
4368
5095d541
ML
4369 return 0;
4370}
4371
d38ceaf9 4372/**
810ddc3a 4373 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4374 *
87e3f136 4375 * @dev: drm dev pointer
87e3f136 4376 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4377 *
4378 * Puts the hw in the suspend state (all asics).
4379 * Returns 0 for success or an error on failure.
4380 * Called at driver suspend.
4381 */
de185019 4382int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4383{
a2e15b0e 4384 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4385 int r = 0;
d38ceaf9 4386
d38ceaf9
AD
4387 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4388 return 0;
4389
44779b43 4390 adev->in_suspend = true;
3fa8f89d 4391
d7274ec7
BZ
4392 if (amdgpu_sriov_vf(adev)) {
4393 amdgpu_virt_fini_data_exchange(adev);
4394 r = amdgpu_virt_request_full_gpu(adev, false);
4395 if (r)
4396 return r;
4397 }
4398
3fa8f89d
S
4399 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4400 DRM_WARN("smart shift update failed\n");
4401
5f818173 4402 if (fbcon)
087451f3 4403 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4404
beff74bc 4405 cancel_delayed_work_sync(&adev->delayed_init_work);
0dee7263 4406 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
a5459475 4407
5e6932fe 4408 amdgpu_ras_suspend(adev);
4409
2196927b 4410 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4411
c004d44e 4412 if (!adev->in_s0ix)
5d3a2d95 4413 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4414
7863c155
ML
4415 r = amdgpu_device_evict_resources(adev);
4416 if (r)
4417 return r;
d38ceaf9 4418
8d35a259 4419 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4420
2196927b 4421 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4422
d7274ec7
BZ
4423 if (amdgpu_sriov_vf(adev))
4424 amdgpu_virt_release_full_gpu(adev, false);
4425
d38ceaf9
AD
4426 return 0;
4427}
4428
4429/**
810ddc3a 4430 * amdgpu_device_resume - initiate device resume
d38ceaf9 4431 *
87e3f136 4432 * @dev: drm dev pointer
87e3f136 4433 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4434 *
4435 * Bring the hw back to operating state (all asics).
4436 * Returns 0 for success or an error on failure.
4437 * Called at driver resume.
4438 */
de185019 4439int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4440{
1348969a 4441 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4442 int r = 0;
d38ceaf9 4443
d7274ec7
BZ
4444 if (amdgpu_sriov_vf(adev)) {
4445 r = amdgpu_virt_request_full_gpu(adev, true);
4446 if (r)
4447 return r;
4448 }
4449
d38ceaf9
AD
4450 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4451 return 0;
4452
62498733 4453 if (adev->in_s0ix)
bc143d8b 4454 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4455
d38ceaf9 4456 /* post card */
39c640c0 4457 if (amdgpu_device_need_post(adev)) {
4d2997ab 4458 r = amdgpu_device_asic_init(adev);
74b0b157 4459 if (r)
aac89168 4460 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4461 }
d38ceaf9 4462
06ec9070 4463 r = amdgpu_device_ip_resume(adev);
d7274ec7 4464
e6707218 4465 if (r) {
aac89168 4466 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4467 goto exit;
e6707218 4468 }
8d35a259 4469 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4470
06ec9070 4471 r = amdgpu_device_ip_late_init(adev);
03161a6e 4472 if (r)
3c22c1ea 4473 goto exit;
d38ceaf9 4474
beff74bc
AD
4475 queue_delayed_work(system_wq, &adev->delayed_init_work,
4476 msecs_to_jiffies(AMDGPU_RESUME_MS));
4477
c004d44e 4478 if (!adev->in_s0ix) {
5d3a2d95
AD
4479 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4480 if (r)
3c22c1ea 4481 goto exit;
5d3a2d95 4482 }
756e6880 4483
3c22c1ea
SF
4484exit:
4485 if (amdgpu_sriov_vf(adev)) {
4486 amdgpu_virt_init_data_exchange(adev);
4487 amdgpu_virt_release_full_gpu(adev, true);
4488 }
4489
4490 if (r)
4491 return r;
4492
96a5d8d4 4493 /* Make sure IB tests flushed */
beff74bc 4494 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4495
a2e15b0e 4496 if (fbcon)
087451f3 4497 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4498
5e6932fe 4499 amdgpu_ras_resume(adev);
4500
d09ef243
AD
4501 if (adev->mode_info.num_crtc) {
4502 /*
4503 * Most of the connector probing functions try to acquire runtime pm
4504 * refs to ensure that the GPU is powered on when connector polling is
4505 * performed. Since we're calling this from a runtime PM callback,
4506 * trying to acquire rpm refs will cause us to deadlock.
4507 *
4508 * Since we're guaranteed to be holding the rpm lock, it's safe to
4509 * temporarily disable the rpm helpers so this doesn't deadlock us.
4510 */
23a1a9e5 4511#ifdef CONFIG_PM
d09ef243 4512 dev->dev->power.disable_depth++;
23a1a9e5 4513#endif
d09ef243
AD
4514 if (!adev->dc_enabled)
4515 drm_helper_hpd_irq_event(dev);
4516 else
4517 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4518#ifdef CONFIG_PM
d09ef243 4519 dev->dev->power.disable_depth--;
23a1a9e5 4520#endif
d09ef243 4521 }
44779b43
RZ
4522 adev->in_suspend = false;
4523
dc907c9d
JX
4524 if (adev->enable_mes)
4525 amdgpu_mes_self_test(adev);
4526
3fa8f89d
S
4527 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4528 DRM_WARN("smart shift update failed\n");
4529
4d3b9ae5 4530 return 0;
d38ceaf9
AD
4531}
4532
e3ecdffa
AD
4533/**
4534 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4535 *
4536 * @adev: amdgpu_device pointer
4537 *
4538 * The list of all the hardware IPs that make up the asic is walked and
4539 * the check_soft_reset callbacks are run. check_soft_reset determines
4540 * if the asic is still hung or not.
4541 * Returns true if any of the IPs are still in a hung state, false if not.
4542 */
06ec9070 4543static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4544{
4545 int i;
4546 bool asic_hang = false;
4547
f993d628
ML
4548 if (amdgpu_sriov_vf(adev))
4549 return true;
4550
8bc04c29
AD
4551 if (amdgpu_asic_need_full_reset(adev))
4552 return true;
4553
63fbf42f 4554 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4555 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4556 continue;
a1255107
AD
4557 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4558 adev->ip_blocks[i].status.hang =
4559 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4560 if (adev->ip_blocks[i].status.hang) {
aac89168 4561 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4562 asic_hang = true;
4563 }
4564 }
4565 return asic_hang;
4566}
4567
e3ecdffa
AD
4568/**
4569 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4570 *
4571 * @adev: amdgpu_device pointer
4572 *
4573 * The list of all the hardware IPs that make up the asic is walked and the
4574 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4575 * handles any IP specific hardware or software state changes that are
4576 * necessary for a soft reset to succeed.
4577 * Returns 0 on success, negative error code on failure.
4578 */
06ec9070 4579static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4580{
4581 int i, r = 0;
4582
4583 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4584 if (!adev->ip_blocks[i].status.valid)
d31a501e 4585 continue;
a1255107
AD
4586 if (adev->ip_blocks[i].status.hang &&
4587 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4588 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4589 if (r)
4590 return r;
4591 }
4592 }
4593
4594 return 0;
4595}
4596
e3ecdffa
AD
4597/**
4598 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4599 *
4600 * @adev: amdgpu_device pointer
4601 *
4602 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4603 * reset is necessary to recover.
4604 * Returns true if a full asic reset is required, false if not.
4605 */
06ec9070 4606static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4607{
da146d3b
AD
4608 int i;
4609
8bc04c29
AD
4610 if (amdgpu_asic_need_full_reset(adev))
4611 return true;
4612
da146d3b 4613 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4614 if (!adev->ip_blocks[i].status.valid)
da146d3b 4615 continue;
a1255107
AD
4616 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4617 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4618 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4619 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4620 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4621 if (adev->ip_blocks[i].status.hang) {
aac89168 4622 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4623 return true;
4624 }
4625 }
35d782fe
CZ
4626 }
4627 return false;
4628}
4629
e3ecdffa
AD
4630/**
4631 * amdgpu_device_ip_soft_reset - do a soft reset
4632 *
4633 * @adev: amdgpu_device pointer
4634 *
4635 * The list of all the hardware IPs that make up the asic is walked and the
4636 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4637 * IP specific hardware or software state changes that are necessary to soft
4638 * reset the IP.
4639 * Returns 0 on success, negative error code on failure.
4640 */
06ec9070 4641static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4642{
4643 int i, r = 0;
4644
4645 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4646 if (!adev->ip_blocks[i].status.valid)
35d782fe 4647 continue;
a1255107
AD
4648 if (adev->ip_blocks[i].status.hang &&
4649 adev->ip_blocks[i].version->funcs->soft_reset) {
4650 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4651 if (r)
4652 return r;
4653 }
4654 }
4655
4656 return 0;
4657}
4658
e3ecdffa
AD
4659/**
4660 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4661 *
4662 * @adev: amdgpu_device pointer
4663 *
4664 * The list of all the hardware IPs that make up the asic is walked and the
4665 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4666 * handles any IP specific hardware or software state changes that are
4667 * necessary after the IP has been soft reset.
4668 * Returns 0 on success, negative error code on failure.
4669 */
06ec9070 4670static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4671{
4672 int i, r = 0;
4673
4674 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4675 if (!adev->ip_blocks[i].status.valid)
35d782fe 4676 continue;
a1255107
AD
4677 if (adev->ip_blocks[i].status.hang &&
4678 adev->ip_blocks[i].version->funcs->post_soft_reset)
4679 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4680 if (r)
4681 return r;
4682 }
4683
4684 return 0;
4685}
4686
e3ecdffa 4687/**
c33adbc7 4688 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4689 *
4690 * @adev: amdgpu_device pointer
4691 *
4692 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4693 * restore things like GPUVM page tables after a GPU reset where
4694 * the contents of VRAM might be lost.
403009bf
CK
4695 *
4696 * Returns:
4697 * 0 on success, negative error code on failure.
e3ecdffa 4698 */
c33adbc7 4699static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4700{
c41d1cf6 4701 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4702 struct amdgpu_bo *shadow;
e18aaea7 4703 struct amdgpu_bo_vm *vmbo;
403009bf 4704 long r = 1, tmo;
c41d1cf6
ML
4705
4706 if (amdgpu_sriov_runtime(adev))
b045d3af 4707 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4708 else
4709 tmo = msecs_to_jiffies(100);
4710
aac89168 4711 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4712 mutex_lock(&adev->shadow_list_lock);
e18aaea7 4713 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4994d1f0
LC
4714 /* If vm is compute context or adev is APU, shadow will be NULL */
4715 if (!vmbo->shadow)
4716 continue;
4717 shadow = vmbo->shadow;
4718
403009bf 4719 /* No need to recover an evicted BO */
d3116756
CK
4720 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4721 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4722 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4723 continue;
4724
4725 r = amdgpu_bo_restore_shadow(shadow, &next);
4726 if (r)
4727 break;
4728
c41d1cf6 4729 if (fence) {
1712fb1a 4730 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4731 dma_fence_put(fence);
4732 fence = next;
1712fb1a 4733 if (tmo == 0) {
4734 r = -ETIMEDOUT;
c41d1cf6 4735 break;
1712fb1a 4736 } else if (tmo < 0) {
4737 r = tmo;
4738 break;
4739 }
403009bf
CK
4740 } else {
4741 fence = next;
c41d1cf6 4742 }
c41d1cf6
ML
4743 }
4744 mutex_unlock(&adev->shadow_list_lock);
4745
403009bf
CK
4746 if (fence)
4747 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4748 dma_fence_put(fence);
4749
1712fb1a 4750 if (r < 0 || tmo <= 0) {
aac89168 4751 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4752 return -EIO;
4753 }
c41d1cf6 4754
aac89168 4755 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4756 return 0;
c41d1cf6
ML
4757}
4758
a90ad3c2 4759
e3ecdffa 4760/**
06ec9070 4761 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4762 *
982a820b 4763 * @adev: amdgpu_device pointer
87e3f136 4764 * @from_hypervisor: request from hypervisor
5740682e
ML
4765 *
4766 * do VF FLR and reinitialize Asic
3f48c681 4767 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4768 */
4769static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4770 bool from_hypervisor)
5740682e
ML
4771{
4772 int r;
a5f67c93 4773 struct amdgpu_hive_info *hive = NULL;
7258fa31 4774 int retry_limit = 0;
5740682e 4775
7258fa31 4776retry:
c004d44e 4777 amdgpu_amdkfd_pre_reset(adev);
428890a3 4778
5740682e
ML
4779 if (from_hypervisor)
4780 r = amdgpu_virt_request_full_gpu(adev, true);
4781 else
4782 r = amdgpu_virt_reset_gpu(adev);
4783 if (r)
4784 return r;
f734b213 4785 amdgpu_irq_gpu_reset_resume_helper(adev);
a90ad3c2 4786
83f24a8f
HC
4787 /* some sw clean up VF needs to do before recover */
4788 amdgpu_virt_post_reset(adev);
4789
a90ad3c2 4790 /* Resume IP prior to SMC */
06ec9070 4791 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4792 if (r)
4793 goto error;
a90ad3c2 4794
c9ffa427 4795 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4796
7a3e0bb2
RZ
4797 r = amdgpu_device_fw_loading(adev);
4798 if (r)
4799 return r;
4800
a90ad3c2 4801 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4802 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4803 if (r)
4804 goto error;
a90ad3c2 4805
a5f67c93
ZL
4806 hive = amdgpu_get_xgmi_hive(adev);
4807 /* Update PSP FW topology after reset */
4808 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4809 r = amdgpu_xgmi_update_topology(hive, adev);
4810
4811 if (hive)
4812 amdgpu_put_xgmi_hive(hive);
4813
4814 if (!r) {
a5f67c93 4815 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4816
c004d44e 4817 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4818 }
a90ad3c2 4819
abc34253 4820error:
c41d1cf6 4821 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4822 amdgpu_inc_vram_lost(adev);
c33adbc7 4823 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4824 }
437f3e0b 4825 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4826
7258fa31
SK
4827 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4828 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4829 retry_limit++;
4830 goto retry;
4831 } else
4832 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4833 }
4834
a90ad3c2
ML
4835 return r;
4836}
4837
9a1cddd6 4838/**
4839 * amdgpu_device_has_job_running - check if there is any job in mirror list
4840 *
982a820b 4841 * @adev: amdgpu_device pointer
9a1cddd6 4842 *
4843 * check if there is any job in mirror list
4844 */
4845bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4846{
4847 int i;
4848 struct drm_sched_job *job;
4849
4850 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4851 struct amdgpu_ring *ring = adev->rings[i];
4852
4853 if (!ring || !ring->sched.thread)
4854 continue;
4855
4856 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4857 job = list_first_entry_or_null(&ring->sched.pending_list,
4858 struct drm_sched_job, list);
9a1cddd6 4859 spin_unlock(&ring->sched.job_list_lock);
4860 if (job)
4861 return true;
4862 }
4863 return false;
4864}
4865
12938fad
CK
4866/**
4867 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4868 *
982a820b 4869 * @adev: amdgpu_device pointer
12938fad
CK
4870 *
4871 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4872 * a hung GPU.
4873 */
4874bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4875{
12938fad 4876
3ba7b418
AG
4877 if (amdgpu_gpu_recovery == 0)
4878 goto disabled;
4879
1a11a65d
YC
4880 /* Skip soft reset check in fatal error mode */
4881 if (!amdgpu_ras_is_poison_mode_supported(adev))
4882 return true;
4883
3ba7b418
AG
4884 if (amdgpu_sriov_vf(adev))
4885 return true;
4886
4887 if (amdgpu_gpu_recovery == -1) {
4888 switch (adev->asic_type) {
b3523c45
AD
4889#ifdef CONFIG_DRM_AMDGPU_SI
4890 case CHIP_VERDE:
4891 case CHIP_TAHITI:
4892 case CHIP_PITCAIRN:
4893 case CHIP_OLAND:
4894 case CHIP_HAINAN:
4895#endif
4896#ifdef CONFIG_DRM_AMDGPU_CIK
4897 case CHIP_KAVERI:
4898 case CHIP_KABINI:
4899 case CHIP_MULLINS:
4900#endif
4901 case CHIP_CARRIZO:
4902 case CHIP_STONEY:
4903 case CHIP_CYAN_SKILLFISH:
3ba7b418 4904 goto disabled;
b3523c45
AD
4905 default:
4906 break;
3ba7b418 4907 }
12938fad
CK
4908 }
4909
4910 return true;
3ba7b418
AG
4911
4912disabled:
aac89168 4913 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4914 return false;
12938fad
CK
4915}
4916
5c03e584
FX
4917int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4918{
47fc644f
SS
4919 u32 i;
4920 int ret = 0;
5c03e584 4921
47fc644f 4922 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5c03e584 4923
47fc644f 4924 dev_info(adev->dev, "GPU mode1 reset\n");
5c03e584 4925
47fc644f
SS
4926 /* disable BM */
4927 pci_clear_master(adev->pdev);
5c03e584 4928
47fc644f 4929 amdgpu_device_cache_pci_state(adev->pdev);
5c03e584 4930
47fc644f
SS
4931 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4932 dev_info(adev->dev, "GPU smu mode1 reset\n");
4933 ret = amdgpu_dpm_mode1_reset(adev);
4934 } else {
4935 dev_info(adev->dev, "GPU psp mode1 reset\n");
4936 ret = psp_gpu_reset(adev);
4937 }
5c03e584 4938
47fc644f 4939 if (ret)
2c0f880a 4940 goto mode1_reset_failed;
5c03e584 4941
47fc644f 4942 amdgpu_device_load_pci_state(adev->pdev);
15c5c5f5
LL
4943 ret = amdgpu_psp_wait_for_bootloader(adev);
4944 if (ret)
2c0f880a 4945 goto mode1_reset_failed;
5c03e584 4946
47fc644f
SS
4947 /* wait for asic to come out of reset */
4948 for (i = 0; i < adev->usec_timeout; i++) {
4949 u32 memsize = adev->nbio.funcs->get_memsize(adev);
5c03e584 4950
47fc644f
SS
4951 if (memsize != 0xffffffff)
4952 break;
4953 udelay(1);
4954 }
5c03e584 4955
2c0f880a
HZ
4956 if (i >= adev->usec_timeout) {
4957 ret = -ETIMEDOUT;
4958 goto mode1_reset_failed;
4959 }
4960
47fc644f 4961 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
15c5c5f5 4962
2c0f880a
HZ
4963 return 0;
4964
4965mode1_reset_failed:
4966 dev_err(adev->dev, "GPU mode1 reset failed\n");
47fc644f 4967 return ret;
5c03e584 4968}
5c6dd71e 4969
e3c1b071 4970int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4971 struct amdgpu_reset_context *reset_context)
26bc5340 4972{
5c1e6fa4 4973 int i, r = 0;
04442bf7
LL
4974 struct amdgpu_job *job = NULL;
4975 bool need_full_reset =
4976 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4977
4978 if (reset_context->reset_req_dev == adev)
4979 job = reset_context->job;
71182665 4980
b602ca5f
TZ
4981 if (amdgpu_sriov_vf(adev)) {
4982 /* stop the data exchange thread */
4983 amdgpu_virt_fini_data_exchange(adev);
4984 }
4985
9e225fb9
AG
4986 amdgpu_fence_driver_isr_toggle(adev, true);
4987
71182665 4988 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4989 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4990 struct amdgpu_ring *ring = adev->rings[i];
4991
51687759 4992 if (!ring || !ring->sched.thread)
0875dc9e 4993 continue;
5740682e 4994
b8920e1e
SS
4995 /* Clear job fence from fence drv to avoid force_completion
4996 * leave NULL and vm flush fence in fence drv
4997 */
5c1e6fa4 4998 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4999
2f9d4084
ML
5000 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5001 amdgpu_fence_driver_force_completion(ring);
0875dc9e 5002 }
d38ceaf9 5003
9e225fb9
AG
5004 amdgpu_fence_driver_isr_toggle(adev, false);
5005
ff99849b 5006 if (job && job->vm)
222b5f04
AG
5007 drm_sched_increase_karma(&job->base);
5008
04442bf7 5009 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b 5010 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5011 if (r == -EOPNOTSUPP)
404b277b
LL
5012 r = 0;
5013 else
04442bf7
LL
5014 return r;
5015
1d721ed6 5016 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
5017 if (!amdgpu_sriov_vf(adev)) {
5018
5019 if (!need_full_reset)
5020 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5021
360cd081
LG
5022 if (!need_full_reset && amdgpu_gpu_recovery &&
5023 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
5024 amdgpu_device_ip_pre_soft_reset(adev);
5025 r = amdgpu_device_ip_soft_reset(adev);
5026 amdgpu_device_ip_post_soft_reset(adev);
5027 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 5028 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
5029 need_full_reset = true;
5030 }
5031 }
5032
5033 if (need_full_reset)
5034 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
5035 if (need_full_reset)
5036 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5037 else
5038 clear_bit(AMDGPU_NEED_FULL_RESET,
5039 &reset_context->flags);
26bc5340
AG
5040 }
5041
5042 return r;
5043}
5044
15fd09a0
SA
5045static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
5046{
15fd09a0
SA
5047 int i;
5048
38a15ad9 5049 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0 5050
2d6a2a28
AA
5051 for (i = 0; i < adev->reset_info.num_regs; i++) {
5052 adev->reset_info.reset_dump_reg_value[i] =
5053 RREG32(adev->reset_info.reset_dump_reg_list[i]);
5054
5055 trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
5056 adev->reset_info.reset_dump_reg_value[i]);
15fd09a0
SA
5057 }
5058
5059 return 0;
5060}
5061
04442bf7
LL
5062int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5063 struct amdgpu_reset_context *reset_context)
26bc5340
AG
5064{
5065 struct amdgpu_device *tmp_adev = NULL;
04442bf7 5066 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 5067 int r = 0;
f5c7e779 5068 bool gpu_reset_for_dev_remove = 0;
26bc5340 5069
04442bf7
LL
5070 /* Try reset handler method first */
5071 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5072 reset_list);
15fd09a0 5073 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
5074
5075 reset_context->reset_device_list = device_list_handle;
04442bf7 5076 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b 5077 /* If reset handler not implemented, continue; otherwise return */
b8920e1e 5078 if (r == -EOPNOTSUPP)
404b277b
LL
5079 r = 0;
5080 else
04442bf7
LL
5081 return r;
5082
5083 /* Reset handler not implemented, use the default method */
5084 need_full_reset =
5085 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5086 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5087
f5c7e779
YC
5088 gpu_reset_for_dev_remove =
5089 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5090 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5091
26bc5340 5092 /*
655ce9cb 5093 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
5094 * to allow proper links negotiation in FW (within 1 sec)
5095 */
7ac71382 5096 if (!skip_hw_reset && need_full_reset) {
655ce9cb 5097 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 5098 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 5099 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 5100 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 5101 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
5102 r = -EALREADY;
5103 } else
5104 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 5105
041a62bc 5106 if (r) {
aac89168 5107 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 5108 r, adev_to_drm(tmp_adev)->unique);
19349072 5109 goto out;
ce316fa5
LM
5110 }
5111 }
5112
041a62bc
AG
5113 /* For XGMI wait for all resets to complete before proceed */
5114 if (!r) {
655ce9cb 5115 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
5116 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5117 flush_work(&tmp_adev->xgmi_reset_work);
5118 r = tmp_adev->asic_reset_res;
5119 if (r)
5120 break;
ce316fa5
LM
5121 }
5122 }
5123 }
ce316fa5 5124 }
26bc5340 5125
43c4d576 5126 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 5127 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
21226f02 5128 amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
43c4d576
JC
5129 }
5130
00eaa571 5131 amdgpu_ras_intr_cleared();
43c4d576 5132 }
00eaa571 5133
f5c7e779
YC
5134 /* Since the mode1 reset affects base ip blocks, the
5135 * phase1 ip blocks need to be resumed. Otherwise there
5136 * will be a BIOS signature error and the psp bootloader
5137 * can't load kdb on the next amdgpu install.
5138 */
5139 if (gpu_reset_for_dev_remove) {
5140 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
5141 amdgpu_device_ip_resume_phase1(tmp_adev);
5142
5143 goto end;
5144 }
5145
655ce9cb 5146 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
5147 if (need_full_reset) {
5148 /* post card */
e3c1b071 5149 r = amdgpu_device_asic_init(tmp_adev);
5150 if (r) {
aac89168 5151 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 5152 } else {
26bc5340 5153 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1 5154
26bc5340
AG
5155 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5156 if (r)
5157 goto out;
5158
5159 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
a7691785
AA
5160
5161 amdgpu_coredump(tmp_adev, vram_lost, reset_context);
5162
26bc5340 5163 if (vram_lost) {
77e7f829 5164 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 5165 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
5166 }
5167
26bc5340
AG
5168 r = amdgpu_device_fw_loading(tmp_adev);
5169 if (r)
5170 return r;
5171
c45e38f2
LL
5172 r = amdgpu_xcp_restore_partition_mode(
5173 tmp_adev->xcp_mgr);
5174 if (r)
5175 goto out;
5176
26bc5340
AG
5177 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5178 if (r)
5179 goto out;
5180
5181 if (vram_lost)
5182 amdgpu_device_fill_reset_magic(tmp_adev);
5183
fdafb359
EQ
5184 /*
5185 * Add this ASIC as tracked as reset was already
5186 * complete successfully.
5187 */
5188 amdgpu_register_gpu_instance(tmp_adev);
5189
04442bf7
LL
5190 if (!reset_context->hive &&
5191 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 5192 amdgpu_xgmi_add_device(tmp_adev);
5193
7c04ca50 5194 r = amdgpu_device_ip_late_init(tmp_adev);
5195 if (r)
5196 goto out;
5197
087451f3 5198 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5199
e8fbaf03
GC
5200 /*
5201 * The GPU enters bad state once faulty pages
5202 * by ECC has reached the threshold, and ras
5203 * recovery is scheduled next. So add one check
5204 * here to break recovery if it indeed exceeds
5205 * bad page threshold, and remind user to
5206 * retire this GPU or setting one bigger
5207 * bad_page_threshold value to fix this once
5208 * probing driver again.
5209 */
11003c68 5210 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5211 /* must succeed. */
5212 amdgpu_ras_resume(tmp_adev);
5213 } else {
5214 r = -EINVAL;
5215 goto out;
5216 }
e79a04d5 5217
26bc5340 5218 /* Update PSP FW topology after reset */
04442bf7
LL
5219 if (reset_context->hive &&
5220 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5221 r = amdgpu_xgmi_update_topology(
5222 reset_context->hive, tmp_adev);
26bc5340
AG
5223 }
5224 }
5225
26bc5340
AG
5226out:
5227 if (!r) {
5228 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5229 r = amdgpu_ib_ring_tests(tmp_adev);
5230 if (r) {
5231 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5232 need_full_reset = true;
5233 r = -EAGAIN;
5234 goto end;
5235 }
5236 }
5237
5238 if (!r)
5239 r = amdgpu_device_recover_vram(tmp_adev);
5240 else
5241 tmp_adev->asic_reset_res = r;
5242 }
5243
5244end:
04442bf7
LL
5245 if (need_full_reset)
5246 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5247 else
5248 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5249 return r;
5250}
5251
e923be99 5252static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5253{
5740682e 5254
a3a09142
AD
5255 switch (amdgpu_asic_reset_method(adev)) {
5256 case AMD_RESET_METHOD_MODE1:
5257 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5258 break;
5259 case AMD_RESET_METHOD_MODE2:
5260 adev->mp1_state = PP_MP1_STATE_RESET;
5261 break;
5262 default:
5263 adev->mp1_state = PP_MP1_STATE_NONE;
5264 break;
5265 }
26bc5340 5266}
d38ceaf9 5267
e923be99 5268static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5269{
89041940 5270 amdgpu_vf_error_trans_all(adev);
a3a09142 5271 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5272}
5273
3f12acc8
EQ
5274static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5275{
5276 struct pci_dev *p = NULL;
5277
5278 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5279 adev->pdev->bus->number, 1);
5280 if (p) {
5281 pm_runtime_enable(&(p->dev));
5282 pm_runtime_resume(&(p->dev));
5283 }
b85e285e
YY
5284
5285 pci_dev_put(p);
3f12acc8
EQ
5286}
5287
5288static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5289{
5290 enum amd_reset_method reset_method;
5291 struct pci_dev *p = NULL;
5292 u64 expires;
5293
5294 /*
5295 * For now, only BACO and mode1 reset are confirmed
5296 * to suffer the audio issue without proper suspended.
5297 */
5298 reset_method = amdgpu_asic_reset_method(adev);
5299 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5300 (reset_method != AMD_RESET_METHOD_MODE1))
5301 return -EINVAL;
5302
5303 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5304 adev->pdev->bus->number, 1);
5305 if (!p)
5306 return -ENODEV;
5307
5308 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5309 if (!expires)
5310 /*
5311 * If we cannot get the audio device autosuspend delay,
5312 * a fixed 4S interval will be used. Considering 3S is
5313 * the audio controller default autosuspend delay setting.
5314 * 4S used here is guaranteed to cover that.
5315 */
54b7feb9 5316 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5317
5318 while (!pm_runtime_status_suspended(&(p->dev))) {
5319 if (!pm_runtime_suspend(&(p->dev)))
5320 break;
5321
5322 if (expires < ktime_get_mono_fast_ns()) {
5323 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5324 pci_dev_put(p);
3f12acc8
EQ
5325 /* TODO: abort the succeeding gpu reset? */
5326 return -ETIMEDOUT;
5327 }
5328 }
5329
5330 pm_runtime_disable(&(p->dev));
5331
b85e285e 5332 pci_dev_put(p);
3f12acc8
EQ
5333 return 0;
5334}
5335
d193b12b 5336static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5337{
5338 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5339
5340#if defined(CONFIG_DEBUG_FS)
5341 if (!amdgpu_sriov_vf(adev))
5342 cancel_work(&adev->reset_work);
5343#endif
5344
5345 if (adev->kfd.dev)
5346 cancel_work(&adev->kfd.reset_work);
5347
5348 if (amdgpu_sriov_vf(adev))
5349 cancel_work(&adev->virt.flr_work);
5350
5351 if (con && adev->ras_enabled)
5352 cancel_work(&con->recovery_work);
5353
5354}
5355
26bc5340 5356/**
6e9c65f7 5357 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5358 *
982a820b 5359 * @adev: amdgpu_device pointer
26bc5340 5360 * @job: which job trigger hang
80bd2de1 5361 * @reset_context: amdgpu reset context pointer
26bc5340
AG
5362 *
5363 * Attempt to reset the GPU if it has hung (all asics).
5364 * Attempt to do soft-reset or full-reset and reinitialize Asic
5365 * Returns 0 for success or an error on failure.
5366 */
5367
cf727044 5368int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5369 struct amdgpu_job *job,
5370 struct amdgpu_reset_context *reset_context)
26bc5340 5371{
1d721ed6 5372 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5373 bool job_signaled = false;
26bc5340 5374 struct amdgpu_hive_info *hive = NULL;
26bc5340 5375 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5376 int i, r = 0;
bb5c7235 5377 bool need_emergency_restart = false;
3f12acc8 5378 bool audio_suspended = false;
f5c7e779
YC
5379 bool gpu_reset_for_dev_remove = false;
5380
5381 gpu_reset_for_dev_remove =
5382 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5383 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5384
6e3cd2a9 5385 /*
bb5c7235
WS
5386 * Special case: RAS triggered and full reset isn't supported
5387 */
5388 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5389
d5ea093e
AG
5390 /*
5391 * Flush RAM to disk so that after reboot
5392 * the user can read log and see why the system rebooted.
5393 */
80285ae1
SY
5394 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5395 amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5396 DRM_WARN("Emergency reboot.");
5397
5398 ksys_sync_helper();
5399 emergency_restart();
5400 }
5401
b823821f 5402 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5403 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5404
175ac6ec
ZL
5405 if (!amdgpu_sriov_vf(adev))
5406 hive = amdgpu_get_xgmi_hive(adev);
681260df 5407 if (hive)
53b3f8f4 5408 mutex_lock(&hive->hive_lock);
26bc5340 5409
f1549c09
LG
5410 reset_context->job = job;
5411 reset_context->hive = hive;
9e94d22c
EQ
5412 /*
5413 * Build list of devices to reset.
5414 * In case we are in XGMI hive mode, resort the device list
5415 * to put adev in the 1st position.
5416 */
5417 INIT_LIST_HEAD(&device_list);
175ac6ec 5418 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5419 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5420 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5421 if (gpu_reset_for_dev_remove && adev->shutdown)
5422 tmp_adev->shutdown = true;
5423 }
655ce9cb 5424 if (!list_is_first(&adev->reset_list, &device_list))
5425 list_rotate_to_front(&adev->reset_list, &device_list);
5426 device_list_handle = &device_list;
26bc5340 5427 } else {
655ce9cb 5428 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5429 device_list_handle = &device_list;
5430 }
5431
e923be99
AG
5432 /* We need to lock reset domain only once both for XGMI and single device */
5433 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5434 reset_list);
3675c2f2 5435 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5436
1d721ed6 5437 /* block all schedulers and reset given job's ring */
655ce9cb 5438 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5439
e923be99 5440 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5441
3f12acc8
EQ
5442 /*
5443 * Try to put the audio codec into suspend state
5444 * before gpu reset started.
5445 *
5446 * Due to the power domain of the graphics device
5447 * is shared with AZ power domain. Without this,
5448 * we may change the audio hardware from behind
5449 * the audio driver's back. That will trigger
5450 * some audio codec errors.
5451 */
5452 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5453 audio_suspended = true;
5454
9e94d22c
EQ
5455 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5456
52fb44cf
EQ
5457 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5458
c004d44e 5459 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5460 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5461
12ffa55d
AG
5462 /*
5463 * Mark these ASICs to be reseted as untracked first
5464 * And add them back after reset completed
5465 */
5466 amdgpu_unregister_gpu_instance(tmp_adev);
5467
163d4cd2 5468 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5469
f1c1314b 5470 /* disable ras on ALL IPs */
bb5c7235 5471 if (!need_emergency_restart &&
b823821f 5472 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5473 amdgpu_ras_suspend(tmp_adev);
5474
1d721ed6
AG
5475 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5476 struct amdgpu_ring *ring = tmp_adev->rings[i];
5477
5478 if (!ring || !ring->sched.thread)
5479 continue;
5480
0b2d2c2e 5481 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5482
bb5c7235 5483 if (need_emergency_restart)
7c6e68c7 5484 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5485 }
8f8c80f4 5486 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5487 }
5488
bb5c7235 5489 if (need_emergency_restart)
7c6e68c7
AG
5490 goto skip_sched_resume;
5491
1d721ed6
AG
5492 /*
5493 * Must check guilty signal here since after this point all old
5494 * HW fences are force signaled.
5495 *
5496 * job->base holds a reference to parent fence
5497 */
f6a3f660 5498 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5499 job_signaled = true;
1d721ed6
AG
5500 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5501 goto skip_hw_reset;
5502 }
5503
26bc5340 5504retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5505 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5506 if (gpu_reset_for_dev_remove) {
5507 /* Workaroud for ASICs need to disable SMC first */
5508 amdgpu_device_smu_fini_early(tmp_adev);
5509 }
f1549c09 5510 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5511 /*TODO Should we stop ?*/
5512 if (r) {
aac89168 5513 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5514 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5515 tmp_adev->asic_reset_res = r;
5516 }
247c7b0d
AG
5517
5518 /*
5519 * Drop all pending non scheduler resets. Scheduler resets
5520 * were already dropped during drm_sched_stop
5521 */
d193b12b 5522 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5523 }
5524
5525 /* Actual ASIC resets if needed.*/
4f30d920 5526 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5527 if (amdgpu_sriov_vf(adev)) {
5528 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5529 if (r)
5530 adev->asic_reset_res = r;
950d6425 5531
28606c4e 5532 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
4e8303cf
LL
5533 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
5534 IP_VERSION(9, 4, 2) ||
5535 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
950d6425 5536 amdgpu_ras_resume(adev);
26bc5340 5537 } else {
f1549c09 5538 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5539 if (r && r == -EAGAIN)
26bc5340 5540 goto retry;
f5c7e779
YC
5541
5542 if (!r && gpu_reset_for_dev_remove)
5543 goto recover_end;
26bc5340
AG
5544 }
5545
1d721ed6
AG
5546skip_hw_reset:
5547
26bc5340 5548 /* Post ASIC reset for all devs .*/
655ce9cb 5549 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5550
1d721ed6
AG
5551 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5552 struct amdgpu_ring *ring = tmp_adev->rings[i];
5553
5554 if (!ring || !ring->sched.thread)
5555 continue;
5556
6868a2c4 5557 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5558 }
5559
4e8303cf
LL
5560 if (adev->enable_mes &&
5561 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))
ed67f729
JX
5562 amdgpu_mes_self_test(tmp_adev);
5563
b8920e1e 5564 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
4a580877 5565 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6 5566
7258fa31
SK
5567 if (tmp_adev->asic_reset_res)
5568 r = tmp_adev->asic_reset_res;
5569
1d721ed6 5570 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5571
5572 if (r) {
5573 /* bad news, how to tell it to userspace ? */
12ffa55d 5574 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5575 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5576 } else {
12ffa55d 5577 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5578 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5579 DRM_WARN("smart shift update failed\n");
26bc5340 5580 }
7c6e68c7 5581 }
26bc5340 5582
7c6e68c7 5583skip_sched_resume:
655ce9cb 5584 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5585 /* unlock kfd: SRIOV would do it separately */
c004d44e 5586 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5587 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5588
5589 /* kfd_post_reset will do nothing if kfd device is not initialized,
5590 * need to bring up kfd here if it's not be initialized before
5591 */
5592 if (!adev->kfd.init_complete)
5593 amdgpu_amdkfd_device_init(adev);
5594
3f12acc8
EQ
5595 if (audio_suspended)
5596 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5597
5598 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5599
5600 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5601 }
5602
f5c7e779 5603recover_end:
e923be99
AG
5604 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5605 reset_list);
5606 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5607
9e94d22c 5608 if (hive) {
9e94d22c 5609 mutex_unlock(&hive->hive_lock);
d95e8e97 5610 amdgpu_put_xgmi_hive(hive);
9e94d22c 5611 }
26bc5340 5612
f287a3c5 5613 if (r)
26bc5340 5614 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5615
5616 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5617 return r;
5618}
5619
e3ecdffa
AD
5620/**
5621 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5622 *
5623 * @adev: amdgpu_device pointer
5624 *
5625 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5626 * and lanes) of the slot the device is in. Handles APUs and
5627 * virtualized environments where PCIE config space may not be available.
5628 */
5494d864 5629static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5630{
5d9a6330 5631 struct pci_dev *pdev;
c5313457
HK
5632 enum pci_bus_speed speed_cap, platform_speed_cap;
5633 enum pcie_link_width platform_link_width;
d0dd7f0c 5634
cd474ba0
AD
5635 if (amdgpu_pcie_gen_cap)
5636 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5637
cd474ba0
AD
5638 if (amdgpu_pcie_lane_cap)
5639 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5640
cd474ba0 5641 /* covers APUs as well */
04e85958 5642 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
cd474ba0
AD
5643 if (adev->pm.pcie_gen_mask == 0)
5644 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5645 if (adev->pm.pcie_mlw_mask == 0)
5646 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5647 return;
cd474ba0 5648 }
d0dd7f0c 5649
c5313457
HK
5650 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5651 return;
5652
dbaa922b
AD
5653 pcie_bandwidth_available(adev->pdev, NULL,
5654 &platform_speed_cap, &platform_link_width);
c5313457 5655
cd474ba0 5656 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5657 /* asic caps */
5658 pdev = adev->pdev;
5659 speed_cap = pcie_get_speed_cap(pdev);
5660 if (speed_cap == PCI_SPEED_UNKNOWN) {
5661 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5662 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5663 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5664 } else {
2b3a1f51
FX
5665 if (speed_cap == PCIE_SPEED_32_0GT)
5666 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5667 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5668 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5669 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5670 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5671 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5672 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5673 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5674 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5675 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5676 else if (speed_cap == PCIE_SPEED_8_0GT)
5677 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5678 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5679 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5680 else if (speed_cap == PCIE_SPEED_5_0GT)
5681 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5682 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5683 else
5684 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5685 }
5686 /* platform caps */
c5313457 5687 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5688 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5689 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5690 } else {
2b3a1f51
FX
5691 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5692 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5693 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5694 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5695 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5696 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5697 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5698 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5699 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5700 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5701 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5702 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5703 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5704 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5705 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5706 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5707 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5708 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5709 else
5710 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5711
cd474ba0
AD
5712 }
5713 }
5714 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5715 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5716 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5717 } else {
c5313457 5718 switch (platform_link_width) {
5d9a6330 5719 case PCIE_LNK_X32:
cd474ba0
AD
5720 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5721 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5722 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5723 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5724 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5725 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5726 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5727 break;
5d9a6330 5728 case PCIE_LNK_X16:
cd474ba0
AD
5729 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5730 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5731 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5732 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5733 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5734 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5735 break;
5d9a6330 5736 case PCIE_LNK_X12:
cd474ba0
AD
5737 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5738 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5739 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5740 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5741 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5742 break;
5d9a6330 5743 case PCIE_LNK_X8:
cd474ba0
AD
5744 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5745 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5746 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5747 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5748 break;
5d9a6330 5749 case PCIE_LNK_X4:
cd474ba0
AD
5750 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5751 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5752 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5753 break;
5d9a6330 5754 case PCIE_LNK_X2:
cd474ba0
AD
5755 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5756 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5757 break;
5d9a6330 5758 case PCIE_LNK_X1:
cd474ba0
AD
5759 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5760 break;
5761 default:
5762 break;
5763 }
d0dd7f0c
AD
5764 }
5765 }
5766}
d38ceaf9 5767
08a2fd23
RE
5768/**
5769 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5770 *
5771 * @adev: amdgpu_device pointer
5772 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5773 *
5774 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5775 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5776 * @peer_adev.
5777 */
5778bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5779 struct amdgpu_device *peer_adev)
5780{
5781#ifdef CONFIG_HSA_AMD_P2P
5782 uint64_t address_mask = peer_adev->dev->dma_mask ?
5783 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5784 resource_size_t aper_limit =
5785 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5786 bool p2p_access =
5787 !adev->gmc.xgmi.connected_to_cpu &&
5788 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5789
5790 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5791 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5792 !(adev->gmc.aper_base & address_mask ||
5793 aper_limit & address_mask));
5794#else
5795 return false;
5796#endif
5797}
5798
361dbd01
AD
5799int amdgpu_device_baco_enter(struct drm_device *dev)
5800{
1348969a 5801 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5802 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5803
6ab68650 5804 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5805 return -ENOTSUPP;
5806
8ab0d6f0 5807 if (ras && adev->ras_enabled &&
acdae216 5808 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5809 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5810
9530273e 5811 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5812}
5813
5814int amdgpu_device_baco_exit(struct drm_device *dev)
5815{
1348969a 5816 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5817 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5818 int ret = 0;
361dbd01 5819
6ab68650 5820 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5821 return -ENOTSUPP;
5822
9530273e
EQ
5823 ret = amdgpu_dpm_baco_exit(adev);
5824 if (ret)
5825 return ret;
7a22677b 5826
8ab0d6f0 5827 if (ras && adev->ras_enabled &&
acdae216 5828 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5829 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5830
1bece222
CL
5831 if (amdgpu_passthrough(adev) &&
5832 adev->nbio.funcs->clear_doorbell_interrupt)
5833 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5834
7a22677b 5835 return 0;
361dbd01 5836}
c9a6b82f
AG
5837
5838/**
5839 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5840 * @pdev: PCI device struct
5841 * @state: PCI channel state
5842 *
5843 * Description: Called when a PCI error is detected.
5844 *
5845 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5846 */
5847pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5848{
5849 struct drm_device *dev = pci_get_drvdata(pdev);
5850 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5851 int i;
c9a6b82f
AG
5852
5853 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5854
6894305c
AG
5855 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5856 DRM_WARN("No support for XGMI hive yet...");
5857 return PCI_ERS_RESULT_DISCONNECT;
5858 }
5859
e17e27f9
GC
5860 adev->pci_channel_state = state;
5861
c9a6b82f
AG
5862 switch (state) {
5863 case pci_channel_io_normal:
5864 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5865 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5866 case pci_channel_io_frozen:
5867 /*
d0fb18b5 5868 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5869 * to GPU during PCI error recovery
5870 */
3675c2f2 5871 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5872 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5873
5874 /*
5875 * Block any work scheduling as we do for regular GPU reset
5876 * for the duration of the recovery
5877 */
5878 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5879 struct amdgpu_ring *ring = adev->rings[i];
5880
5881 if (!ring || !ring->sched.thread)
5882 continue;
5883
5884 drm_sched_stop(&ring->sched, NULL);
5885 }
8f8c80f4 5886 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5887 return PCI_ERS_RESULT_NEED_RESET;
5888 case pci_channel_io_perm_failure:
5889 /* Permanent error, prepare for device removal */
5890 return PCI_ERS_RESULT_DISCONNECT;
5891 }
5892
5893 return PCI_ERS_RESULT_NEED_RESET;
5894}
5895
5896/**
5897 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5898 * @pdev: pointer to PCI device
5899 */
5900pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5901{
5902
5903 DRM_INFO("PCI error: mmio enabled callback!!\n");
5904
5905 /* TODO - dump whatever for debugging purposes */
5906
5907 /* This called only if amdgpu_pci_error_detected returns
5908 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5909 * works, no need to reset slot.
5910 */
5911
5912 return PCI_ERS_RESULT_RECOVERED;
5913}
5914
5915/**
5916 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5917 * @pdev: PCI device struct
5918 *
5919 * Description: This routine is called by the pci error recovery
5920 * code after the PCI slot has been reset, just before we
5921 * should resume normal operations.
5922 */
5923pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5924{
5925 struct drm_device *dev = pci_get_drvdata(pdev);
5926 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5927 int r, i;
04442bf7 5928 struct amdgpu_reset_context reset_context;
362c7b91 5929 u32 memsize;
7ac71382 5930 struct list_head device_list;
c9a6b82f
AG
5931
5932 DRM_INFO("PCI error: slot reset callback!!\n");
5933
04442bf7
LL
5934 memset(&reset_context, 0, sizeof(reset_context));
5935
7ac71382 5936 INIT_LIST_HEAD(&device_list);
655ce9cb 5937 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5938
362c7b91
AG
5939 /* wait for asic to come out of reset */
5940 msleep(500);
5941
7ac71382 5942 /* Restore PCI confspace */
c1dd4aa6 5943 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5944
362c7b91
AG
5945 /* confirm ASIC came out of reset */
5946 for (i = 0; i < adev->usec_timeout; i++) {
5947 memsize = amdgpu_asic_get_config_memsize(adev);
5948
5949 if (memsize != 0xffffffff)
5950 break;
5951 udelay(1);
5952 }
5953 if (memsize == 0xffffffff) {
5954 r = -ETIME;
5955 goto out;
5956 }
5957
04442bf7
LL
5958 reset_context.method = AMD_RESET_METHOD_NONE;
5959 reset_context.reset_req_dev = adev;
5960 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5961 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5962
7afefb81 5963 adev->no_hw_access = true;
04442bf7 5964 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5965 adev->no_hw_access = false;
c9a6b82f
AG
5966 if (r)
5967 goto out;
5968
04442bf7 5969 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5970
5971out:
c9a6b82f 5972 if (!r) {
c1dd4aa6
AG
5973 if (amdgpu_device_cache_pci_state(adev->pdev))
5974 pci_restore_state(adev->pdev);
5975
c9a6b82f
AG
5976 DRM_INFO("PCIe error recovery succeeded\n");
5977 } else {
5978 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5979 amdgpu_device_unset_mp1_state(adev);
5980 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5981 }
5982
5983 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5984}
5985
5986/**
5987 * amdgpu_pci_resume() - resume normal ops after PCI reset
5988 * @pdev: pointer to PCI device
5989 *
5990 * Called when the error recovery driver tells us that its
505199a3 5991 * OK to resume normal operation.
c9a6b82f
AG
5992 */
5993void amdgpu_pci_resume(struct pci_dev *pdev)
5994{
5995 struct drm_device *dev = pci_get_drvdata(pdev);
5996 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5997 int i;
c9a6b82f 5998
c9a6b82f
AG
5999
6000 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 6001
e17e27f9
GC
6002 /* Only continue execution for the case of pci_channel_io_frozen */
6003 if (adev->pci_channel_state != pci_channel_io_frozen)
6004 return;
6005
acd89fca
AG
6006 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6007 struct amdgpu_ring *ring = adev->rings[i];
6008
6009 if (!ring || !ring->sched.thread)
6010 continue;
6011
acd89fca
AG
6012 drm_sched_start(&ring->sched, true);
6013 }
6014
e923be99
AG
6015 amdgpu_device_unset_mp1_state(adev);
6016 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 6017}
c1dd4aa6
AG
6018
6019bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6020{
6021 struct drm_device *dev = pci_get_drvdata(pdev);
6022 struct amdgpu_device *adev = drm_to_adev(dev);
6023 int r;
6024
6025 r = pci_save_state(pdev);
6026 if (!r) {
6027 kfree(adev->pci_state);
6028
6029 adev->pci_state = pci_store_saved_state(pdev);
6030
6031 if (!adev->pci_state) {
6032 DRM_ERROR("Failed to store PCI saved state");
6033 return false;
6034 }
6035 } else {
6036 DRM_WARN("Failed to save PCI state, err:%d\n", r);
6037 return false;
6038 }
6039
6040 return true;
6041}
6042
6043bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6044{
6045 struct drm_device *dev = pci_get_drvdata(pdev);
6046 struct amdgpu_device *adev = drm_to_adev(dev);
6047 int r;
6048
6049 if (!adev->pci_state)
6050 return false;
6051
6052 r = pci_load_saved_state(pdev, adev->pci_state);
6053
6054 if (!r) {
6055 pci_restore_state(pdev);
6056 } else {
6057 DRM_WARN("Failed to load PCI state, err:%d\n", r);
6058 return false;
6059 }
6060
6061 return true;
6062}
6063
810085dd
EH
6064void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6065 struct amdgpu_ring *ring)
6066{
6067#ifdef CONFIG_X86_64
b818a5d3 6068 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6069 return;
6070#endif
6071 if (adev->gmc.xgmi.connected_to_cpu)
6072 return;
6073
6074 if (ring && ring->funcs->emit_hdp_flush)
6075 amdgpu_ring_emit_hdp_flush(ring);
6076 else
6077 amdgpu_asic_flush_hdp(adev, ring);
6078}
c1dd4aa6 6079
810085dd
EH
6080void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6081 struct amdgpu_ring *ring)
6082{
6083#ifdef CONFIG_X86_64
b818a5d3 6084 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
6085 return;
6086#endif
6087 if (adev->gmc.xgmi.connected_to_cpu)
6088 return;
c1dd4aa6 6089
810085dd
EH
6090 amdgpu_asic_invalidate_hdp(adev, ring);
6091}
34f3a4a9 6092
89a7a870
AG
6093int amdgpu_in_reset(struct amdgpu_device *adev)
6094{
6095 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
6096}
6097
34f3a4a9
LY
6098/**
6099 * amdgpu_device_halt() - bring hardware to some kind of halt state
6100 *
6101 * @adev: amdgpu_device pointer
6102 *
6103 * Bring hardware to some kind of halt state so that no one can touch it
6104 * any more. It will help to maintain error context when error occurred.
6105 * Compare to a simple hang, the system will keep stable at least for SSH
6106 * access. Then it should be trivial to inspect the hardware state and
6107 * see what's going on. Implemented as following:
6108 *
6109 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6110 * clears all CPU mappings to device, disallows remappings through page faults
6111 * 2. amdgpu_irq_disable_all() disables all interrupts
6112 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6113 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6114 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6115 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6116 * flush any in flight DMA operations
6117 */
6118void amdgpu_device_halt(struct amdgpu_device *adev)
6119{
6120 struct pci_dev *pdev = adev->pdev;
e0f943b4 6121 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9 6122
2c1c7ba4 6123 amdgpu_xcp_dev_unplug(adev);
34f3a4a9
LY
6124 drm_dev_unplug(ddev);
6125
6126 amdgpu_irq_disable_all(adev);
6127
6128 amdgpu_fence_driver_hw_fini(adev);
6129
6130 adev->no_hw_access = true;
6131
6132 amdgpu_device_unmap_mmio(adev);
6133
6134 pci_disable_device(pdev);
6135 pci_wait_for_pending_transaction(pdev);
6136}
86700a40
XD
6137
6138u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6139 u32 reg)
6140{
6141 unsigned long flags, address, data;
6142 u32 r;
6143
6144 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6145 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6146
6147 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6148 WREG32(address, reg * 4);
6149 (void)RREG32(address);
6150 r = RREG32(data);
6151 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6152 return r;
6153}
6154
6155void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6156 u32 reg, u32 v)
6157{
6158 unsigned long flags, address, data;
6159
6160 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6161 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6162
6163 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6164 WREG32(address, reg * 4);
6165 (void)RREG32(address);
6166 WREG32(data, v);
6167 (void)RREG32(data);
6168 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6169}
68ce8b24
CK
6170
6171/**
6172 * amdgpu_device_switch_gang - switch to a new gang
6173 * @adev: amdgpu_device pointer
6174 * @gang: the gang to switch to
6175 *
6176 * Try to switch to a new gang.
6177 * Returns: NULL if we switched to the new gang or a reference to the current
6178 * gang leader.
6179 */
6180struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6181 struct dma_fence *gang)
6182{
6183 struct dma_fence *old = NULL;
6184
6185 do {
6186 dma_fence_put(old);
6187 rcu_read_lock();
6188 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6189 rcu_read_unlock();
6190
6191 if (old == gang)
6192 break;
6193
6194 if (!dma_fence_is_signaled(old))
6195 return old;
6196
6197 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6198 old, gang) != old);
6199
6200 dma_fence_put(old);
6201 return NULL;
6202}
220c8cc8
AD
6203
6204bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6205{
6206 switch (adev->asic_type) {
6207#ifdef CONFIG_DRM_AMDGPU_SI
6208 case CHIP_HAINAN:
6209#endif
6210 case CHIP_TOPAZ:
6211 /* chips with no display hardware */
6212 return false;
6213#ifdef CONFIG_DRM_AMDGPU_SI
6214 case CHIP_TAHITI:
6215 case CHIP_PITCAIRN:
6216 case CHIP_VERDE:
6217 case CHIP_OLAND:
6218#endif
6219#ifdef CONFIG_DRM_AMDGPU_CIK
6220 case CHIP_BONAIRE:
6221 case CHIP_HAWAII:
6222 case CHIP_KAVERI:
6223 case CHIP_KABINI:
6224 case CHIP_MULLINS:
6225#endif
6226 case CHIP_TONGA:
6227 case CHIP_FIJI:
6228 case CHIP_POLARIS10:
6229 case CHIP_POLARIS11:
6230 case CHIP_POLARIS12:
6231 case CHIP_VEGAM:
6232 case CHIP_CARRIZO:
6233 case CHIP_STONEY:
6234 /* chips with display hardware */
6235 return true;
6236 default:
6237 /* IP discovery */
4e8303cf 6238 if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
220c8cc8
AD
6239 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6240 return false;
6241 return true;
6242 }
6243}
81283fee
JZ
6244
6245uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6246 uint32_t inst, uint32_t reg_addr, char reg_name[],
6247 uint32_t expected_value, uint32_t mask)
6248{
6249 uint32_t ret = 0;
6250 uint32_t old_ = 0;
6251 uint32_t tmp_ = RREG32(reg_addr);
6252 uint32_t loop = adev->usec_timeout;
6253
6254 while ((tmp_ & (mask)) != (expected_value)) {
6255 if (old_ != tmp_) {
6256 loop = adev->usec_timeout;
6257 old_ = tmp_;
6258 } else
6259 udelay(1);
6260 tmp_ = RREG32(reg_addr);
6261 loop--;
6262 if (!loop) {
6263 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6264 inst, reg_name, (uint32_t)expected_value,
6265 (uint32_t)(tmp_ & (mask)));
6266 ret = -ETIMEDOUT;
6267 break;
6268 }
6269 }
6270 return ret;
6271}