drm/amd/display/dc/dc_hdmi_types: Move string definition to the only file it's used in
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
e2a75f88 84MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 85MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 86MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 87MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 88MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 89MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 90MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 91
2dc80b00 92#define AMDGPU_RESUME_MS 2000
7258fa31
SK
93#define AMDGPU_MAX_RETRY_LIMIT 2
94#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 95
b7cdb41e
ML
96static const struct drm_driver amdgpu_kms_driver;
97
050091ab 98const char *amdgpu_asic_name[] = {
da69c161
KW
99 "TAHITI",
100 "PITCAIRN",
101 "VERDE",
102 "OLAND",
103 "HAINAN",
d38ceaf9
AD
104 "BONAIRE",
105 "KAVERI",
106 "KABINI",
107 "HAWAII",
108 "MULLINS",
109 "TOPAZ",
110 "TONGA",
48299f95 111 "FIJI",
d38ceaf9 112 "CARRIZO",
139f4917 113 "STONEY",
2cc0c0b5
FC
114 "POLARIS10",
115 "POLARIS11",
c4642a47 116 "POLARIS12",
48ff108d 117 "VEGAM",
d4196f01 118 "VEGA10",
8fab806a 119 "VEGA12",
956fcddc 120 "VEGA20",
2ca8a5d2 121 "RAVEN",
d6c3b24e 122 "ARCTURUS",
1eee4228 123 "RENOIR",
d46b417a 124 "ALDEBARAN",
852a6626 125 "NAVI10",
d0f56dc2 126 "CYAN_SKILLFISH",
87dbad02 127 "NAVI14",
9802f5d7 128 "NAVI12",
ccaf72d3 129 "SIENNA_CICHLID",
ddd8fbe7 130 "NAVY_FLOUNDER",
4f1e9a76 131 "VANGOGH",
a2468e04 132 "DIMGREY_CAVEFISH",
6f169591 133 "BEIGE_GOBY",
ee9236b7 134 "YELLOW_CARP",
3ae695d6 135 "IP DISCOVERY",
d38ceaf9
AD
136 "LAST",
137};
138
dcea6e65
KR
139/**
140 * DOC: pcie_replay_count
141 *
142 * The amdgpu driver provides a sysfs API for reporting the total number
143 * of PCIe replays (NAKs)
144 * The file pcie_replay_count is used for this and returns the total
145 * number of replays as a sum of the NAKs generated and NAKs received
146 */
147
148static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
149 struct device_attribute *attr, char *buf)
150{
151 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 152 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
153 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
154
36000c7a 155 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
156}
157
158static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
159 amdgpu_device_get_pcie_replay_count, NULL);
160
5494d864
AD
161static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
162
bd607166
KR
163/**
164 * DOC: product_name
165 *
166 * The amdgpu driver provides a sysfs API for reporting the product name
167 * for the device
2c496a6c 168 * The file product_name is used for this and returns the product name
bd607166
KR
169 * as returned from the FRU.
170 * NOTE: This is only available for certain server cards
171 */
172
173static ssize_t amdgpu_device_get_product_name(struct device *dev,
174 struct device_attribute *attr, char *buf)
175{
176 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 177 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 178
36000c7a 179 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
180}
181
182static DEVICE_ATTR(product_name, S_IRUGO,
183 amdgpu_device_get_product_name, NULL);
184
185/**
186 * DOC: product_number
187 *
188 * The amdgpu driver provides a sysfs API for reporting the part number
189 * for the device
2c496a6c 190 * The file product_number is used for this and returns the part number
bd607166
KR
191 * as returned from the FRU.
192 * NOTE: This is only available for certain server cards
193 */
194
195static ssize_t amdgpu_device_get_product_number(struct device *dev,
196 struct device_attribute *attr, char *buf)
197{
198 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 199 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 200
36000c7a 201 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
202}
203
204static DEVICE_ATTR(product_number, S_IRUGO,
205 amdgpu_device_get_product_number, NULL);
206
207/**
208 * DOC: serial_number
209 *
210 * The amdgpu driver provides a sysfs API for reporting the serial number
211 * for the device
212 * The file serial_number is used for this and returns the serial number
213 * as returned from the FRU.
214 * NOTE: This is only available for certain server cards
215 */
216
217static ssize_t amdgpu_device_get_serial_number(struct device *dev,
218 struct device_attribute *attr, char *buf)
219{
220 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 221 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 222
36000c7a 223 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
224}
225
226static DEVICE_ATTR(serial_number, S_IRUGO,
227 amdgpu_device_get_serial_number, NULL);
228
fd496ca8 229/**
b98c6299 230 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
231 *
232 * @dev: drm_device pointer
233 *
b98c6299 234 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
235 * otherwise return false.
236 */
b98c6299 237bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
238{
239 struct amdgpu_device *adev = drm_to_adev(dev);
240
b98c6299 241 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
242 return true;
243 return false;
244}
245
e3ecdffa 246/**
0330b848 247 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
248 *
249 * @dev: drm_device pointer
250 *
b98c6299 251 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
252 * otherwise return false.
253 */
31af062a 254bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 255{
1348969a 256 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 257
b98c6299
AD
258 if (adev->has_pr3 ||
259 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
260 return true;
261 return false;
262}
263
a69cba42
AD
264/**
265 * amdgpu_device_supports_baco - Does the device support BACO
266 *
267 * @dev: drm_device pointer
268 *
269 * Returns true if the device supporte BACO,
270 * otherwise return false.
271 */
272bool amdgpu_device_supports_baco(struct drm_device *dev)
273{
1348969a 274 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
275
276 return amdgpu_asic_supports_baco(adev);
277}
278
3fa8f89d
S
279/**
280 * amdgpu_device_supports_smart_shift - Is the device dGPU with
281 * smart shift support
282 *
283 * @dev: drm_device pointer
284 *
285 * Returns true if the device is a dGPU with Smart Shift support,
286 * otherwise returns false.
287 */
288bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
289{
290 return (amdgpu_device_supports_boco(dev) &&
291 amdgpu_acpi_is_power_shift_control_supported());
292}
293
6e3cd2a9
MCC
294/*
295 * VRAM access helper functions
296 */
297
e35e2b11 298/**
048af66b 299 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
300 *
301 * @adev: amdgpu_device pointer
302 * @pos: offset of the buffer in vram
303 * @buf: virtual address of the buffer in system memory
304 * @size: read/write size, sizeof(@buf) must > @size
305 * @write: true - write to vram, otherwise - read from vram
306 */
048af66b
KW
307void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
308 void *buf, size_t size, bool write)
e35e2b11 309{
e35e2b11 310 unsigned long flags;
048af66b
KW
311 uint32_t hi = ~0, tmp = 0;
312 uint32_t *data = buf;
ce05ac56 313 uint64_t last;
f89f8c6b 314 int idx;
ce05ac56 315
c58a863b 316 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 317 return;
9d11eb0d 318
048af66b
KW
319 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
320
321 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
322 for (last = pos + size; pos < last; pos += 4) {
323 tmp = pos >> 31;
324
325 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
326 if (tmp != hi) {
327 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
328 hi = tmp;
329 }
330 if (write)
331 WREG32_NO_KIQ(mmMM_DATA, *data++);
332 else
333 *data++ = RREG32_NO_KIQ(mmMM_DATA);
334 }
335
336 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
337 drm_dev_exit(idx);
338}
339
340/**
bbe04dec 341 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
342 *
343 * @adev: amdgpu_device pointer
344 * @pos: offset of the buffer in vram
345 * @buf: virtual address of the buffer in system memory
346 * @size: read/write size, sizeof(@buf) must > @size
347 * @write: true - write to vram, otherwise - read from vram
348 *
349 * The return value means how many bytes have been transferred.
350 */
351size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
352 void *buf, size_t size, bool write)
353{
9d11eb0d 354#ifdef CONFIG_64BIT
048af66b
KW
355 void __iomem *addr;
356 size_t count = 0;
357 uint64_t last;
358
359 if (!adev->mman.aper_base_kaddr)
360 return 0;
361
9d11eb0d
CK
362 last = min(pos + size, adev->gmc.visible_vram_size);
363 if (last > pos) {
048af66b
KW
364 addr = adev->mman.aper_base_kaddr + pos;
365 count = last - pos;
9d11eb0d
CK
366
367 if (write) {
368 memcpy_toio(addr, buf, count);
369 mb();
810085dd 370 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 371 } else {
810085dd 372 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
373 mb();
374 memcpy_fromio(buf, addr, count);
375 }
376
9d11eb0d 377 }
048af66b
KW
378
379 return count;
380#else
381 return 0;
9d11eb0d 382#endif
048af66b 383}
9d11eb0d 384
048af66b
KW
385/**
386 * amdgpu_device_vram_access - read/write a buffer in vram
387 *
388 * @adev: amdgpu_device pointer
389 * @pos: offset of the buffer in vram
390 * @buf: virtual address of the buffer in system memory
391 * @size: read/write size, sizeof(@buf) must > @size
392 * @write: true - write to vram, otherwise - read from vram
393 */
394void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
395 void *buf, size_t size, bool write)
396{
397 size_t count;
e35e2b11 398
048af66b
KW
399 /* try to using vram apreature to access vram first */
400 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
401 size -= count;
402 if (size) {
403 /* using MM to access rest vram */
404 pos += count;
405 buf += count;
406 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
407 }
408}
409
d38ceaf9 410/*
f7ee1874 411 * register access helper functions.
d38ceaf9 412 */
56b53c0b
DL
413
414/* Check if hw access should be skipped because of hotplug or device error */
415bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
416{
7afefb81 417 if (adev->no_hw_access)
56b53c0b
DL
418 return true;
419
420#ifdef CONFIG_LOCKDEP
421 /*
422 * This is a bit complicated to understand, so worth a comment. What we assert
423 * here is that the GPU reset is not running on another thread in parallel.
424 *
425 * For this we trylock the read side of the reset semaphore, if that succeeds
426 * we know that the reset is not running in paralell.
427 *
428 * If the trylock fails we assert that we are either already holding the read
429 * side of the lock or are the reset thread itself and hold the write side of
430 * the lock.
431 */
432 if (in_task()) {
d0fb18b5
AG
433 if (down_read_trylock(&adev->reset_domain->sem))
434 up_read(&adev->reset_domain->sem);
56b53c0b 435 else
d0fb18b5 436 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
437 }
438#endif
439 return false;
440}
441
e3ecdffa 442/**
f7ee1874 443 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
444 *
445 * @adev: amdgpu_device pointer
446 * @reg: dword aligned register offset
447 * @acc_flags: access flags which require special behavior
448 *
449 * Returns the 32 bit value from the offset specified.
450 */
f7ee1874
HZ
451uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
452 uint32_t reg, uint32_t acc_flags)
d38ceaf9 453{
f4b373f4
TSD
454 uint32_t ret;
455
56b53c0b 456 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
457 return 0;
458
f7ee1874
HZ
459 if ((reg * 4) < adev->rmmio_size) {
460 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
461 amdgpu_sriov_runtime(adev) &&
d0fb18b5 462 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 463 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 464 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
465 } else {
466 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
467 }
468 } else {
469 ret = adev->pcie_rreg(adev, reg * 4);
81202807 470 }
bc992ba5 471
f7ee1874 472 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 473
f4b373f4 474 return ret;
d38ceaf9
AD
475}
476
421a2a30
ML
477/*
478 * MMIO register read with bytes helper functions
479 * @offset:bytes offset from MMIO start
480 *
481*/
482
e3ecdffa
AD
483/**
484 * amdgpu_mm_rreg8 - read a memory mapped IO register
485 *
486 * @adev: amdgpu_device pointer
487 * @offset: byte aligned register offset
488 *
489 * Returns the 8 bit value from the offset specified.
490 */
7cbbc745
AG
491uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
492{
56b53c0b 493 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
494 return 0;
495
421a2a30
ML
496 if (offset < adev->rmmio_size)
497 return (readb(adev->rmmio + offset));
498 BUG();
499}
500
501/*
502 * MMIO register write with bytes helper functions
503 * @offset:bytes offset from MMIO start
504 * @value: the value want to be written to the register
505 *
506*/
e3ecdffa
AD
507/**
508 * amdgpu_mm_wreg8 - read a memory mapped IO register
509 *
510 * @adev: amdgpu_device pointer
511 * @offset: byte aligned register offset
512 * @value: 8 bit value to write
513 *
514 * Writes the value specified to the offset specified.
515 */
7cbbc745
AG
516void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
517{
56b53c0b 518 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
519 return;
520
421a2a30
ML
521 if (offset < adev->rmmio_size)
522 writeb(value, adev->rmmio + offset);
523 else
524 BUG();
525}
526
e3ecdffa 527/**
f7ee1874 528 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
529 *
530 * @adev: amdgpu_device pointer
531 * @reg: dword aligned register offset
532 * @v: 32 bit value to write to the register
533 * @acc_flags: access flags which require special behavior
534 *
535 * Writes the value specified to the offset specified.
536 */
f7ee1874
HZ
537void amdgpu_device_wreg(struct amdgpu_device *adev,
538 uint32_t reg, uint32_t v,
539 uint32_t acc_flags)
d38ceaf9 540{
56b53c0b 541 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
542 return;
543
f7ee1874
HZ
544 if ((reg * 4) < adev->rmmio_size) {
545 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
546 amdgpu_sriov_runtime(adev) &&
d0fb18b5 547 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 548 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 549 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
550 } else {
551 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
552 }
553 } else {
554 adev->pcie_wreg(adev, reg * 4, v);
81202807 555 }
bc992ba5 556
f7ee1874 557 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 558}
d38ceaf9 559
03f2abb0 560/**
4cc9f86f 561 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 562 *
71579346
RB
563 * @adev: amdgpu_device pointer
564 * @reg: mmio/rlc register
565 * @v: value to write
566 *
567 * this function is invoked only for the debugfs register access
03f2abb0 568 */
f7ee1874
HZ
569void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
570 uint32_t reg, uint32_t v)
2e0cc4d4 571{
56b53c0b 572 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
573 return;
574
2e0cc4d4 575 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
576 adev->gfx.rlc.funcs &&
577 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 578 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 579 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
580 } else if ((reg * 4) >= adev->rmmio_size) {
581 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
582 } else {
583 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 584 }
d38ceaf9
AD
585}
586
d38ceaf9
AD
587/**
588 * amdgpu_mm_rdoorbell - read a doorbell dword
589 *
590 * @adev: amdgpu_device pointer
591 * @index: doorbell index
592 *
593 * Returns the value in the doorbell aperture at the
594 * requested doorbell index (CIK).
595 */
596u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
597{
56b53c0b 598 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
599 return 0;
600
d38ceaf9
AD
601 if (index < adev->doorbell.num_doorbells) {
602 return readl(adev->doorbell.ptr + index);
603 } else {
604 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
605 return 0;
606 }
607}
608
609/**
610 * amdgpu_mm_wdoorbell - write a doorbell dword
611 *
612 * @adev: amdgpu_device pointer
613 * @index: doorbell index
614 * @v: value to write
615 *
616 * Writes @v to the doorbell aperture at the
617 * requested doorbell index (CIK).
618 */
619void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
620{
56b53c0b 621 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
622 return;
623
d38ceaf9
AD
624 if (index < adev->doorbell.num_doorbells) {
625 writel(v, adev->doorbell.ptr + index);
626 } else {
627 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
628 }
629}
630
832be404
KW
631/**
632 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
633 *
634 * @adev: amdgpu_device pointer
635 * @index: doorbell index
636 *
637 * Returns the value in the doorbell aperture at the
638 * requested doorbell index (VEGA10+).
639 */
640u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
641{
56b53c0b 642 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
643 return 0;
644
832be404
KW
645 if (index < adev->doorbell.num_doorbells) {
646 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
647 } else {
648 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
649 return 0;
650 }
651}
652
653/**
654 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
655 *
656 * @adev: amdgpu_device pointer
657 * @index: doorbell index
658 * @v: value to write
659 *
660 * Writes @v to the doorbell aperture at the
661 * requested doorbell index (VEGA10+).
662 */
663void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
664{
56b53c0b 665 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
666 return;
667
832be404
KW
668 if (index < adev->doorbell.num_doorbells) {
669 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
670 } else {
671 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
672 }
673}
674
1bba3683
HZ
675/**
676 * amdgpu_device_indirect_rreg - read an indirect register
677 *
678 * @adev: amdgpu_device pointer
22f453fb 679 * @reg_addr: indirect register address to read from
1bba3683
HZ
680 *
681 * Returns the value of indirect register @reg_addr
682 */
683u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
1bba3683
HZ
684 u32 reg_addr)
685{
65ba96e9 686 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
687 void __iomem *pcie_index_offset;
688 void __iomem *pcie_data_offset;
65ba96e9
HZ
689 u32 r;
690
691 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
692 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
693
694 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
695 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
696 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
697
698 writel(reg_addr, pcie_index_offset);
699 readl(pcie_index_offset);
700 r = readl(pcie_data_offset);
701 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
702
703 return r;
704}
705
706/**
707 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
708 *
709 * @adev: amdgpu_device pointer
22f453fb 710 * @reg_addr: indirect register address to read from
1bba3683
HZ
711 *
712 * Returns the value of indirect register @reg_addr
713 */
714u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1bba3683
HZ
715 u32 reg_addr)
716{
65ba96e9 717 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
718 void __iomem *pcie_index_offset;
719 void __iomem *pcie_data_offset;
65ba96e9
HZ
720 u64 r;
721
722 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
723 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1bba3683
HZ
724
725 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
726 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
727 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
728
729 /* read low 32 bits */
730 writel(reg_addr, pcie_index_offset);
731 readl(pcie_index_offset);
732 r = readl(pcie_data_offset);
733 /* read high 32 bits */
734 writel(reg_addr + 4, pcie_index_offset);
735 readl(pcie_index_offset);
736 r |= ((u64)readl(pcie_data_offset) << 32);
737 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
738
739 return r;
740}
741
742/**
743 * amdgpu_device_indirect_wreg - write an indirect register address
744 *
745 * @adev: amdgpu_device pointer
746 * @pcie_index: mmio register offset
747 * @pcie_data: mmio register offset
748 * @reg_addr: indirect register offset
749 * @reg_data: indirect register data
750 *
751 */
752void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1bba3683
HZ
753 u32 reg_addr, u32 reg_data)
754{
65ba96e9 755 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
756 void __iomem *pcie_index_offset;
757 void __iomem *pcie_data_offset;
758
65ba96e9
HZ
759 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
760 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
761
1bba3683
HZ
762 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
763 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
764 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
765
766 writel(reg_addr, pcie_index_offset);
767 readl(pcie_index_offset);
768 writel(reg_data, pcie_data_offset);
769 readl(pcie_data_offset);
770 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
771}
772
773/**
774 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
775 *
776 * @adev: amdgpu_device pointer
777 * @pcie_index: mmio register offset
778 * @pcie_data: mmio register offset
779 * @reg_addr: indirect register offset
780 * @reg_data: indirect register data
781 *
782 */
783void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1bba3683
HZ
784 u32 reg_addr, u64 reg_data)
785{
65ba96e9 786 unsigned long flags, pcie_index, pcie_data;
1bba3683
HZ
787 void __iomem *pcie_index_offset;
788 void __iomem *pcie_data_offset;
789
65ba96e9
HZ
790 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
791 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
792
1bba3683
HZ
793 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
794 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
795 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
796
797 /* write low 32 bits */
798 writel(reg_addr, pcie_index_offset);
799 readl(pcie_index_offset);
800 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
801 readl(pcie_data_offset);
802 /* write high 32 bits */
803 writel(reg_addr + 4, pcie_index_offset);
804 readl(pcie_index_offset);
805 writel((u32)(reg_data >> 32), pcie_data_offset);
806 readl(pcie_data_offset);
807 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
808}
809
dabc114e
HZ
810/**
811 * amdgpu_device_get_rev_id - query device rev_id
812 *
813 * @adev: amdgpu_device pointer
814 *
815 * Return device rev_id
816 */
817u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
818{
819 return adev->nbio.funcs->get_rev_id(adev);
820}
821
d38ceaf9
AD
822/**
823 * amdgpu_invalid_rreg - dummy reg read function
824 *
982a820b 825 * @adev: amdgpu_device pointer
d38ceaf9
AD
826 * @reg: offset of register
827 *
828 * Dummy register read function. Used for register blocks
829 * that certain asics don't have (all asics).
830 * Returns the value in the register.
831 */
832static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
833{
834 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
835 BUG();
836 return 0;
837}
838
839/**
840 * amdgpu_invalid_wreg - dummy reg write function
841 *
982a820b 842 * @adev: amdgpu_device pointer
d38ceaf9
AD
843 * @reg: offset of register
844 * @v: value to write to the register
845 *
846 * Dummy register read function. Used for register blocks
847 * that certain asics don't have (all asics).
848 */
849static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
850{
851 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
852 reg, v);
853 BUG();
854}
855
4fa1c6a6
TZ
856/**
857 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
858 *
982a820b 859 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
860 * @reg: offset of register
861 *
862 * Dummy register read function. Used for register blocks
863 * that certain asics don't have (all asics).
864 * Returns the value in the register.
865 */
866static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
867{
868 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
869 BUG();
870 return 0;
871}
872
873/**
874 * amdgpu_invalid_wreg64 - dummy reg write function
875 *
982a820b 876 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
877 * @reg: offset of register
878 * @v: value to write to the register
879 *
880 * Dummy register read function. Used for register blocks
881 * that certain asics don't have (all asics).
882 */
883static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
884{
885 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
886 reg, v);
887 BUG();
888}
889
d38ceaf9
AD
890/**
891 * amdgpu_block_invalid_rreg - dummy reg read function
892 *
982a820b 893 * @adev: amdgpu_device pointer
d38ceaf9
AD
894 * @block: offset of instance
895 * @reg: offset of register
896 *
897 * Dummy register read function. Used for register blocks
898 * that certain asics don't have (all asics).
899 * Returns the value in the register.
900 */
901static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
902 uint32_t block, uint32_t reg)
903{
904 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
905 reg, block);
906 BUG();
907 return 0;
908}
909
910/**
911 * amdgpu_block_invalid_wreg - dummy reg write function
912 *
982a820b 913 * @adev: amdgpu_device pointer
d38ceaf9
AD
914 * @block: offset of instance
915 * @reg: offset of register
916 * @v: value to write to the register
917 *
918 * Dummy register read function. Used for register blocks
919 * that certain asics don't have (all asics).
920 */
921static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
922 uint32_t block,
923 uint32_t reg, uint32_t v)
924{
925 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
926 reg, block, v);
927 BUG();
928}
929
4d2997ab
AD
930/**
931 * amdgpu_device_asic_init - Wrapper for atom asic_init
932 *
982a820b 933 * @adev: amdgpu_device pointer
4d2997ab
AD
934 *
935 * Does any asic specific work and then calls atom asic init.
936 */
937static int amdgpu_device_asic_init(struct amdgpu_device *adev)
938{
939 amdgpu_asic_pre_asic_init(adev);
940
85d1bcc6
HZ
941 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
942 return amdgpu_atomfirmware_asic_init(adev, true);
943 else
944 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
945}
946
e3ecdffa 947/**
7ccfd79f 948 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 949 *
982a820b 950 * @adev: amdgpu_device pointer
e3ecdffa
AD
951 *
952 * Allocates a scratch page of VRAM for use by various things in the
953 * driver.
954 */
7ccfd79f 955static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 956{
7ccfd79f
CK
957 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
958 AMDGPU_GEM_DOMAIN_VRAM |
959 AMDGPU_GEM_DOMAIN_GTT,
960 &adev->mem_scratch.robj,
961 &adev->mem_scratch.gpu_addr,
962 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
963}
964
e3ecdffa 965/**
7ccfd79f 966 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 967 *
982a820b 968 * @adev: amdgpu_device pointer
e3ecdffa
AD
969 *
970 * Frees the VRAM scratch page.
971 */
7ccfd79f 972static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 973{
7ccfd79f 974 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
975}
976
977/**
9c3f2b54 978 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
979 *
980 * @adev: amdgpu_device pointer
981 * @registers: pointer to the register array
982 * @array_size: size of the register array
983 *
984 * Programs an array or registers with and and or masks.
985 * This is a helper for setting golden registers.
986 */
9c3f2b54
AD
987void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
988 const u32 *registers,
989 const u32 array_size)
d38ceaf9
AD
990{
991 u32 tmp, reg, and_mask, or_mask;
992 int i;
993
994 if (array_size % 3)
995 return;
996
997 for (i = 0; i < array_size; i +=3) {
998 reg = registers[i + 0];
999 and_mask = registers[i + 1];
1000 or_mask = registers[i + 2];
1001
1002 if (and_mask == 0xffffffff) {
1003 tmp = or_mask;
1004 } else {
1005 tmp = RREG32(reg);
1006 tmp &= ~and_mask;
e0d07657
HZ
1007 if (adev->family >= AMDGPU_FAMILY_AI)
1008 tmp |= (or_mask & and_mask);
1009 else
1010 tmp |= or_mask;
d38ceaf9
AD
1011 }
1012 WREG32(reg, tmp);
1013 }
1014}
1015
e3ecdffa
AD
1016/**
1017 * amdgpu_device_pci_config_reset - reset the GPU
1018 *
1019 * @adev: amdgpu_device pointer
1020 *
1021 * Resets the GPU using the pci config reset sequence.
1022 * Only applicable to asics prior to vega10.
1023 */
8111c387 1024void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1025{
1026 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1027}
1028
af484df8
AD
1029/**
1030 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1031 *
1032 * @adev: amdgpu_device pointer
1033 *
1034 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1035 */
1036int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1037{
1038 return pci_reset_function(adev->pdev);
1039}
1040
d38ceaf9
AD
1041/*
1042 * GPU doorbell aperture helpers function.
1043 */
1044/**
06ec9070 1045 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1046 *
1047 * @adev: amdgpu_device pointer
1048 *
1049 * Init doorbell driver information (CIK)
1050 * Returns 0 on success, error on failure.
1051 */
06ec9070 1052static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1053{
6585661d 1054
705e519e
CK
1055 /* No doorbell on SI hardware generation */
1056 if (adev->asic_type < CHIP_BONAIRE) {
1057 adev->doorbell.base = 0;
1058 adev->doorbell.size = 0;
1059 adev->doorbell.num_doorbells = 0;
1060 adev->doorbell.ptr = NULL;
1061 return 0;
1062 }
1063
d6895ad3
CK
1064 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1065 return -EINVAL;
1066
22357775
AD
1067 amdgpu_asic_init_doorbell_index(adev);
1068
d38ceaf9
AD
1069 /* doorbell bar mapping */
1070 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1071 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1072
de33a329
JX
1073 if (adev->enable_mes) {
1074 adev->doorbell.num_doorbells =
1075 adev->doorbell.size / sizeof(u32);
1076 } else {
1077 adev->doorbell.num_doorbells =
1078 min_t(u32, adev->doorbell.size / sizeof(u32),
1079 adev->doorbell_index.max_assignment+1);
1080 if (adev->doorbell.num_doorbells == 0)
1081 return -EINVAL;
1082
1083 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1084 * paging queue doorbell use the second page. The
1085 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1086 * doorbells are in the first page. So with paging queue enabled,
1087 * the max num_doorbells should + 1 page (0x400 in dword)
1088 */
1089 if (adev->asic_type >= CHIP_VEGA10)
1090 adev->doorbell.num_doorbells += 0x400;
1091 }
ec3db8a6 1092
8972e5d2
CK
1093 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1094 adev->doorbell.num_doorbells *
1095 sizeof(u32));
1096 if (adev->doorbell.ptr == NULL)
d38ceaf9 1097 return -ENOMEM;
d38ceaf9
AD
1098
1099 return 0;
1100}
1101
1102/**
06ec9070 1103 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1104 *
1105 * @adev: amdgpu_device pointer
1106 *
1107 * Tear down doorbell driver information (CIK)
1108 */
06ec9070 1109static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1110{
1111 iounmap(adev->doorbell.ptr);
1112 adev->doorbell.ptr = NULL;
1113}
1114
22cb0164 1115
d38ceaf9
AD
1116
1117/*
06ec9070 1118 * amdgpu_device_wb_*()
455a7bc2 1119 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1120 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1121 */
1122
1123/**
06ec9070 1124 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1125 *
1126 * @adev: amdgpu_device pointer
1127 *
1128 * Disables Writeback and frees the Writeback memory (all asics).
1129 * Used at driver shutdown.
1130 */
06ec9070 1131static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1132{
1133 if (adev->wb.wb_obj) {
a76ed485
AD
1134 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1135 &adev->wb.gpu_addr,
1136 (void **)&adev->wb.wb);
d38ceaf9
AD
1137 adev->wb.wb_obj = NULL;
1138 }
1139}
1140
1141/**
03f2abb0 1142 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1143 *
1144 * @adev: amdgpu_device pointer
1145 *
455a7bc2 1146 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1147 * Used at driver startup.
1148 * Returns 0 on success or an -error on failure.
1149 */
06ec9070 1150static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1151{
1152 int r;
1153
1154 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1155 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1156 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1157 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1158 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1159 (void **)&adev->wb.wb);
d38ceaf9
AD
1160 if (r) {
1161 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1162 return r;
1163 }
d38ceaf9
AD
1164
1165 adev->wb.num_wb = AMDGPU_MAX_WB;
1166 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1167
1168 /* clear wb memory */
73469585 1169 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1170 }
1171
1172 return 0;
1173}
1174
1175/**
131b4b36 1176 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1177 *
1178 * @adev: amdgpu_device pointer
1179 * @wb: wb index
1180 *
1181 * Allocate a wb slot for use by the driver (all asics).
1182 * Returns 0 on success or -EINVAL on failure.
1183 */
131b4b36 1184int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1185{
1186 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1187
97407b63 1188 if (offset < adev->wb.num_wb) {
7014285a 1189 __set_bit(offset, adev->wb.used);
63ae07ca 1190 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1191 return 0;
1192 } else {
1193 return -EINVAL;
1194 }
1195}
1196
d38ceaf9 1197/**
131b4b36 1198 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1199 *
1200 * @adev: amdgpu_device pointer
1201 * @wb: wb index
1202 *
1203 * Free a wb slot allocated for use by the driver (all asics)
1204 */
131b4b36 1205void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1206{
73469585 1207 wb >>= 3;
d38ceaf9 1208 if (wb < adev->wb.num_wb)
73469585 1209 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1210}
1211
d6895ad3
CK
1212/**
1213 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1214 *
1215 * @adev: amdgpu_device pointer
1216 *
1217 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1218 * to fail, but if any of the BARs is not accessible after the size we abort
1219 * driver loading by returning -ENODEV.
1220 */
1221int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1222{
453f617a 1223 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1224 struct pci_bus *root;
1225 struct resource *res;
1226 unsigned i;
d6895ad3
CK
1227 u16 cmd;
1228 int r;
1229
0c03b912 1230 /* Bypass for VF */
1231 if (amdgpu_sriov_vf(adev))
1232 return 0;
1233
b7221f2b
AD
1234 /* skip if the bios has already enabled large BAR */
1235 if (adev->gmc.real_vram_size &&
1236 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1237 return 0;
1238
31b8adab
CK
1239 /* Check if the root BUS has 64bit memory resources */
1240 root = adev->pdev->bus;
1241 while (root->parent)
1242 root = root->parent;
1243
1244 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1245 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1246 res->start > 0x100000000ull)
1247 break;
1248 }
1249
1250 /* Trying to resize is pointless without a root hub window above 4GB */
1251 if (!res)
1252 return 0;
1253
453f617a
ND
1254 /* Limit the BAR size to what is available */
1255 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1256 rbar_size);
1257
d6895ad3
CK
1258 /* Disable memory decoding while we change the BAR addresses and size */
1259 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1260 pci_write_config_word(adev->pdev, PCI_COMMAND,
1261 cmd & ~PCI_COMMAND_MEMORY);
1262
1263 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1264 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1265 if (adev->asic_type >= CHIP_BONAIRE)
1266 pci_release_resource(adev->pdev, 2);
1267
1268 pci_release_resource(adev->pdev, 0);
1269
1270 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1271 if (r == -ENOSPC)
1272 DRM_INFO("Not enough PCI address space for a large BAR.");
1273 else if (r && r != -ENOTSUPP)
1274 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1275
1276 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1277
1278 /* When the doorbell or fb BAR isn't available we have no chance of
1279 * using the device.
1280 */
06ec9070 1281 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1282 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1283 return -ENODEV;
1284
1285 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1286
1287 return 0;
1288}
a05502e5 1289
d38ceaf9
AD
1290/*
1291 * GPU helpers function.
1292 */
1293/**
39c640c0 1294 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1295 *
1296 * @adev: amdgpu_device pointer
1297 *
c836fec5
JQ
1298 * Check if the asic has been initialized (all asics) at driver startup
1299 * or post is needed if hw reset is performed.
1300 * Returns true if need or false if not.
d38ceaf9 1301 */
39c640c0 1302bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1303{
1304 uint32_t reg;
1305
bec86378
ML
1306 if (amdgpu_sriov_vf(adev))
1307 return false;
1308
1309 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1310 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1311 * some old smc fw still need driver do vPost otherwise gpu hang, while
1312 * those smc fw version above 22.15 doesn't have this flaw, so we force
1313 * vpost executed for smc version below 22.15
bec86378
ML
1314 */
1315 if (adev->asic_type == CHIP_FIJI) {
1316 int err;
1317 uint32_t fw_ver;
1318 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1319 /* force vPost if error occured */
1320 if (err)
1321 return true;
1322
1323 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1324 if (fw_ver < 0x00160e00)
1325 return true;
bec86378 1326 }
bec86378 1327 }
91fe77eb 1328
e3c1b071 1329 /* Don't post if we need to reset whole hive on init */
1330 if (adev->gmc.xgmi.pending_reset)
1331 return false;
1332
91fe77eb 1333 if (adev->has_hw_reset) {
1334 adev->has_hw_reset = false;
1335 return true;
1336 }
1337
1338 /* bios scratch used on CIK+ */
1339 if (adev->asic_type >= CHIP_BONAIRE)
1340 return amdgpu_atombios_scratch_need_asic_init(adev);
1341
1342 /* check MEM_SIZE for older asics */
1343 reg = amdgpu_asic_get_config_memsize(adev);
1344
1345 if ((reg != 0) && (reg != 0xffffffff))
1346 return false;
1347
1348 return true;
bec86378
ML
1349}
1350
0ab5d711
ML
1351/**
1352 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1353 *
1354 * @adev: amdgpu_device pointer
1355 *
1356 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1357 * be set for this device.
1358 *
1359 * Returns true if it should be used or false if not.
1360 */
1361bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1362{
1363 switch (amdgpu_aspm) {
1364 case -1:
1365 break;
1366 case 0:
1367 return false;
1368 case 1:
1369 return true;
1370 default:
1371 return false;
1372 }
1373 return pcie_aspm_enabled(adev->pdev);
1374}
1375
d38ceaf9
AD
1376/* if we get transitioned to only one device, take VGA back */
1377/**
06ec9070 1378 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1379 *
bf44e8ce 1380 * @pdev: PCI device pointer
d38ceaf9
AD
1381 * @state: enable/disable vga decode
1382 *
1383 * Enable/disable vga decode (all asics).
1384 * Returns VGA resource flags.
1385 */
bf44e8ce
CH
1386static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1387 bool state)
d38ceaf9 1388{
bf44e8ce 1389 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1390 amdgpu_asic_set_vga_state(adev, state);
1391 if (state)
1392 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1393 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1394 else
1395 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1396}
1397
e3ecdffa
AD
1398/**
1399 * amdgpu_device_check_block_size - validate the vm block size
1400 *
1401 * @adev: amdgpu_device pointer
1402 *
1403 * Validates the vm block size specified via module parameter.
1404 * The vm block size defines number of bits in page table versus page directory,
1405 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1406 * page table and the remaining bits are in the page directory.
1407 */
06ec9070 1408static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1409{
1410 /* defines number of bits in page table versus page directory,
1411 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1412 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1413 if (amdgpu_vm_block_size == -1)
1414 return;
a1adf8be 1415
bab4fee7 1416 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1417 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1418 amdgpu_vm_block_size);
97489129 1419 amdgpu_vm_block_size = -1;
a1adf8be 1420 }
a1adf8be
CZ
1421}
1422
e3ecdffa
AD
1423/**
1424 * amdgpu_device_check_vm_size - validate the vm size
1425 *
1426 * @adev: amdgpu_device pointer
1427 *
1428 * Validates the vm size in GB specified via module parameter.
1429 * The VM size is the size of the GPU virtual memory space in GB.
1430 */
06ec9070 1431static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1432{
64dab074
AD
1433 /* no need to check the default value */
1434 if (amdgpu_vm_size == -1)
1435 return;
1436
83ca145d
ZJ
1437 if (amdgpu_vm_size < 1) {
1438 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1439 amdgpu_vm_size);
f3368128 1440 amdgpu_vm_size = -1;
83ca145d 1441 }
83ca145d
ZJ
1442}
1443
7951e376
RZ
1444static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1445{
1446 struct sysinfo si;
a9d4fe2f 1447 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1448 uint64_t total_memory;
1449 uint64_t dram_size_seven_GB = 0x1B8000000;
1450 uint64_t dram_size_three_GB = 0xB8000000;
1451
1452 if (amdgpu_smu_memory_pool_size == 0)
1453 return;
1454
1455 if (!is_os_64) {
1456 DRM_WARN("Not 64-bit OS, feature not supported\n");
1457 goto def_value;
1458 }
1459 si_meminfo(&si);
1460 total_memory = (uint64_t)si.totalram * si.mem_unit;
1461
1462 if ((amdgpu_smu_memory_pool_size == 1) ||
1463 (amdgpu_smu_memory_pool_size == 2)) {
1464 if (total_memory < dram_size_three_GB)
1465 goto def_value1;
1466 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1467 (amdgpu_smu_memory_pool_size == 8)) {
1468 if (total_memory < dram_size_seven_GB)
1469 goto def_value1;
1470 } else {
1471 DRM_WARN("Smu memory pool size not supported\n");
1472 goto def_value;
1473 }
1474 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1475
1476 return;
1477
1478def_value1:
1479 DRM_WARN("No enough system memory\n");
1480def_value:
1481 adev->pm.smu_prv_buffer_size = 0;
1482}
1483
9f6a7857
HR
1484static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1485{
1486 if (!(adev->flags & AMD_IS_APU) ||
1487 adev->asic_type < CHIP_RAVEN)
1488 return 0;
1489
1490 switch (adev->asic_type) {
1491 case CHIP_RAVEN:
1492 if (adev->pdev->device == 0x15dd)
1493 adev->apu_flags |= AMD_APU_IS_RAVEN;
1494 if (adev->pdev->device == 0x15d8)
1495 adev->apu_flags |= AMD_APU_IS_PICASSO;
1496 break;
1497 case CHIP_RENOIR:
1498 if ((adev->pdev->device == 0x1636) ||
1499 (adev->pdev->device == 0x164c))
1500 adev->apu_flags |= AMD_APU_IS_RENOIR;
1501 else
1502 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1503 break;
1504 case CHIP_VANGOGH:
1505 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1506 break;
1507 case CHIP_YELLOW_CARP:
1508 break;
d0f56dc2 1509 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1510 if ((adev->pdev->device == 0x13FE) ||
1511 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1512 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1513 break;
9f6a7857 1514 default:
4eaf21b7 1515 break;
9f6a7857
HR
1516 }
1517
1518 return 0;
1519}
1520
d38ceaf9 1521/**
06ec9070 1522 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1523 *
1524 * @adev: amdgpu_device pointer
1525 *
1526 * Validates certain module parameters and updates
1527 * the associated values used by the driver (all asics).
1528 */
912dfc84 1529static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1530{
5b011235
CZ
1531 if (amdgpu_sched_jobs < 4) {
1532 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1533 amdgpu_sched_jobs);
1534 amdgpu_sched_jobs = 4;
76117507 1535 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1536 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1537 amdgpu_sched_jobs);
1538 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1539 }
d38ceaf9 1540
83e74db6 1541 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1542 /* gart size must be greater or equal to 32M */
1543 dev_warn(adev->dev, "gart size (%d) too small\n",
1544 amdgpu_gart_size);
83e74db6 1545 amdgpu_gart_size = -1;
d38ceaf9
AD
1546 }
1547
36d38372 1548 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1549 /* gtt size must be greater or equal to 32M */
36d38372
CK
1550 dev_warn(adev->dev, "gtt size (%d) too small\n",
1551 amdgpu_gtt_size);
1552 amdgpu_gtt_size = -1;
d38ceaf9
AD
1553 }
1554
d07f14be
RH
1555 /* valid range is between 4 and 9 inclusive */
1556 if (amdgpu_vm_fragment_size != -1 &&
1557 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1558 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1559 amdgpu_vm_fragment_size = -1;
1560 }
1561
5d5bd5e3
KW
1562 if (amdgpu_sched_hw_submission < 2) {
1563 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1564 amdgpu_sched_hw_submission);
1565 amdgpu_sched_hw_submission = 2;
1566 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1567 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1568 amdgpu_sched_hw_submission);
1569 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1570 }
1571
2656fd23
AG
1572 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1573 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1574 amdgpu_reset_method = -1;
1575 }
1576
7951e376
RZ
1577 amdgpu_device_check_smu_prv_buffer_size(adev);
1578
06ec9070 1579 amdgpu_device_check_vm_size(adev);
d38ceaf9 1580
06ec9070 1581 amdgpu_device_check_block_size(adev);
6a7f76e7 1582
19aede77 1583 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1584
e3c00faa 1585 return 0;
d38ceaf9
AD
1586}
1587
1588/**
1589 * amdgpu_switcheroo_set_state - set switcheroo state
1590 *
1591 * @pdev: pci dev pointer
1694467b 1592 * @state: vga_switcheroo state
d38ceaf9 1593 *
12024b17 1594 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1595 * the asics before or after it is powered up using ACPI methods.
1596 */
8aba21b7
LT
1597static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1598 enum vga_switcheroo_state state)
d38ceaf9
AD
1599{
1600 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1601 int r;
d38ceaf9 1602
b98c6299 1603 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1604 return;
1605
1606 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1607 pr_info("switched on\n");
d38ceaf9
AD
1608 /* don't suspend or resume card normally */
1609 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1610
8f66090b
TZ
1611 pci_set_power_state(pdev, PCI_D0);
1612 amdgpu_device_load_pci_state(pdev);
1613 r = pci_enable_device(pdev);
de185019
AD
1614 if (r)
1615 DRM_WARN("pci_enable_device failed (%d)\n", r);
1616 amdgpu_device_resume(dev, true);
d38ceaf9 1617
d38ceaf9 1618 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1619 } else {
dd4fa6c1 1620 pr_info("switched off\n");
d38ceaf9 1621 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1622 amdgpu_device_suspend(dev, true);
8f66090b 1623 amdgpu_device_cache_pci_state(pdev);
de185019 1624 /* Shut down the device */
8f66090b
TZ
1625 pci_disable_device(pdev);
1626 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1627 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1628 }
1629}
1630
1631/**
1632 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1633 *
1634 * @pdev: pci dev pointer
1635 *
1636 * Callback for the switcheroo driver. Check of the switcheroo
1637 * state can be changed.
1638 * Returns true if the state can be changed, false if not.
1639 */
1640static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1641{
1642 struct drm_device *dev = pci_get_drvdata(pdev);
1643
1644 /*
1645 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1646 * locking inversion with the driver load path. And the access here is
1647 * completely racy anyway. So don't bother with locking for now.
1648 */
7e13ad89 1649 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1650}
1651
1652static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1653 .set_gpu_state = amdgpu_switcheroo_set_state,
1654 .reprobe = NULL,
1655 .can_switch = amdgpu_switcheroo_can_switch,
1656};
1657
e3ecdffa
AD
1658/**
1659 * amdgpu_device_ip_set_clockgating_state - set the CG state
1660 *
87e3f136 1661 * @dev: amdgpu_device pointer
e3ecdffa
AD
1662 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1663 * @state: clockgating state (gate or ungate)
1664 *
1665 * Sets the requested clockgating state for all instances of
1666 * the hardware IP specified.
1667 * Returns the error code from the last instance.
1668 */
43fa561f 1669int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1670 enum amd_ip_block_type block_type,
1671 enum amd_clockgating_state state)
d38ceaf9 1672{
43fa561f 1673 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1674 int i, r = 0;
1675
1676 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1677 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1678 continue;
c722865a
RZ
1679 if (adev->ip_blocks[i].version->type != block_type)
1680 continue;
1681 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1682 continue;
1683 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1684 (void *)adev, state);
1685 if (r)
1686 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1687 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1688 }
1689 return r;
1690}
1691
e3ecdffa
AD
1692/**
1693 * amdgpu_device_ip_set_powergating_state - set the PG state
1694 *
87e3f136 1695 * @dev: amdgpu_device pointer
e3ecdffa
AD
1696 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1697 * @state: powergating state (gate or ungate)
1698 *
1699 * Sets the requested powergating state for all instances of
1700 * the hardware IP specified.
1701 * Returns the error code from the last instance.
1702 */
43fa561f 1703int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1704 enum amd_ip_block_type block_type,
1705 enum amd_powergating_state state)
d38ceaf9 1706{
43fa561f 1707 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1708 int i, r = 0;
1709
1710 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1711 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1712 continue;
c722865a
RZ
1713 if (adev->ip_blocks[i].version->type != block_type)
1714 continue;
1715 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1716 continue;
1717 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1718 (void *)adev, state);
1719 if (r)
1720 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1721 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1722 }
1723 return r;
1724}
1725
e3ecdffa
AD
1726/**
1727 * amdgpu_device_ip_get_clockgating_state - get the CG state
1728 *
1729 * @adev: amdgpu_device pointer
1730 * @flags: clockgating feature flags
1731 *
1732 * Walks the list of IPs on the device and updates the clockgating
1733 * flags for each IP.
1734 * Updates @flags with the feature flags for each hardware IP where
1735 * clockgating is enabled.
1736 */
2990a1fc 1737void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1738 u64 *flags)
6cb2d4e4
HR
1739{
1740 int i;
1741
1742 for (i = 0; i < adev->num_ip_blocks; i++) {
1743 if (!adev->ip_blocks[i].status.valid)
1744 continue;
1745 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1746 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1747 }
1748}
1749
e3ecdffa
AD
1750/**
1751 * amdgpu_device_ip_wait_for_idle - wait for idle
1752 *
1753 * @adev: amdgpu_device pointer
1754 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1755 *
1756 * Waits for the request hardware IP to be idle.
1757 * Returns 0 for success or a negative error code on failure.
1758 */
2990a1fc
AD
1759int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1760 enum amd_ip_block_type block_type)
5dbbb60b
AD
1761{
1762 int i, r;
1763
1764 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1765 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1766 continue;
a1255107
AD
1767 if (adev->ip_blocks[i].version->type == block_type) {
1768 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1769 if (r)
1770 return r;
1771 break;
1772 }
1773 }
1774 return 0;
1775
1776}
1777
e3ecdffa
AD
1778/**
1779 * amdgpu_device_ip_is_idle - is the hardware IP idle
1780 *
1781 * @adev: amdgpu_device pointer
1782 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1783 *
1784 * Check if the hardware IP is idle or not.
1785 * Returns true if it the IP is idle, false if not.
1786 */
2990a1fc
AD
1787bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1788 enum amd_ip_block_type block_type)
5dbbb60b
AD
1789{
1790 int i;
1791
1792 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1793 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1794 continue;
a1255107
AD
1795 if (adev->ip_blocks[i].version->type == block_type)
1796 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1797 }
1798 return true;
1799
1800}
1801
e3ecdffa
AD
1802/**
1803 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1804 *
1805 * @adev: amdgpu_device pointer
87e3f136 1806 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1807 *
1808 * Returns a pointer to the hardware IP block structure
1809 * if it exists for the asic, otherwise NULL.
1810 */
2990a1fc
AD
1811struct amdgpu_ip_block *
1812amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1813 enum amd_ip_block_type type)
d38ceaf9
AD
1814{
1815 int i;
1816
1817 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1818 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1819 return &adev->ip_blocks[i];
1820
1821 return NULL;
1822}
1823
1824/**
2990a1fc 1825 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1826 *
1827 * @adev: amdgpu_device pointer
5fc3aeeb 1828 * @type: enum amd_ip_block_type
d38ceaf9
AD
1829 * @major: major version
1830 * @minor: minor version
1831 *
1832 * return 0 if equal or greater
1833 * return 1 if smaller or the ip_block doesn't exist
1834 */
2990a1fc
AD
1835int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1836 enum amd_ip_block_type type,
1837 u32 major, u32 minor)
d38ceaf9 1838{
2990a1fc 1839 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1840
a1255107
AD
1841 if (ip_block && ((ip_block->version->major > major) ||
1842 ((ip_block->version->major == major) &&
1843 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1844 return 0;
1845
1846 return 1;
1847}
1848
a1255107 1849/**
2990a1fc 1850 * amdgpu_device_ip_block_add
a1255107
AD
1851 *
1852 * @adev: amdgpu_device pointer
1853 * @ip_block_version: pointer to the IP to add
1854 *
1855 * Adds the IP block driver information to the collection of IPs
1856 * on the asic.
1857 */
2990a1fc
AD
1858int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1859 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1860{
1861 if (!ip_block_version)
1862 return -EINVAL;
1863
7bd939d0
LG
1864 switch (ip_block_version->type) {
1865 case AMD_IP_BLOCK_TYPE_VCN:
1866 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1867 return 0;
1868 break;
1869 case AMD_IP_BLOCK_TYPE_JPEG:
1870 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1871 return 0;
1872 break;
1873 default:
1874 break;
1875 }
1876
e966a725 1877 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1878 ip_block_version->funcs->name);
1879
a1255107
AD
1880 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1881
1882 return 0;
1883}
1884
e3ecdffa
AD
1885/**
1886 * amdgpu_device_enable_virtual_display - enable virtual display feature
1887 *
1888 * @adev: amdgpu_device pointer
1889 *
1890 * Enabled the virtual display feature if the user has enabled it via
1891 * the module parameter virtual_display. This feature provides a virtual
1892 * display hardware on headless boards or in virtualized environments.
1893 * This function parses and validates the configuration string specified by
1894 * the user and configues the virtual display configuration (number of
1895 * virtual connectors, crtcs, etc.) specified.
1896 */
483ef985 1897static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1898{
1899 adev->enable_virtual_display = false;
1900
1901 if (amdgpu_virtual_display) {
8f66090b 1902 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1903 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1904
1905 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1906 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1907 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1908 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1909 if (!strcmp("all", pciaddname)
1910 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1911 long num_crtc;
1912 int res = -1;
1913
9accf2fd 1914 adev->enable_virtual_display = true;
0f66356d
ED
1915
1916 if (pciaddname_tmp)
1917 res = kstrtol(pciaddname_tmp, 10,
1918 &num_crtc);
1919
1920 if (!res) {
1921 if (num_crtc < 1)
1922 num_crtc = 1;
1923 if (num_crtc > 6)
1924 num_crtc = 6;
1925 adev->mode_info.num_crtc = num_crtc;
1926 } else {
1927 adev->mode_info.num_crtc = 1;
1928 }
9accf2fd
ED
1929 break;
1930 }
1931 }
1932
0f66356d
ED
1933 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1934 amdgpu_virtual_display, pci_address_name,
1935 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1936
1937 kfree(pciaddstr);
1938 }
1939}
1940
25263da3
AD
1941void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1942{
1943 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1944 adev->mode_info.num_crtc = 1;
1945 adev->enable_virtual_display = true;
1946 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1947 adev->enable_virtual_display, adev->mode_info.num_crtc);
1948 }
1949}
1950
e3ecdffa
AD
1951/**
1952 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1953 *
1954 * @adev: amdgpu_device pointer
1955 *
1956 * Parses the asic configuration parameters specified in the gpu info
1957 * firmware and makes them availale to the driver for use in configuring
1958 * the asic.
1959 * Returns 0 on success, -EINVAL on failure.
1960 */
e2a75f88
AD
1961static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1962{
e2a75f88 1963 const char *chip_name;
c0a43457 1964 char fw_name[40];
e2a75f88
AD
1965 int err;
1966 const struct gpu_info_firmware_header_v1_0 *hdr;
1967
ab4fe3e1
HR
1968 adev->firmware.gpu_info_fw = NULL;
1969
72de33f8 1970 if (adev->mman.discovery_bin) {
cc375d8c
TY
1971 /*
1972 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1973 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1974 * when DAL no longer needs it.
1975 */
1976 if (adev->asic_type != CHIP_NAVI12)
1977 return 0;
258620d0
AD
1978 }
1979
e2a75f88 1980 switch (adev->asic_type) {
e2a75f88
AD
1981 default:
1982 return 0;
1983 case CHIP_VEGA10:
1984 chip_name = "vega10";
1985 break;
3f76dced
AD
1986 case CHIP_VEGA12:
1987 chip_name = "vega12";
1988 break;
2d2e5e7e 1989 case CHIP_RAVEN:
54f78a76 1990 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1991 chip_name = "raven2";
54f78a76 1992 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1993 chip_name = "picasso";
54c4d17e
FX
1994 else
1995 chip_name = "raven";
2d2e5e7e 1996 break;
65e60f6e
LM
1997 case CHIP_ARCTURUS:
1998 chip_name = "arcturus";
1999 break;
42b325e5
XY
2000 case CHIP_NAVI12:
2001 chip_name = "navi12";
2002 break;
e2a75f88
AD
2003 }
2004
2005 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 2006 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
2007 if (err) {
2008 dev_err(adev->dev,
b31d3063 2009 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
2010 fw_name);
2011 goto out;
2012 }
2013
ab4fe3e1 2014 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2015 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2016
2017 switch (hdr->version_major) {
2018 case 1:
2019 {
2020 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2021 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2022 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2023
cc375d8c
TY
2024 /*
2025 * Should be droped when DAL no longer needs it.
2026 */
2027 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2028 goto parse_soc_bounding_box;
2029
b5ab16bf
AD
2030 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2031 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2032 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2033 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2034 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2035 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2036 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2037 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2038 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2039 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2040 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2041 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2042 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2043 adev->gfx.cu_info.max_waves_per_simd =
2044 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2045 adev->gfx.cu_info.max_scratch_slots_per_cu =
2046 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2047 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2048 if (hdr->version_minor >= 1) {
35c2e910
HZ
2049 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2050 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2051 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2052 adev->gfx.config.num_sc_per_sh =
2053 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2054 adev->gfx.config.num_packer_per_sc =
2055 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2056 }
ec51d3fa
XY
2057
2058parse_soc_bounding_box:
ec51d3fa
XY
2059 /*
2060 * soc bounding box info is not integrated in disocovery table,
258620d0 2061 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2062 */
48321c3d
HW
2063 if (hdr->version_minor == 2) {
2064 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2065 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2066 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2067 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2068 }
e2a75f88
AD
2069 break;
2070 }
2071 default:
2072 dev_err(adev->dev,
2073 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2074 err = -EINVAL;
2075 goto out;
2076 }
2077out:
e2a75f88
AD
2078 return err;
2079}
2080
e3ecdffa
AD
2081/**
2082 * amdgpu_device_ip_early_init - run early init for hardware IPs
2083 *
2084 * @adev: amdgpu_device pointer
2085 *
2086 * Early initialization pass for hardware IPs. The hardware IPs that make
2087 * up each asic are discovered each IP's early_init callback is run. This
2088 * is the first stage in initializing the asic.
2089 * Returns 0 on success, negative error code on failure.
2090 */
06ec9070 2091static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2092{
901e2be2
AD
2093 struct drm_device *dev = adev_to_drm(adev);
2094 struct pci_dev *parent;
aaa36a97 2095 int i, r;
ced69502 2096 bool total;
d38ceaf9 2097
483ef985 2098 amdgpu_device_enable_virtual_display(adev);
a6be7570 2099
00a979f3 2100 if (amdgpu_sriov_vf(adev)) {
00a979f3 2101 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2102 if (r)
2103 return r;
00a979f3
WS
2104 }
2105
d38ceaf9 2106 switch (adev->asic_type) {
33f34802
KW
2107#ifdef CONFIG_DRM_AMDGPU_SI
2108 case CHIP_VERDE:
2109 case CHIP_TAHITI:
2110 case CHIP_PITCAIRN:
2111 case CHIP_OLAND:
2112 case CHIP_HAINAN:
295d0daf 2113 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2114 r = si_set_ip_blocks(adev);
2115 if (r)
2116 return r;
2117 break;
2118#endif
a2e73f56
AD
2119#ifdef CONFIG_DRM_AMDGPU_CIK
2120 case CHIP_BONAIRE:
2121 case CHIP_HAWAII:
2122 case CHIP_KAVERI:
2123 case CHIP_KABINI:
2124 case CHIP_MULLINS:
e1ad2d53 2125 if (adev->flags & AMD_IS_APU)
a2e73f56 2126 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2127 else
2128 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2129
2130 r = cik_set_ip_blocks(adev);
2131 if (r)
2132 return r;
2133 break;
2134#endif
da87c30b
AD
2135 case CHIP_TOPAZ:
2136 case CHIP_TONGA:
2137 case CHIP_FIJI:
2138 case CHIP_POLARIS10:
2139 case CHIP_POLARIS11:
2140 case CHIP_POLARIS12:
2141 case CHIP_VEGAM:
2142 case CHIP_CARRIZO:
2143 case CHIP_STONEY:
2144 if (adev->flags & AMD_IS_APU)
2145 adev->family = AMDGPU_FAMILY_CZ;
2146 else
2147 adev->family = AMDGPU_FAMILY_VI;
2148
2149 r = vi_set_ip_blocks(adev);
2150 if (r)
2151 return r;
2152 break;
d38ceaf9 2153 default:
63352b7f
AD
2154 r = amdgpu_discovery_set_ip_blocks(adev);
2155 if (r)
2156 return r;
2157 break;
d38ceaf9
AD
2158 }
2159
901e2be2
AD
2160 if (amdgpu_has_atpx() &&
2161 (amdgpu_is_atpx_hybrid() ||
2162 amdgpu_has_atpx_dgpu_power_cntl()) &&
2163 ((adev->flags & AMD_IS_APU) == 0) &&
2164 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2165 adev->flags |= AMD_IS_PX;
2166
85ac2021
AD
2167 if (!(adev->flags & AMD_IS_APU)) {
2168 parent = pci_upstream_bridge(adev->pdev);
2169 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2170 }
901e2be2 2171
c004d44e 2172 amdgpu_amdkfd_device_probe(adev);
1884734a 2173
3b94fb10 2174 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2175 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2176 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2177 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2178 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2179
ced69502 2180 total = true;
d38ceaf9
AD
2181 for (i = 0; i < adev->num_ip_blocks; i++) {
2182 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2183 DRM_ERROR("disabled ip block: %d <%s>\n",
2184 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2185 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2186 } else {
a1255107
AD
2187 if (adev->ip_blocks[i].version->funcs->early_init) {
2188 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2189 if (r == -ENOENT) {
a1255107 2190 adev->ip_blocks[i].status.valid = false;
2c1a2784 2191 } else if (r) {
a1255107
AD
2192 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2193 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2194 total = false;
2c1a2784 2195 } else {
a1255107 2196 adev->ip_blocks[i].status.valid = true;
2c1a2784 2197 }
974e6b64 2198 } else {
a1255107 2199 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2200 }
d38ceaf9 2201 }
21a249ca
AD
2202 /* get the vbios after the asic_funcs are set up */
2203 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2204 r = amdgpu_device_parse_gpu_info_fw(adev);
2205 if (r)
2206 return r;
2207
21a249ca
AD
2208 /* Read BIOS */
2209 if (!amdgpu_get_bios(adev))
2210 return -EINVAL;
2211
2212 r = amdgpu_atombios_init(adev);
2213 if (r) {
2214 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2215 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2216 return r;
2217 }
77eabc6f
PJZ
2218
2219 /*get pf2vf msg info at it's earliest time*/
2220 if (amdgpu_sriov_vf(adev))
2221 amdgpu_virt_init_data_exchange(adev);
2222
21a249ca 2223 }
d38ceaf9 2224 }
ced69502
ML
2225 if (!total)
2226 return -ENODEV;
d38ceaf9 2227
395d1fb9
NH
2228 adev->cg_flags &= amdgpu_cg_mask;
2229 adev->pg_flags &= amdgpu_pg_mask;
2230
d38ceaf9
AD
2231 return 0;
2232}
2233
0a4f2520
RZ
2234static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2235{
2236 int i, r;
2237
2238 for (i = 0; i < adev->num_ip_blocks; i++) {
2239 if (!adev->ip_blocks[i].status.sw)
2240 continue;
2241 if (adev->ip_blocks[i].status.hw)
2242 continue;
2243 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2244 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2245 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2246 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2247 if (r) {
2248 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2249 adev->ip_blocks[i].version->funcs->name, r);
2250 return r;
2251 }
2252 adev->ip_blocks[i].status.hw = true;
2253 }
2254 }
2255
2256 return 0;
2257}
2258
2259static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2260{
2261 int i, r;
2262
2263 for (i = 0; i < adev->num_ip_blocks; i++) {
2264 if (!adev->ip_blocks[i].status.sw)
2265 continue;
2266 if (adev->ip_blocks[i].status.hw)
2267 continue;
2268 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2269 if (r) {
2270 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2271 adev->ip_blocks[i].version->funcs->name, r);
2272 return r;
2273 }
2274 adev->ip_blocks[i].status.hw = true;
2275 }
2276
2277 return 0;
2278}
2279
7a3e0bb2
RZ
2280static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2281{
2282 int r = 0;
2283 int i;
80f41f84 2284 uint32_t smu_version;
7a3e0bb2
RZ
2285
2286 if (adev->asic_type >= CHIP_VEGA10) {
2287 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2288 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2289 continue;
2290
e3c1b071 2291 if (!adev->ip_blocks[i].status.sw)
2292 continue;
2293
482f0e53
ML
2294 /* no need to do the fw loading again if already done*/
2295 if (adev->ip_blocks[i].status.hw == true)
2296 break;
2297
53b3f8f4 2298 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2299 r = adev->ip_blocks[i].version->funcs->resume(adev);
2300 if (r) {
2301 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2302 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2303 return r;
2304 }
2305 } else {
2306 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2307 if (r) {
2308 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2309 adev->ip_blocks[i].version->funcs->name, r);
2310 return r;
7a3e0bb2 2311 }
7a3e0bb2 2312 }
482f0e53
ML
2313
2314 adev->ip_blocks[i].status.hw = true;
2315 break;
7a3e0bb2
RZ
2316 }
2317 }
482f0e53 2318
8973d9ec
ED
2319 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2320 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2321
80f41f84 2322 return r;
7a3e0bb2
RZ
2323}
2324
5fd8518d
AG
2325static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2326{
2327 long timeout;
2328 int r, i;
2329
2330 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2331 struct amdgpu_ring *ring = adev->rings[i];
2332
2333 /* No need to setup the GPU scheduler for rings that don't need it */
2334 if (!ring || ring->no_scheduler)
2335 continue;
2336
2337 switch (ring->funcs->type) {
2338 case AMDGPU_RING_TYPE_GFX:
2339 timeout = adev->gfx_timeout;
2340 break;
2341 case AMDGPU_RING_TYPE_COMPUTE:
2342 timeout = adev->compute_timeout;
2343 break;
2344 case AMDGPU_RING_TYPE_SDMA:
2345 timeout = adev->sdma_timeout;
2346 break;
2347 default:
2348 timeout = adev->video_timeout;
2349 break;
2350 }
2351
2352 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2353 ring->num_hw_submission, amdgpu_job_hang_limit,
8ab62eda
JG
2354 timeout, adev->reset_domain->wq,
2355 ring->sched_score, ring->name,
2356 adev->dev);
5fd8518d
AG
2357 if (r) {
2358 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2359 ring->name);
2360 return r;
2361 }
2362 }
2363
2364 return 0;
2365}
2366
2367
e3ecdffa
AD
2368/**
2369 * amdgpu_device_ip_init - run init for hardware IPs
2370 *
2371 * @adev: amdgpu_device pointer
2372 *
2373 * Main initialization pass for hardware IPs. The list of all the hardware
2374 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2375 * are run. sw_init initializes the software state associated with each IP
2376 * and hw_init initializes the hardware associated with each IP.
2377 * Returns 0 on success, negative error code on failure.
2378 */
06ec9070 2379static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2380{
2381 int i, r;
2382
c030f2e4 2383 r = amdgpu_ras_init(adev);
2384 if (r)
2385 return r;
2386
d38ceaf9 2387 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2388 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2389 continue;
a1255107 2390 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2391 if (r) {
a1255107
AD
2392 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2393 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2394 goto init_failed;
2c1a2784 2395 }
a1255107 2396 adev->ip_blocks[i].status.sw = true;
bfca0289 2397
c1c39032
AD
2398 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2399 /* need to do common hw init early so everything is set up for gmc */
2400 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2401 if (r) {
2402 DRM_ERROR("hw_init %d failed %d\n", i, r);
2403 goto init_failed;
2404 }
2405 adev->ip_blocks[i].status.hw = true;
2406 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2407 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2408 /* Try to reserve bad pages early */
2409 if (amdgpu_sriov_vf(adev))
2410 amdgpu_virt_exchange_data(adev);
2411
7ccfd79f 2412 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2413 if (r) {
7ccfd79f 2414 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2415 goto init_failed;
2c1a2784 2416 }
a1255107 2417 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2418 if (r) {
2419 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2420 goto init_failed;
2c1a2784 2421 }
06ec9070 2422 r = amdgpu_device_wb_init(adev);
2c1a2784 2423 if (r) {
06ec9070 2424 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2425 goto init_failed;
2c1a2784 2426 }
a1255107 2427 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2428
2429 /* right after GMC hw init, we create CSA */
8a1fbb4a 2430 if (amdgpu_mcbp) {
1e256e27 2431 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2432 AMDGPU_GEM_DOMAIN_VRAM |
2433 AMDGPU_GEM_DOMAIN_GTT,
2434 AMDGPU_CSA_SIZE);
2493664f
ML
2435 if (r) {
2436 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2437 goto init_failed;
2493664f
ML
2438 }
2439 }
d38ceaf9
AD
2440 }
2441 }
2442
c9ffa427 2443 if (amdgpu_sriov_vf(adev))
22c16d25 2444 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2445
533aed27
AG
2446 r = amdgpu_ib_pool_init(adev);
2447 if (r) {
2448 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2449 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2450 goto init_failed;
2451 }
2452
c8963ea4
RZ
2453 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2454 if (r)
72d3f592 2455 goto init_failed;
0a4f2520
RZ
2456
2457 r = amdgpu_device_ip_hw_init_phase1(adev);
2458 if (r)
72d3f592 2459 goto init_failed;
0a4f2520 2460
7a3e0bb2
RZ
2461 r = amdgpu_device_fw_loading(adev);
2462 if (r)
72d3f592 2463 goto init_failed;
7a3e0bb2 2464
0a4f2520
RZ
2465 r = amdgpu_device_ip_hw_init_phase2(adev);
2466 if (r)
72d3f592 2467 goto init_failed;
d38ceaf9 2468
121a2bc6
AG
2469 /*
2470 * retired pages will be loaded from eeprom and reserved here,
2471 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2472 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2473 * for I2C communication which only true at this point.
b82e65a9
GC
2474 *
2475 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2476 * failure from bad gpu situation and stop amdgpu init process
2477 * accordingly. For other failed cases, it will still release all
2478 * the resource and print error message, rather than returning one
2479 * negative value to upper level.
121a2bc6
AG
2480 *
2481 * Note: theoretically, this should be called before all vram allocations
2482 * to protect retired page from abusing
2483 */
b82e65a9
GC
2484 r = amdgpu_ras_recovery_init(adev);
2485 if (r)
2486 goto init_failed;
121a2bc6 2487
cfbb6b00
AG
2488 /**
2489 * In case of XGMI grab extra reference for reset domain for this device
2490 */
a4c63caf 2491 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2492 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2493 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2494 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2495
dfd0287b
LH
2496 if (WARN_ON(!hive)) {
2497 r = -ENOENT;
2498 goto init_failed;
2499 }
2500
46c67660 2501 if (!hive->reset_domain ||
2502 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2503 r = -ENOENT;
2504 amdgpu_put_xgmi_hive(hive);
2505 goto init_failed;
2506 }
2507
2508 /* Drop the early temporary reset domain we created for device */
2509 amdgpu_reset_put_reset_domain(adev->reset_domain);
2510 adev->reset_domain = hive->reset_domain;
9dfa4860 2511 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2512 }
a4c63caf
AG
2513 }
2514 }
2515
5fd8518d
AG
2516 r = amdgpu_device_init_schedulers(adev);
2517 if (r)
2518 goto init_failed;
e3c1b071 2519
2520 /* Don't init kfd if whole hive need to be reset during init */
c004d44e 2521 if (!adev->gmc.xgmi.pending_reset)
e3c1b071 2522 amdgpu_amdkfd_device_init(adev);
c6332b97 2523
bd607166
KR
2524 amdgpu_fru_get_product_info(adev);
2525
72d3f592 2526init_failed:
c9ffa427 2527 if (amdgpu_sriov_vf(adev))
c6332b97 2528 amdgpu_virt_release_full_gpu(adev, true);
2529
72d3f592 2530 return r;
d38ceaf9
AD
2531}
2532
e3ecdffa
AD
2533/**
2534 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2535 *
2536 * @adev: amdgpu_device pointer
2537 *
2538 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2539 * this function before a GPU reset. If the value is retained after a
2540 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2541 */
06ec9070 2542static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2543{
2544 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2545}
2546
e3ecdffa
AD
2547/**
2548 * amdgpu_device_check_vram_lost - check if vram is valid
2549 *
2550 * @adev: amdgpu_device pointer
2551 *
2552 * Checks the reset magic value written to the gart pointer in VRAM.
2553 * The driver calls this after a GPU reset to see if the contents of
2554 * VRAM is lost or now.
2555 * returns true if vram is lost, false if not.
2556 */
06ec9070 2557static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2558{
dadce777
EQ
2559 if (memcmp(adev->gart.ptr, adev->reset_magic,
2560 AMDGPU_RESET_MAGIC_NUM))
2561 return true;
2562
53b3f8f4 2563 if (!amdgpu_in_reset(adev))
dadce777
EQ
2564 return false;
2565
2566 /*
2567 * For all ASICs with baco/mode1 reset, the VRAM is
2568 * always assumed to be lost.
2569 */
2570 switch (amdgpu_asic_reset_method(adev)) {
2571 case AMD_RESET_METHOD_BACO:
2572 case AMD_RESET_METHOD_MODE1:
2573 return true;
2574 default:
2575 return false;
2576 }
0c49e0b8
CZ
2577}
2578
e3ecdffa 2579/**
1112a46b 2580 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2581 *
2582 * @adev: amdgpu_device pointer
b8b72130 2583 * @state: clockgating state (gate or ungate)
e3ecdffa 2584 *
e3ecdffa 2585 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2586 * set_clockgating_state callbacks are run.
2587 * Late initialization pass enabling clockgating for hardware IPs.
2588 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2589 * Returns 0 on success, negative error code on failure.
2590 */
fdd34271 2591
5d89bb2d
LL
2592int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2593 enum amd_clockgating_state state)
d38ceaf9 2594{
1112a46b 2595 int i, j, r;
d38ceaf9 2596
4a2ba394
SL
2597 if (amdgpu_emu_mode == 1)
2598 return 0;
2599
1112a46b
RZ
2600 for (j = 0; j < adev->num_ip_blocks; j++) {
2601 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2602 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2603 continue;
47198eb7 2604 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2605 if (adev->in_s0ix &&
47198eb7
AD
2606 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2607 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2608 continue;
4a446d55 2609 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2610 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2611 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2612 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2613 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2614 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2615 /* enable clockgating to save power */
a1255107 2616 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2617 state);
4a446d55
AD
2618 if (r) {
2619 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2620 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2621 return r;
2622 }
b0b00ff1 2623 }
d38ceaf9 2624 }
06b18f61 2625
c9f96fd5
RZ
2626 return 0;
2627}
2628
5d89bb2d
LL
2629int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2630 enum amd_powergating_state state)
c9f96fd5 2631{
1112a46b 2632 int i, j, r;
06b18f61 2633
c9f96fd5
RZ
2634 if (amdgpu_emu_mode == 1)
2635 return 0;
2636
1112a46b
RZ
2637 for (j = 0; j < adev->num_ip_blocks; j++) {
2638 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2639 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2640 continue;
47198eb7 2641 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2642 if (adev->in_s0ix &&
47198eb7
AD
2643 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2644 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2645 continue;
c9f96fd5
RZ
2646 /* skip CG for VCE/UVD, it's handled specially */
2647 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2648 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2649 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2650 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2651 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2652 /* enable powergating to save power */
2653 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2654 state);
c9f96fd5
RZ
2655 if (r) {
2656 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2657 adev->ip_blocks[i].version->funcs->name, r);
2658 return r;
2659 }
2660 }
2661 }
2dc80b00
S
2662 return 0;
2663}
2664
beff74bc
AD
2665static int amdgpu_device_enable_mgpu_fan_boost(void)
2666{
2667 struct amdgpu_gpu_instance *gpu_ins;
2668 struct amdgpu_device *adev;
2669 int i, ret = 0;
2670
2671 mutex_lock(&mgpu_info.mutex);
2672
2673 /*
2674 * MGPU fan boost feature should be enabled
2675 * only when there are two or more dGPUs in
2676 * the system
2677 */
2678 if (mgpu_info.num_dgpu < 2)
2679 goto out;
2680
2681 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2682 gpu_ins = &(mgpu_info.gpu_ins[i]);
2683 adev = gpu_ins->adev;
2684 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2685 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2686 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2687 if (ret)
2688 break;
2689
2690 gpu_ins->mgpu_fan_enabled = 1;
2691 }
2692 }
2693
2694out:
2695 mutex_unlock(&mgpu_info.mutex);
2696
2697 return ret;
2698}
2699
e3ecdffa
AD
2700/**
2701 * amdgpu_device_ip_late_init - run late init for hardware IPs
2702 *
2703 * @adev: amdgpu_device pointer
2704 *
2705 * Late initialization pass for hardware IPs. The list of all the hardware
2706 * IPs that make up the asic is walked and the late_init callbacks are run.
2707 * late_init covers any special initialization that an IP requires
2708 * after all of the have been initialized or something that needs to happen
2709 * late in the init process.
2710 * Returns 0 on success, negative error code on failure.
2711 */
06ec9070 2712static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2713{
60599a03 2714 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2715 int i = 0, r;
2716
2717 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2718 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2719 continue;
2720 if (adev->ip_blocks[i].version->funcs->late_init) {
2721 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2722 if (r) {
2723 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2724 adev->ip_blocks[i].version->funcs->name, r);
2725 return r;
2726 }
2dc80b00 2727 }
73f847db 2728 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2729 }
2730
867e24ca 2731 r = amdgpu_ras_late_init(adev);
2732 if (r) {
2733 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2734 return r;
2735 }
2736
a891d239
DL
2737 amdgpu_ras_set_error_query_ready(adev, true);
2738
1112a46b
RZ
2739 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2740 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2741
06ec9070 2742 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2743
beff74bc
AD
2744 r = amdgpu_device_enable_mgpu_fan_boost();
2745 if (r)
2746 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2747
4da8b639 2748 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2749 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2750 adev->asic_type == CHIP_ALDEBARAN ))
bc143d8b 2751 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2752
2753 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2754 mutex_lock(&mgpu_info.mutex);
2755
2756 /*
2757 * Reset device p-state to low as this was booted with high.
2758 *
2759 * This should be performed only after all devices from the same
2760 * hive get initialized.
2761 *
2762 * However, it's unknown how many device in the hive in advance.
2763 * As this is counted one by one during devices initializations.
2764 *
2765 * So, we wait for all XGMI interlinked devices initialized.
2766 * This may bring some delays as those devices may come from
2767 * different hives. But that should be OK.
2768 */
2769 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2770 for (i = 0; i < mgpu_info.num_gpu; i++) {
2771 gpu_instance = &(mgpu_info.gpu_ins[i]);
2772 if (gpu_instance->adev->flags & AMD_IS_APU)
2773 continue;
2774
d84a430d
JK
2775 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2776 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2777 if (r) {
2778 DRM_ERROR("pstate setting failed (%d).\n", r);
2779 break;
2780 }
2781 }
2782 }
2783
2784 mutex_unlock(&mgpu_info.mutex);
2785 }
2786
d38ceaf9
AD
2787 return 0;
2788}
2789
613aa3ea
LY
2790/**
2791 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2792 *
2793 * @adev: amdgpu_device pointer
2794 *
2795 * For ASICs need to disable SMC first
2796 */
2797static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2798{
2799 int i, r;
2800
2801 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2802 return;
2803
2804 for (i = 0; i < adev->num_ip_blocks; i++) {
2805 if (!adev->ip_blocks[i].status.hw)
2806 continue;
2807 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2808 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2809 /* XXX handle errors */
2810 if (r) {
2811 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2812 adev->ip_blocks[i].version->funcs->name, r);
2813 }
2814 adev->ip_blocks[i].status.hw = false;
2815 break;
2816 }
2817 }
2818}
2819
e9669fb7 2820static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2821{
2822 int i, r;
2823
e9669fb7
AG
2824 for (i = 0; i < adev->num_ip_blocks; i++) {
2825 if (!adev->ip_blocks[i].version->funcs->early_fini)
2826 continue;
5278a159 2827
e9669fb7
AG
2828 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2829 if (r) {
2830 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2831 adev->ip_blocks[i].version->funcs->name, r);
2832 }
2833 }
c030f2e4 2834
05df1f01 2835 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2836 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2837
7270e895
TY
2838 amdgpu_amdkfd_suspend(adev, false);
2839
613aa3ea
LY
2840 /* Workaroud for ASICs need to disable SMC first */
2841 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2842
d38ceaf9 2843 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2844 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2845 continue;
8201a67a 2846
a1255107 2847 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2848 /* XXX handle errors */
2c1a2784 2849 if (r) {
a1255107
AD
2850 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2851 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2852 }
8201a67a 2853
a1255107 2854 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2855 }
2856
6effad8a
GC
2857 if (amdgpu_sriov_vf(adev)) {
2858 if (amdgpu_virt_release_full_gpu(adev, false))
2859 DRM_ERROR("failed to release exclusive mode on fini\n");
2860 }
2861
e9669fb7
AG
2862 return 0;
2863}
2864
2865/**
2866 * amdgpu_device_ip_fini - run fini for hardware IPs
2867 *
2868 * @adev: amdgpu_device pointer
2869 *
2870 * Main teardown pass for hardware IPs. The list of all the hardware
2871 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2872 * are run. hw_fini tears down the hardware associated with each IP
2873 * and sw_fini tears down any software state associated with each IP.
2874 * Returns 0 on success, negative error code on failure.
2875 */
2876static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2877{
2878 int i, r;
2879
2880 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2881 amdgpu_virt_release_ras_err_handler_data(adev);
2882
e9669fb7
AG
2883 if (adev->gmc.xgmi.num_physical_nodes > 1)
2884 amdgpu_xgmi_remove_device(adev);
2885
c004d44e 2886 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2887
d38ceaf9 2888 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2889 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2890 continue;
c12aba3a
ML
2891
2892 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2893 amdgpu_ucode_free_bo(adev);
1e256e27 2894 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2895 amdgpu_device_wb_fini(adev);
7ccfd79f 2896 amdgpu_device_mem_scratch_fini(adev);
533aed27 2897 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2898 }
2899
a1255107 2900 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2901 /* XXX handle errors */
2c1a2784 2902 if (r) {
a1255107
AD
2903 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2904 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2905 }
a1255107
AD
2906 adev->ip_blocks[i].status.sw = false;
2907 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2908 }
2909
a6dcfd9c 2910 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2911 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2912 continue;
a1255107
AD
2913 if (adev->ip_blocks[i].version->funcs->late_fini)
2914 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2915 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2916 }
2917
c030f2e4 2918 amdgpu_ras_fini(adev);
2919
d38ceaf9
AD
2920 return 0;
2921}
2922
e3ecdffa 2923/**
beff74bc 2924 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2925 *
1112a46b 2926 * @work: work_struct.
e3ecdffa 2927 */
beff74bc 2928static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2929{
2930 struct amdgpu_device *adev =
beff74bc 2931 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2932 int r;
2933
2934 r = amdgpu_ib_ring_tests(adev);
2935 if (r)
2936 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2937}
2938
1e317b99
RZ
2939static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2940{
2941 struct amdgpu_device *adev =
2942 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2943
90a92662
MD
2944 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2945 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2946
2947 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2948 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2949}
2950
e3ecdffa 2951/**
e7854a03 2952 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2953 *
2954 * @adev: amdgpu_device pointer
2955 *
2956 * Main suspend function for hardware IPs. The list of all the hardware
2957 * IPs that make up the asic is walked, clockgating is disabled and the
2958 * suspend callbacks are run. suspend puts the hardware and software state
2959 * in each IP into a state suitable for suspend.
2960 * Returns 0 on success, negative error code on failure.
2961 */
e7854a03
AD
2962static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2963{
2964 int i, r;
2965
50ec83f0
AD
2966 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2967 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2968
b31d6ada
EQ
2969 /*
2970 * Per PMFW team's suggestion, driver needs to handle gfxoff
2971 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2972 * scenario. Add the missing df cstate disablement here.
2973 */
2974 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2975 dev_warn(adev->dev, "Failed to disallow df cstate");
2976
e7854a03
AD
2977 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2978 if (!adev->ip_blocks[i].status.valid)
2979 continue;
2b9f7848 2980
e7854a03 2981 /* displays are handled separately */
2b9f7848
ND
2982 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2983 continue;
2984
2985 /* XXX handle errors */
2986 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2987 /* XXX handle errors */
2988 if (r) {
2989 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2990 adev->ip_blocks[i].version->funcs->name, r);
2991 return r;
e7854a03 2992 }
2b9f7848
ND
2993
2994 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2995 }
2996
e7854a03
AD
2997 return 0;
2998}
2999
3000/**
3001 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3002 *
3003 * @adev: amdgpu_device pointer
3004 *
3005 * Main suspend function for hardware IPs. The list of all the hardware
3006 * IPs that make up the asic is walked, clockgating is disabled and the
3007 * suspend callbacks are run. suspend puts the hardware and software state
3008 * in each IP into a state suitable for suspend.
3009 * Returns 0 on success, negative error code on failure.
3010 */
3011static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3012{
3013 int i, r;
3014
557f42a2 3015 if (adev->in_s0ix)
bc143d8b 3016 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3017
d38ceaf9 3018 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3019 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3020 continue;
e7854a03
AD
3021 /* displays are handled in phase1 */
3022 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3023 continue;
bff77e86
LM
3024 /* PSP lost connection when err_event_athub occurs */
3025 if (amdgpu_ras_intr_triggered() &&
3026 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3027 adev->ip_blocks[i].status.hw = false;
3028 continue;
3029 }
e3c1b071 3030
3031 /* skip unnecessary suspend if we do not initialize them yet */
3032 if (adev->gmc.xgmi.pending_reset &&
3033 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3034 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3035 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3036 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3037 adev->ip_blocks[i].status.hw = false;
3038 continue;
3039 }
557f42a2 3040
afa6646b 3041 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3042 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3043 * like at runtime. PSP is also part of the always on hardware
3044 * so no need to suspend it.
3045 */
557f42a2 3046 if (adev->in_s0ix &&
32ff160d 3047 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3048 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3049 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3050 continue;
3051
2a7798ea
AD
3052 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3053 if (adev->in_s0ix &&
3054 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3055 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3056 continue;
3057
e11c7750
TH
3058 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3059 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3060 * from this location and RLC Autoload automatically also gets loaded
3061 * from here based on PMFW -> PSP message during re-init sequence.
3062 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3063 * the TMR and reload FWs again for IMU enabled APU ASICs.
3064 */
3065 if (amdgpu_in_reset(adev) &&
3066 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3067 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3068 continue;
3069
d38ceaf9 3070 /* XXX handle errors */
a1255107 3071 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3072 /* XXX handle errors */
2c1a2784 3073 if (r) {
a1255107
AD
3074 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3075 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3076 }
876923fb 3077 adev->ip_blocks[i].status.hw = false;
a3a09142 3078 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
3079 if(!amdgpu_sriov_vf(adev)){
3080 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3081 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3082 if (r) {
3083 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3084 adev->mp1_state, r);
3085 return r;
3086 }
a3a09142
AD
3087 }
3088 }
d38ceaf9
AD
3089 }
3090
3091 return 0;
3092}
3093
e7854a03
AD
3094/**
3095 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3096 *
3097 * @adev: amdgpu_device pointer
3098 *
3099 * Main suspend function for hardware IPs. The list of all the hardware
3100 * IPs that make up the asic is walked, clockgating is disabled and the
3101 * suspend callbacks are run. suspend puts the hardware and software state
3102 * in each IP into a state suitable for suspend.
3103 * Returns 0 on success, negative error code on failure.
3104 */
3105int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3106{
3107 int r;
3108
3c73683c
JC
3109 if (amdgpu_sriov_vf(adev)) {
3110 amdgpu_virt_fini_data_exchange(adev);
e7819644 3111 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3112 }
e7819644 3113
e7854a03
AD
3114 r = amdgpu_device_ip_suspend_phase1(adev);
3115 if (r)
3116 return r;
3117 r = amdgpu_device_ip_suspend_phase2(adev);
3118
e7819644
YT
3119 if (amdgpu_sriov_vf(adev))
3120 amdgpu_virt_release_full_gpu(adev, false);
3121
e7854a03
AD
3122 return r;
3123}
3124
06ec9070 3125static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3126{
3127 int i, r;
3128
2cb681b6 3129 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3130 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3131 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3132 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3133 AMD_IP_BLOCK_TYPE_IH,
3134 };
a90ad3c2 3135
95ea3dbc 3136 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3137 int j;
3138 struct amdgpu_ip_block *block;
a90ad3c2 3139
4cd2a96d
J
3140 block = &adev->ip_blocks[i];
3141 block->status.hw = false;
2cb681b6 3142
4cd2a96d 3143 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3144
4cd2a96d 3145 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3146 !block->status.valid)
3147 continue;
3148
3149 r = block->version->funcs->hw_init(adev);
0aaeefcc 3150 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3151 if (r)
3152 return r;
482f0e53 3153 block->status.hw = true;
a90ad3c2
ML
3154 }
3155 }
3156
3157 return 0;
3158}
3159
06ec9070 3160static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3161{
3162 int i, r;
3163
2cb681b6
ML
3164 static enum amd_ip_block_type ip_order[] = {
3165 AMD_IP_BLOCK_TYPE_SMC,
3166 AMD_IP_BLOCK_TYPE_DCE,
3167 AMD_IP_BLOCK_TYPE_GFX,
3168 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 3169 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
3170 AMD_IP_BLOCK_TYPE_VCE,
3171 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 3172 };
a90ad3c2 3173
2cb681b6
ML
3174 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3175 int j;
3176 struct amdgpu_ip_block *block;
a90ad3c2 3177
2cb681b6
ML
3178 for (j = 0; j < adev->num_ip_blocks; j++) {
3179 block = &adev->ip_blocks[j];
3180
3181 if (block->version->type != ip_order[i] ||
482f0e53
ML
3182 !block->status.valid ||
3183 block->status.hw)
2cb681b6
ML
3184 continue;
3185
895bd048
JZ
3186 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3187 r = block->version->funcs->resume(adev);
3188 else
3189 r = block->version->funcs->hw_init(adev);
3190
0aaeefcc 3191 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3192 if (r)
3193 return r;
482f0e53 3194 block->status.hw = true;
a90ad3c2
ML
3195 }
3196 }
3197
3198 return 0;
3199}
3200
e3ecdffa
AD
3201/**
3202 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3203 *
3204 * @adev: amdgpu_device pointer
3205 *
3206 * First resume function for hardware IPs. The list of all the hardware
3207 * IPs that make up the asic is walked and the resume callbacks are run for
3208 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3209 * after a suspend and updates the software state as necessary. This
3210 * function is also used for restoring the GPU after a GPU reset.
3211 * Returns 0 on success, negative error code on failure.
3212 */
06ec9070 3213static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3214{
3215 int i, r;
3216
a90ad3c2 3217 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3218 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3219 continue;
a90ad3c2 3220 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3221 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3222 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3223 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3224
fcf0649f
CZ
3225 r = adev->ip_blocks[i].version->funcs->resume(adev);
3226 if (r) {
3227 DRM_ERROR("resume of IP block <%s> failed %d\n",
3228 adev->ip_blocks[i].version->funcs->name, r);
3229 return r;
3230 }
482f0e53 3231 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3232 }
3233 }
3234
3235 return 0;
3236}
3237
e3ecdffa
AD
3238/**
3239 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3240 *
3241 * @adev: amdgpu_device pointer
3242 *
3243 * First resume function for hardware IPs. The list of all the hardware
3244 * IPs that make up the asic is walked and the resume callbacks are run for
3245 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3246 * functional state after a suspend and updates the software state as
3247 * necessary. This function is also used for restoring the GPU after a GPU
3248 * reset.
3249 * Returns 0 on success, negative error code on failure.
3250 */
06ec9070 3251static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3252{
3253 int i, r;
3254
3255 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3256 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3257 continue;
fcf0649f 3258 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3259 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3260 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3261 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3262 continue;
a1255107 3263 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3264 if (r) {
a1255107
AD
3265 DRM_ERROR("resume of IP block <%s> failed %d\n",
3266 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3267 return r;
2c1a2784 3268 }
482f0e53 3269 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3270 }
3271
3272 return 0;
3273}
3274
e3ecdffa
AD
3275/**
3276 * amdgpu_device_ip_resume - run resume for hardware IPs
3277 *
3278 * @adev: amdgpu_device pointer
3279 *
3280 * Main resume function for hardware IPs. The hardware IPs
3281 * are split into two resume functions because they are
3282 * are also used in in recovering from a GPU reset and some additional
3283 * steps need to be take between them. In this case (S3/S4) they are
3284 * run sequentially.
3285 * Returns 0 on success, negative error code on failure.
3286 */
06ec9070 3287static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3288{
3289 int r;
3290
9cec53c1
JZ
3291 r = amdgpu_amdkfd_resume_iommu(adev);
3292 if (r)
3293 return r;
3294
06ec9070 3295 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3296 if (r)
3297 return r;
7a3e0bb2
RZ
3298
3299 r = amdgpu_device_fw_loading(adev);
3300 if (r)
3301 return r;
3302
06ec9070 3303 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3304
3305 return r;
3306}
3307
e3ecdffa
AD
3308/**
3309 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3310 *
3311 * @adev: amdgpu_device pointer
3312 *
3313 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3314 */
4e99a44e 3315static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3316{
6867e1b5
ML
3317 if (amdgpu_sriov_vf(adev)) {
3318 if (adev->is_atom_fw) {
58ff791a 3319 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3320 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3321 } else {
3322 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3323 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3324 }
3325
3326 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3327 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3328 }
048765ad
AR
3329}
3330
e3ecdffa
AD
3331/**
3332 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3333 *
3334 * @asic_type: AMD asic type
3335 *
3336 * Check if there is DC (new modesetting infrastructre) support for an asic.
3337 * returns true if DC has support, false if not.
3338 */
4562236b
HW
3339bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3340{
3341 switch (asic_type) {
0637d417
AD
3342#ifdef CONFIG_DRM_AMDGPU_SI
3343 case CHIP_HAINAN:
3344#endif
3345 case CHIP_TOPAZ:
3346 /* chips with no display hardware */
3347 return false;
4562236b 3348#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3349 case CHIP_TAHITI:
3350 case CHIP_PITCAIRN:
3351 case CHIP_VERDE:
3352 case CHIP_OLAND:
2d32ffd6
AD
3353 /*
3354 * We have systems in the wild with these ASICs that require
3355 * LVDS and VGA support which is not supported with DC.
3356 *
3357 * Fallback to the non-DC driver here by default so as not to
3358 * cause regressions.
3359 */
3360#if defined(CONFIG_DRM_AMD_DC_SI)
3361 return amdgpu_dc > 0;
3362#else
3363 return false;
64200c46 3364#endif
4562236b 3365 case CHIP_BONAIRE:
0d6fbccb 3366 case CHIP_KAVERI:
367e6687
AD
3367 case CHIP_KABINI:
3368 case CHIP_MULLINS:
d9fda248
HW
3369 /*
3370 * We have systems in the wild with these ASICs that require
b5a0168e 3371 * VGA support which is not supported with DC.
d9fda248
HW
3372 *
3373 * Fallback to the non-DC driver here by default so as not to
3374 * cause regressions.
3375 */
3376 return amdgpu_dc > 0;
f7f12b25 3377 default:
fd187853 3378 return amdgpu_dc != 0;
f7f12b25 3379#else
4562236b 3380 default:
93b09a9a 3381 if (amdgpu_dc > 0)
044a48f4 3382 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3383 "but isn't supported by ASIC, ignoring\n");
4562236b 3384 return false;
f7f12b25 3385#endif
4562236b
HW
3386 }
3387}
3388
3389/**
3390 * amdgpu_device_has_dc_support - check if dc is supported
3391 *
982a820b 3392 * @adev: amdgpu_device pointer
4562236b
HW
3393 *
3394 * Returns true for supported, false for not supported
3395 */
3396bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3397{
25263da3 3398 if (adev->enable_virtual_display ||
abaf210c 3399 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3400 return false;
3401
4562236b
HW
3402 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3403}
3404
d4535e2c
AG
3405static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3406{
3407 struct amdgpu_device *adev =
3408 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3409 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3410
c6a6e2db
AG
3411 /* It's a bug to not have a hive within this function */
3412 if (WARN_ON(!hive))
3413 return;
3414
3415 /*
3416 * Use task barrier to synchronize all xgmi reset works across the
3417 * hive. task_barrier_enter and task_barrier_exit will block
3418 * until all the threads running the xgmi reset works reach
3419 * those points. task_barrier_full will do both blocks.
3420 */
3421 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3422
3423 task_barrier_enter(&hive->tb);
4a580877 3424 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3425
3426 if (adev->asic_reset_res)
3427 goto fail;
3428
3429 task_barrier_exit(&hive->tb);
4a580877 3430 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3431
3432 if (adev->asic_reset_res)
3433 goto fail;
43c4d576 3434
5e67bba3 3435 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3436 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3437 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3438 } else {
3439
3440 task_barrier_full(&hive->tb);
3441 adev->asic_reset_res = amdgpu_asic_reset(adev);
3442 }
ce316fa5 3443
c6a6e2db 3444fail:
d4535e2c 3445 if (adev->asic_reset_res)
fed184e9 3446 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3447 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3448 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3449}
3450
71f98027
AD
3451static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3452{
3453 char *input = amdgpu_lockup_timeout;
3454 char *timeout_setting = NULL;
3455 int index = 0;
3456 long timeout;
3457 int ret = 0;
3458
3459 /*
67387dfe
AD
3460 * By default timeout for non compute jobs is 10000
3461 * and 60000 for compute jobs.
71f98027 3462 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3463 * jobs are 60000 by default.
71f98027
AD
3464 */
3465 adev->gfx_timeout = msecs_to_jiffies(10000);
3466 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3467 if (amdgpu_sriov_vf(adev))
3468 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3469 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3470 else
67387dfe 3471 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3472
f440ff44 3473 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3474 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3475 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3476 ret = kstrtol(timeout_setting, 0, &timeout);
3477 if (ret)
3478 return ret;
3479
3480 if (timeout == 0) {
3481 index++;
3482 continue;
3483 } else if (timeout < 0) {
3484 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3485 dev_warn(adev->dev, "lockup timeout disabled");
3486 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3487 } else {
3488 timeout = msecs_to_jiffies(timeout);
3489 }
3490
3491 switch (index++) {
3492 case 0:
3493 adev->gfx_timeout = timeout;
3494 break;
3495 case 1:
3496 adev->compute_timeout = timeout;
3497 break;
3498 case 2:
3499 adev->sdma_timeout = timeout;
3500 break;
3501 case 3:
3502 adev->video_timeout = timeout;
3503 break;
3504 default:
3505 break;
3506 }
3507 }
3508 /*
3509 * There is only one value specified and
3510 * it should apply to all non-compute jobs.
3511 */
bcccee89 3512 if (index == 1) {
71f98027 3513 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3514 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3515 adev->compute_timeout = adev->gfx_timeout;
3516 }
71f98027
AD
3517 }
3518
3519 return ret;
3520}
d4535e2c 3521
4a74c38c
PY
3522/**
3523 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3524 *
3525 * @adev: amdgpu_device pointer
3526 *
3527 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3528 */
3529static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3530{
3531 struct iommu_domain *domain;
3532
3533 domain = iommu_get_domain_for_dev(adev->dev);
3534 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3535 adev->ram_is_direct_mapped = true;
3536}
3537
77f3a5cd
ND
3538static const struct attribute *amdgpu_dev_attributes[] = {
3539 &dev_attr_product_name.attr,
3540 &dev_attr_product_number.attr,
3541 &dev_attr_serial_number.attr,
3542 &dev_attr_pcie_replay_count.attr,
3543 NULL
3544};
3545
d38ceaf9
AD
3546/**
3547 * amdgpu_device_init - initialize the driver
3548 *
3549 * @adev: amdgpu_device pointer
d38ceaf9
AD
3550 * @flags: driver flags
3551 *
3552 * Initializes the driver info and hw (all asics).
3553 * Returns 0 for success or an error on failure.
3554 * Called at driver startup.
3555 */
3556int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3557 uint32_t flags)
3558{
8aba21b7
LT
3559 struct drm_device *ddev = adev_to_drm(adev);
3560 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3561 int r, i;
b98c6299 3562 bool px = false;
95844d20 3563 u32 max_MBps;
d38ceaf9
AD
3564
3565 adev->shutdown = false;
d38ceaf9 3566 adev->flags = flags;
4e66d7d2
YZ
3567
3568 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3569 adev->asic_type = amdgpu_force_asic_type;
3570 else
3571 adev->asic_type = flags & AMD_ASIC_MASK;
3572
d38ceaf9 3573 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3574 if (amdgpu_emu_mode == 1)
8bdab6bb 3575 adev->usec_timeout *= 10;
770d13b1 3576 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3577 adev->accel_working = false;
3578 adev->num_rings = 0;
68ce8b24 3579 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3580 adev->mman.buffer_funcs = NULL;
3581 adev->mman.buffer_funcs_ring = NULL;
3582 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3583 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3584 adev->gmc.gmc_funcs = NULL;
7bd939d0 3585 adev->harvest_ip_mask = 0x0;
f54d1867 3586 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3587 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3588
3589 adev->smc_rreg = &amdgpu_invalid_rreg;
3590 adev->smc_wreg = &amdgpu_invalid_wreg;
3591 adev->pcie_rreg = &amdgpu_invalid_rreg;
3592 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3593 adev->pciep_rreg = &amdgpu_invalid_rreg;
3594 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3595 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3596 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3597 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3598 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3599 adev->didt_rreg = &amdgpu_invalid_rreg;
3600 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3601 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3602 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3603 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3604 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3605
3e39ab90
AD
3606 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3607 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3608 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3609
3610 /* mutex initialization are all done here so we
3611 * can recall function without having locking issues */
0e5ca0d1 3612 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3613 mutex_init(&adev->pm.mutex);
3614 mutex_init(&adev->gfx.gpu_clock_mutex);
3615 mutex_init(&adev->srbm_mutex);
b8866c26 3616 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3617 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3618 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3619 mutex_init(&adev->mn_lock);
e23b74aa 3620 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3621 hash_init(adev->mn_hash);
32eaeae0 3622 mutex_init(&adev->psp.mutex);
bd052211 3623 mutex_init(&adev->notifier_lock);
8cda7a4f 3624 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3625 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3626
ab3b9de6 3627 amdgpu_device_init_apu_flags(adev);
9f6a7857 3628
912dfc84
EQ
3629 r = amdgpu_device_check_arguments(adev);
3630 if (r)
3631 return r;
d38ceaf9 3632
d38ceaf9
AD
3633 spin_lock_init(&adev->mmio_idx_lock);
3634 spin_lock_init(&adev->smc_idx_lock);
3635 spin_lock_init(&adev->pcie_idx_lock);
3636 spin_lock_init(&adev->uvd_ctx_idx_lock);
3637 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3638 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3639 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3640 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3641 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3642
0c4e7fa5
CZ
3643 INIT_LIST_HEAD(&adev->shadow_list);
3644 mutex_init(&adev->shadow_list_lock);
3645
655ce9cb 3646 INIT_LIST_HEAD(&adev->reset_list);
3647
6492e1b0 3648 INIT_LIST_HEAD(&adev->ras_list);
3649
beff74bc
AD
3650 INIT_DELAYED_WORK(&adev->delayed_init_work,
3651 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3652 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3653 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3654
d4535e2c
AG
3655 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3656
d23ee13f 3657 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3658 adev->gfx.gfx_off_residency = 0;
3659 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3660 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3661
b265bdbd
EQ
3662 atomic_set(&adev->throttling_logging_enabled, 1);
3663 /*
3664 * If throttling continues, logging will be performed every minute
3665 * to avoid log flooding. "-1" is subtracted since the thermal
3666 * throttling interrupt comes every second. Thus, the total logging
3667 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3668 * for throttling interrupt) = 60 seconds.
3669 */
3670 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3671 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3672
0fa49558
AX
3673 /* Registers mapping */
3674 /* TODO: block userspace mapping of io register */
da69c161
KW
3675 if (adev->asic_type >= CHIP_BONAIRE) {
3676 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3677 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3678 } else {
3679 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3680 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3681 }
d38ceaf9 3682
6c08e0ef
EQ
3683 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3684 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3685
d38ceaf9
AD
3686 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3687 if (adev->rmmio == NULL) {
3688 return -ENOMEM;
3689 }
3690 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3691 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3692
5494d864
AD
3693 amdgpu_device_get_pcie_info(adev);
3694
b239c017
JX
3695 if (amdgpu_mcbp)
3696 DRM_INFO("MCBP is enabled\n");
3697
436afdfa
PY
3698 /*
3699 * Reset domain needs to be present early, before XGMI hive discovered
3700 * (if any) and intitialized to use reset sem and in_gpu reset flag
3701 * early on during init and before calling to RREG32.
3702 */
3703 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3704 if (!adev->reset_domain)
3705 return -ENOMEM;
3706
3aa0115d
ML
3707 /* detect hw virtualization here */
3708 amdgpu_detect_virtualization(adev);
3709
dffa11b4
ML
3710 r = amdgpu_device_get_job_timeout_settings(adev);
3711 if (r) {
3712 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3713 return r;
a190d1c7
XY
3714 }
3715
d38ceaf9 3716 /* early init functions */
06ec9070 3717 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3718 if (r)
4ef87d8f 3719 return r;
d38ceaf9 3720
b7cdb41e
ML
3721 /* Get rid of things like offb */
3722 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3723 if (r)
3724 return r;
3725
4d33e704
SK
3726 /* Enable TMZ based on IP_VERSION */
3727 amdgpu_gmc_tmz_set(adev);
3728
957b0787 3729 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3730 /* Need to get xgmi info early to decide the reset behavior*/
3731 if (adev->gmc.xgmi.supported) {
3732 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3733 if (r)
3734 return r;
3735 }
3736
8e6d0b69 3737 /* enable PCIE atomic ops */
3738 if (amdgpu_sriov_vf(adev))
3739 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
e15c9d06 3740 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
8e6d0b69 3741 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3742 else
3743 adev->have_atomics_support =
3744 !pci_enable_atomic_ops_to_root(adev->pdev,
3745 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3746 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3747 if (!adev->have_atomics_support)
3748 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3749
6585661d
OZ
3750 /* doorbell bar mapping and doorbell index init*/
3751 amdgpu_device_doorbell_init(adev);
3752
9475a943
SL
3753 if (amdgpu_emu_mode == 1) {
3754 /* post the asic on emulation mode */
3755 emu_soc_asic_init(adev);
bfca0289 3756 goto fence_driver_init;
9475a943 3757 }
bfca0289 3758
04442bf7
LL
3759 amdgpu_reset_init(adev);
3760
4e99a44e
ML
3761 /* detect if we are with an SRIOV vbios */
3762 amdgpu_device_detect_sriov_bios(adev);
048765ad 3763
95e8e59e
AD
3764 /* check if we need to reset the asic
3765 * E.g., driver was not cleanly unloaded previously, etc.
3766 */
f14899fd 3767 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3768 if (adev->gmc.xgmi.num_physical_nodes) {
3769 dev_info(adev->dev, "Pending hive reset.\n");
3770 adev->gmc.xgmi.pending_reset = true;
3771 /* Only need to init necessary block for SMU to handle the reset */
3772 for (i = 0; i < adev->num_ip_blocks; i++) {
3773 if (!adev->ip_blocks[i].status.valid)
3774 continue;
3775 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3776 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3777 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3778 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3779 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3780 adev->ip_blocks[i].version->funcs->name);
3781 adev->ip_blocks[i].status.hw = true;
3782 }
3783 }
3784 } else {
3785 r = amdgpu_asic_reset(adev);
3786 if (r) {
3787 dev_err(adev->dev, "asic reset on init failed\n");
3788 goto failed;
3789 }
95e8e59e
AD
3790 }
3791 }
3792
d38ceaf9 3793 /* Post card if necessary */
39c640c0 3794 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3795 if (!adev->bios) {
bec86378 3796 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3797 r = -EINVAL;
3798 goto failed;
d38ceaf9 3799 }
bec86378 3800 DRM_INFO("GPU posting now...\n");
4d2997ab 3801 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3802 if (r) {
3803 dev_err(adev->dev, "gpu post error!\n");
3804 goto failed;
3805 }
d38ceaf9
AD
3806 }
3807
88b64e95
AD
3808 if (adev->is_atom_fw) {
3809 /* Initialize clocks */
3810 r = amdgpu_atomfirmware_get_clock_info(adev);
3811 if (r) {
3812 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3813 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3814 goto failed;
3815 }
3816 } else {
a5bde2f9
AD
3817 /* Initialize clocks */
3818 r = amdgpu_atombios_get_clock_info(adev);
3819 if (r) {
3820 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3821 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3822 goto failed;
a5bde2f9
AD
3823 }
3824 /* init i2c buses */
4562236b
HW
3825 if (!amdgpu_device_has_dc_support(adev))
3826 amdgpu_atombios_i2c_init(adev);
2c1a2784 3827 }
d38ceaf9 3828
bfca0289 3829fence_driver_init:
d38ceaf9 3830 /* Fence driver */
067f44c8 3831 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3832 if (r) {
067f44c8 3833 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3834 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3835 goto failed;
2c1a2784 3836 }
d38ceaf9
AD
3837
3838 /* init the mode config */
4a580877 3839 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3840
06ec9070 3841 r = amdgpu_device_ip_init(adev);
d38ceaf9 3842 if (r) {
8840a387 3843 /* failed in exclusive mode due to timeout */
3844 if (amdgpu_sriov_vf(adev) &&
3845 !amdgpu_sriov_runtime(adev) &&
3846 amdgpu_virt_mmio_blocked(adev) &&
3847 !amdgpu_virt_wait_reset(adev)) {
3848 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3849 /* Don't send request since VF is inactive. */
3850 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3851 adev->virt.ops = NULL;
8840a387 3852 r = -EAGAIN;
970fd197 3853 goto release_ras_con;
8840a387 3854 }
06ec9070 3855 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3856 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3857 goto release_ras_con;
d38ceaf9
AD
3858 }
3859
8d35a259
LG
3860 amdgpu_fence_driver_hw_init(adev);
3861
d69b8971
YZ
3862 dev_info(adev->dev,
3863 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3864 adev->gfx.config.max_shader_engines,
3865 adev->gfx.config.max_sh_per_se,
3866 adev->gfx.config.max_cu_per_sh,
3867 adev->gfx.cu_info.number);
3868
d38ceaf9
AD
3869 adev->accel_working = true;
3870
e59c0205
AX
3871 amdgpu_vm_check_compute_bug(adev);
3872
95844d20
MO
3873 /* Initialize the buffer migration limit. */
3874 if (amdgpu_moverate >= 0)
3875 max_MBps = amdgpu_moverate;
3876 else
3877 max_MBps = 8; /* Allow 8 MB/s. */
3878 /* Get a log2 for easy divisions. */
3879 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3880
d2f52ac8 3881 r = amdgpu_pm_sysfs_init(adev);
53e9d836
GC
3882 if (r)
3883 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
d2f52ac8 3884
5bb23532 3885 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3886 if (r) {
3887 adev->ucode_sysfs_en = false;
5bb23532 3888 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3889 } else
3890 adev->ucode_sysfs_en = true;
5bb23532 3891
8424f2cc
LG
3892 r = amdgpu_psp_sysfs_init(adev);
3893 if (r) {
3894 adev->psp_sysfs_en = false;
3895 if (!amdgpu_sriov_vf(adev))
3896 DRM_ERROR("Creating psp sysfs failed\n");
3897 } else
3898 adev->psp_sysfs_en = true;
3899
b0adca4d
EQ
3900 /*
3901 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3902 * Otherwise the mgpu fan boost feature will be skipped due to the
3903 * gpu instance is counted less.
3904 */
3905 amdgpu_register_gpu_instance(adev);
3906
d38ceaf9
AD
3907 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3908 * explicit gating rather than handling it automatically.
3909 */
e3c1b071 3910 if (!adev->gmc.xgmi.pending_reset) {
3911 r = amdgpu_device_ip_late_init(adev);
3912 if (r) {
3913 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3914 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3915 goto release_ras_con;
e3c1b071 3916 }
3917 /* must succeed. */
3918 amdgpu_ras_resume(adev);
3919 queue_delayed_work(system_wq, &adev->delayed_init_work,
3920 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3921 }
d38ceaf9 3922
2c738637
ML
3923 if (amdgpu_sriov_vf(adev))
3924 flush_delayed_work(&adev->delayed_init_work);
3925
77f3a5cd 3926 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3927 if (r)
77f3a5cd 3928 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3929
d155bef0
AB
3930 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3931 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3932 if (r)
3933 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3934
c1dd4aa6
AG
3935 /* Have stored pci confspace at hand for restore in sudden PCI error */
3936 if (amdgpu_device_cache_pci_state(adev->pdev))
3937 pci_restore_state(pdev);
3938
8c3dd61c
KHF
3939 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3940 /* this will fail for cards that aren't VGA class devices, just
3941 * ignore it */
3942 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3943 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 3944
d37a3929
OC
3945 px = amdgpu_device_supports_px(ddev);
3946
3947 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3948 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
3949 vga_switcheroo_register_client(adev->pdev,
3950 &amdgpu_switcheroo_ops, px);
d37a3929
OC
3951
3952 if (px)
8c3dd61c 3953 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 3954
e3c1b071 3955 if (adev->gmc.xgmi.pending_reset)
3956 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3957 msecs_to_jiffies(AMDGPU_RESUME_MS));
3958
4a74c38c
PY
3959 amdgpu_device_check_iommu_direct_map(adev);
3960
d38ceaf9 3961 return 0;
83ba126a 3962
970fd197
SY
3963release_ras_con:
3964 amdgpu_release_ras_context(adev);
3965
83ba126a 3966failed:
89041940 3967 amdgpu_vf_error_trans_all(adev);
8840a387 3968
83ba126a 3969 return r;
d38ceaf9
AD
3970}
3971
07775fc1
AG
3972static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3973{
62d5f9f7 3974
07775fc1
AG
3975 /* Clear all CPU mappings pointing to this device */
3976 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3977
3978 /* Unmap all mapped bars - Doorbell, registers and VRAM */
3979 amdgpu_device_doorbell_fini(adev);
3980
3981 iounmap(adev->rmmio);
3982 adev->rmmio = NULL;
3983 if (adev->mman.aper_base_kaddr)
3984 iounmap(adev->mman.aper_base_kaddr);
3985 adev->mman.aper_base_kaddr = NULL;
3986
3987 /* Memory manager related */
3988 if (!adev->gmc.xgmi.connected_to_cpu) {
3989 arch_phys_wc_del(adev->gmc.vram_mtrr);
3990 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3991 }
3992}
3993
d38ceaf9 3994/**
bbe04dec 3995 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3996 *
3997 * @adev: amdgpu_device pointer
3998 *
3999 * Tear down the driver info (all asics).
4000 * Called at driver shutdown.
4001 */
72c8c97b 4002void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 4003{
aac89168 4004 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 4005 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 4006 adev->shutdown = true;
9f875167 4007
752c683d
ML
4008 /* make sure IB test finished before entering exclusive mode
4009 * to avoid preemption on IB test
4010 * */
519b8b76 4011 if (amdgpu_sriov_vf(adev)) {
752c683d 4012 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4013 amdgpu_virt_fini_data_exchange(adev);
4014 }
752c683d 4015
e5b03032
ML
4016 /* disable all interrupts */
4017 amdgpu_irq_disable_all(adev);
ff97cba8 4018 if (adev->mode_info.mode_config_initialized){
1053b9c9 4019 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4020 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4021 else
4a580877 4022 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4023 }
8d35a259 4024 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4025
cd3a8a59 4026 if (adev->mman.initialized)
9bff18d1 4027 drain_workqueue(adev->mman.bdev.wq);
98f56188 4028
53e9d836 4029 if (adev->pm.sysfs_initialized)
7c868b59 4030 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4031 if (adev->ucode_sysfs_en)
4032 amdgpu_ucode_sysfs_fini(adev);
8424f2cc
LG
4033 if (adev->psp_sysfs_en)
4034 amdgpu_psp_sysfs_fini(adev);
72c8c97b
AG
4035 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4036
232d1d43
SY
4037 /* disable ras feature must before hw fini */
4038 amdgpu_ras_pre_fini(adev);
4039
e9669fb7 4040 amdgpu_device_ip_fini_early(adev);
d10d0daa 4041
a3848df6
YW
4042 amdgpu_irq_fini_hw(adev);
4043
b6fd6e0f
SK
4044 if (adev->mman.initialized)
4045 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4046
d10d0daa 4047 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4048
39934d3e
VP
4049 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4050 amdgpu_device_unmap_mmio(adev);
87172e89 4051
72c8c97b
AG
4052}
4053
4054void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4055{
62d5f9f7 4056 int idx;
d37a3929 4057 bool px;
62d5f9f7 4058
8d35a259 4059 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4060 amdgpu_device_ip_fini(adev);
b31d3063 4061 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4062 adev->accel_working = false;
68ce8b24 4063 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4064
4065 amdgpu_reset_fini(adev);
4066
d38ceaf9 4067 /* free i2c buses */
4562236b
HW
4068 if (!amdgpu_device_has_dc_support(adev))
4069 amdgpu_i2c_fini(adev);
bfca0289
SL
4070
4071 if (amdgpu_emu_mode != 1)
4072 amdgpu_atombios_fini(adev);
4073
d38ceaf9
AD
4074 kfree(adev->bios);
4075 adev->bios = NULL;
d37a3929
OC
4076
4077 px = amdgpu_device_supports_px(adev_to_drm(adev));
4078
4079 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4080 apple_gmux_detect(NULL, NULL)))
84c8b22e 4081 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4082
4083 if (px)
83ba126a 4084 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4085
38d6be81 4086 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4087 vga_client_unregister(adev->pdev);
e9bc1bf7 4088
62d5f9f7
LS
4089 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4090
4091 iounmap(adev->rmmio);
4092 adev->rmmio = NULL;
4093 amdgpu_device_doorbell_fini(adev);
4094 drm_dev_exit(idx);
4095 }
4096
d155bef0
AB
4097 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4098 amdgpu_pmu_fini(adev);
72de33f8 4099 if (adev->mman.discovery_bin)
a190d1c7 4100 amdgpu_discovery_fini(adev);
72c8c97b 4101
cfbb6b00
AG
4102 amdgpu_reset_put_reset_domain(adev->reset_domain);
4103 adev->reset_domain = NULL;
4104
72c8c97b
AG
4105 kfree(adev->pci_state);
4106
d38ceaf9
AD
4107}
4108
58144d28
ND
4109/**
4110 * amdgpu_device_evict_resources - evict device resources
4111 * @adev: amdgpu device object
4112 *
4113 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4114 * of the vram memory type. Mainly used for evicting device resources
4115 * at suspend time.
4116 *
4117 */
7863c155 4118static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4119{
7863c155
ML
4120 int ret;
4121
e53d9665
ML
4122 /* No need to evict vram on APUs for suspend to ram or s2idle */
4123 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4124 return 0;
58144d28 4125
7863c155
ML
4126 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4127 if (ret)
58144d28 4128 DRM_WARN("evicting device resources failed\n");
7863c155 4129 return ret;
58144d28 4130}
d38ceaf9
AD
4131
4132/*
4133 * Suspend & resume.
4134 */
4135/**
810ddc3a 4136 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4137 *
87e3f136 4138 * @dev: drm dev pointer
87e3f136 4139 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4140 *
4141 * Puts the hw in the suspend state (all asics).
4142 * Returns 0 for success or an error on failure.
4143 * Called at driver suspend.
4144 */
de185019 4145int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4146{
a2e15b0e 4147 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4148 int r = 0;
d38ceaf9 4149
d38ceaf9
AD
4150 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4151 return 0;
4152
44779b43 4153 adev->in_suspend = true;
3fa8f89d 4154
47ea2076
SF
4155 /* Evict the majority of BOs before grabbing the full access */
4156 r = amdgpu_device_evict_resources(adev);
4157 if (r)
4158 return r;
4159
d7274ec7
BZ
4160 if (amdgpu_sriov_vf(adev)) {
4161 amdgpu_virt_fini_data_exchange(adev);
4162 r = amdgpu_virt_request_full_gpu(adev, false);
4163 if (r)
4164 return r;
4165 }
4166
3fa8f89d
S
4167 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4168 DRM_WARN("smart shift update failed\n");
4169
5f818173 4170 if (fbcon)
087451f3 4171 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4172
beff74bc 4173 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4174
5e6932fe 4175 amdgpu_ras_suspend(adev);
4176
2196927b 4177 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4178
c004d44e 4179 if (!adev->in_s0ix)
5d3a2d95 4180 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4181
7863c155
ML
4182 r = amdgpu_device_evict_resources(adev);
4183 if (r)
4184 return r;
d38ceaf9 4185
8d35a259 4186 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4187
2196927b 4188 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4189
d7274ec7
BZ
4190 if (amdgpu_sriov_vf(adev))
4191 amdgpu_virt_release_full_gpu(adev, false);
4192
d38ceaf9
AD
4193 return 0;
4194}
4195
4196/**
810ddc3a 4197 * amdgpu_device_resume - initiate device resume
d38ceaf9 4198 *
87e3f136 4199 * @dev: drm dev pointer
87e3f136 4200 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4201 *
4202 * Bring the hw back to operating state (all asics).
4203 * Returns 0 for success or an error on failure.
4204 * Called at driver resume.
4205 */
de185019 4206int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4207{
1348969a 4208 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4209 int r = 0;
d38ceaf9 4210
d7274ec7
BZ
4211 if (amdgpu_sriov_vf(adev)) {
4212 r = amdgpu_virt_request_full_gpu(adev, true);
4213 if (r)
4214 return r;
4215 }
4216
d38ceaf9
AD
4217 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4218 return 0;
4219
62498733 4220 if (adev->in_s0ix)
bc143d8b 4221 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4222
d38ceaf9 4223 /* post card */
39c640c0 4224 if (amdgpu_device_need_post(adev)) {
4d2997ab 4225 r = amdgpu_device_asic_init(adev);
74b0b157 4226 if (r)
aac89168 4227 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4228 }
d38ceaf9 4229
06ec9070 4230 r = amdgpu_device_ip_resume(adev);
d7274ec7 4231
e6707218 4232 if (r) {
aac89168 4233 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4234 goto exit;
e6707218 4235 }
8d35a259 4236 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4237
06ec9070 4238 r = amdgpu_device_ip_late_init(adev);
03161a6e 4239 if (r)
3c22c1ea 4240 goto exit;
d38ceaf9 4241
beff74bc
AD
4242 queue_delayed_work(system_wq, &adev->delayed_init_work,
4243 msecs_to_jiffies(AMDGPU_RESUME_MS));
4244
c004d44e 4245 if (!adev->in_s0ix) {
5d3a2d95
AD
4246 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4247 if (r)
3c22c1ea 4248 goto exit;
5d3a2d95 4249 }
756e6880 4250
3c22c1ea
SF
4251exit:
4252 if (amdgpu_sriov_vf(adev)) {
4253 amdgpu_virt_init_data_exchange(adev);
4254 amdgpu_virt_release_full_gpu(adev, true);
4255 }
4256
4257 if (r)
4258 return r;
4259
96a5d8d4 4260 /* Make sure IB tests flushed */
beff74bc 4261 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4262
a2e15b0e 4263 if (fbcon)
087451f3 4264 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9 4265
5e6932fe 4266 amdgpu_ras_resume(adev);
4267
d09ef243
AD
4268 if (adev->mode_info.num_crtc) {
4269 /*
4270 * Most of the connector probing functions try to acquire runtime pm
4271 * refs to ensure that the GPU is powered on when connector polling is
4272 * performed. Since we're calling this from a runtime PM callback,
4273 * trying to acquire rpm refs will cause us to deadlock.
4274 *
4275 * Since we're guaranteed to be holding the rpm lock, it's safe to
4276 * temporarily disable the rpm helpers so this doesn't deadlock us.
4277 */
23a1a9e5 4278#ifdef CONFIG_PM
d09ef243 4279 dev->dev->power.disable_depth++;
23a1a9e5 4280#endif
d09ef243
AD
4281 if (!adev->dc_enabled)
4282 drm_helper_hpd_irq_event(dev);
4283 else
4284 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4285#ifdef CONFIG_PM
d09ef243 4286 dev->dev->power.disable_depth--;
23a1a9e5 4287#endif
d09ef243 4288 }
44779b43
RZ
4289 adev->in_suspend = false;
4290
dc907c9d
JX
4291 if (adev->enable_mes)
4292 amdgpu_mes_self_test(adev);
4293
3fa8f89d
S
4294 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4295 DRM_WARN("smart shift update failed\n");
4296
4d3b9ae5 4297 return 0;
d38ceaf9
AD
4298}
4299
e3ecdffa
AD
4300/**
4301 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4302 *
4303 * @adev: amdgpu_device pointer
4304 *
4305 * The list of all the hardware IPs that make up the asic is walked and
4306 * the check_soft_reset callbacks are run. check_soft_reset determines
4307 * if the asic is still hung or not.
4308 * Returns true if any of the IPs are still in a hung state, false if not.
4309 */
06ec9070 4310static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4311{
4312 int i;
4313 bool asic_hang = false;
4314
f993d628
ML
4315 if (amdgpu_sriov_vf(adev))
4316 return true;
4317
8bc04c29
AD
4318 if (amdgpu_asic_need_full_reset(adev))
4319 return true;
4320
63fbf42f 4321 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4322 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4323 continue;
a1255107
AD
4324 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4325 adev->ip_blocks[i].status.hang =
4326 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4327 if (adev->ip_blocks[i].status.hang) {
aac89168 4328 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4329 asic_hang = true;
4330 }
4331 }
4332 return asic_hang;
4333}
4334
e3ecdffa
AD
4335/**
4336 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4337 *
4338 * @adev: amdgpu_device pointer
4339 *
4340 * The list of all the hardware IPs that make up the asic is walked and the
4341 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4342 * handles any IP specific hardware or software state changes that are
4343 * necessary for a soft reset to succeed.
4344 * Returns 0 on success, negative error code on failure.
4345 */
06ec9070 4346static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4347{
4348 int i, r = 0;
4349
4350 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4351 if (!adev->ip_blocks[i].status.valid)
d31a501e 4352 continue;
a1255107
AD
4353 if (adev->ip_blocks[i].status.hang &&
4354 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4355 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4356 if (r)
4357 return r;
4358 }
4359 }
4360
4361 return 0;
4362}
4363
e3ecdffa
AD
4364/**
4365 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4366 *
4367 * @adev: amdgpu_device pointer
4368 *
4369 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4370 * reset is necessary to recover.
4371 * Returns true if a full asic reset is required, false if not.
4372 */
06ec9070 4373static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4374{
da146d3b
AD
4375 int i;
4376
8bc04c29
AD
4377 if (amdgpu_asic_need_full_reset(adev))
4378 return true;
4379
da146d3b 4380 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4381 if (!adev->ip_blocks[i].status.valid)
da146d3b 4382 continue;
a1255107
AD
4383 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4384 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4385 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4386 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4387 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4388 if (adev->ip_blocks[i].status.hang) {
aac89168 4389 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4390 return true;
4391 }
4392 }
35d782fe
CZ
4393 }
4394 return false;
4395}
4396
e3ecdffa
AD
4397/**
4398 * amdgpu_device_ip_soft_reset - do a soft reset
4399 *
4400 * @adev: amdgpu_device pointer
4401 *
4402 * The list of all the hardware IPs that make up the asic is walked and the
4403 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4404 * IP specific hardware or software state changes that are necessary to soft
4405 * reset the IP.
4406 * Returns 0 on success, negative error code on failure.
4407 */
06ec9070 4408static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4409{
4410 int i, r = 0;
4411
4412 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4413 if (!adev->ip_blocks[i].status.valid)
35d782fe 4414 continue;
a1255107
AD
4415 if (adev->ip_blocks[i].status.hang &&
4416 adev->ip_blocks[i].version->funcs->soft_reset) {
4417 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4418 if (r)
4419 return r;
4420 }
4421 }
4422
4423 return 0;
4424}
4425
e3ecdffa
AD
4426/**
4427 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4428 *
4429 * @adev: amdgpu_device pointer
4430 *
4431 * The list of all the hardware IPs that make up the asic is walked and the
4432 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4433 * handles any IP specific hardware or software state changes that are
4434 * necessary after the IP has been soft reset.
4435 * Returns 0 on success, negative error code on failure.
4436 */
06ec9070 4437static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4438{
4439 int i, r = 0;
4440
4441 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4442 if (!adev->ip_blocks[i].status.valid)
35d782fe 4443 continue;
a1255107
AD
4444 if (adev->ip_blocks[i].status.hang &&
4445 adev->ip_blocks[i].version->funcs->post_soft_reset)
4446 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4447 if (r)
4448 return r;
4449 }
4450
4451 return 0;
4452}
4453
e3ecdffa 4454/**
c33adbc7 4455 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4456 *
4457 * @adev: amdgpu_device pointer
4458 *
4459 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4460 * restore things like GPUVM page tables after a GPU reset where
4461 * the contents of VRAM might be lost.
403009bf
CK
4462 *
4463 * Returns:
4464 * 0 on success, negative error code on failure.
e3ecdffa 4465 */
c33adbc7 4466static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4467{
c41d1cf6 4468 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4469 struct amdgpu_bo *shadow;
e18aaea7 4470 struct amdgpu_bo_vm *vmbo;
403009bf 4471 long r = 1, tmo;
c41d1cf6
ML
4472
4473 if (amdgpu_sriov_runtime(adev))
b045d3af 4474 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4475 else
4476 tmo = msecs_to_jiffies(100);
4477
aac89168 4478 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4479 mutex_lock(&adev->shadow_list_lock);
e18aaea7
ND
4480 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4481 shadow = &vmbo->bo;
403009bf 4482 /* No need to recover an evicted BO */
d3116756
CK
4483 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4484 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4485 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4486 continue;
4487
4488 r = amdgpu_bo_restore_shadow(shadow, &next);
4489 if (r)
4490 break;
4491
c41d1cf6 4492 if (fence) {
1712fb1a 4493 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4494 dma_fence_put(fence);
4495 fence = next;
1712fb1a 4496 if (tmo == 0) {
4497 r = -ETIMEDOUT;
c41d1cf6 4498 break;
1712fb1a 4499 } else if (tmo < 0) {
4500 r = tmo;
4501 break;
4502 }
403009bf
CK
4503 } else {
4504 fence = next;
c41d1cf6 4505 }
c41d1cf6
ML
4506 }
4507 mutex_unlock(&adev->shadow_list_lock);
4508
403009bf
CK
4509 if (fence)
4510 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4511 dma_fence_put(fence);
4512
1712fb1a 4513 if (r < 0 || tmo <= 0) {
aac89168 4514 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4515 return -EIO;
4516 }
c41d1cf6 4517
aac89168 4518 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4519 return 0;
c41d1cf6
ML
4520}
4521
a90ad3c2 4522
e3ecdffa 4523/**
06ec9070 4524 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4525 *
982a820b 4526 * @adev: amdgpu_device pointer
87e3f136 4527 * @from_hypervisor: request from hypervisor
5740682e
ML
4528 *
4529 * do VF FLR and reinitialize Asic
3f48c681 4530 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4531 */
4532static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4533 bool from_hypervisor)
5740682e
ML
4534{
4535 int r;
a5f67c93 4536 struct amdgpu_hive_info *hive = NULL;
7258fa31 4537 int retry_limit = 0;
5740682e 4538
7258fa31 4539retry:
c004d44e 4540 amdgpu_amdkfd_pre_reset(adev);
428890a3 4541
5740682e
ML
4542 if (from_hypervisor)
4543 r = amdgpu_virt_request_full_gpu(adev, true);
4544 else
4545 r = amdgpu_virt_reset_gpu(adev);
4546 if (r)
4547 return r;
a90ad3c2
ML
4548
4549 /* Resume IP prior to SMC */
06ec9070 4550 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4551 if (r)
4552 goto error;
a90ad3c2 4553
c9ffa427 4554 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4555
7a3e0bb2
RZ
4556 r = amdgpu_device_fw_loading(adev);
4557 if (r)
4558 return r;
4559
a90ad3c2 4560 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4561 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4562 if (r)
4563 goto error;
a90ad3c2 4564
a5f67c93
ZL
4565 hive = amdgpu_get_xgmi_hive(adev);
4566 /* Update PSP FW topology after reset */
4567 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4568 r = amdgpu_xgmi_update_topology(hive, adev);
4569
4570 if (hive)
4571 amdgpu_put_xgmi_hive(hive);
4572
4573 if (!r) {
4574 amdgpu_irq_gpu_reset_resume_helper(adev);
4575 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4576
c004d44e 4577 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4578 }
a90ad3c2 4579
abc34253 4580error:
c41d1cf6 4581 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4582 amdgpu_inc_vram_lost(adev);
c33adbc7 4583 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4584 }
437f3e0b 4585 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4586
7258fa31
SK
4587 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4588 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4589 retry_limit++;
4590 goto retry;
4591 } else
4592 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4593 }
4594
a90ad3c2
ML
4595 return r;
4596}
4597
9a1cddd6 4598/**
4599 * amdgpu_device_has_job_running - check if there is any job in mirror list
4600 *
982a820b 4601 * @adev: amdgpu_device pointer
9a1cddd6 4602 *
4603 * check if there is any job in mirror list
4604 */
4605bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4606{
4607 int i;
4608 struct drm_sched_job *job;
4609
4610 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4611 struct amdgpu_ring *ring = adev->rings[i];
4612
4613 if (!ring || !ring->sched.thread)
4614 continue;
4615
4616 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4617 job = list_first_entry_or_null(&ring->sched.pending_list,
4618 struct drm_sched_job, list);
9a1cddd6 4619 spin_unlock(&ring->sched.job_list_lock);
4620 if (job)
4621 return true;
4622 }
4623 return false;
4624}
4625
12938fad
CK
4626/**
4627 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4628 *
982a820b 4629 * @adev: amdgpu_device pointer
12938fad
CK
4630 *
4631 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4632 * a hung GPU.
4633 */
4634bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4635{
12938fad 4636
3ba7b418
AG
4637 if (amdgpu_gpu_recovery == 0)
4638 goto disabled;
4639
1a11a65d
YC
4640 /* Skip soft reset check in fatal error mode */
4641 if (!amdgpu_ras_is_poison_mode_supported(adev))
4642 return true;
4643
3ba7b418
AG
4644 if (amdgpu_sriov_vf(adev))
4645 return true;
4646
4647 if (amdgpu_gpu_recovery == -1) {
4648 switch (adev->asic_type) {
b3523c45
AD
4649#ifdef CONFIG_DRM_AMDGPU_SI
4650 case CHIP_VERDE:
4651 case CHIP_TAHITI:
4652 case CHIP_PITCAIRN:
4653 case CHIP_OLAND:
4654 case CHIP_HAINAN:
4655#endif
4656#ifdef CONFIG_DRM_AMDGPU_CIK
4657 case CHIP_KAVERI:
4658 case CHIP_KABINI:
4659 case CHIP_MULLINS:
4660#endif
4661 case CHIP_CARRIZO:
4662 case CHIP_STONEY:
4663 case CHIP_CYAN_SKILLFISH:
3ba7b418 4664 goto disabled;
b3523c45
AD
4665 default:
4666 break;
3ba7b418 4667 }
12938fad
CK
4668 }
4669
4670 return true;
3ba7b418
AG
4671
4672disabled:
aac89168 4673 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4674 return false;
12938fad
CK
4675}
4676
5c03e584
FX
4677int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4678{
4679 u32 i;
4680 int ret = 0;
4681
4682 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4683
4684 dev_info(adev->dev, "GPU mode1 reset\n");
4685
4686 /* disable BM */
4687 pci_clear_master(adev->pdev);
4688
4689 amdgpu_device_cache_pci_state(adev->pdev);
4690
4691 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4692 dev_info(adev->dev, "GPU smu mode1 reset\n");
4693 ret = amdgpu_dpm_mode1_reset(adev);
4694 } else {
4695 dev_info(adev->dev, "GPU psp mode1 reset\n");
4696 ret = psp_gpu_reset(adev);
4697 }
4698
4699 if (ret)
4700 dev_err(adev->dev, "GPU mode1 reset failed\n");
4701
4702 amdgpu_device_load_pci_state(adev->pdev);
4703
4704 /* wait for asic to come out of reset */
4705 for (i = 0; i < adev->usec_timeout; i++) {
4706 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4707
4708 if (memsize != 0xffffffff)
4709 break;
4710 udelay(1);
4711 }
4712
4713 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4714 return ret;
4715}
5c6dd71e 4716
e3c1b071 4717int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4718 struct amdgpu_reset_context *reset_context)
26bc5340 4719{
5c1e6fa4 4720 int i, r = 0;
04442bf7
LL
4721 struct amdgpu_job *job = NULL;
4722 bool need_full_reset =
4723 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4724
4725 if (reset_context->reset_req_dev == adev)
4726 job = reset_context->job;
71182665 4727
b602ca5f
TZ
4728 if (amdgpu_sriov_vf(adev)) {
4729 /* stop the data exchange thread */
4730 amdgpu_virt_fini_data_exchange(adev);
4731 }
4732
9e225fb9
AG
4733 amdgpu_fence_driver_isr_toggle(adev, true);
4734
71182665 4735 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4736 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4737 struct amdgpu_ring *ring = adev->rings[i];
4738
51687759 4739 if (!ring || !ring->sched.thread)
0875dc9e 4740 continue;
5740682e 4741
c530b02f
JZ
4742 /*clear job fence from fence drv to avoid force_completion
4743 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4744 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4745
2f9d4084
ML
4746 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4747 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4748 }
d38ceaf9 4749
9e225fb9
AG
4750 amdgpu_fence_driver_isr_toggle(adev, false);
4751
ff99849b 4752 if (job && job->vm)
222b5f04
AG
4753 drm_sched_increase_karma(&job->base);
4754
04442bf7 4755 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4756 /* If reset handler not implemented, continue; otherwise return */
4757 if (r == -ENOSYS)
4758 r = 0;
4759 else
04442bf7
LL
4760 return r;
4761
1d721ed6 4762 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4763 if (!amdgpu_sriov_vf(adev)) {
4764
4765 if (!need_full_reset)
4766 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4767
360cd081
LG
4768 if (!need_full_reset && amdgpu_gpu_recovery &&
4769 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4770 amdgpu_device_ip_pre_soft_reset(adev);
4771 r = amdgpu_device_ip_soft_reset(adev);
4772 amdgpu_device_ip_post_soft_reset(adev);
4773 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4774 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4775 need_full_reset = true;
4776 }
4777 }
4778
4779 if (need_full_reset)
4780 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4781 if (need_full_reset)
4782 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4783 else
4784 clear_bit(AMDGPU_NEED_FULL_RESET,
4785 &reset_context->flags);
26bc5340
AG
4786 }
4787
4788 return r;
4789}
4790
15fd09a0
SA
4791static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4792{
15fd09a0
SA
4793 int i;
4794
38a15ad9 4795 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4796
4797 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4798 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4799 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4800 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4801 }
4802
4803 return 0;
4804}
4805
3d8785f6
SA
4806#ifdef CONFIG_DEV_COREDUMP
4807static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4808 size_t count, void *data, size_t datalen)
4809{
4810 struct drm_printer p;
4811 struct amdgpu_device *adev = data;
4812 struct drm_print_iterator iter;
4813 int i;
4814
4815 iter.data = buffer;
4816 iter.offset = 0;
4817 iter.start = offset;
4818 iter.remain = count;
4819
4820 p = drm_coredump_printer(&iter);
4821
4822 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4823 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4824 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4825 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4826 if (adev->reset_task_info.pid)
4827 drm_printf(&p, "process_name: %s PID: %d\n",
4828 adev->reset_task_info.process_name,
4829 adev->reset_task_info.pid);
4830
4831 if (adev->reset_vram_lost)
4832 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4833 if (adev->num_regs) {
4834 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4835
4836 for (i = 0; i < adev->num_regs; i++)
4837 drm_printf(&p, "0x%08x: 0x%08x\n",
4838 adev->reset_dump_reg_list[i],
4839 adev->reset_dump_reg_value[i]);
4840 }
4841
4842 return count - iter.remain;
4843}
4844
4845static void amdgpu_devcoredump_free(void *data)
4846{
4847}
4848
4849static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4850{
4851 struct drm_device *dev = adev_to_drm(adev);
4852
4853 ktime_get_ts64(&adev->reset_time);
4854 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4855 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4856}
4857#endif
4858
04442bf7
LL
4859int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4860 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4861{
4862 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4863 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4864 int r = 0;
f5c7e779 4865 bool gpu_reset_for_dev_remove = 0;
26bc5340 4866
04442bf7
LL
4867 /* Try reset handler method first */
4868 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4869 reset_list);
15fd09a0 4870 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4871
4872 reset_context->reset_device_list = device_list_handle;
04442bf7 4873 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4874 /* If reset handler not implemented, continue; otherwise return */
4875 if (r == -ENOSYS)
4876 r = 0;
4877 else
04442bf7
LL
4878 return r;
4879
4880 /* Reset handler not implemented, use the default method */
4881 need_full_reset =
4882 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4883 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4884
f5c7e779
YC
4885 gpu_reset_for_dev_remove =
4886 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4887 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4888
26bc5340 4889 /*
655ce9cb 4890 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4891 * to allow proper links negotiation in FW (within 1 sec)
4892 */
7ac71382 4893 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4894 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4895 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4896 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4897 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4898 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4899 r = -EALREADY;
4900 } else
4901 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4902
041a62bc 4903 if (r) {
aac89168 4904 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4905 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4906 break;
ce316fa5
LM
4907 }
4908 }
4909
041a62bc
AG
4910 /* For XGMI wait for all resets to complete before proceed */
4911 if (!r) {
655ce9cb 4912 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4913 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4914 flush_work(&tmp_adev->xgmi_reset_work);
4915 r = tmp_adev->asic_reset_res;
4916 if (r)
4917 break;
ce316fa5
LM
4918 }
4919 }
4920 }
ce316fa5 4921 }
26bc5340 4922
43c4d576 4923 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4924 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4925 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4926 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4927 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4928 }
4929
00eaa571 4930 amdgpu_ras_intr_cleared();
43c4d576 4931 }
00eaa571 4932
f5c7e779
YC
4933 /* Since the mode1 reset affects base ip blocks, the
4934 * phase1 ip blocks need to be resumed. Otherwise there
4935 * will be a BIOS signature error and the psp bootloader
4936 * can't load kdb on the next amdgpu install.
4937 */
4938 if (gpu_reset_for_dev_remove) {
4939 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4940 amdgpu_device_ip_resume_phase1(tmp_adev);
4941
4942 goto end;
4943 }
4944
655ce9cb 4945 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4946 if (need_full_reset) {
4947 /* post card */
e3c1b071 4948 r = amdgpu_device_asic_init(tmp_adev);
4949 if (r) {
aac89168 4950 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4951 } else {
26bc5340 4952 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4953 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4954 if (r)
4955 goto out;
4956
26bc5340
AG
4957 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4958 if (r)
4959 goto out;
4960
4961 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4962#ifdef CONFIG_DEV_COREDUMP
4963 tmp_adev->reset_vram_lost = vram_lost;
4964 memset(&tmp_adev->reset_task_info, 0,
4965 sizeof(tmp_adev->reset_task_info));
4966 if (reset_context->job && reset_context->job->vm)
4967 tmp_adev->reset_task_info =
4968 reset_context->job->vm->task_info;
4969 amdgpu_reset_capture_coredumpm(tmp_adev);
4970#endif
26bc5340 4971 if (vram_lost) {
77e7f829 4972 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4973 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4974 }
4975
26bc5340
AG
4976 r = amdgpu_device_fw_loading(tmp_adev);
4977 if (r)
4978 return r;
4979
4980 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4981 if (r)
4982 goto out;
4983
4984 if (vram_lost)
4985 amdgpu_device_fill_reset_magic(tmp_adev);
4986
fdafb359
EQ
4987 /*
4988 * Add this ASIC as tracked as reset was already
4989 * complete successfully.
4990 */
4991 amdgpu_register_gpu_instance(tmp_adev);
4992
04442bf7
LL
4993 if (!reset_context->hive &&
4994 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4995 amdgpu_xgmi_add_device(tmp_adev);
4996
7c04ca50 4997 r = amdgpu_device_ip_late_init(tmp_adev);
4998 if (r)
4999 goto out;
5000
087451f3 5001 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 5002
e8fbaf03
GC
5003 /*
5004 * The GPU enters bad state once faulty pages
5005 * by ECC has reached the threshold, and ras
5006 * recovery is scheduled next. So add one check
5007 * here to break recovery if it indeed exceeds
5008 * bad page threshold, and remind user to
5009 * retire this GPU or setting one bigger
5010 * bad_page_threshold value to fix this once
5011 * probing driver again.
5012 */
11003c68 5013 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5014 /* must succeed. */
5015 amdgpu_ras_resume(tmp_adev);
5016 } else {
5017 r = -EINVAL;
5018 goto out;
5019 }
e79a04d5 5020
26bc5340 5021 /* Update PSP FW topology after reset */
04442bf7
LL
5022 if (reset_context->hive &&
5023 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5024 r = amdgpu_xgmi_update_topology(
5025 reset_context->hive, tmp_adev);
26bc5340
AG
5026 }
5027 }
5028
26bc5340
AG
5029out:
5030 if (!r) {
5031 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5032 r = amdgpu_ib_ring_tests(tmp_adev);
5033 if (r) {
5034 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5035 need_full_reset = true;
5036 r = -EAGAIN;
5037 goto end;
5038 }
5039 }
5040
5041 if (!r)
5042 r = amdgpu_device_recover_vram(tmp_adev);
5043 else
5044 tmp_adev->asic_reset_res = r;
5045 }
5046
5047end:
04442bf7
LL
5048 if (need_full_reset)
5049 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5050 else
5051 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5052 return r;
5053}
5054
e923be99 5055static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5056{
5740682e 5057
a3a09142
AD
5058 switch (amdgpu_asic_reset_method(adev)) {
5059 case AMD_RESET_METHOD_MODE1:
5060 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5061 break;
5062 case AMD_RESET_METHOD_MODE2:
5063 adev->mp1_state = PP_MP1_STATE_RESET;
5064 break;
5065 default:
5066 adev->mp1_state = PP_MP1_STATE_NONE;
5067 break;
5068 }
26bc5340 5069}
d38ceaf9 5070
e923be99 5071static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5072{
89041940 5073 amdgpu_vf_error_trans_all(adev);
a3a09142 5074 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5075}
5076
3f12acc8
EQ
5077static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5078{
5079 struct pci_dev *p = NULL;
5080
5081 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5082 adev->pdev->bus->number, 1);
5083 if (p) {
5084 pm_runtime_enable(&(p->dev));
5085 pm_runtime_resume(&(p->dev));
5086 }
b85e285e
YY
5087
5088 pci_dev_put(p);
3f12acc8
EQ
5089}
5090
5091static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5092{
5093 enum amd_reset_method reset_method;
5094 struct pci_dev *p = NULL;
5095 u64 expires;
5096
5097 /*
5098 * For now, only BACO and mode1 reset are confirmed
5099 * to suffer the audio issue without proper suspended.
5100 */
5101 reset_method = amdgpu_asic_reset_method(adev);
5102 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5103 (reset_method != AMD_RESET_METHOD_MODE1))
5104 return -EINVAL;
5105
5106 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5107 adev->pdev->bus->number, 1);
5108 if (!p)
5109 return -ENODEV;
5110
5111 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5112 if (!expires)
5113 /*
5114 * If we cannot get the audio device autosuspend delay,
5115 * a fixed 4S interval will be used. Considering 3S is
5116 * the audio controller default autosuspend delay setting.
5117 * 4S used here is guaranteed to cover that.
5118 */
54b7feb9 5119 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5120
5121 while (!pm_runtime_status_suspended(&(p->dev))) {
5122 if (!pm_runtime_suspend(&(p->dev)))
5123 break;
5124
5125 if (expires < ktime_get_mono_fast_ns()) {
5126 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5127 pci_dev_put(p);
3f12acc8
EQ
5128 /* TODO: abort the succeeding gpu reset? */
5129 return -ETIMEDOUT;
5130 }
5131 }
5132
5133 pm_runtime_disable(&(p->dev));
5134
b85e285e 5135 pci_dev_put(p);
3f12acc8
EQ
5136 return 0;
5137}
5138
d193b12b 5139static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5140{
5141 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5142
5143#if defined(CONFIG_DEBUG_FS)
5144 if (!amdgpu_sriov_vf(adev))
5145 cancel_work(&adev->reset_work);
5146#endif
5147
5148 if (adev->kfd.dev)
5149 cancel_work(&adev->kfd.reset_work);
5150
5151 if (amdgpu_sriov_vf(adev))
5152 cancel_work(&adev->virt.flr_work);
5153
5154 if (con && adev->ras_enabled)
5155 cancel_work(&con->recovery_work);
5156
5157}
5158
26bc5340 5159/**
6e9c65f7 5160 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5161 *
982a820b 5162 * @adev: amdgpu_device pointer
26bc5340
AG
5163 * @job: which job trigger hang
5164 *
5165 * Attempt to reset the GPU if it has hung (all asics).
5166 * Attempt to do soft-reset or full-reset and reinitialize Asic
5167 * Returns 0 for success or an error on failure.
5168 */
5169
cf727044 5170int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5171 struct amdgpu_job *job,
5172 struct amdgpu_reset_context *reset_context)
26bc5340 5173{
1d721ed6 5174 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5175 bool job_signaled = false;
26bc5340 5176 struct amdgpu_hive_info *hive = NULL;
26bc5340 5177 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5178 int i, r = 0;
bb5c7235 5179 bool need_emergency_restart = false;
3f12acc8 5180 bool audio_suspended = false;
f5c7e779
YC
5181 bool gpu_reset_for_dev_remove = false;
5182
5183 gpu_reset_for_dev_remove =
5184 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5185 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5186
6e3cd2a9 5187 /*
bb5c7235
WS
5188 * Special case: RAS triggered and full reset isn't supported
5189 */
5190 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5191
d5ea093e
AG
5192 /*
5193 * Flush RAM to disk so that after reboot
5194 * the user can read log and see why the system rebooted.
5195 */
bb5c7235 5196 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5197 DRM_WARN("Emergency reboot.");
5198
5199 ksys_sync_helper();
5200 emergency_restart();
5201 }
5202
b823821f 5203 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5204 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5205
175ac6ec
ZL
5206 if (!amdgpu_sriov_vf(adev))
5207 hive = amdgpu_get_xgmi_hive(adev);
681260df 5208 if (hive)
53b3f8f4 5209 mutex_lock(&hive->hive_lock);
26bc5340 5210
f1549c09
LG
5211 reset_context->job = job;
5212 reset_context->hive = hive;
9e94d22c
EQ
5213 /*
5214 * Build list of devices to reset.
5215 * In case we are in XGMI hive mode, resort the device list
5216 * to put adev in the 1st position.
5217 */
5218 INIT_LIST_HEAD(&device_list);
175ac6ec 5219 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5220 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5221 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5222 if (gpu_reset_for_dev_remove && adev->shutdown)
5223 tmp_adev->shutdown = true;
5224 }
655ce9cb 5225 if (!list_is_first(&adev->reset_list, &device_list))
5226 list_rotate_to_front(&adev->reset_list, &device_list);
5227 device_list_handle = &device_list;
26bc5340 5228 } else {
655ce9cb 5229 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5230 device_list_handle = &device_list;
5231 }
5232
e923be99
AG
5233 /* We need to lock reset domain only once both for XGMI and single device */
5234 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5235 reset_list);
3675c2f2 5236 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5237
1d721ed6 5238 /* block all schedulers and reset given job's ring */
655ce9cb 5239 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5240
e923be99 5241 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5242
3f12acc8
EQ
5243 /*
5244 * Try to put the audio codec into suspend state
5245 * before gpu reset started.
5246 *
5247 * Due to the power domain of the graphics device
5248 * is shared with AZ power domain. Without this,
5249 * we may change the audio hardware from behind
5250 * the audio driver's back. That will trigger
5251 * some audio codec errors.
5252 */
5253 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5254 audio_suspended = true;
5255
9e94d22c
EQ
5256 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5257
52fb44cf
EQ
5258 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5259
c004d44e 5260 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5261 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5262
12ffa55d
AG
5263 /*
5264 * Mark these ASICs to be reseted as untracked first
5265 * And add them back after reset completed
5266 */
5267 amdgpu_unregister_gpu_instance(tmp_adev);
5268
163d4cd2 5269 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5270
f1c1314b 5271 /* disable ras on ALL IPs */
bb5c7235 5272 if (!need_emergency_restart &&
b823821f 5273 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5274 amdgpu_ras_suspend(tmp_adev);
5275
1d721ed6
AG
5276 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5277 struct amdgpu_ring *ring = tmp_adev->rings[i];
5278
5279 if (!ring || !ring->sched.thread)
5280 continue;
5281
0b2d2c2e 5282 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5283
bb5c7235 5284 if (need_emergency_restart)
7c6e68c7 5285 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5286 }
8f8c80f4 5287 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5288 }
5289
bb5c7235 5290 if (need_emergency_restart)
7c6e68c7
AG
5291 goto skip_sched_resume;
5292
1d721ed6
AG
5293 /*
5294 * Must check guilty signal here since after this point all old
5295 * HW fences are force signaled.
5296 *
5297 * job->base holds a reference to parent fence
5298 */
f6a3f660 5299 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5300 job_signaled = true;
1d721ed6
AG
5301 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5302 goto skip_hw_reset;
5303 }
5304
26bc5340 5305retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5306 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5307 if (gpu_reset_for_dev_remove) {
5308 /* Workaroud for ASICs need to disable SMC first */
5309 amdgpu_device_smu_fini_early(tmp_adev);
5310 }
f1549c09 5311 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5312 /*TODO Should we stop ?*/
5313 if (r) {
aac89168 5314 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5315 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5316 tmp_adev->asic_reset_res = r;
5317 }
247c7b0d
AG
5318
5319 /*
5320 * Drop all pending non scheduler resets. Scheduler resets
5321 * were already dropped during drm_sched_stop
5322 */
d193b12b 5323 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5324 }
5325
5326 /* Actual ASIC resets if needed.*/
4f30d920 5327 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5328 if (amdgpu_sriov_vf(adev)) {
5329 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5330 if (r)
5331 adev->asic_reset_res = r;
950d6425
SY
5332
5333 /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5334 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5335 amdgpu_ras_resume(adev);
26bc5340 5336 } else {
f1549c09 5337 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5338 if (r && r == -EAGAIN)
26bc5340 5339 goto retry;
f5c7e779
YC
5340
5341 if (!r && gpu_reset_for_dev_remove)
5342 goto recover_end;
26bc5340
AG
5343 }
5344
1d721ed6
AG
5345skip_hw_reset:
5346
26bc5340 5347 /* Post ASIC reset for all devs .*/
655ce9cb 5348 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5349
1d721ed6
AG
5350 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5351 struct amdgpu_ring *ring = tmp_adev->rings[i];
5352
5353 if (!ring || !ring->sched.thread)
5354 continue;
5355
6868a2c4 5356 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5357 }
5358
693073a0 5359 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5360 amdgpu_mes_self_test(tmp_adev);
5361
1053b9c9 5362 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5363 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5364 }
5365
7258fa31
SK
5366 if (tmp_adev->asic_reset_res)
5367 r = tmp_adev->asic_reset_res;
5368
1d721ed6 5369 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5370
5371 if (r) {
5372 /* bad news, how to tell it to userspace ? */
12ffa55d 5373 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5374 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5375 } else {
12ffa55d 5376 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5377 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5378 DRM_WARN("smart shift update failed\n");
26bc5340 5379 }
7c6e68c7 5380 }
26bc5340 5381
7c6e68c7 5382skip_sched_resume:
655ce9cb 5383 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5384 /* unlock kfd: SRIOV would do it separately */
c004d44e 5385 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5386 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5387
5388 /* kfd_post_reset will do nothing if kfd device is not initialized,
5389 * need to bring up kfd here if it's not be initialized before
5390 */
5391 if (!adev->kfd.init_complete)
5392 amdgpu_amdkfd_device_init(adev);
5393
3f12acc8
EQ
5394 if (audio_suspended)
5395 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5396
5397 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5398
5399 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5400 }
5401
f5c7e779 5402recover_end:
e923be99
AG
5403 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5404 reset_list);
5405 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5406
9e94d22c 5407 if (hive) {
9e94d22c 5408 mutex_unlock(&hive->hive_lock);
d95e8e97 5409 amdgpu_put_xgmi_hive(hive);
9e94d22c 5410 }
26bc5340 5411
f287a3c5 5412 if (r)
26bc5340 5413 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5414
5415 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5416 return r;
5417}
5418
e3ecdffa
AD
5419/**
5420 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5421 *
5422 * @adev: amdgpu_device pointer
5423 *
5424 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5425 * and lanes) of the slot the device is in. Handles APUs and
5426 * virtualized environments where PCIE config space may not be available.
5427 */
5494d864 5428static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5429{
5d9a6330 5430 struct pci_dev *pdev;
c5313457
HK
5431 enum pci_bus_speed speed_cap, platform_speed_cap;
5432 enum pcie_link_width platform_link_width;
d0dd7f0c 5433
cd474ba0
AD
5434 if (amdgpu_pcie_gen_cap)
5435 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5436
cd474ba0
AD
5437 if (amdgpu_pcie_lane_cap)
5438 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5439
cd474ba0
AD
5440 /* covers APUs as well */
5441 if (pci_is_root_bus(adev->pdev->bus)) {
5442 if (adev->pm.pcie_gen_mask == 0)
5443 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5444 if (adev->pm.pcie_mlw_mask == 0)
5445 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5446 return;
cd474ba0 5447 }
d0dd7f0c 5448
c5313457
HK
5449 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5450 return;
5451
dbaa922b
AD
5452 pcie_bandwidth_available(adev->pdev, NULL,
5453 &platform_speed_cap, &platform_link_width);
c5313457 5454
cd474ba0 5455 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5456 /* asic caps */
5457 pdev = adev->pdev;
5458 speed_cap = pcie_get_speed_cap(pdev);
5459 if (speed_cap == PCI_SPEED_UNKNOWN) {
5460 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5461 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5462 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5463 } else {
2b3a1f51
FX
5464 if (speed_cap == PCIE_SPEED_32_0GT)
5465 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5466 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5467 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5468 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5469 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5470 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5471 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5472 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5473 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5474 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5475 else if (speed_cap == PCIE_SPEED_8_0GT)
5476 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5477 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5478 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5479 else if (speed_cap == PCIE_SPEED_5_0GT)
5480 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5481 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5482 else
5483 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5484 }
5485 /* platform caps */
c5313457 5486 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5487 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5488 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5489 } else {
2b3a1f51
FX
5490 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5491 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5492 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5493 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5494 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5495 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5496 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5497 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5498 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5499 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5500 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5501 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5502 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5503 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5504 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5505 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5506 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5507 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5508 else
5509 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5510
cd474ba0
AD
5511 }
5512 }
5513 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5514 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5515 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5516 } else {
c5313457 5517 switch (platform_link_width) {
5d9a6330 5518 case PCIE_LNK_X32:
cd474ba0
AD
5519 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5520 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5525 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5526 break;
5d9a6330 5527 case PCIE_LNK_X16:
cd474ba0
AD
5528 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5531 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5532 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5533 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5534 break;
5d9a6330 5535 case PCIE_LNK_X12:
cd474ba0
AD
5536 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5537 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5538 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5539 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5540 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5541 break;
5d9a6330 5542 case PCIE_LNK_X8:
cd474ba0
AD
5543 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5544 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5545 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5546 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5547 break;
5d9a6330 5548 case PCIE_LNK_X4:
cd474ba0
AD
5549 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5550 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5551 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5552 break;
5d9a6330 5553 case PCIE_LNK_X2:
cd474ba0
AD
5554 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5555 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5556 break;
5d9a6330 5557 case PCIE_LNK_X1:
cd474ba0
AD
5558 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5559 break;
5560 default:
5561 break;
5562 }
d0dd7f0c
AD
5563 }
5564 }
5565}
d38ceaf9 5566
08a2fd23
RE
5567/**
5568 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5569 *
5570 * @adev: amdgpu_device pointer
5571 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5572 *
5573 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5574 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5575 * @peer_adev.
5576 */
5577bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5578 struct amdgpu_device *peer_adev)
5579{
5580#ifdef CONFIG_HSA_AMD_P2P
5581 uint64_t address_mask = peer_adev->dev->dma_mask ?
5582 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5583 resource_size_t aper_limit =
5584 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5585 bool p2p_access =
5586 !adev->gmc.xgmi.connected_to_cpu &&
5587 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5588
5589 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5590 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5591 !(adev->gmc.aper_base & address_mask ||
5592 aper_limit & address_mask));
5593#else
5594 return false;
5595#endif
5596}
5597
361dbd01
AD
5598int amdgpu_device_baco_enter(struct drm_device *dev)
5599{
1348969a 5600 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5601 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5602
6ab68650 5603 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5604 return -ENOTSUPP;
5605
8ab0d6f0 5606 if (ras && adev->ras_enabled &&
acdae216 5607 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5608 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5609
9530273e 5610 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5611}
5612
5613int amdgpu_device_baco_exit(struct drm_device *dev)
5614{
1348969a 5615 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5616 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5617 int ret = 0;
361dbd01 5618
6ab68650 5619 if (!amdgpu_device_supports_baco(dev))
361dbd01
AD
5620 return -ENOTSUPP;
5621
9530273e
EQ
5622 ret = amdgpu_dpm_baco_exit(adev);
5623 if (ret)
5624 return ret;
7a22677b 5625
8ab0d6f0 5626 if (ras && adev->ras_enabled &&
acdae216 5627 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5628 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5629
1bece222
CL
5630 if (amdgpu_passthrough(adev) &&
5631 adev->nbio.funcs->clear_doorbell_interrupt)
5632 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5633
7a22677b 5634 return 0;
361dbd01 5635}
c9a6b82f
AG
5636
5637/**
5638 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5639 * @pdev: PCI device struct
5640 * @state: PCI channel state
5641 *
5642 * Description: Called when a PCI error is detected.
5643 *
5644 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5645 */
5646pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5647{
5648 struct drm_device *dev = pci_get_drvdata(pdev);
5649 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5650 int i;
c9a6b82f
AG
5651
5652 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5653
6894305c
AG
5654 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5655 DRM_WARN("No support for XGMI hive yet...");
5656 return PCI_ERS_RESULT_DISCONNECT;
5657 }
5658
e17e27f9
GC
5659 adev->pci_channel_state = state;
5660
c9a6b82f
AG
5661 switch (state) {
5662 case pci_channel_io_normal:
5663 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5664 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5665 case pci_channel_io_frozen:
5666 /*
d0fb18b5 5667 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5668 * to GPU during PCI error recovery
5669 */
3675c2f2 5670 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5671 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5672
5673 /*
5674 * Block any work scheduling as we do for regular GPU reset
5675 * for the duration of the recovery
5676 */
5677 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5678 struct amdgpu_ring *ring = adev->rings[i];
5679
5680 if (!ring || !ring->sched.thread)
5681 continue;
5682
5683 drm_sched_stop(&ring->sched, NULL);
5684 }
8f8c80f4 5685 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5686 return PCI_ERS_RESULT_NEED_RESET;
5687 case pci_channel_io_perm_failure:
5688 /* Permanent error, prepare for device removal */
5689 return PCI_ERS_RESULT_DISCONNECT;
5690 }
5691
5692 return PCI_ERS_RESULT_NEED_RESET;
5693}
5694
5695/**
5696 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5697 * @pdev: pointer to PCI device
5698 */
5699pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5700{
5701
5702 DRM_INFO("PCI error: mmio enabled callback!!\n");
5703
5704 /* TODO - dump whatever for debugging purposes */
5705
5706 /* This called only if amdgpu_pci_error_detected returns
5707 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5708 * works, no need to reset slot.
5709 */
5710
5711 return PCI_ERS_RESULT_RECOVERED;
5712}
5713
5714/**
5715 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5716 * @pdev: PCI device struct
5717 *
5718 * Description: This routine is called by the pci error recovery
5719 * code after the PCI slot has been reset, just before we
5720 * should resume normal operations.
5721 */
5722pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5723{
5724 struct drm_device *dev = pci_get_drvdata(pdev);
5725 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5726 int r, i;
04442bf7 5727 struct amdgpu_reset_context reset_context;
362c7b91 5728 u32 memsize;
7ac71382 5729 struct list_head device_list;
c9a6b82f
AG
5730
5731 DRM_INFO("PCI error: slot reset callback!!\n");
5732
04442bf7
LL
5733 memset(&reset_context, 0, sizeof(reset_context));
5734
7ac71382 5735 INIT_LIST_HEAD(&device_list);
655ce9cb 5736 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5737
362c7b91
AG
5738 /* wait for asic to come out of reset */
5739 msleep(500);
5740
7ac71382 5741 /* Restore PCI confspace */
c1dd4aa6 5742 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5743
362c7b91
AG
5744 /* confirm ASIC came out of reset */
5745 for (i = 0; i < adev->usec_timeout; i++) {
5746 memsize = amdgpu_asic_get_config_memsize(adev);
5747
5748 if (memsize != 0xffffffff)
5749 break;
5750 udelay(1);
5751 }
5752 if (memsize == 0xffffffff) {
5753 r = -ETIME;
5754 goto out;
5755 }
5756
04442bf7
LL
5757 reset_context.method = AMD_RESET_METHOD_NONE;
5758 reset_context.reset_req_dev = adev;
5759 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5760 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5761
7afefb81 5762 adev->no_hw_access = true;
04442bf7 5763 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5764 adev->no_hw_access = false;
c9a6b82f
AG
5765 if (r)
5766 goto out;
5767
04442bf7 5768 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5769
5770out:
c9a6b82f 5771 if (!r) {
c1dd4aa6
AG
5772 if (amdgpu_device_cache_pci_state(adev->pdev))
5773 pci_restore_state(adev->pdev);
5774
c9a6b82f
AG
5775 DRM_INFO("PCIe error recovery succeeded\n");
5776 } else {
5777 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5778 amdgpu_device_unset_mp1_state(adev);
5779 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5780 }
5781
5782 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5783}
5784
5785/**
5786 * amdgpu_pci_resume() - resume normal ops after PCI reset
5787 * @pdev: pointer to PCI device
5788 *
5789 * Called when the error recovery driver tells us that its
505199a3 5790 * OK to resume normal operation.
c9a6b82f
AG
5791 */
5792void amdgpu_pci_resume(struct pci_dev *pdev)
5793{
5794 struct drm_device *dev = pci_get_drvdata(pdev);
5795 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5796 int i;
c9a6b82f 5797
c9a6b82f
AG
5798
5799 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5800
e17e27f9
GC
5801 /* Only continue execution for the case of pci_channel_io_frozen */
5802 if (adev->pci_channel_state != pci_channel_io_frozen)
5803 return;
5804
acd89fca
AG
5805 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5806 struct amdgpu_ring *ring = adev->rings[i];
5807
5808 if (!ring || !ring->sched.thread)
5809 continue;
5810
acd89fca
AG
5811 drm_sched_start(&ring->sched, true);
5812 }
5813
e923be99
AG
5814 amdgpu_device_unset_mp1_state(adev);
5815 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5816}
c1dd4aa6
AG
5817
5818bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5819{
5820 struct drm_device *dev = pci_get_drvdata(pdev);
5821 struct amdgpu_device *adev = drm_to_adev(dev);
5822 int r;
5823
5824 r = pci_save_state(pdev);
5825 if (!r) {
5826 kfree(adev->pci_state);
5827
5828 adev->pci_state = pci_store_saved_state(pdev);
5829
5830 if (!adev->pci_state) {
5831 DRM_ERROR("Failed to store PCI saved state");
5832 return false;
5833 }
5834 } else {
5835 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5836 return false;
5837 }
5838
5839 return true;
5840}
5841
5842bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5843{
5844 struct drm_device *dev = pci_get_drvdata(pdev);
5845 struct amdgpu_device *adev = drm_to_adev(dev);
5846 int r;
5847
5848 if (!adev->pci_state)
5849 return false;
5850
5851 r = pci_load_saved_state(pdev, adev->pci_state);
5852
5853 if (!r) {
5854 pci_restore_state(pdev);
5855 } else {
5856 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5857 return false;
5858 }
5859
5860 return true;
5861}
5862
810085dd
EH
5863void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5864 struct amdgpu_ring *ring)
5865{
5866#ifdef CONFIG_X86_64
b818a5d3 5867 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5868 return;
5869#endif
5870 if (adev->gmc.xgmi.connected_to_cpu)
5871 return;
5872
5873 if (ring && ring->funcs->emit_hdp_flush)
5874 amdgpu_ring_emit_hdp_flush(ring);
5875 else
5876 amdgpu_asic_flush_hdp(adev, ring);
5877}
c1dd4aa6 5878
810085dd
EH
5879void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5880 struct amdgpu_ring *ring)
5881{
5882#ifdef CONFIG_X86_64
b818a5d3 5883 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5884 return;
5885#endif
5886 if (adev->gmc.xgmi.connected_to_cpu)
5887 return;
c1dd4aa6 5888
810085dd
EH
5889 amdgpu_asic_invalidate_hdp(adev, ring);
5890}
34f3a4a9 5891
89a7a870
AG
5892int amdgpu_in_reset(struct amdgpu_device *adev)
5893{
5894 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
5895}
5896
34f3a4a9
LY
5897/**
5898 * amdgpu_device_halt() - bring hardware to some kind of halt state
5899 *
5900 * @adev: amdgpu_device pointer
5901 *
5902 * Bring hardware to some kind of halt state so that no one can touch it
5903 * any more. It will help to maintain error context when error occurred.
5904 * Compare to a simple hang, the system will keep stable at least for SSH
5905 * access. Then it should be trivial to inspect the hardware state and
5906 * see what's going on. Implemented as following:
5907 *
5908 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5909 * clears all CPU mappings to device, disallows remappings through page faults
5910 * 2. amdgpu_irq_disable_all() disables all interrupts
5911 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5912 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5913 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5914 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5915 * flush any in flight DMA operations
5916 */
5917void amdgpu_device_halt(struct amdgpu_device *adev)
5918{
5919 struct pci_dev *pdev = adev->pdev;
e0f943b4 5920 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
5921
5922 drm_dev_unplug(ddev);
5923
5924 amdgpu_irq_disable_all(adev);
5925
5926 amdgpu_fence_driver_hw_fini(adev);
5927
5928 adev->no_hw_access = true;
5929
5930 amdgpu_device_unmap_mmio(adev);
5931
5932 pci_disable_device(pdev);
5933 pci_wait_for_pending_transaction(pdev);
5934}
86700a40
XD
5935
5936u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5937 u32 reg)
5938{
5939 unsigned long flags, address, data;
5940 u32 r;
5941
5942 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5943 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5944
5945 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5946 WREG32(address, reg * 4);
5947 (void)RREG32(address);
5948 r = RREG32(data);
5949 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5950 return r;
5951}
5952
5953void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5954 u32 reg, u32 v)
5955{
5956 unsigned long flags, address, data;
5957
5958 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5959 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5960
5961 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5962 WREG32(address, reg * 4);
5963 (void)RREG32(address);
5964 WREG32(data, v);
5965 (void)RREG32(data);
5966 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5967}
68ce8b24
CK
5968
5969/**
5970 * amdgpu_device_switch_gang - switch to a new gang
5971 * @adev: amdgpu_device pointer
5972 * @gang: the gang to switch to
5973 *
5974 * Try to switch to a new gang.
5975 * Returns: NULL if we switched to the new gang or a reference to the current
5976 * gang leader.
5977 */
5978struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5979 struct dma_fence *gang)
5980{
5981 struct dma_fence *old = NULL;
5982
5983 do {
5984 dma_fence_put(old);
5985 rcu_read_lock();
5986 old = dma_fence_get_rcu_safe(&adev->gang_submit);
5987 rcu_read_unlock();
5988
5989 if (old == gang)
5990 break;
5991
5992 if (!dma_fence_is_signaled(old))
5993 return old;
5994
5995 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5996 old, gang) != old);
5997
5998 dma_fence_put(old);
5999 return NULL;
6000}
220c8cc8
AD
6001
6002bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6003{
6004 switch (adev->asic_type) {
6005#ifdef CONFIG_DRM_AMDGPU_SI
6006 case CHIP_HAINAN:
6007#endif
6008 case CHIP_TOPAZ:
6009 /* chips with no display hardware */
6010 return false;
6011#ifdef CONFIG_DRM_AMDGPU_SI
6012 case CHIP_TAHITI:
6013 case CHIP_PITCAIRN:
6014 case CHIP_VERDE:
6015 case CHIP_OLAND:
6016#endif
6017#ifdef CONFIG_DRM_AMDGPU_CIK
6018 case CHIP_BONAIRE:
6019 case CHIP_HAWAII:
6020 case CHIP_KAVERI:
6021 case CHIP_KABINI:
6022 case CHIP_MULLINS:
6023#endif
6024 case CHIP_TONGA:
6025 case CHIP_FIJI:
6026 case CHIP_POLARIS10:
6027 case CHIP_POLARIS11:
6028 case CHIP_POLARIS12:
6029 case CHIP_VEGAM:
6030 case CHIP_CARRIZO:
6031 case CHIP_STONEY:
6032 /* chips with display hardware */
6033 return true;
6034 default:
6035 /* IP discovery */
6036 if (!adev->ip_versions[DCE_HWIP][0] ||
6037 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6038 return false;
6039 return true;
6040 }
6041}