drm: Remove unnecessary include statements for drm_crtc_helper.h
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
fdf2f6c5 38
4562236b 39#include <drm/drm_atomic_helper.h>
45b64fd9 40#include <drm/drm_fb_helper.h>
fcd70cd3 41#include <drm/drm_probe_helper.h>
d38ceaf9
AD
42#include <drm/amdgpu_drm.h>
43#include <linux/vgaarb.h>
44#include <linux/vga_switcheroo.h>
45#include <linux/efi.h>
46#include "amdgpu.h"
f4b373f4 47#include "amdgpu_trace.h"
d38ceaf9
AD
48#include "amdgpu_i2c.h"
49#include "atom.h"
50#include "amdgpu_atombios.h"
a5bde2f9 51#include "amdgpu_atomfirmware.h"
d0dd7f0c 52#include "amd_pcie.h"
33f34802
KW
53#ifdef CONFIG_DRM_AMDGPU_SI
54#include "si.h"
55#endif
a2e73f56
AD
56#ifdef CONFIG_DRM_AMDGPU_CIK
57#include "cik.h"
58#endif
aaa36a97 59#include "vi.h"
460826e6 60#include "soc15.h"
0a5b8c7b 61#include "nv.h"
d38ceaf9 62#include "bif/bif_4_1_d.h"
bec86378 63#include <linux/firmware.h>
89041940 64#include "amdgpu_vf_error.h"
d38ceaf9 65
ba997709 66#include "amdgpu_amdkfd.h"
d2f52ac8 67#include "amdgpu_pm.h"
d38ceaf9 68
5183411b 69#include "amdgpu_xgmi.h"
c030f2e4 70#include "amdgpu_ras.h"
9c7c85f7 71#include "amdgpu_pmu.h"
bd607166 72#include "amdgpu_fru_eeprom.h"
04442bf7 73#include "amdgpu_reset.h"
5183411b 74
d5ea093e 75#include <linux/suspend.h>
c6a6e2db 76#include <drm/task_barrier.h>
3f12acc8 77#include <linux/pm_runtime.h>
d5ea093e 78
f89f8c6b
AG
79#include <drm/drm_drv.h>
80
e2a75f88 81MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 82MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 83MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 84MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 85MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 86MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 87MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 88
2dc80b00 89#define AMDGPU_RESUME_MS 2000
7258fa31
SK
90#define AMDGPU_MAX_RETRY_LIMIT 2
91#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 92
050091ab 93const char *amdgpu_asic_name[] = {
da69c161
KW
94 "TAHITI",
95 "PITCAIRN",
96 "VERDE",
97 "OLAND",
98 "HAINAN",
d38ceaf9
AD
99 "BONAIRE",
100 "KAVERI",
101 "KABINI",
102 "HAWAII",
103 "MULLINS",
104 "TOPAZ",
105 "TONGA",
48299f95 106 "FIJI",
d38ceaf9 107 "CARRIZO",
139f4917 108 "STONEY",
2cc0c0b5
FC
109 "POLARIS10",
110 "POLARIS11",
c4642a47 111 "POLARIS12",
48ff108d 112 "VEGAM",
d4196f01 113 "VEGA10",
8fab806a 114 "VEGA12",
956fcddc 115 "VEGA20",
2ca8a5d2 116 "RAVEN",
d6c3b24e 117 "ARCTURUS",
1eee4228 118 "RENOIR",
d46b417a 119 "ALDEBARAN",
852a6626 120 "NAVI10",
d0f56dc2 121 "CYAN_SKILLFISH",
87dbad02 122 "NAVI14",
9802f5d7 123 "NAVI12",
ccaf72d3 124 "SIENNA_CICHLID",
ddd8fbe7 125 "NAVY_FLOUNDER",
4f1e9a76 126 "VANGOGH",
a2468e04 127 "DIMGREY_CAVEFISH",
6f169591 128 "BEIGE_GOBY",
ee9236b7 129 "YELLOW_CARP",
3ae695d6 130 "IP DISCOVERY",
d38ceaf9
AD
131 "LAST",
132};
133
dcea6e65
KR
134/**
135 * DOC: pcie_replay_count
136 *
137 * The amdgpu driver provides a sysfs API for reporting the total number
138 * of PCIe replays (NAKs)
139 * The file pcie_replay_count is used for this and returns the total
140 * number of replays as a sum of the NAKs generated and NAKs received
141 */
142
143static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
144 struct device_attribute *attr, char *buf)
145{
146 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 147 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
148 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
149
36000c7a 150 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
151}
152
153static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
154 amdgpu_device_get_pcie_replay_count, NULL);
155
5494d864
AD
156static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
157
bd607166
KR
158/**
159 * DOC: product_name
160 *
161 * The amdgpu driver provides a sysfs API for reporting the product name
162 * for the device
163 * The file serial_number is used for this and returns the product name
164 * as returned from the FRU.
165 * NOTE: This is only available for certain server cards
166 */
167
168static ssize_t amdgpu_device_get_product_name(struct device *dev,
169 struct device_attribute *attr, char *buf)
170{
171 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 172 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 173
36000c7a 174 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
175}
176
177static DEVICE_ATTR(product_name, S_IRUGO,
178 amdgpu_device_get_product_name, NULL);
179
180/**
181 * DOC: product_number
182 *
183 * The amdgpu driver provides a sysfs API for reporting the part number
184 * for the device
185 * The file serial_number is used for this and returns the part number
186 * as returned from the FRU.
187 * NOTE: This is only available for certain server cards
188 */
189
190static ssize_t amdgpu_device_get_product_number(struct device *dev,
191 struct device_attribute *attr, char *buf)
192{
193 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 194 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 195
36000c7a 196 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
197}
198
199static DEVICE_ATTR(product_number, S_IRUGO,
200 amdgpu_device_get_product_number, NULL);
201
202/**
203 * DOC: serial_number
204 *
205 * The amdgpu driver provides a sysfs API for reporting the serial number
206 * for the device
207 * The file serial_number is used for this and returns the serial number
208 * as returned from the FRU.
209 * NOTE: This is only available for certain server cards
210 */
211
212static ssize_t amdgpu_device_get_serial_number(struct device *dev,
213 struct device_attribute *attr, char *buf)
214{
215 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 216 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 217
36000c7a 218 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
219}
220
221static DEVICE_ATTR(serial_number, S_IRUGO,
222 amdgpu_device_get_serial_number, NULL);
223
fd496ca8 224/**
b98c6299 225 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
226 *
227 * @dev: drm_device pointer
228 *
b98c6299 229 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
230 * otherwise return false.
231 */
b98c6299 232bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
233{
234 struct amdgpu_device *adev = drm_to_adev(dev);
235
b98c6299 236 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
237 return true;
238 return false;
239}
240
e3ecdffa 241/**
0330b848 242 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
243 *
244 * @dev: drm_device pointer
245 *
b98c6299 246 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
247 * otherwise return false.
248 */
31af062a 249bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 250{
1348969a 251 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 252
b98c6299
AD
253 if (adev->has_pr3 ||
254 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
255 return true;
256 return false;
257}
258
a69cba42
AD
259/**
260 * amdgpu_device_supports_baco - Does the device support BACO
261 *
262 * @dev: drm_device pointer
263 *
264 * Returns true if the device supporte BACO,
265 * otherwise return false.
266 */
267bool amdgpu_device_supports_baco(struct drm_device *dev)
268{
1348969a 269 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
270
271 return amdgpu_asic_supports_baco(adev);
272}
273
3fa8f89d
S
274/**
275 * amdgpu_device_supports_smart_shift - Is the device dGPU with
276 * smart shift support
277 *
278 * @dev: drm_device pointer
279 *
280 * Returns true if the device is a dGPU with Smart Shift support,
281 * otherwise returns false.
282 */
283bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
284{
285 return (amdgpu_device_supports_boco(dev) &&
286 amdgpu_acpi_is_power_shift_control_supported());
287}
288
6e3cd2a9
MCC
289/*
290 * VRAM access helper functions
291 */
292
e35e2b11 293/**
048af66b 294 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
295 *
296 * @adev: amdgpu_device pointer
297 * @pos: offset of the buffer in vram
298 * @buf: virtual address of the buffer in system memory
299 * @size: read/write size, sizeof(@buf) must > @size
300 * @write: true - write to vram, otherwise - read from vram
301 */
048af66b
KW
302void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
303 void *buf, size_t size, bool write)
e35e2b11 304{
e35e2b11 305 unsigned long flags;
048af66b
KW
306 uint32_t hi = ~0, tmp = 0;
307 uint32_t *data = buf;
ce05ac56 308 uint64_t last;
f89f8c6b 309 int idx;
ce05ac56 310
c58a863b 311 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 312 return;
9d11eb0d 313
048af66b
KW
314 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
315
316 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
317 for (last = pos + size; pos < last; pos += 4) {
318 tmp = pos >> 31;
319
320 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
321 if (tmp != hi) {
322 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
323 hi = tmp;
324 }
325 if (write)
326 WREG32_NO_KIQ(mmMM_DATA, *data++);
327 else
328 *data++ = RREG32_NO_KIQ(mmMM_DATA);
329 }
330
331 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
332 drm_dev_exit(idx);
333}
334
335/**
bbe04dec 336 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
337 *
338 * @adev: amdgpu_device pointer
339 * @pos: offset of the buffer in vram
340 * @buf: virtual address of the buffer in system memory
341 * @size: read/write size, sizeof(@buf) must > @size
342 * @write: true - write to vram, otherwise - read from vram
343 *
344 * The return value means how many bytes have been transferred.
345 */
346size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
347 void *buf, size_t size, bool write)
348{
9d11eb0d 349#ifdef CONFIG_64BIT
048af66b
KW
350 void __iomem *addr;
351 size_t count = 0;
352 uint64_t last;
353
354 if (!adev->mman.aper_base_kaddr)
355 return 0;
356
9d11eb0d
CK
357 last = min(pos + size, adev->gmc.visible_vram_size);
358 if (last > pos) {
048af66b
KW
359 addr = adev->mman.aper_base_kaddr + pos;
360 count = last - pos;
9d11eb0d
CK
361
362 if (write) {
363 memcpy_toio(addr, buf, count);
364 mb();
810085dd 365 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 366 } else {
810085dd 367 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
368 mb();
369 memcpy_fromio(buf, addr, count);
370 }
371
9d11eb0d 372 }
048af66b
KW
373
374 return count;
375#else
376 return 0;
9d11eb0d 377#endif
048af66b 378}
9d11eb0d 379
048af66b
KW
380/**
381 * amdgpu_device_vram_access - read/write a buffer in vram
382 *
383 * @adev: amdgpu_device pointer
384 * @pos: offset of the buffer in vram
385 * @buf: virtual address of the buffer in system memory
386 * @size: read/write size, sizeof(@buf) must > @size
387 * @write: true - write to vram, otherwise - read from vram
388 */
389void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
390 void *buf, size_t size, bool write)
391{
392 size_t count;
e35e2b11 393
048af66b
KW
394 /* try to using vram apreature to access vram first */
395 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
396 size -= count;
397 if (size) {
398 /* using MM to access rest vram */
399 pos += count;
400 buf += count;
401 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
402 }
403}
404
d38ceaf9 405/*
f7ee1874 406 * register access helper functions.
d38ceaf9 407 */
56b53c0b
DL
408
409/* Check if hw access should be skipped because of hotplug or device error */
410bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
411{
7afefb81 412 if (adev->no_hw_access)
56b53c0b
DL
413 return true;
414
415#ifdef CONFIG_LOCKDEP
416 /*
417 * This is a bit complicated to understand, so worth a comment. What we assert
418 * here is that the GPU reset is not running on another thread in parallel.
419 *
420 * For this we trylock the read side of the reset semaphore, if that succeeds
421 * we know that the reset is not running in paralell.
422 *
423 * If the trylock fails we assert that we are either already holding the read
424 * side of the lock or are the reset thread itself and hold the write side of
425 * the lock.
426 */
427 if (in_task()) {
d0fb18b5
AG
428 if (down_read_trylock(&adev->reset_domain->sem))
429 up_read(&adev->reset_domain->sem);
56b53c0b 430 else
d0fb18b5 431 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
432 }
433#endif
434 return false;
435}
436
e3ecdffa 437/**
f7ee1874 438 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
439 *
440 * @adev: amdgpu_device pointer
441 * @reg: dword aligned register offset
442 * @acc_flags: access flags which require special behavior
443 *
444 * Returns the 32 bit value from the offset specified.
445 */
f7ee1874
HZ
446uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
447 uint32_t reg, uint32_t acc_flags)
d38ceaf9 448{
f4b373f4
TSD
449 uint32_t ret;
450
56b53c0b 451 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
452 return 0;
453
f7ee1874
HZ
454 if ((reg * 4) < adev->rmmio_size) {
455 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
456 amdgpu_sriov_runtime(adev) &&
d0fb18b5 457 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 458 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 459 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
460 } else {
461 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
462 }
463 } else {
464 ret = adev->pcie_rreg(adev, reg * 4);
81202807 465 }
bc992ba5 466
f7ee1874 467 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 468
f4b373f4 469 return ret;
d38ceaf9
AD
470}
471
421a2a30
ML
472/*
473 * MMIO register read with bytes helper functions
474 * @offset:bytes offset from MMIO start
475 *
476*/
477
e3ecdffa
AD
478/**
479 * amdgpu_mm_rreg8 - read a memory mapped IO register
480 *
481 * @adev: amdgpu_device pointer
482 * @offset: byte aligned register offset
483 *
484 * Returns the 8 bit value from the offset specified.
485 */
7cbbc745
AG
486uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
487{
56b53c0b 488 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
489 return 0;
490
421a2a30
ML
491 if (offset < adev->rmmio_size)
492 return (readb(adev->rmmio + offset));
493 BUG();
494}
495
496/*
497 * MMIO register write with bytes helper functions
498 * @offset:bytes offset from MMIO start
499 * @value: the value want to be written to the register
500 *
501*/
e3ecdffa
AD
502/**
503 * amdgpu_mm_wreg8 - read a memory mapped IO register
504 *
505 * @adev: amdgpu_device pointer
506 * @offset: byte aligned register offset
507 * @value: 8 bit value to write
508 *
509 * Writes the value specified to the offset specified.
510 */
7cbbc745
AG
511void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
512{
56b53c0b 513 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
514 return;
515
421a2a30
ML
516 if (offset < adev->rmmio_size)
517 writeb(value, adev->rmmio + offset);
518 else
519 BUG();
520}
521
e3ecdffa 522/**
f7ee1874 523 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
524 *
525 * @adev: amdgpu_device pointer
526 * @reg: dword aligned register offset
527 * @v: 32 bit value to write to the register
528 * @acc_flags: access flags which require special behavior
529 *
530 * Writes the value specified to the offset specified.
531 */
f7ee1874
HZ
532void amdgpu_device_wreg(struct amdgpu_device *adev,
533 uint32_t reg, uint32_t v,
534 uint32_t acc_flags)
d38ceaf9 535{
56b53c0b 536 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
537 return;
538
f7ee1874
HZ
539 if ((reg * 4) < adev->rmmio_size) {
540 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
541 amdgpu_sriov_runtime(adev) &&
d0fb18b5 542 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 543 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 544 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
545 } else {
546 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
547 }
548 } else {
549 adev->pcie_wreg(adev, reg * 4, v);
81202807 550 }
bc992ba5 551
f7ee1874 552 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 553}
d38ceaf9 554
03f2abb0 555/**
4cc9f86f 556 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 557 *
71579346
RB
558 * @adev: amdgpu_device pointer
559 * @reg: mmio/rlc register
560 * @v: value to write
561 *
562 * this function is invoked only for the debugfs register access
03f2abb0 563 */
f7ee1874
HZ
564void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
565 uint32_t reg, uint32_t v)
2e0cc4d4 566{
56b53c0b 567 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
568 return;
569
2e0cc4d4 570 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
571 adev->gfx.rlc.funcs &&
572 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 573 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 574 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
575 } else if ((reg * 4) >= adev->rmmio_size) {
576 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
577 } else {
578 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 579 }
d38ceaf9
AD
580}
581
d38ceaf9
AD
582/**
583 * amdgpu_mm_rdoorbell - read a doorbell dword
584 *
585 * @adev: amdgpu_device pointer
586 * @index: doorbell index
587 *
588 * Returns the value in the doorbell aperture at the
589 * requested doorbell index (CIK).
590 */
591u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
592{
56b53c0b 593 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
594 return 0;
595
d38ceaf9
AD
596 if (index < adev->doorbell.num_doorbells) {
597 return readl(adev->doorbell.ptr + index);
598 } else {
599 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
600 return 0;
601 }
602}
603
604/**
605 * amdgpu_mm_wdoorbell - write a doorbell dword
606 *
607 * @adev: amdgpu_device pointer
608 * @index: doorbell index
609 * @v: value to write
610 *
611 * Writes @v to the doorbell aperture at the
612 * requested doorbell index (CIK).
613 */
614void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
615{
56b53c0b 616 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
617 return;
618
d38ceaf9
AD
619 if (index < adev->doorbell.num_doorbells) {
620 writel(v, adev->doorbell.ptr + index);
621 } else {
622 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
623 }
624}
625
832be404
KW
626/**
627 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
628 *
629 * @adev: amdgpu_device pointer
630 * @index: doorbell index
631 *
632 * Returns the value in the doorbell aperture at the
633 * requested doorbell index (VEGA10+).
634 */
635u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
636{
56b53c0b 637 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
638 return 0;
639
832be404
KW
640 if (index < adev->doorbell.num_doorbells) {
641 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
642 } else {
643 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
644 return 0;
645 }
646}
647
648/**
649 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
650 *
651 * @adev: amdgpu_device pointer
652 * @index: doorbell index
653 * @v: value to write
654 *
655 * Writes @v to the doorbell aperture at the
656 * requested doorbell index (VEGA10+).
657 */
658void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
659{
56b53c0b 660 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
661 return;
662
832be404
KW
663 if (index < adev->doorbell.num_doorbells) {
664 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
665 } else {
666 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
667 }
668}
669
1bba3683
HZ
670/**
671 * amdgpu_device_indirect_rreg - read an indirect register
672 *
673 * @adev: amdgpu_device pointer
674 * @pcie_index: mmio register offset
675 * @pcie_data: mmio register offset
22f453fb 676 * @reg_addr: indirect register address to read from
1bba3683
HZ
677 *
678 * Returns the value of indirect register @reg_addr
679 */
680u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
681 u32 pcie_index, u32 pcie_data,
682 u32 reg_addr)
683{
684 unsigned long flags;
685 u32 r;
686 void __iomem *pcie_index_offset;
687 void __iomem *pcie_data_offset;
688
689 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
690 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
691 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
692
693 writel(reg_addr, pcie_index_offset);
694 readl(pcie_index_offset);
695 r = readl(pcie_data_offset);
696 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
697
698 return r;
699}
700
701/**
702 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
703 *
704 * @adev: amdgpu_device pointer
705 * @pcie_index: mmio register offset
706 * @pcie_data: mmio register offset
22f453fb 707 * @reg_addr: indirect register address to read from
1bba3683
HZ
708 *
709 * Returns the value of indirect register @reg_addr
710 */
711u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
712 u32 pcie_index, u32 pcie_data,
713 u32 reg_addr)
714{
715 unsigned long flags;
716 u64 r;
717 void __iomem *pcie_index_offset;
718 void __iomem *pcie_data_offset;
719
720 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
721 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
722 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
723
724 /* read low 32 bits */
725 writel(reg_addr, pcie_index_offset);
726 readl(pcie_index_offset);
727 r = readl(pcie_data_offset);
728 /* read high 32 bits */
729 writel(reg_addr + 4, pcie_index_offset);
730 readl(pcie_index_offset);
731 r |= ((u64)readl(pcie_data_offset) << 32);
732 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
733
734 return r;
735}
736
737/**
738 * amdgpu_device_indirect_wreg - write an indirect register address
739 *
740 * @adev: amdgpu_device pointer
741 * @pcie_index: mmio register offset
742 * @pcie_data: mmio register offset
743 * @reg_addr: indirect register offset
744 * @reg_data: indirect register data
745 *
746 */
747void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
748 u32 pcie_index, u32 pcie_data,
749 u32 reg_addr, u32 reg_data)
750{
751 unsigned long flags;
752 void __iomem *pcie_index_offset;
753 void __iomem *pcie_data_offset;
754
755 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
756 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
757 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
758
759 writel(reg_addr, pcie_index_offset);
760 readl(pcie_index_offset);
761 writel(reg_data, pcie_data_offset);
762 readl(pcie_data_offset);
763 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
764}
765
766/**
767 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
768 *
769 * @adev: amdgpu_device pointer
770 * @pcie_index: mmio register offset
771 * @pcie_data: mmio register offset
772 * @reg_addr: indirect register offset
773 * @reg_data: indirect register data
774 *
775 */
776void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
777 u32 pcie_index, u32 pcie_data,
778 u32 reg_addr, u64 reg_data)
779{
780 unsigned long flags;
781 void __iomem *pcie_index_offset;
782 void __iomem *pcie_data_offset;
783
784 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
785 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
786 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
787
788 /* write low 32 bits */
789 writel(reg_addr, pcie_index_offset);
790 readl(pcie_index_offset);
791 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
792 readl(pcie_data_offset);
793 /* write high 32 bits */
794 writel(reg_addr + 4, pcie_index_offset);
795 readl(pcie_index_offset);
796 writel((u32)(reg_data >> 32), pcie_data_offset);
797 readl(pcie_data_offset);
798 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
799}
800
d38ceaf9
AD
801/**
802 * amdgpu_invalid_rreg - dummy reg read function
803 *
982a820b 804 * @adev: amdgpu_device pointer
d38ceaf9
AD
805 * @reg: offset of register
806 *
807 * Dummy register read function. Used for register blocks
808 * that certain asics don't have (all asics).
809 * Returns the value in the register.
810 */
811static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
812{
813 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
814 BUG();
815 return 0;
816}
817
818/**
819 * amdgpu_invalid_wreg - dummy reg write function
820 *
982a820b 821 * @adev: amdgpu_device pointer
d38ceaf9
AD
822 * @reg: offset of register
823 * @v: value to write to the register
824 *
825 * Dummy register read function. Used for register blocks
826 * that certain asics don't have (all asics).
827 */
828static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
829{
830 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
831 reg, v);
832 BUG();
833}
834
4fa1c6a6
TZ
835/**
836 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
837 *
982a820b 838 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
839 * @reg: offset of register
840 *
841 * Dummy register read function. Used for register blocks
842 * that certain asics don't have (all asics).
843 * Returns the value in the register.
844 */
845static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
846{
847 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
848 BUG();
849 return 0;
850}
851
852/**
853 * amdgpu_invalid_wreg64 - dummy reg write function
854 *
982a820b 855 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
856 * @reg: offset of register
857 * @v: value to write to the register
858 *
859 * Dummy register read function. Used for register blocks
860 * that certain asics don't have (all asics).
861 */
862static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
863{
864 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
865 reg, v);
866 BUG();
867}
868
d38ceaf9
AD
869/**
870 * amdgpu_block_invalid_rreg - dummy reg read function
871 *
982a820b 872 * @adev: amdgpu_device pointer
d38ceaf9
AD
873 * @block: offset of instance
874 * @reg: offset of register
875 *
876 * Dummy register read function. Used for register blocks
877 * that certain asics don't have (all asics).
878 * Returns the value in the register.
879 */
880static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
881 uint32_t block, uint32_t reg)
882{
883 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
884 reg, block);
885 BUG();
886 return 0;
887}
888
889/**
890 * amdgpu_block_invalid_wreg - dummy reg write function
891 *
982a820b 892 * @adev: amdgpu_device pointer
d38ceaf9
AD
893 * @block: offset of instance
894 * @reg: offset of register
895 * @v: value to write to the register
896 *
897 * Dummy register read function. Used for register blocks
898 * that certain asics don't have (all asics).
899 */
900static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
901 uint32_t block,
902 uint32_t reg, uint32_t v)
903{
904 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
905 reg, block, v);
906 BUG();
907}
908
4d2997ab
AD
909/**
910 * amdgpu_device_asic_init - Wrapper for atom asic_init
911 *
982a820b 912 * @adev: amdgpu_device pointer
4d2997ab
AD
913 *
914 * Does any asic specific work and then calls atom asic init.
915 */
916static int amdgpu_device_asic_init(struct amdgpu_device *adev)
917{
918 amdgpu_asic_pre_asic_init(adev);
919
85d1bcc6
HZ
920 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
921 return amdgpu_atomfirmware_asic_init(adev, true);
922 else
923 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
924}
925
e3ecdffa
AD
926/**
927 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
928 *
982a820b 929 * @adev: amdgpu_device pointer
e3ecdffa
AD
930 *
931 * Allocates a scratch page of VRAM for use by various things in the
932 * driver.
933 */
06ec9070 934static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 935{
a4a02777
CK
936 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
937 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
938 &adev->vram_scratch.robj,
939 &adev->vram_scratch.gpu_addr,
940 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
941}
942
e3ecdffa
AD
943/**
944 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
945 *
982a820b 946 * @adev: amdgpu_device pointer
e3ecdffa
AD
947 *
948 * Frees the VRAM scratch page.
949 */
06ec9070 950static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 951{
078af1a3 952 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
953}
954
955/**
9c3f2b54 956 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
957 *
958 * @adev: amdgpu_device pointer
959 * @registers: pointer to the register array
960 * @array_size: size of the register array
961 *
962 * Programs an array or registers with and and or masks.
963 * This is a helper for setting golden registers.
964 */
9c3f2b54
AD
965void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
966 const u32 *registers,
967 const u32 array_size)
d38ceaf9
AD
968{
969 u32 tmp, reg, and_mask, or_mask;
970 int i;
971
972 if (array_size % 3)
973 return;
974
975 for (i = 0; i < array_size; i +=3) {
976 reg = registers[i + 0];
977 and_mask = registers[i + 1];
978 or_mask = registers[i + 2];
979
980 if (and_mask == 0xffffffff) {
981 tmp = or_mask;
982 } else {
983 tmp = RREG32(reg);
984 tmp &= ~and_mask;
e0d07657
HZ
985 if (adev->family >= AMDGPU_FAMILY_AI)
986 tmp |= (or_mask & and_mask);
987 else
988 tmp |= or_mask;
d38ceaf9
AD
989 }
990 WREG32(reg, tmp);
991 }
992}
993
e3ecdffa
AD
994/**
995 * amdgpu_device_pci_config_reset - reset the GPU
996 *
997 * @adev: amdgpu_device pointer
998 *
999 * Resets the GPU using the pci config reset sequence.
1000 * Only applicable to asics prior to vega10.
1001 */
8111c387 1002void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1003{
1004 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1005}
1006
af484df8
AD
1007/**
1008 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1009 *
1010 * @adev: amdgpu_device pointer
1011 *
1012 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1013 */
1014int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1015{
1016 return pci_reset_function(adev->pdev);
1017}
1018
d38ceaf9
AD
1019/*
1020 * GPU doorbell aperture helpers function.
1021 */
1022/**
06ec9070 1023 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1024 *
1025 * @adev: amdgpu_device pointer
1026 *
1027 * Init doorbell driver information (CIK)
1028 * Returns 0 on success, error on failure.
1029 */
06ec9070 1030static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1031{
6585661d 1032
705e519e
CK
1033 /* No doorbell on SI hardware generation */
1034 if (adev->asic_type < CHIP_BONAIRE) {
1035 adev->doorbell.base = 0;
1036 adev->doorbell.size = 0;
1037 adev->doorbell.num_doorbells = 0;
1038 adev->doorbell.ptr = NULL;
1039 return 0;
1040 }
1041
d6895ad3
CK
1042 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1043 return -EINVAL;
1044
22357775
AD
1045 amdgpu_asic_init_doorbell_index(adev);
1046
d38ceaf9
AD
1047 /* doorbell bar mapping */
1048 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1049 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1050
de33a329
JX
1051 if (adev->enable_mes) {
1052 adev->doorbell.num_doorbells =
1053 adev->doorbell.size / sizeof(u32);
1054 } else {
1055 adev->doorbell.num_doorbells =
1056 min_t(u32, adev->doorbell.size / sizeof(u32),
1057 adev->doorbell_index.max_assignment+1);
1058 if (adev->doorbell.num_doorbells == 0)
1059 return -EINVAL;
1060
1061 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1062 * paging queue doorbell use the second page. The
1063 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1064 * doorbells are in the first page. So with paging queue enabled,
1065 * the max num_doorbells should + 1 page (0x400 in dword)
1066 */
1067 if (adev->asic_type >= CHIP_VEGA10)
1068 adev->doorbell.num_doorbells += 0x400;
1069 }
ec3db8a6 1070
8972e5d2
CK
1071 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1072 adev->doorbell.num_doorbells *
1073 sizeof(u32));
1074 if (adev->doorbell.ptr == NULL)
d38ceaf9 1075 return -ENOMEM;
d38ceaf9
AD
1076
1077 return 0;
1078}
1079
1080/**
06ec9070 1081 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1082 *
1083 * @adev: amdgpu_device pointer
1084 *
1085 * Tear down doorbell driver information (CIK)
1086 */
06ec9070 1087static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1088{
1089 iounmap(adev->doorbell.ptr);
1090 adev->doorbell.ptr = NULL;
1091}
1092
22cb0164 1093
d38ceaf9
AD
1094
1095/*
06ec9070 1096 * amdgpu_device_wb_*()
455a7bc2 1097 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1098 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1099 */
1100
1101/**
06ec9070 1102 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1103 *
1104 * @adev: amdgpu_device pointer
1105 *
1106 * Disables Writeback and frees the Writeback memory (all asics).
1107 * Used at driver shutdown.
1108 */
06ec9070 1109static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1110{
1111 if (adev->wb.wb_obj) {
a76ed485
AD
1112 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1113 &adev->wb.gpu_addr,
1114 (void **)&adev->wb.wb);
d38ceaf9
AD
1115 adev->wb.wb_obj = NULL;
1116 }
1117}
1118
1119/**
03f2abb0 1120 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1121 *
1122 * @adev: amdgpu_device pointer
1123 *
455a7bc2 1124 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1125 * Used at driver startup.
1126 * Returns 0 on success or an -error on failure.
1127 */
06ec9070 1128static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1129{
1130 int r;
1131
1132 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1133 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1134 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1135 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1136 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1137 (void **)&adev->wb.wb);
d38ceaf9
AD
1138 if (r) {
1139 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1140 return r;
1141 }
d38ceaf9
AD
1142
1143 adev->wb.num_wb = AMDGPU_MAX_WB;
1144 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1145
1146 /* clear wb memory */
73469585 1147 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1148 }
1149
1150 return 0;
1151}
1152
1153/**
131b4b36 1154 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1155 *
1156 * @adev: amdgpu_device pointer
1157 * @wb: wb index
1158 *
1159 * Allocate a wb slot for use by the driver (all asics).
1160 * Returns 0 on success or -EINVAL on failure.
1161 */
131b4b36 1162int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1163{
1164 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1165
97407b63 1166 if (offset < adev->wb.num_wb) {
7014285a 1167 __set_bit(offset, adev->wb.used);
63ae07ca 1168 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1169 return 0;
1170 } else {
1171 return -EINVAL;
1172 }
1173}
1174
d38ceaf9 1175/**
131b4b36 1176 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1177 *
1178 * @adev: amdgpu_device pointer
1179 * @wb: wb index
1180 *
1181 * Free a wb slot allocated for use by the driver (all asics)
1182 */
131b4b36 1183void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1184{
73469585 1185 wb >>= 3;
d38ceaf9 1186 if (wb < adev->wb.num_wb)
73469585 1187 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1188}
1189
d6895ad3
CK
1190/**
1191 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1192 *
1193 * @adev: amdgpu_device pointer
1194 *
1195 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1196 * to fail, but if any of the BARs is not accessible after the size we abort
1197 * driver loading by returning -ENODEV.
1198 */
1199int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1200{
453f617a 1201 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1202 struct pci_bus *root;
1203 struct resource *res;
1204 unsigned i;
d6895ad3
CK
1205 u16 cmd;
1206 int r;
1207
0c03b912 1208 /* Bypass for VF */
1209 if (amdgpu_sriov_vf(adev))
1210 return 0;
1211
b7221f2b
AD
1212 /* skip if the bios has already enabled large BAR */
1213 if (adev->gmc.real_vram_size &&
1214 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1215 return 0;
1216
31b8adab
CK
1217 /* Check if the root BUS has 64bit memory resources */
1218 root = adev->pdev->bus;
1219 while (root->parent)
1220 root = root->parent;
1221
1222 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1223 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1224 res->start > 0x100000000ull)
1225 break;
1226 }
1227
1228 /* Trying to resize is pointless without a root hub window above 4GB */
1229 if (!res)
1230 return 0;
1231
453f617a
ND
1232 /* Limit the BAR size to what is available */
1233 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1234 rbar_size);
1235
d6895ad3
CK
1236 /* Disable memory decoding while we change the BAR addresses and size */
1237 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1238 pci_write_config_word(adev->pdev, PCI_COMMAND,
1239 cmd & ~PCI_COMMAND_MEMORY);
1240
1241 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1242 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1243 if (adev->asic_type >= CHIP_BONAIRE)
1244 pci_release_resource(adev->pdev, 2);
1245
1246 pci_release_resource(adev->pdev, 0);
1247
1248 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1249 if (r == -ENOSPC)
1250 DRM_INFO("Not enough PCI address space for a large BAR.");
1251 else if (r && r != -ENOTSUPP)
1252 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1253
1254 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1255
1256 /* When the doorbell or fb BAR isn't available we have no chance of
1257 * using the device.
1258 */
06ec9070 1259 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1260 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1261 return -ENODEV;
1262
1263 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1264
1265 return 0;
1266}
a05502e5 1267
d38ceaf9
AD
1268/*
1269 * GPU helpers function.
1270 */
1271/**
39c640c0 1272 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1273 *
1274 * @adev: amdgpu_device pointer
1275 *
c836fec5
JQ
1276 * Check if the asic has been initialized (all asics) at driver startup
1277 * or post is needed if hw reset is performed.
1278 * Returns true if need or false if not.
d38ceaf9 1279 */
39c640c0 1280bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1281{
1282 uint32_t reg;
1283
bec86378
ML
1284 if (amdgpu_sriov_vf(adev))
1285 return false;
1286
1287 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1288 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1289 * some old smc fw still need driver do vPost otherwise gpu hang, while
1290 * those smc fw version above 22.15 doesn't have this flaw, so we force
1291 * vpost executed for smc version below 22.15
bec86378
ML
1292 */
1293 if (adev->asic_type == CHIP_FIJI) {
1294 int err;
1295 uint32_t fw_ver;
1296 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1297 /* force vPost if error occured */
1298 if (err)
1299 return true;
1300
1301 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1302 if (fw_ver < 0x00160e00)
1303 return true;
bec86378 1304 }
bec86378 1305 }
91fe77eb 1306
e3c1b071 1307 /* Don't post if we need to reset whole hive on init */
1308 if (adev->gmc.xgmi.pending_reset)
1309 return false;
1310
91fe77eb 1311 if (adev->has_hw_reset) {
1312 adev->has_hw_reset = false;
1313 return true;
1314 }
1315
1316 /* bios scratch used on CIK+ */
1317 if (adev->asic_type >= CHIP_BONAIRE)
1318 return amdgpu_atombios_scratch_need_asic_init(adev);
1319
1320 /* check MEM_SIZE for older asics */
1321 reg = amdgpu_asic_get_config_memsize(adev);
1322
1323 if ((reg != 0) && (reg != 0xffffffff))
1324 return false;
1325
1326 return true;
bec86378
ML
1327}
1328
0ab5d711
ML
1329/**
1330 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1331 *
1332 * @adev: amdgpu_device pointer
1333 *
1334 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1335 * be set for this device.
1336 *
1337 * Returns true if it should be used or false if not.
1338 */
1339bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1340{
1341 switch (amdgpu_aspm) {
1342 case -1:
1343 break;
1344 case 0:
1345 return false;
1346 case 1:
1347 return true;
1348 default:
1349 return false;
1350 }
1351 return pcie_aspm_enabled(adev->pdev);
1352}
1353
d38ceaf9
AD
1354/* if we get transitioned to only one device, take VGA back */
1355/**
06ec9070 1356 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1357 *
bf44e8ce 1358 * @pdev: PCI device pointer
d38ceaf9
AD
1359 * @state: enable/disable vga decode
1360 *
1361 * Enable/disable vga decode (all asics).
1362 * Returns VGA resource flags.
1363 */
bf44e8ce
CH
1364static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1365 bool state)
d38ceaf9 1366{
bf44e8ce 1367 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1368 amdgpu_asic_set_vga_state(adev, state);
1369 if (state)
1370 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1371 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1372 else
1373 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1374}
1375
e3ecdffa
AD
1376/**
1377 * amdgpu_device_check_block_size - validate the vm block size
1378 *
1379 * @adev: amdgpu_device pointer
1380 *
1381 * Validates the vm block size specified via module parameter.
1382 * The vm block size defines number of bits in page table versus page directory,
1383 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1384 * page table and the remaining bits are in the page directory.
1385 */
06ec9070 1386static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1387{
1388 /* defines number of bits in page table versus page directory,
1389 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1390 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1391 if (amdgpu_vm_block_size == -1)
1392 return;
a1adf8be 1393
bab4fee7 1394 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1395 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1396 amdgpu_vm_block_size);
97489129 1397 amdgpu_vm_block_size = -1;
a1adf8be 1398 }
a1adf8be
CZ
1399}
1400
e3ecdffa
AD
1401/**
1402 * amdgpu_device_check_vm_size - validate the vm size
1403 *
1404 * @adev: amdgpu_device pointer
1405 *
1406 * Validates the vm size in GB specified via module parameter.
1407 * The VM size is the size of the GPU virtual memory space in GB.
1408 */
06ec9070 1409static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1410{
64dab074
AD
1411 /* no need to check the default value */
1412 if (amdgpu_vm_size == -1)
1413 return;
1414
83ca145d
ZJ
1415 if (amdgpu_vm_size < 1) {
1416 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1417 amdgpu_vm_size);
f3368128 1418 amdgpu_vm_size = -1;
83ca145d 1419 }
83ca145d
ZJ
1420}
1421
7951e376
RZ
1422static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1423{
1424 struct sysinfo si;
a9d4fe2f 1425 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1426 uint64_t total_memory;
1427 uint64_t dram_size_seven_GB = 0x1B8000000;
1428 uint64_t dram_size_three_GB = 0xB8000000;
1429
1430 if (amdgpu_smu_memory_pool_size == 0)
1431 return;
1432
1433 if (!is_os_64) {
1434 DRM_WARN("Not 64-bit OS, feature not supported\n");
1435 goto def_value;
1436 }
1437 si_meminfo(&si);
1438 total_memory = (uint64_t)si.totalram * si.mem_unit;
1439
1440 if ((amdgpu_smu_memory_pool_size == 1) ||
1441 (amdgpu_smu_memory_pool_size == 2)) {
1442 if (total_memory < dram_size_three_GB)
1443 goto def_value1;
1444 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1445 (amdgpu_smu_memory_pool_size == 8)) {
1446 if (total_memory < dram_size_seven_GB)
1447 goto def_value1;
1448 } else {
1449 DRM_WARN("Smu memory pool size not supported\n");
1450 goto def_value;
1451 }
1452 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1453
1454 return;
1455
1456def_value1:
1457 DRM_WARN("No enough system memory\n");
1458def_value:
1459 adev->pm.smu_prv_buffer_size = 0;
1460}
1461
9f6a7857
HR
1462static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1463{
1464 if (!(adev->flags & AMD_IS_APU) ||
1465 adev->asic_type < CHIP_RAVEN)
1466 return 0;
1467
1468 switch (adev->asic_type) {
1469 case CHIP_RAVEN:
1470 if (adev->pdev->device == 0x15dd)
1471 adev->apu_flags |= AMD_APU_IS_RAVEN;
1472 if (adev->pdev->device == 0x15d8)
1473 adev->apu_flags |= AMD_APU_IS_PICASSO;
1474 break;
1475 case CHIP_RENOIR:
1476 if ((adev->pdev->device == 0x1636) ||
1477 (adev->pdev->device == 0x164c))
1478 adev->apu_flags |= AMD_APU_IS_RENOIR;
1479 else
1480 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1481 break;
1482 case CHIP_VANGOGH:
1483 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1484 break;
1485 case CHIP_YELLOW_CARP:
1486 break;
d0f56dc2 1487 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1488 if ((adev->pdev->device == 0x13FE) ||
1489 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1490 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1491 break;
9f6a7857 1492 default:
4eaf21b7 1493 break;
9f6a7857
HR
1494 }
1495
1496 return 0;
1497}
1498
d38ceaf9 1499/**
06ec9070 1500 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1501 *
1502 * @adev: amdgpu_device pointer
1503 *
1504 * Validates certain module parameters and updates
1505 * the associated values used by the driver (all asics).
1506 */
912dfc84 1507static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1508{
5b011235
CZ
1509 if (amdgpu_sched_jobs < 4) {
1510 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1511 amdgpu_sched_jobs);
1512 amdgpu_sched_jobs = 4;
76117507 1513 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1514 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1515 amdgpu_sched_jobs);
1516 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1517 }
d38ceaf9 1518
83e74db6 1519 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1520 /* gart size must be greater or equal to 32M */
1521 dev_warn(adev->dev, "gart size (%d) too small\n",
1522 amdgpu_gart_size);
83e74db6 1523 amdgpu_gart_size = -1;
d38ceaf9
AD
1524 }
1525
36d38372 1526 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1527 /* gtt size must be greater or equal to 32M */
36d38372
CK
1528 dev_warn(adev->dev, "gtt size (%d) too small\n",
1529 amdgpu_gtt_size);
1530 amdgpu_gtt_size = -1;
d38ceaf9
AD
1531 }
1532
d07f14be
RH
1533 /* valid range is between 4 and 9 inclusive */
1534 if (amdgpu_vm_fragment_size != -1 &&
1535 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1536 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1537 amdgpu_vm_fragment_size = -1;
1538 }
1539
5d5bd5e3
KW
1540 if (amdgpu_sched_hw_submission < 2) {
1541 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1542 amdgpu_sched_hw_submission);
1543 amdgpu_sched_hw_submission = 2;
1544 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1545 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1546 amdgpu_sched_hw_submission);
1547 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1548 }
1549
2656fd23
AG
1550 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1551 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1552 amdgpu_reset_method = -1;
1553 }
1554
7951e376
RZ
1555 amdgpu_device_check_smu_prv_buffer_size(adev);
1556
06ec9070 1557 amdgpu_device_check_vm_size(adev);
d38ceaf9 1558
06ec9070 1559 amdgpu_device_check_block_size(adev);
6a7f76e7 1560
19aede77 1561 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1562
e3c00faa 1563 return 0;
d38ceaf9
AD
1564}
1565
1566/**
1567 * amdgpu_switcheroo_set_state - set switcheroo state
1568 *
1569 * @pdev: pci dev pointer
1694467b 1570 * @state: vga_switcheroo state
d38ceaf9 1571 *
12024b17 1572 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1573 * the asics before or after it is powered up using ACPI methods.
1574 */
8aba21b7
LT
1575static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1576 enum vga_switcheroo_state state)
d38ceaf9
AD
1577{
1578 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1579 int r;
d38ceaf9 1580
b98c6299 1581 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1582 return;
1583
1584 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1585 pr_info("switched on\n");
d38ceaf9
AD
1586 /* don't suspend or resume card normally */
1587 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1588
8f66090b
TZ
1589 pci_set_power_state(pdev, PCI_D0);
1590 amdgpu_device_load_pci_state(pdev);
1591 r = pci_enable_device(pdev);
de185019
AD
1592 if (r)
1593 DRM_WARN("pci_enable_device failed (%d)\n", r);
1594 amdgpu_device_resume(dev, true);
d38ceaf9 1595
d38ceaf9 1596 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1597 } else {
dd4fa6c1 1598 pr_info("switched off\n");
d38ceaf9 1599 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1600 amdgpu_device_suspend(dev, true);
8f66090b 1601 amdgpu_device_cache_pci_state(pdev);
de185019 1602 /* Shut down the device */
8f66090b
TZ
1603 pci_disable_device(pdev);
1604 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1605 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1606 }
1607}
1608
1609/**
1610 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1611 *
1612 * @pdev: pci dev pointer
1613 *
1614 * Callback for the switcheroo driver. Check of the switcheroo
1615 * state can be changed.
1616 * Returns true if the state can be changed, false if not.
1617 */
1618static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1619{
1620 struct drm_device *dev = pci_get_drvdata(pdev);
1621
1622 /*
1623 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1624 * locking inversion with the driver load path. And the access here is
1625 * completely racy anyway. So don't bother with locking for now.
1626 */
7e13ad89 1627 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1628}
1629
1630static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1631 .set_gpu_state = amdgpu_switcheroo_set_state,
1632 .reprobe = NULL,
1633 .can_switch = amdgpu_switcheroo_can_switch,
1634};
1635
e3ecdffa
AD
1636/**
1637 * amdgpu_device_ip_set_clockgating_state - set the CG state
1638 *
87e3f136 1639 * @dev: amdgpu_device pointer
e3ecdffa
AD
1640 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1641 * @state: clockgating state (gate or ungate)
1642 *
1643 * Sets the requested clockgating state for all instances of
1644 * the hardware IP specified.
1645 * Returns the error code from the last instance.
1646 */
43fa561f 1647int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1648 enum amd_ip_block_type block_type,
1649 enum amd_clockgating_state state)
d38ceaf9 1650{
43fa561f 1651 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1652 int i, r = 0;
1653
1654 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1655 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1656 continue;
c722865a
RZ
1657 if (adev->ip_blocks[i].version->type != block_type)
1658 continue;
1659 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1660 continue;
1661 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1662 (void *)adev, state);
1663 if (r)
1664 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1665 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1666 }
1667 return r;
1668}
1669
e3ecdffa
AD
1670/**
1671 * amdgpu_device_ip_set_powergating_state - set the PG state
1672 *
87e3f136 1673 * @dev: amdgpu_device pointer
e3ecdffa
AD
1674 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1675 * @state: powergating state (gate or ungate)
1676 *
1677 * Sets the requested powergating state for all instances of
1678 * the hardware IP specified.
1679 * Returns the error code from the last instance.
1680 */
43fa561f 1681int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1682 enum amd_ip_block_type block_type,
1683 enum amd_powergating_state state)
d38ceaf9 1684{
43fa561f 1685 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1686 int i, r = 0;
1687
1688 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1689 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1690 continue;
c722865a
RZ
1691 if (adev->ip_blocks[i].version->type != block_type)
1692 continue;
1693 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1694 continue;
1695 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1696 (void *)adev, state);
1697 if (r)
1698 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1699 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1700 }
1701 return r;
1702}
1703
e3ecdffa
AD
1704/**
1705 * amdgpu_device_ip_get_clockgating_state - get the CG state
1706 *
1707 * @adev: amdgpu_device pointer
1708 * @flags: clockgating feature flags
1709 *
1710 * Walks the list of IPs on the device and updates the clockgating
1711 * flags for each IP.
1712 * Updates @flags with the feature flags for each hardware IP where
1713 * clockgating is enabled.
1714 */
2990a1fc 1715void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1716 u64 *flags)
6cb2d4e4
HR
1717{
1718 int i;
1719
1720 for (i = 0; i < adev->num_ip_blocks; i++) {
1721 if (!adev->ip_blocks[i].status.valid)
1722 continue;
1723 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1724 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1725 }
1726}
1727
e3ecdffa
AD
1728/**
1729 * amdgpu_device_ip_wait_for_idle - wait for idle
1730 *
1731 * @adev: amdgpu_device pointer
1732 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1733 *
1734 * Waits for the request hardware IP to be idle.
1735 * Returns 0 for success or a negative error code on failure.
1736 */
2990a1fc
AD
1737int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1738 enum amd_ip_block_type block_type)
5dbbb60b
AD
1739{
1740 int i, r;
1741
1742 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1743 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1744 continue;
a1255107
AD
1745 if (adev->ip_blocks[i].version->type == block_type) {
1746 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1747 if (r)
1748 return r;
1749 break;
1750 }
1751 }
1752 return 0;
1753
1754}
1755
e3ecdffa
AD
1756/**
1757 * amdgpu_device_ip_is_idle - is the hardware IP idle
1758 *
1759 * @adev: amdgpu_device pointer
1760 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1761 *
1762 * Check if the hardware IP is idle or not.
1763 * Returns true if it the IP is idle, false if not.
1764 */
2990a1fc
AD
1765bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1766 enum amd_ip_block_type block_type)
5dbbb60b
AD
1767{
1768 int i;
1769
1770 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1771 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1772 continue;
a1255107
AD
1773 if (adev->ip_blocks[i].version->type == block_type)
1774 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1775 }
1776 return true;
1777
1778}
1779
e3ecdffa
AD
1780/**
1781 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1782 *
1783 * @adev: amdgpu_device pointer
87e3f136 1784 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1785 *
1786 * Returns a pointer to the hardware IP block structure
1787 * if it exists for the asic, otherwise NULL.
1788 */
2990a1fc
AD
1789struct amdgpu_ip_block *
1790amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1791 enum amd_ip_block_type type)
d38ceaf9
AD
1792{
1793 int i;
1794
1795 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1796 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1797 return &adev->ip_blocks[i];
1798
1799 return NULL;
1800}
1801
1802/**
2990a1fc 1803 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1804 *
1805 * @adev: amdgpu_device pointer
5fc3aeeb 1806 * @type: enum amd_ip_block_type
d38ceaf9
AD
1807 * @major: major version
1808 * @minor: minor version
1809 *
1810 * return 0 if equal or greater
1811 * return 1 if smaller or the ip_block doesn't exist
1812 */
2990a1fc
AD
1813int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1814 enum amd_ip_block_type type,
1815 u32 major, u32 minor)
d38ceaf9 1816{
2990a1fc 1817 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1818
a1255107
AD
1819 if (ip_block && ((ip_block->version->major > major) ||
1820 ((ip_block->version->major == major) &&
1821 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1822 return 0;
1823
1824 return 1;
1825}
1826
a1255107 1827/**
2990a1fc 1828 * amdgpu_device_ip_block_add
a1255107
AD
1829 *
1830 * @adev: amdgpu_device pointer
1831 * @ip_block_version: pointer to the IP to add
1832 *
1833 * Adds the IP block driver information to the collection of IPs
1834 * on the asic.
1835 */
2990a1fc
AD
1836int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1837 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1838{
1839 if (!ip_block_version)
1840 return -EINVAL;
1841
7bd939d0
LG
1842 switch (ip_block_version->type) {
1843 case AMD_IP_BLOCK_TYPE_VCN:
1844 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1845 return 0;
1846 break;
1847 case AMD_IP_BLOCK_TYPE_JPEG:
1848 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1849 return 0;
1850 break;
1851 default:
1852 break;
1853 }
1854
e966a725 1855 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1856 ip_block_version->funcs->name);
1857
a1255107
AD
1858 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1859
1860 return 0;
1861}
1862
e3ecdffa
AD
1863/**
1864 * amdgpu_device_enable_virtual_display - enable virtual display feature
1865 *
1866 * @adev: amdgpu_device pointer
1867 *
1868 * Enabled the virtual display feature if the user has enabled it via
1869 * the module parameter virtual_display. This feature provides a virtual
1870 * display hardware on headless boards or in virtualized environments.
1871 * This function parses and validates the configuration string specified by
1872 * the user and configues the virtual display configuration (number of
1873 * virtual connectors, crtcs, etc.) specified.
1874 */
483ef985 1875static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1876{
1877 adev->enable_virtual_display = false;
1878
1879 if (amdgpu_virtual_display) {
8f66090b 1880 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1881 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1882
1883 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1884 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1885 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1886 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1887 if (!strcmp("all", pciaddname)
1888 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1889 long num_crtc;
1890 int res = -1;
1891
9accf2fd 1892 adev->enable_virtual_display = true;
0f66356d
ED
1893
1894 if (pciaddname_tmp)
1895 res = kstrtol(pciaddname_tmp, 10,
1896 &num_crtc);
1897
1898 if (!res) {
1899 if (num_crtc < 1)
1900 num_crtc = 1;
1901 if (num_crtc > 6)
1902 num_crtc = 6;
1903 adev->mode_info.num_crtc = num_crtc;
1904 } else {
1905 adev->mode_info.num_crtc = 1;
1906 }
9accf2fd
ED
1907 break;
1908 }
1909 }
1910
0f66356d
ED
1911 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1912 amdgpu_virtual_display, pci_address_name,
1913 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1914
1915 kfree(pciaddstr);
1916 }
1917}
1918
25263da3
AD
1919void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1920{
1921 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1922 adev->mode_info.num_crtc = 1;
1923 adev->enable_virtual_display = true;
1924 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1925 adev->enable_virtual_display, adev->mode_info.num_crtc);
1926 }
1927}
1928
e3ecdffa
AD
1929/**
1930 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1931 *
1932 * @adev: amdgpu_device pointer
1933 *
1934 * Parses the asic configuration parameters specified in the gpu info
1935 * firmware and makes them availale to the driver for use in configuring
1936 * the asic.
1937 * Returns 0 on success, -EINVAL on failure.
1938 */
e2a75f88
AD
1939static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1940{
e2a75f88 1941 const char *chip_name;
c0a43457 1942 char fw_name[40];
e2a75f88
AD
1943 int err;
1944 const struct gpu_info_firmware_header_v1_0 *hdr;
1945
ab4fe3e1
HR
1946 adev->firmware.gpu_info_fw = NULL;
1947
72de33f8 1948 if (adev->mman.discovery_bin) {
cc375d8c
TY
1949 /*
1950 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1951 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1952 * when DAL no longer needs it.
1953 */
1954 if (adev->asic_type != CHIP_NAVI12)
1955 return 0;
258620d0
AD
1956 }
1957
e2a75f88 1958 switch (adev->asic_type) {
e2a75f88
AD
1959 default:
1960 return 0;
1961 case CHIP_VEGA10:
1962 chip_name = "vega10";
1963 break;
3f76dced
AD
1964 case CHIP_VEGA12:
1965 chip_name = "vega12";
1966 break;
2d2e5e7e 1967 case CHIP_RAVEN:
54f78a76 1968 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1969 chip_name = "raven2";
54f78a76 1970 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1971 chip_name = "picasso";
54c4d17e
FX
1972 else
1973 chip_name = "raven";
2d2e5e7e 1974 break;
65e60f6e
LM
1975 case CHIP_ARCTURUS:
1976 chip_name = "arcturus";
1977 break;
42b325e5
XY
1978 case CHIP_NAVI12:
1979 chip_name = "navi12";
1980 break;
e2a75f88
AD
1981 }
1982
1983 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1984 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1985 if (err) {
1986 dev_err(adev->dev,
1987 "Failed to load gpu_info firmware \"%s\"\n",
1988 fw_name);
1989 goto out;
1990 }
ab4fe3e1 1991 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1992 if (err) {
1993 dev_err(adev->dev,
1994 "Failed to validate gpu_info firmware \"%s\"\n",
1995 fw_name);
1996 goto out;
1997 }
1998
ab4fe3e1 1999 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2000 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2001
2002 switch (hdr->version_major) {
2003 case 1:
2004 {
2005 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2006 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2007 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2008
cc375d8c
TY
2009 /*
2010 * Should be droped when DAL no longer needs it.
2011 */
2012 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2013 goto parse_soc_bounding_box;
2014
b5ab16bf
AD
2015 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2016 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2017 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2018 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2019 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2020 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2021 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2022 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2023 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2024 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2025 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2026 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2027 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2028 adev->gfx.cu_info.max_waves_per_simd =
2029 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2030 adev->gfx.cu_info.max_scratch_slots_per_cu =
2031 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2032 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2033 if (hdr->version_minor >= 1) {
35c2e910
HZ
2034 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2035 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2036 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2037 adev->gfx.config.num_sc_per_sh =
2038 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2039 adev->gfx.config.num_packer_per_sc =
2040 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2041 }
ec51d3fa
XY
2042
2043parse_soc_bounding_box:
ec51d3fa
XY
2044 /*
2045 * soc bounding box info is not integrated in disocovery table,
258620d0 2046 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2047 */
48321c3d
HW
2048 if (hdr->version_minor == 2) {
2049 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2050 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2051 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2052 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2053 }
e2a75f88
AD
2054 break;
2055 }
2056 default:
2057 dev_err(adev->dev,
2058 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2059 err = -EINVAL;
2060 goto out;
2061 }
2062out:
e2a75f88
AD
2063 return err;
2064}
2065
e3ecdffa
AD
2066/**
2067 * amdgpu_device_ip_early_init - run early init for hardware IPs
2068 *
2069 * @adev: amdgpu_device pointer
2070 *
2071 * Early initialization pass for hardware IPs. The hardware IPs that make
2072 * up each asic are discovered each IP's early_init callback is run. This
2073 * is the first stage in initializing the asic.
2074 * Returns 0 on success, negative error code on failure.
2075 */
06ec9070 2076static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2077{
901e2be2
AD
2078 struct drm_device *dev = adev_to_drm(adev);
2079 struct pci_dev *parent;
aaa36a97 2080 int i, r;
d38ceaf9 2081
483ef985 2082 amdgpu_device_enable_virtual_display(adev);
a6be7570 2083
00a979f3 2084 if (amdgpu_sriov_vf(adev)) {
00a979f3 2085 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2086 if (r)
2087 return r;
00a979f3
WS
2088 }
2089
d38ceaf9 2090 switch (adev->asic_type) {
33f34802
KW
2091#ifdef CONFIG_DRM_AMDGPU_SI
2092 case CHIP_VERDE:
2093 case CHIP_TAHITI:
2094 case CHIP_PITCAIRN:
2095 case CHIP_OLAND:
2096 case CHIP_HAINAN:
295d0daf 2097 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2098 r = si_set_ip_blocks(adev);
2099 if (r)
2100 return r;
2101 break;
2102#endif
a2e73f56
AD
2103#ifdef CONFIG_DRM_AMDGPU_CIK
2104 case CHIP_BONAIRE:
2105 case CHIP_HAWAII:
2106 case CHIP_KAVERI:
2107 case CHIP_KABINI:
2108 case CHIP_MULLINS:
e1ad2d53 2109 if (adev->flags & AMD_IS_APU)
a2e73f56 2110 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2111 else
2112 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2113
2114 r = cik_set_ip_blocks(adev);
2115 if (r)
2116 return r;
2117 break;
2118#endif
da87c30b
AD
2119 case CHIP_TOPAZ:
2120 case CHIP_TONGA:
2121 case CHIP_FIJI:
2122 case CHIP_POLARIS10:
2123 case CHIP_POLARIS11:
2124 case CHIP_POLARIS12:
2125 case CHIP_VEGAM:
2126 case CHIP_CARRIZO:
2127 case CHIP_STONEY:
2128 if (adev->flags & AMD_IS_APU)
2129 adev->family = AMDGPU_FAMILY_CZ;
2130 else
2131 adev->family = AMDGPU_FAMILY_VI;
2132
2133 r = vi_set_ip_blocks(adev);
2134 if (r)
2135 return r;
2136 break;
d38ceaf9 2137 default:
63352b7f
AD
2138 r = amdgpu_discovery_set_ip_blocks(adev);
2139 if (r)
2140 return r;
2141 break;
d38ceaf9
AD
2142 }
2143
901e2be2
AD
2144 if (amdgpu_has_atpx() &&
2145 (amdgpu_is_atpx_hybrid() ||
2146 amdgpu_has_atpx_dgpu_power_cntl()) &&
2147 ((adev->flags & AMD_IS_APU) == 0) &&
2148 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2149 adev->flags |= AMD_IS_PX;
2150
85ac2021
AD
2151 if (!(adev->flags & AMD_IS_APU)) {
2152 parent = pci_upstream_bridge(adev->pdev);
2153 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2154 }
901e2be2 2155
c004d44e 2156 amdgpu_amdkfd_device_probe(adev);
1884734a 2157
3b94fb10 2158 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2159 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2160 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2161 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2162 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2163
d38ceaf9
AD
2164 for (i = 0; i < adev->num_ip_blocks; i++) {
2165 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2166 DRM_ERROR("disabled ip block: %d <%s>\n",
2167 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2168 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2169 } else {
a1255107
AD
2170 if (adev->ip_blocks[i].version->funcs->early_init) {
2171 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2172 if (r == -ENOENT) {
a1255107 2173 adev->ip_blocks[i].status.valid = false;
2c1a2784 2174 } else if (r) {
a1255107
AD
2175 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2176 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2177 return r;
2c1a2784 2178 } else {
a1255107 2179 adev->ip_blocks[i].status.valid = true;
2c1a2784 2180 }
974e6b64 2181 } else {
a1255107 2182 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2183 }
d38ceaf9 2184 }
21a249ca
AD
2185 /* get the vbios after the asic_funcs are set up */
2186 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2187 r = amdgpu_device_parse_gpu_info_fw(adev);
2188 if (r)
2189 return r;
2190
21a249ca
AD
2191 /* Read BIOS */
2192 if (!amdgpu_get_bios(adev))
2193 return -EINVAL;
2194
2195 r = amdgpu_atombios_init(adev);
2196 if (r) {
2197 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2198 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2199 return r;
2200 }
77eabc6f
PJZ
2201
2202 /*get pf2vf msg info at it's earliest time*/
2203 if (amdgpu_sriov_vf(adev))
2204 amdgpu_virt_init_data_exchange(adev);
2205
21a249ca 2206 }
d38ceaf9
AD
2207 }
2208
395d1fb9
NH
2209 adev->cg_flags &= amdgpu_cg_mask;
2210 adev->pg_flags &= amdgpu_pg_mask;
2211
d38ceaf9
AD
2212 return 0;
2213}
2214
0a4f2520
RZ
2215static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2216{
2217 int i, r;
2218
2219 for (i = 0; i < adev->num_ip_blocks; i++) {
2220 if (!adev->ip_blocks[i].status.sw)
2221 continue;
2222 if (adev->ip_blocks[i].status.hw)
2223 continue;
2224 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2225 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2226 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2227 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2228 if (r) {
2229 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2230 adev->ip_blocks[i].version->funcs->name, r);
2231 return r;
2232 }
2233 adev->ip_blocks[i].status.hw = true;
2234 }
2235 }
2236
2237 return 0;
2238}
2239
2240static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2241{
2242 int i, r;
2243
2244 for (i = 0; i < adev->num_ip_blocks; i++) {
2245 if (!adev->ip_blocks[i].status.sw)
2246 continue;
2247 if (adev->ip_blocks[i].status.hw)
2248 continue;
2249 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2250 if (r) {
2251 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2252 adev->ip_blocks[i].version->funcs->name, r);
2253 return r;
2254 }
2255 adev->ip_blocks[i].status.hw = true;
2256 }
2257
2258 return 0;
2259}
2260
7a3e0bb2
RZ
2261static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2262{
2263 int r = 0;
2264 int i;
80f41f84 2265 uint32_t smu_version;
7a3e0bb2
RZ
2266
2267 if (adev->asic_type >= CHIP_VEGA10) {
2268 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2269 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2270 continue;
2271
e3c1b071 2272 if (!adev->ip_blocks[i].status.sw)
2273 continue;
2274
482f0e53
ML
2275 /* no need to do the fw loading again if already done*/
2276 if (adev->ip_blocks[i].status.hw == true)
2277 break;
2278
53b3f8f4 2279 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2280 r = adev->ip_blocks[i].version->funcs->resume(adev);
2281 if (r) {
2282 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2283 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2284 return r;
2285 }
2286 } else {
2287 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2288 if (r) {
2289 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2290 adev->ip_blocks[i].version->funcs->name, r);
2291 return r;
7a3e0bb2 2292 }
7a3e0bb2 2293 }
482f0e53
ML
2294
2295 adev->ip_blocks[i].status.hw = true;
2296 break;
7a3e0bb2
RZ
2297 }
2298 }
482f0e53 2299
8973d9ec
ED
2300 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2301 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2302
80f41f84 2303 return r;
7a3e0bb2
RZ
2304}
2305
5fd8518d
AG
2306static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2307{
2308 long timeout;
2309 int r, i;
2310
2311 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2312 struct amdgpu_ring *ring = adev->rings[i];
2313
2314 /* No need to setup the GPU scheduler for rings that don't need it */
2315 if (!ring || ring->no_scheduler)
2316 continue;
2317
2318 switch (ring->funcs->type) {
2319 case AMDGPU_RING_TYPE_GFX:
2320 timeout = adev->gfx_timeout;
2321 break;
2322 case AMDGPU_RING_TYPE_COMPUTE:
2323 timeout = adev->compute_timeout;
2324 break;
2325 case AMDGPU_RING_TYPE_SDMA:
2326 timeout = adev->sdma_timeout;
2327 break;
2328 default:
2329 timeout = adev->video_timeout;
2330 break;
2331 }
2332
2333 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2334 ring->num_hw_submission, amdgpu_job_hang_limit,
8ab62eda
JG
2335 timeout, adev->reset_domain->wq,
2336 ring->sched_score, ring->name,
2337 adev->dev);
5fd8518d
AG
2338 if (r) {
2339 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2340 ring->name);
2341 return r;
2342 }
2343 }
2344
2345 return 0;
2346}
2347
2348
e3ecdffa
AD
2349/**
2350 * amdgpu_device_ip_init - run init for hardware IPs
2351 *
2352 * @adev: amdgpu_device pointer
2353 *
2354 * Main initialization pass for hardware IPs. The list of all the hardware
2355 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2356 * are run. sw_init initializes the software state associated with each IP
2357 * and hw_init initializes the hardware associated with each IP.
2358 * Returns 0 on success, negative error code on failure.
2359 */
06ec9070 2360static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2361{
2362 int i, r;
2363
c030f2e4 2364 r = amdgpu_ras_init(adev);
2365 if (r)
2366 return r;
2367
d38ceaf9 2368 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2369 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2370 continue;
a1255107 2371 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2372 if (r) {
a1255107
AD
2373 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2374 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2375 goto init_failed;
2c1a2784 2376 }
a1255107 2377 adev->ip_blocks[i].status.sw = true;
bfca0289 2378
c1c39032
AD
2379 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2380 /* need to do common hw init early so everything is set up for gmc */
2381 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2382 if (r) {
2383 DRM_ERROR("hw_init %d failed %d\n", i, r);
2384 goto init_failed;
2385 }
2386 adev->ip_blocks[i].status.hw = true;
2387 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2388 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2389 /* Try to reserve bad pages early */
2390 if (amdgpu_sriov_vf(adev))
2391 amdgpu_virt_exchange_data(adev);
2392
06ec9070 2393 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2394 if (r) {
2395 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2396 goto init_failed;
2c1a2784 2397 }
a1255107 2398 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2399 if (r) {
2400 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2401 goto init_failed;
2c1a2784 2402 }
06ec9070 2403 r = amdgpu_device_wb_init(adev);
2c1a2784 2404 if (r) {
06ec9070 2405 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2406 goto init_failed;
2c1a2784 2407 }
a1255107 2408 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2409
2410 /* right after GMC hw init, we create CSA */
8a1fbb4a 2411 if (amdgpu_mcbp) {
1e256e27
RZ
2412 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2413 AMDGPU_GEM_DOMAIN_VRAM,
2414 AMDGPU_CSA_SIZE);
2493664f
ML
2415 if (r) {
2416 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2417 goto init_failed;
2493664f
ML
2418 }
2419 }
d38ceaf9
AD
2420 }
2421 }
2422
c9ffa427 2423 if (amdgpu_sriov_vf(adev))
22c16d25 2424 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2425
533aed27
AG
2426 r = amdgpu_ib_pool_init(adev);
2427 if (r) {
2428 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2429 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2430 goto init_failed;
2431 }
2432
c8963ea4
RZ
2433 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2434 if (r)
72d3f592 2435 goto init_failed;
0a4f2520
RZ
2436
2437 r = amdgpu_device_ip_hw_init_phase1(adev);
2438 if (r)
72d3f592 2439 goto init_failed;
0a4f2520 2440
7a3e0bb2
RZ
2441 r = amdgpu_device_fw_loading(adev);
2442 if (r)
72d3f592 2443 goto init_failed;
7a3e0bb2 2444
0a4f2520
RZ
2445 r = amdgpu_device_ip_hw_init_phase2(adev);
2446 if (r)
72d3f592 2447 goto init_failed;
d38ceaf9 2448
121a2bc6
AG
2449 /*
2450 * retired pages will be loaded from eeprom and reserved here,
2451 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2452 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2453 * for I2C communication which only true at this point.
b82e65a9
GC
2454 *
2455 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2456 * failure from bad gpu situation and stop amdgpu init process
2457 * accordingly. For other failed cases, it will still release all
2458 * the resource and print error message, rather than returning one
2459 * negative value to upper level.
121a2bc6
AG
2460 *
2461 * Note: theoretically, this should be called before all vram allocations
2462 * to protect retired page from abusing
2463 */
b82e65a9
GC
2464 r = amdgpu_ras_recovery_init(adev);
2465 if (r)
2466 goto init_failed;
121a2bc6 2467
cfbb6b00
AG
2468 /**
2469 * In case of XGMI grab extra reference for reset domain for this device
2470 */
a4c63caf 2471 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2472 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2473 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2474 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2475
46c67660 2476 if (!hive->reset_domain ||
2477 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2478 r = -ENOENT;
2479 amdgpu_put_xgmi_hive(hive);
2480 goto init_failed;
2481 }
2482
2483 /* Drop the early temporary reset domain we created for device */
2484 amdgpu_reset_put_reset_domain(adev->reset_domain);
2485 adev->reset_domain = hive->reset_domain;
9dfa4860 2486 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2487 }
a4c63caf
AG
2488 }
2489 }
2490
5fd8518d
AG
2491 r = amdgpu_device_init_schedulers(adev);
2492 if (r)
2493 goto init_failed;
e3c1b071 2494
2495 /* Don't init kfd if whole hive need to be reset during init */
c004d44e 2496 if (!adev->gmc.xgmi.pending_reset)
e3c1b071 2497 amdgpu_amdkfd_device_init(adev);
c6332b97 2498
bd607166
KR
2499 amdgpu_fru_get_product_info(adev);
2500
72d3f592 2501init_failed:
c9ffa427 2502 if (amdgpu_sriov_vf(adev))
c6332b97 2503 amdgpu_virt_release_full_gpu(adev, true);
2504
72d3f592 2505 return r;
d38ceaf9
AD
2506}
2507
e3ecdffa
AD
2508/**
2509 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2510 *
2511 * @adev: amdgpu_device pointer
2512 *
2513 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2514 * this function before a GPU reset. If the value is retained after a
2515 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2516 */
06ec9070 2517static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2518{
2519 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2520}
2521
e3ecdffa
AD
2522/**
2523 * amdgpu_device_check_vram_lost - check if vram is valid
2524 *
2525 * @adev: amdgpu_device pointer
2526 *
2527 * Checks the reset magic value written to the gart pointer in VRAM.
2528 * The driver calls this after a GPU reset to see if the contents of
2529 * VRAM is lost or now.
2530 * returns true if vram is lost, false if not.
2531 */
06ec9070 2532static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2533{
dadce777
EQ
2534 if (memcmp(adev->gart.ptr, adev->reset_magic,
2535 AMDGPU_RESET_MAGIC_NUM))
2536 return true;
2537
53b3f8f4 2538 if (!amdgpu_in_reset(adev))
dadce777
EQ
2539 return false;
2540
2541 /*
2542 * For all ASICs with baco/mode1 reset, the VRAM is
2543 * always assumed to be lost.
2544 */
2545 switch (amdgpu_asic_reset_method(adev)) {
2546 case AMD_RESET_METHOD_BACO:
2547 case AMD_RESET_METHOD_MODE1:
2548 return true;
2549 default:
2550 return false;
2551 }
0c49e0b8
CZ
2552}
2553
e3ecdffa 2554/**
1112a46b 2555 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2556 *
2557 * @adev: amdgpu_device pointer
b8b72130 2558 * @state: clockgating state (gate or ungate)
e3ecdffa 2559 *
e3ecdffa 2560 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2561 * set_clockgating_state callbacks are run.
2562 * Late initialization pass enabling clockgating for hardware IPs.
2563 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2564 * Returns 0 on success, negative error code on failure.
2565 */
fdd34271 2566
5d89bb2d
LL
2567int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2568 enum amd_clockgating_state state)
d38ceaf9 2569{
1112a46b 2570 int i, j, r;
d38ceaf9 2571
4a2ba394
SL
2572 if (amdgpu_emu_mode == 1)
2573 return 0;
2574
1112a46b
RZ
2575 for (j = 0; j < adev->num_ip_blocks; j++) {
2576 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2577 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2578 continue;
5d70a549
PV
2579 /* skip CG for GFX on S0ix */
2580 if (adev->in_s0ix &&
2581 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2582 continue;
4a446d55 2583 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2584 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2585 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2586 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2587 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2588 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2589 /* enable clockgating to save power */
a1255107 2590 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2591 state);
4a446d55
AD
2592 if (r) {
2593 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2594 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2595 return r;
2596 }
b0b00ff1 2597 }
d38ceaf9 2598 }
06b18f61 2599
c9f96fd5
RZ
2600 return 0;
2601}
2602
5d89bb2d
LL
2603int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2604 enum amd_powergating_state state)
c9f96fd5 2605{
1112a46b 2606 int i, j, r;
06b18f61 2607
c9f96fd5
RZ
2608 if (amdgpu_emu_mode == 1)
2609 return 0;
2610
1112a46b
RZ
2611 for (j = 0; j < adev->num_ip_blocks; j++) {
2612 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2613 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2614 continue;
5d70a549
PV
2615 /* skip PG for GFX on S0ix */
2616 if (adev->in_s0ix &&
2617 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2618 continue;
c9f96fd5
RZ
2619 /* skip CG for VCE/UVD, it's handled specially */
2620 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2621 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2622 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2623 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2624 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2625 /* enable powergating to save power */
2626 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2627 state);
c9f96fd5
RZ
2628 if (r) {
2629 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2630 adev->ip_blocks[i].version->funcs->name, r);
2631 return r;
2632 }
2633 }
2634 }
2dc80b00
S
2635 return 0;
2636}
2637
beff74bc
AD
2638static int amdgpu_device_enable_mgpu_fan_boost(void)
2639{
2640 struct amdgpu_gpu_instance *gpu_ins;
2641 struct amdgpu_device *adev;
2642 int i, ret = 0;
2643
2644 mutex_lock(&mgpu_info.mutex);
2645
2646 /*
2647 * MGPU fan boost feature should be enabled
2648 * only when there are two or more dGPUs in
2649 * the system
2650 */
2651 if (mgpu_info.num_dgpu < 2)
2652 goto out;
2653
2654 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2655 gpu_ins = &(mgpu_info.gpu_ins[i]);
2656 adev = gpu_ins->adev;
2657 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2658 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2659 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2660 if (ret)
2661 break;
2662
2663 gpu_ins->mgpu_fan_enabled = 1;
2664 }
2665 }
2666
2667out:
2668 mutex_unlock(&mgpu_info.mutex);
2669
2670 return ret;
2671}
2672
e3ecdffa
AD
2673/**
2674 * amdgpu_device_ip_late_init - run late init for hardware IPs
2675 *
2676 * @adev: amdgpu_device pointer
2677 *
2678 * Late initialization pass for hardware IPs. The list of all the hardware
2679 * IPs that make up the asic is walked and the late_init callbacks are run.
2680 * late_init covers any special initialization that an IP requires
2681 * after all of the have been initialized or something that needs to happen
2682 * late in the init process.
2683 * Returns 0 on success, negative error code on failure.
2684 */
06ec9070 2685static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2686{
60599a03 2687 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2688 int i = 0, r;
2689
2690 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2691 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2692 continue;
2693 if (adev->ip_blocks[i].version->funcs->late_init) {
2694 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2695 if (r) {
2696 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2697 adev->ip_blocks[i].version->funcs->name, r);
2698 return r;
2699 }
2dc80b00 2700 }
73f847db 2701 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2702 }
2703
867e24ca 2704 r = amdgpu_ras_late_init(adev);
2705 if (r) {
2706 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2707 return r;
2708 }
2709
a891d239
DL
2710 amdgpu_ras_set_error_query_ready(adev, true);
2711
1112a46b
RZ
2712 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2713 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2714
06ec9070 2715 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2716
beff74bc
AD
2717 r = amdgpu_device_enable_mgpu_fan_boost();
2718 if (r)
2719 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2720
4da8b639 2721 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2722 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2723 adev->asic_type == CHIP_ALDEBARAN ))
bc143d8b 2724 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2725
2726 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2727 mutex_lock(&mgpu_info.mutex);
2728
2729 /*
2730 * Reset device p-state to low as this was booted with high.
2731 *
2732 * This should be performed only after all devices from the same
2733 * hive get initialized.
2734 *
2735 * However, it's unknown how many device in the hive in advance.
2736 * As this is counted one by one during devices initializations.
2737 *
2738 * So, we wait for all XGMI interlinked devices initialized.
2739 * This may bring some delays as those devices may come from
2740 * different hives. But that should be OK.
2741 */
2742 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2743 for (i = 0; i < mgpu_info.num_gpu; i++) {
2744 gpu_instance = &(mgpu_info.gpu_ins[i]);
2745 if (gpu_instance->adev->flags & AMD_IS_APU)
2746 continue;
2747
d84a430d
JK
2748 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2749 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2750 if (r) {
2751 DRM_ERROR("pstate setting failed (%d).\n", r);
2752 break;
2753 }
2754 }
2755 }
2756
2757 mutex_unlock(&mgpu_info.mutex);
2758 }
2759
d38ceaf9
AD
2760 return 0;
2761}
2762
613aa3ea
LY
2763/**
2764 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2765 *
2766 * @adev: amdgpu_device pointer
2767 *
2768 * For ASICs need to disable SMC first
2769 */
2770static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2771{
2772 int i, r;
2773
2774 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2775 return;
2776
2777 for (i = 0; i < adev->num_ip_blocks; i++) {
2778 if (!adev->ip_blocks[i].status.hw)
2779 continue;
2780 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2781 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2782 /* XXX handle errors */
2783 if (r) {
2784 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2785 adev->ip_blocks[i].version->funcs->name, r);
2786 }
2787 adev->ip_blocks[i].status.hw = false;
2788 break;
2789 }
2790 }
2791}
2792
e9669fb7 2793static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2794{
2795 int i, r;
2796
e9669fb7
AG
2797 for (i = 0; i < adev->num_ip_blocks; i++) {
2798 if (!adev->ip_blocks[i].version->funcs->early_fini)
2799 continue;
5278a159 2800
e9669fb7
AG
2801 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2802 if (r) {
2803 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2804 adev->ip_blocks[i].version->funcs->name, r);
2805 }
2806 }
c030f2e4 2807
05df1f01 2808 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2809 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2810
7270e895
TY
2811 amdgpu_amdkfd_suspend(adev, false);
2812
613aa3ea
LY
2813 /* Workaroud for ASICs need to disable SMC first */
2814 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2815
d38ceaf9 2816 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2817 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2818 continue;
8201a67a 2819
a1255107 2820 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2821 /* XXX handle errors */
2c1a2784 2822 if (r) {
a1255107
AD
2823 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2824 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2825 }
8201a67a 2826
a1255107 2827 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2828 }
2829
6effad8a
GC
2830 if (amdgpu_sriov_vf(adev)) {
2831 if (amdgpu_virt_release_full_gpu(adev, false))
2832 DRM_ERROR("failed to release exclusive mode on fini\n");
2833 }
2834
e9669fb7
AG
2835 return 0;
2836}
2837
2838/**
2839 * amdgpu_device_ip_fini - run fini for hardware IPs
2840 *
2841 * @adev: amdgpu_device pointer
2842 *
2843 * Main teardown pass for hardware IPs. The list of all the hardware
2844 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2845 * are run. hw_fini tears down the hardware associated with each IP
2846 * and sw_fini tears down any software state associated with each IP.
2847 * Returns 0 on success, negative error code on failure.
2848 */
2849static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2850{
2851 int i, r;
2852
2853 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2854 amdgpu_virt_release_ras_err_handler_data(adev);
2855
e9669fb7
AG
2856 if (adev->gmc.xgmi.num_physical_nodes > 1)
2857 amdgpu_xgmi_remove_device(adev);
2858
c004d44e 2859 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2860
d38ceaf9 2861 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2862 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2863 continue;
c12aba3a
ML
2864
2865 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2866 amdgpu_ucode_free_bo(adev);
1e256e27 2867 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2868 amdgpu_device_wb_fini(adev);
2869 amdgpu_device_vram_scratch_fini(adev);
533aed27 2870 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2871 }
2872
a1255107 2873 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2874 /* XXX handle errors */
2c1a2784 2875 if (r) {
a1255107
AD
2876 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2877 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2878 }
a1255107
AD
2879 adev->ip_blocks[i].status.sw = false;
2880 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2881 }
2882
a6dcfd9c 2883 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2884 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2885 continue;
a1255107
AD
2886 if (adev->ip_blocks[i].version->funcs->late_fini)
2887 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2888 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2889 }
2890
c030f2e4 2891 amdgpu_ras_fini(adev);
2892
d38ceaf9
AD
2893 return 0;
2894}
2895
e3ecdffa 2896/**
beff74bc 2897 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2898 *
1112a46b 2899 * @work: work_struct.
e3ecdffa 2900 */
beff74bc 2901static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2902{
2903 struct amdgpu_device *adev =
beff74bc 2904 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2905 int r;
2906
2907 r = amdgpu_ib_ring_tests(adev);
2908 if (r)
2909 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2910}
2911
1e317b99
RZ
2912static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2913{
2914 struct amdgpu_device *adev =
2915 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2916
90a92662
MD
2917 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2918 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2919
2920 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2921 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2922}
2923
e3ecdffa 2924/**
e7854a03 2925 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2926 *
2927 * @adev: amdgpu_device pointer
2928 *
2929 * Main suspend function for hardware IPs. The list of all the hardware
2930 * IPs that make up the asic is walked, clockgating is disabled and the
2931 * suspend callbacks are run. suspend puts the hardware and software state
2932 * in each IP into a state suitable for suspend.
2933 * Returns 0 on success, negative error code on failure.
2934 */
e7854a03
AD
2935static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2936{
2937 int i, r;
2938
50ec83f0
AD
2939 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2940 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2941
b31d6ada
EQ
2942 /*
2943 * Per PMFW team's suggestion, driver needs to handle gfxoff
2944 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2945 * scenario. Add the missing df cstate disablement here.
2946 */
2947 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2948 dev_warn(adev->dev, "Failed to disallow df cstate");
2949
e7854a03
AD
2950 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2951 if (!adev->ip_blocks[i].status.valid)
2952 continue;
2b9f7848 2953
e7854a03 2954 /* displays are handled separately */
2b9f7848
ND
2955 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2956 continue;
2957
2958 /* XXX handle errors */
2959 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2960 /* XXX handle errors */
2961 if (r) {
2962 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2963 adev->ip_blocks[i].version->funcs->name, r);
2964 return r;
e7854a03 2965 }
2b9f7848
ND
2966
2967 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2968 }
2969
e7854a03
AD
2970 return 0;
2971}
2972
2973/**
2974 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2975 *
2976 * @adev: amdgpu_device pointer
2977 *
2978 * Main suspend function for hardware IPs. The list of all the hardware
2979 * IPs that make up the asic is walked, clockgating is disabled and the
2980 * suspend callbacks are run. suspend puts the hardware and software state
2981 * in each IP into a state suitable for suspend.
2982 * Returns 0 on success, negative error code on failure.
2983 */
2984static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2985{
2986 int i, r;
2987
557f42a2 2988 if (adev->in_s0ix)
bc143d8b 2989 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 2990
d38ceaf9 2991 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2992 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2993 continue;
e7854a03
AD
2994 /* displays are handled in phase1 */
2995 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2996 continue;
bff77e86
LM
2997 /* PSP lost connection when err_event_athub occurs */
2998 if (amdgpu_ras_intr_triggered() &&
2999 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3000 adev->ip_blocks[i].status.hw = false;
3001 continue;
3002 }
e3c1b071 3003
3004 /* skip unnecessary suspend if we do not initialize them yet */
3005 if (adev->gmc.xgmi.pending_reset &&
3006 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3007 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3008 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3009 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3010 adev->ip_blocks[i].status.hw = false;
3011 continue;
3012 }
557f42a2 3013
32ff160d
AD
3014 /* skip suspend of gfx and psp for S0ix
3015 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3016 * like at runtime. PSP is also part of the always on hardware
3017 * so no need to suspend it.
3018 */
557f42a2 3019 if (adev->in_s0ix &&
32ff160d
AD
3020 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3021 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
557f42a2
AD
3022 continue;
3023
d38ceaf9 3024 /* XXX handle errors */
a1255107 3025 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3026 /* XXX handle errors */
2c1a2784 3027 if (r) {
a1255107
AD
3028 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3029 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3030 }
876923fb 3031 adev->ip_blocks[i].status.hw = false;
a3a09142 3032 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
3033 if(!amdgpu_sriov_vf(adev)){
3034 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3035 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3036 if (r) {
3037 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3038 adev->mp1_state, r);
3039 return r;
3040 }
a3a09142
AD
3041 }
3042 }
d38ceaf9
AD
3043 }
3044
3045 return 0;
3046}
3047
e7854a03
AD
3048/**
3049 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3050 *
3051 * @adev: amdgpu_device pointer
3052 *
3053 * Main suspend function for hardware IPs. The list of all the hardware
3054 * IPs that make up the asic is walked, clockgating is disabled and the
3055 * suspend callbacks are run. suspend puts the hardware and software state
3056 * in each IP into a state suitable for suspend.
3057 * Returns 0 on success, negative error code on failure.
3058 */
3059int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3060{
3061 int r;
3062
3c73683c
JC
3063 if (amdgpu_sriov_vf(adev)) {
3064 amdgpu_virt_fini_data_exchange(adev);
e7819644 3065 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3066 }
e7819644 3067
e7854a03
AD
3068 r = amdgpu_device_ip_suspend_phase1(adev);
3069 if (r)
3070 return r;
3071 r = amdgpu_device_ip_suspend_phase2(adev);
3072
e7819644
YT
3073 if (amdgpu_sriov_vf(adev))
3074 amdgpu_virt_release_full_gpu(adev, false);
3075
e7854a03
AD
3076 return r;
3077}
3078
06ec9070 3079static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3080{
3081 int i, r;
3082
2cb681b6 3083 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3084 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3085 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3086 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3087 AMD_IP_BLOCK_TYPE_IH,
3088 };
a90ad3c2 3089
95ea3dbc 3090 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3091 int j;
3092 struct amdgpu_ip_block *block;
a90ad3c2 3093
4cd2a96d
J
3094 block = &adev->ip_blocks[i];
3095 block->status.hw = false;
2cb681b6 3096
4cd2a96d 3097 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3098
4cd2a96d 3099 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3100 !block->status.valid)
3101 continue;
3102
3103 r = block->version->funcs->hw_init(adev);
0aaeefcc 3104 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3105 if (r)
3106 return r;
482f0e53 3107 block->status.hw = true;
a90ad3c2
ML
3108 }
3109 }
3110
3111 return 0;
3112}
3113
06ec9070 3114static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3115{
3116 int i, r;
3117
2cb681b6
ML
3118 static enum amd_ip_block_type ip_order[] = {
3119 AMD_IP_BLOCK_TYPE_SMC,
3120 AMD_IP_BLOCK_TYPE_DCE,
3121 AMD_IP_BLOCK_TYPE_GFX,
3122 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 3123 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
3124 AMD_IP_BLOCK_TYPE_VCE,
3125 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 3126 };
a90ad3c2 3127
2cb681b6
ML
3128 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3129 int j;
3130 struct amdgpu_ip_block *block;
a90ad3c2 3131
2cb681b6
ML
3132 for (j = 0; j < adev->num_ip_blocks; j++) {
3133 block = &adev->ip_blocks[j];
3134
3135 if (block->version->type != ip_order[i] ||
482f0e53
ML
3136 !block->status.valid ||
3137 block->status.hw)
2cb681b6
ML
3138 continue;
3139
895bd048
JZ
3140 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3141 r = block->version->funcs->resume(adev);
3142 else
3143 r = block->version->funcs->hw_init(adev);
3144
0aaeefcc 3145 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3146 if (r)
3147 return r;
482f0e53 3148 block->status.hw = true;
a90ad3c2
ML
3149 }
3150 }
3151
3152 return 0;
3153}
3154
e3ecdffa
AD
3155/**
3156 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3157 *
3158 * @adev: amdgpu_device pointer
3159 *
3160 * First resume function for hardware IPs. The list of all the hardware
3161 * IPs that make up the asic is walked and the resume callbacks are run for
3162 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3163 * after a suspend and updates the software state as necessary. This
3164 * function is also used for restoring the GPU after a GPU reset.
3165 * Returns 0 on success, negative error code on failure.
3166 */
06ec9070 3167static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3168{
3169 int i, r;
3170
a90ad3c2 3171 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3172 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3173 continue;
a90ad3c2 3174 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3175 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3176 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3177 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3178
fcf0649f
CZ
3179 r = adev->ip_blocks[i].version->funcs->resume(adev);
3180 if (r) {
3181 DRM_ERROR("resume of IP block <%s> failed %d\n",
3182 adev->ip_blocks[i].version->funcs->name, r);
3183 return r;
3184 }
482f0e53 3185 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3186 }
3187 }
3188
3189 return 0;
3190}
3191
e3ecdffa
AD
3192/**
3193 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3194 *
3195 * @adev: amdgpu_device pointer
3196 *
3197 * First resume function for hardware IPs. The list of all the hardware
3198 * IPs that make up the asic is walked and the resume callbacks are run for
3199 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3200 * functional state after a suspend and updates the software state as
3201 * necessary. This function is also used for restoring the GPU after a GPU
3202 * reset.
3203 * Returns 0 on success, negative error code on failure.
3204 */
06ec9070 3205static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3206{
3207 int i, r;
3208
3209 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3210 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3211 continue;
fcf0649f 3212 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3213 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3214 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3215 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3216 continue;
a1255107 3217 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3218 if (r) {
a1255107
AD
3219 DRM_ERROR("resume of IP block <%s> failed %d\n",
3220 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3221 return r;
2c1a2784 3222 }
482f0e53 3223 adev->ip_blocks[i].status.hw = true;
f543d286
PL
3224
3225 if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3226 /* disable gfxoff for IP resume. The gfxoff will be re-enabled in
3227 * amdgpu_device_resume() after IP resume.
3228 */
3229 amdgpu_gfx_off_ctrl(adev, false);
3230 DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n");
3231 }
3232
d38ceaf9
AD
3233 }
3234
3235 return 0;
3236}
3237
e3ecdffa
AD
3238/**
3239 * amdgpu_device_ip_resume - run resume for hardware IPs
3240 *
3241 * @adev: amdgpu_device pointer
3242 *
3243 * Main resume function for hardware IPs. The hardware IPs
3244 * are split into two resume functions because they are
3245 * are also used in in recovering from a GPU reset and some additional
3246 * steps need to be take between them. In this case (S3/S4) they are
3247 * run sequentially.
3248 * Returns 0 on success, negative error code on failure.
3249 */
06ec9070 3250static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3251{
3252 int r;
3253
9cec53c1
JZ
3254 r = amdgpu_amdkfd_resume_iommu(adev);
3255 if (r)
3256 return r;
3257
06ec9070 3258 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3259 if (r)
3260 return r;
7a3e0bb2
RZ
3261
3262 r = amdgpu_device_fw_loading(adev);
3263 if (r)
3264 return r;
3265
06ec9070 3266 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3267
3268 return r;
3269}
3270
e3ecdffa
AD
3271/**
3272 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3273 *
3274 * @adev: amdgpu_device pointer
3275 *
3276 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3277 */
4e99a44e 3278static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3279{
6867e1b5
ML
3280 if (amdgpu_sriov_vf(adev)) {
3281 if (adev->is_atom_fw) {
58ff791a 3282 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3283 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3284 } else {
3285 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3286 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3287 }
3288
3289 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3290 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3291 }
048765ad
AR
3292}
3293
e3ecdffa
AD
3294/**
3295 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3296 *
3297 * @asic_type: AMD asic type
3298 *
3299 * Check if there is DC (new modesetting infrastructre) support for an asic.
3300 * returns true if DC has support, false if not.
3301 */
4562236b
HW
3302bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3303{
3304 switch (asic_type) {
0637d417
AD
3305#ifdef CONFIG_DRM_AMDGPU_SI
3306 case CHIP_HAINAN:
3307#endif
3308 case CHIP_TOPAZ:
3309 /* chips with no display hardware */
3310 return false;
4562236b 3311#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3312 case CHIP_TAHITI:
3313 case CHIP_PITCAIRN:
3314 case CHIP_VERDE:
3315 case CHIP_OLAND:
2d32ffd6
AD
3316 /*
3317 * We have systems in the wild with these ASICs that require
3318 * LVDS and VGA support which is not supported with DC.
3319 *
3320 * Fallback to the non-DC driver here by default so as not to
3321 * cause regressions.
3322 */
3323#if defined(CONFIG_DRM_AMD_DC_SI)
3324 return amdgpu_dc > 0;
3325#else
3326 return false;
64200c46 3327#endif
4562236b 3328 case CHIP_BONAIRE:
0d6fbccb 3329 case CHIP_KAVERI:
367e6687
AD
3330 case CHIP_KABINI:
3331 case CHIP_MULLINS:
d9fda248
HW
3332 /*
3333 * We have systems in the wild with these ASICs that require
b5a0168e 3334 * VGA support which is not supported with DC.
d9fda248
HW
3335 *
3336 * Fallback to the non-DC driver here by default so as not to
3337 * cause regressions.
3338 */
3339 return amdgpu_dc > 0;
f7f12b25 3340 default:
fd187853 3341 return amdgpu_dc != 0;
f7f12b25 3342#else
4562236b 3343 default:
93b09a9a 3344 if (amdgpu_dc > 0)
044a48f4 3345 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3346 "but isn't supported by ASIC, ignoring\n");
4562236b 3347 return false;
f7f12b25 3348#endif
4562236b
HW
3349 }
3350}
3351
3352/**
3353 * amdgpu_device_has_dc_support - check if dc is supported
3354 *
982a820b 3355 * @adev: amdgpu_device pointer
4562236b
HW
3356 *
3357 * Returns true for supported, false for not supported
3358 */
3359bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3360{
25263da3 3361 if (adev->enable_virtual_display ||
abaf210c 3362 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3363 return false;
3364
4562236b
HW
3365 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3366}
3367
d4535e2c
AG
3368static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3369{
3370 struct amdgpu_device *adev =
3371 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3372 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3373
c6a6e2db
AG
3374 /* It's a bug to not have a hive within this function */
3375 if (WARN_ON(!hive))
3376 return;
3377
3378 /*
3379 * Use task barrier to synchronize all xgmi reset works across the
3380 * hive. task_barrier_enter and task_barrier_exit will block
3381 * until all the threads running the xgmi reset works reach
3382 * those points. task_barrier_full will do both blocks.
3383 */
3384 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3385
3386 task_barrier_enter(&hive->tb);
4a580877 3387 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3388
3389 if (adev->asic_reset_res)
3390 goto fail;
3391
3392 task_barrier_exit(&hive->tb);
4a580877 3393 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3394
3395 if (adev->asic_reset_res)
3396 goto fail;
43c4d576 3397
5e67bba3 3398 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3399 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3400 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3401 } else {
3402
3403 task_barrier_full(&hive->tb);
3404 adev->asic_reset_res = amdgpu_asic_reset(adev);
3405 }
ce316fa5 3406
c6a6e2db 3407fail:
d4535e2c 3408 if (adev->asic_reset_res)
fed184e9 3409 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3410 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3411 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3412}
3413
71f98027
AD
3414static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3415{
3416 char *input = amdgpu_lockup_timeout;
3417 char *timeout_setting = NULL;
3418 int index = 0;
3419 long timeout;
3420 int ret = 0;
3421
3422 /*
67387dfe
AD
3423 * By default timeout for non compute jobs is 10000
3424 * and 60000 for compute jobs.
71f98027 3425 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3426 * jobs are 60000 by default.
71f98027
AD
3427 */
3428 adev->gfx_timeout = msecs_to_jiffies(10000);
3429 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3430 if (amdgpu_sriov_vf(adev))
3431 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3432 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3433 else
67387dfe 3434 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3435
f440ff44 3436 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3437 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3438 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3439 ret = kstrtol(timeout_setting, 0, &timeout);
3440 if (ret)
3441 return ret;
3442
3443 if (timeout == 0) {
3444 index++;
3445 continue;
3446 } else if (timeout < 0) {
3447 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3448 dev_warn(adev->dev, "lockup timeout disabled");
3449 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3450 } else {
3451 timeout = msecs_to_jiffies(timeout);
3452 }
3453
3454 switch (index++) {
3455 case 0:
3456 adev->gfx_timeout = timeout;
3457 break;
3458 case 1:
3459 adev->compute_timeout = timeout;
3460 break;
3461 case 2:
3462 adev->sdma_timeout = timeout;
3463 break;
3464 case 3:
3465 adev->video_timeout = timeout;
3466 break;
3467 default:
3468 break;
3469 }
3470 }
3471 /*
3472 * There is only one value specified and
3473 * it should apply to all non-compute jobs.
3474 */
bcccee89 3475 if (index == 1) {
71f98027 3476 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3477 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3478 adev->compute_timeout = adev->gfx_timeout;
3479 }
71f98027
AD
3480 }
3481
3482 return ret;
3483}
d4535e2c 3484
4a74c38c
PY
3485/**
3486 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3487 *
3488 * @adev: amdgpu_device pointer
3489 *
3490 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3491 */
3492static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3493{
3494 struct iommu_domain *domain;
3495
3496 domain = iommu_get_domain_for_dev(adev->dev);
3497 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3498 adev->ram_is_direct_mapped = true;
3499}
3500
77f3a5cd
ND
3501static const struct attribute *amdgpu_dev_attributes[] = {
3502 &dev_attr_product_name.attr,
3503 &dev_attr_product_number.attr,
3504 &dev_attr_serial_number.attr,
3505 &dev_attr_pcie_replay_count.attr,
3506 NULL
3507};
3508
d38ceaf9
AD
3509/**
3510 * amdgpu_device_init - initialize the driver
3511 *
3512 * @adev: amdgpu_device pointer
d38ceaf9
AD
3513 * @flags: driver flags
3514 *
3515 * Initializes the driver info and hw (all asics).
3516 * Returns 0 for success or an error on failure.
3517 * Called at driver startup.
3518 */
3519int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3520 uint32_t flags)
3521{
8aba21b7
LT
3522 struct drm_device *ddev = adev_to_drm(adev);
3523 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3524 int r, i;
b98c6299 3525 bool px = false;
95844d20 3526 u32 max_MBps;
d38ceaf9
AD
3527
3528 adev->shutdown = false;
d38ceaf9 3529 adev->flags = flags;
4e66d7d2
YZ
3530
3531 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3532 adev->asic_type = amdgpu_force_asic_type;
3533 else
3534 adev->asic_type = flags & AMD_ASIC_MASK;
3535
d38ceaf9 3536 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3537 if (amdgpu_emu_mode == 1)
8bdab6bb 3538 adev->usec_timeout *= 10;
770d13b1 3539 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3540 adev->accel_working = false;
3541 adev->num_rings = 0;
68ce8b24 3542 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3543 adev->mman.buffer_funcs = NULL;
3544 adev->mman.buffer_funcs_ring = NULL;
3545 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3546 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3547 adev->gmc.gmc_funcs = NULL;
7bd939d0 3548 adev->harvest_ip_mask = 0x0;
f54d1867 3549 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3550 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3551
3552 adev->smc_rreg = &amdgpu_invalid_rreg;
3553 adev->smc_wreg = &amdgpu_invalid_wreg;
3554 adev->pcie_rreg = &amdgpu_invalid_rreg;
3555 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3556 adev->pciep_rreg = &amdgpu_invalid_rreg;
3557 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3558 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3559 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3560 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3561 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3562 adev->didt_rreg = &amdgpu_invalid_rreg;
3563 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3564 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3565 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3566 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3567 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3568
3e39ab90
AD
3569 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3570 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3571 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3572
3573 /* mutex initialization are all done here so we
3574 * can recall function without having locking issues */
0e5ca0d1 3575 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3576 mutex_init(&adev->pm.mutex);
3577 mutex_init(&adev->gfx.gpu_clock_mutex);
3578 mutex_init(&adev->srbm_mutex);
b8866c26 3579 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3580 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3581 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3582 mutex_init(&adev->mn_lock);
e23b74aa 3583 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3584 hash_init(adev->mn_hash);
32eaeae0 3585 mutex_init(&adev->psp.mutex);
bd052211 3586 mutex_init(&adev->notifier_lock);
8cda7a4f 3587 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3588 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3589
ab3b9de6 3590 amdgpu_device_init_apu_flags(adev);
9f6a7857 3591
912dfc84
EQ
3592 r = amdgpu_device_check_arguments(adev);
3593 if (r)
3594 return r;
d38ceaf9 3595
d38ceaf9
AD
3596 spin_lock_init(&adev->mmio_idx_lock);
3597 spin_lock_init(&adev->smc_idx_lock);
3598 spin_lock_init(&adev->pcie_idx_lock);
3599 spin_lock_init(&adev->uvd_ctx_idx_lock);
3600 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3601 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3602 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3603 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3604 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3605
0c4e7fa5
CZ
3606 INIT_LIST_HEAD(&adev->shadow_list);
3607 mutex_init(&adev->shadow_list_lock);
3608
655ce9cb 3609 INIT_LIST_HEAD(&adev->reset_list);
3610
6492e1b0 3611 INIT_LIST_HEAD(&adev->ras_list);
3612
beff74bc
AD
3613 INIT_DELAYED_WORK(&adev->delayed_init_work,
3614 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3615 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3616 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3617
d4535e2c
AG
3618 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3619
d23ee13f 3620 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3621 adev->gfx.gfx_off_residency = 0;
3622 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3623 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3624
b265bdbd
EQ
3625 atomic_set(&adev->throttling_logging_enabled, 1);
3626 /*
3627 * If throttling continues, logging will be performed every minute
3628 * to avoid log flooding. "-1" is subtracted since the thermal
3629 * throttling interrupt comes every second. Thus, the total logging
3630 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3631 * for throttling interrupt) = 60 seconds.
3632 */
3633 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3634 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3635
0fa49558
AX
3636 /* Registers mapping */
3637 /* TODO: block userspace mapping of io register */
da69c161
KW
3638 if (adev->asic_type >= CHIP_BONAIRE) {
3639 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3640 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3641 } else {
3642 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3643 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3644 }
d38ceaf9 3645
6c08e0ef
EQ
3646 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3647 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3648
d38ceaf9
AD
3649 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3650 if (adev->rmmio == NULL) {
3651 return -ENOMEM;
3652 }
3653 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3654 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3655
5494d864
AD
3656 amdgpu_device_get_pcie_info(adev);
3657
b239c017
JX
3658 if (amdgpu_mcbp)
3659 DRM_INFO("MCBP is enabled\n");
3660
436afdfa
PY
3661 /*
3662 * Reset domain needs to be present early, before XGMI hive discovered
3663 * (if any) and intitialized to use reset sem and in_gpu reset flag
3664 * early on during init and before calling to RREG32.
3665 */
3666 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3667 if (!adev->reset_domain)
3668 return -ENOMEM;
3669
3aa0115d
ML
3670 /* detect hw virtualization here */
3671 amdgpu_detect_virtualization(adev);
3672
dffa11b4
ML
3673 r = amdgpu_device_get_job_timeout_settings(adev);
3674 if (r) {
3675 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3676 return r;
a190d1c7
XY
3677 }
3678
d38ceaf9 3679 /* early init functions */
06ec9070 3680 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3681 if (r)
4ef87d8f 3682 return r;
d38ceaf9 3683
4d33e704
SK
3684 /* Enable TMZ based on IP_VERSION */
3685 amdgpu_gmc_tmz_set(adev);
3686
957b0787 3687 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3688 /* Need to get xgmi info early to decide the reset behavior*/
3689 if (adev->gmc.xgmi.supported) {
3690 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3691 if (r)
3692 return r;
3693 }
3694
8e6d0b69 3695 /* enable PCIE atomic ops */
3696 if (amdgpu_sriov_vf(adev))
3697 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
e15c9d06 3698 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
8e6d0b69 3699 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3700 else
3701 adev->have_atomics_support =
3702 !pci_enable_atomic_ops_to_root(adev->pdev,
3703 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3704 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3705 if (!adev->have_atomics_support)
3706 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3707
6585661d
OZ
3708 /* doorbell bar mapping and doorbell index init*/
3709 amdgpu_device_doorbell_init(adev);
3710
9475a943
SL
3711 if (amdgpu_emu_mode == 1) {
3712 /* post the asic on emulation mode */
3713 emu_soc_asic_init(adev);
bfca0289 3714 goto fence_driver_init;
9475a943 3715 }
bfca0289 3716
04442bf7
LL
3717 amdgpu_reset_init(adev);
3718
4e99a44e
ML
3719 /* detect if we are with an SRIOV vbios */
3720 amdgpu_device_detect_sriov_bios(adev);
048765ad 3721
95e8e59e
AD
3722 /* check if we need to reset the asic
3723 * E.g., driver was not cleanly unloaded previously, etc.
3724 */
f14899fd 3725 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3726 if (adev->gmc.xgmi.num_physical_nodes) {
3727 dev_info(adev->dev, "Pending hive reset.\n");
3728 adev->gmc.xgmi.pending_reset = true;
3729 /* Only need to init necessary block for SMU to handle the reset */
3730 for (i = 0; i < adev->num_ip_blocks; i++) {
3731 if (!adev->ip_blocks[i].status.valid)
3732 continue;
3733 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3734 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3735 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3736 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3737 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3738 adev->ip_blocks[i].version->funcs->name);
3739 adev->ip_blocks[i].status.hw = true;
3740 }
3741 }
3742 } else {
3743 r = amdgpu_asic_reset(adev);
3744 if (r) {
3745 dev_err(adev->dev, "asic reset on init failed\n");
3746 goto failed;
3747 }
95e8e59e
AD
3748 }
3749 }
3750
8f66090b 3751 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3752
d38ceaf9 3753 /* Post card if necessary */
39c640c0 3754 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3755 if (!adev->bios) {
bec86378 3756 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3757 r = -EINVAL;
3758 goto failed;
d38ceaf9 3759 }
bec86378 3760 DRM_INFO("GPU posting now...\n");
4d2997ab 3761 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3762 if (r) {
3763 dev_err(adev->dev, "gpu post error!\n");
3764 goto failed;
3765 }
d38ceaf9
AD
3766 }
3767
88b64e95
AD
3768 if (adev->is_atom_fw) {
3769 /* Initialize clocks */
3770 r = amdgpu_atomfirmware_get_clock_info(adev);
3771 if (r) {
3772 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3773 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3774 goto failed;
3775 }
3776 } else {
a5bde2f9
AD
3777 /* Initialize clocks */
3778 r = amdgpu_atombios_get_clock_info(adev);
3779 if (r) {
3780 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3781 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3782 goto failed;
a5bde2f9
AD
3783 }
3784 /* init i2c buses */
4562236b
HW
3785 if (!amdgpu_device_has_dc_support(adev))
3786 amdgpu_atombios_i2c_init(adev);
2c1a2784 3787 }
d38ceaf9 3788
bfca0289 3789fence_driver_init:
d38ceaf9 3790 /* Fence driver */
067f44c8 3791 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3792 if (r) {
067f44c8 3793 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3794 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3795 goto failed;
2c1a2784 3796 }
d38ceaf9
AD
3797
3798 /* init the mode config */
4a580877 3799 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3800
06ec9070 3801 r = amdgpu_device_ip_init(adev);
d38ceaf9 3802 if (r) {
8840a387 3803 /* failed in exclusive mode due to timeout */
3804 if (amdgpu_sriov_vf(adev) &&
3805 !amdgpu_sriov_runtime(adev) &&
3806 amdgpu_virt_mmio_blocked(adev) &&
3807 !amdgpu_virt_wait_reset(adev)) {
3808 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3809 /* Don't send request since VF is inactive. */
3810 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3811 adev->virt.ops = NULL;
8840a387 3812 r = -EAGAIN;
970fd197 3813 goto release_ras_con;
8840a387 3814 }
06ec9070 3815 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3816 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3817 goto release_ras_con;
d38ceaf9
AD
3818 }
3819
8d35a259
LG
3820 amdgpu_fence_driver_hw_init(adev);
3821
d69b8971
YZ
3822 dev_info(adev->dev,
3823 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3824 adev->gfx.config.max_shader_engines,
3825 adev->gfx.config.max_sh_per_se,
3826 adev->gfx.config.max_cu_per_sh,
3827 adev->gfx.cu_info.number);
3828
d38ceaf9
AD
3829 adev->accel_working = true;
3830
e59c0205
AX
3831 amdgpu_vm_check_compute_bug(adev);
3832
95844d20
MO
3833 /* Initialize the buffer migration limit. */
3834 if (amdgpu_moverate >= 0)
3835 max_MBps = amdgpu_moverate;
3836 else
3837 max_MBps = 8; /* Allow 8 MB/s. */
3838 /* Get a log2 for easy divisions. */
3839 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3840
d2f52ac8 3841 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3842 if (r) {
3843 adev->pm_sysfs_en = false;
d2f52ac8 3844 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3845 } else
3846 adev->pm_sysfs_en = true;
d2f52ac8 3847
5bb23532 3848 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3849 if (r) {
3850 adev->ucode_sysfs_en = false;
5bb23532 3851 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3852 } else
3853 adev->ucode_sysfs_en = true;
5bb23532 3854
8424f2cc
LG
3855 r = amdgpu_psp_sysfs_init(adev);
3856 if (r) {
3857 adev->psp_sysfs_en = false;
3858 if (!amdgpu_sriov_vf(adev))
3859 DRM_ERROR("Creating psp sysfs failed\n");
3860 } else
3861 adev->psp_sysfs_en = true;
3862
b0adca4d
EQ
3863 /*
3864 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3865 * Otherwise the mgpu fan boost feature will be skipped due to the
3866 * gpu instance is counted less.
3867 */
3868 amdgpu_register_gpu_instance(adev);
3869
d38ceaf9
AD
3870 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3871 * explicit gating rather than handling it automatically.
3872 */
e3c1b071 3873 if (!adev->gmc.xgmi.pending_reset) {
3874 r = amdgpu_device_ip_late_init(adev);
3875 if (r) {
3876 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3877 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3878 goto release_ras_con;
e3c1b071 3879 }
3880 /* must succeed. */
3881 amdgpu_ras_resume(adev);
3882 queue_delayed_work(system_wq, &adev->delayed_init_work,
3883 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3884 }
d38ceaf9 3885
2c738637
ML
3886 if (amdgpu_sriov_vf(adev))
3887 flush_delayed_work(&adev->delayed_init_work);
3888
77f3a5cd 3889 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3890 if (r)
77f3a5cd 3891 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3892
d155bef0
AB
3893 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3894 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3895 if (r)
3896 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3897
c1dd4aa6
AG
3898 /* Have stored pci confspace at hand for restore in sudden PCI error */
3899 if (amdgpu_device_cache_pci_state(adev->pdev))
3900 pci_restore_state(pdev);
3901
8c3dd61c
KHF
3902 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3903 /* this will fail for cards that aren't VGA class devices, just
3904 * ignore it */
3905 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3906 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c
KHF
3907
3908 if (amdgpu_device_supports_px(ddev)) {
3909 px = true;
3910 vga_switcheroo_register_client(adev->pdev,
3911 &amdgpu_switcheroo_ops, px);
3912 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3913 }
3914
e3c1b071 3915 if (adev->gmc.xgmi.pending_reset)
3916 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3917 msecs_to_jiffies(AMDGPU_RESUME_MS));
3918
4a74c38c
PY
3919 amdgpu_device_check_iommu_direct_map(adev);
3920
d38ceaf9 3921 return 0;
83ba126a 3922
970fd197
SY
3923release_ras_con:
3924 amdgpu_release_ras_context(adev);
3925
83ba126a 3926failed:
89041940 3927 amdgpu_vf_error_trans_all(adev);
8840a387 3928
83ba126a 3929 return r;
d38ceaf9
AD
3930}
3931
07775fc1
AG
3932static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3933{
62d5f9f7 3934
07775fc1
AG
3935 /* Clear all CPU mappings pointing to this device */
3936 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3937
3938 /* Unmap all mapped bars - Doorbell, registers and VRAM */
3939 amdgpu_device_doorbell_fini(adev);
3940
3941 iounmap(adev->rmmio);
3942 adev->rmmio = NULL;
3943 if (adev->mman.aper_base_kaddr)
3944 iounmap(adev->mman.aper_base_kaddr);
3945 adev->mman.aper_base_kaddr = NULL;
3946
3947 /* Memory manager related */
3948 if (!adev->gmc.xgmi.connected_to_cpu) {
3949 arch_phys_wc_del(adev->gmc.vram_mtrr);
3950 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3951 }
3952}
3953
d38ceaf9 3954/**
bbe04dec 3955 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3956 *
3957 * @adev: amdgpu_device pointer
3958 *
3959 * Tear down the driver info (all asics).
3960 * Called at driver shutdown.
3961 */
72c8c97b 3962void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 3963{
aac89168 3964 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3965 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3966 adev->shutdown = true;
9f875167 3967
752c683d
ML
3968 /* make sure IB test finished before entering exclusive mode
3969 * to avoid preemption on IB test
3970 * */
519b8b76 3971 if (amdgpu_sriov_vf(adev)) {
752c683d 3972 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3973 amdgpu_virt_fini_data_exchange(adev);
3974 }
752c683d 3975
e5b03032
ML
3976 /* disable all interrupts */
3977 amdgpu_irq_disable_all(adev);
ff97cba8 3978 if (adev->mode_info.mode_config_initialized){
1053b9c9 3979 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 3980 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3981 else
4a580877 3982 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3983 }
8d35a259 3984 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 3985
cd3a8a59 3986 if (adev->mman.initialized)
9bff18d1 3987 drain_workqueue(adev->mman.bdev.wq);
98f56188 3988
7c868b59
YT
3989 if (adev->pm_sysfs_en)
3990 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
3991 if (adev->ucode_sysfs_en)
3992 amdgpu_ucode_sysfs_fini(adev);
8424f2cc
LG
3993 if (adev->psp_sysfs_en)
3994 amdgpu_psp_sysfs_fini(adev);
72c8c97b
AG
3995 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3996
232d1d43
SY
3997 /* disable ras feature must before hw fini */
3998 amdgpu_ras_pre_fini(adev);
3999
e9669fb7 4000 amdgpu_device_ip_fini_early(adev);
d10d0daa 4001
a3848df6
YW
4002 amdgpu_irq_fini_hw(adev);
4003
b6fd6e0f
SK
4004 if (adev->mman.initialized)
4005 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4006
d10d0daa 4007 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4008
fac53471 4009 amdgpu_device_unmap_mmio(adev);
87172e89 4010
72c8c97b
AG
4011}
4012
4013void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4014{
62d5f9f7
LS
4015 int idx;
4016
8d35a259 4017 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4018 amdgpu_device_ip_fini(adev);
75e1658e
ND
4019 release_firmware(adev->firmware.gpu_info_fw);
4020 adev->firmware.gpu_info_fw = NULL;
d38ceaf9 4021 adev->accel_working = false;
68ce8b24 4022 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4023
4024 amdgpu_reset_fini(adev);
4025
d38ceaf9 4026 /* free i2c buses */
4562236b
HW
4027 if (!amdgpu_device_has_dc_support(adev))
4028 amdgpu_i2c_fini(adev);
bfca0289
SL
4029
4030 if (amdgpu_emu_mode != 1)
4031 amdgpu_atombios_fini(adev);
4032
d38ceaf9
AD
4033 kfree(adev->bios);
4034 adev->bios = NULL;
b98c6299 4035 if (amdgpu_device_supports_px(adev_to_drm(adev))) {
84c8b22e 4036 vga_switcheroo_unregister_client(adev->pdev);
83ba126a 4037 vga_switcheroo_fini_domain_pm_ops(adev->dev);
b98c6299 4038 }
38d6be81 4039 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4040 vga_client_unregister(adev->pdev);
e9bc1bf7 4041
62d5f9f7
LS
4042 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4043
4044 iounmap(adev->rmmio);
4045 adev->rmmio = NULL;
4046 amdgpu_device_doorbell_fini(adev);
4047 drm_dev_exit(idx);
4048 }
4049
d155bef0
AB
4050 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4051 amdgpu_pmu_fini(adev);
72de33f8 4052 if (adev->mman.discovery_bin)
a190d1c7 4053 amdgpu_discovery_fini(adev);
72c8c97b 4054
cfbb6b00
AG
4055 amdgpu_reset_put_reset_domain(adev->reset_domain);
4056 adev->reset_domain = NULL;
4057
72c8c97b
AG
4058 kfree(adev->pci_state);
4059
d38ceaf9
AD
4060}
4061
58144d28
ND
4062/**
4063 * amdgpu_device_evict_resources - evict device resources
4064 * @adev: amdgpu device object
4065 *
4066 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4067 * of the vram memory type. Mainly used for evicting device resources
4068 * at suspend time.
4069 *
4070 */
7863c155 4071static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4072{
7863c155
ML
4073 int ret;
4074
e53d9665
ML
4075 /* No need to evict vram on APUs for suspend to ram or s2idle */
4076 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4077 return 0;
58144d28 4078
7863c155
ML
4079 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4080 if (ret)
58144d28 4081 DRM_WARN("evicting device resources failed\n");
7863c155 4082 return ret;
58144d28 4083}
d38ceaf9
AD
4084
4085/*
4086 * Suspend & resume.
4087 */
4088/**
810ddc3a 4089 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4090 *
87e3f136 4091 * @dev: drm dev pointer
87e3f136 4092 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4093 *
4094 * Puts the hw in the suspend state (all asics).
4095 * Returns 0 for success or an error on failure.
4096 * Called at driver suspend.
4097 */
de185019 4098int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4099{
a2e15b0e 4100 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4101 int r = 0;
d38ceaf9 4102
d38ceaf9
AD
4103 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4104 return 0;
4105
44779b43 4106 adev->in_suspend = true;
3fa8f89d 4107
d7274ec7
BZ
4108 if (amdgpu_sriov_vf(adev)) {
4109 amdgpu_virt_fini_data_exchange(adev);
4110 r = amdgpu_virt_request_full_gpu(adev, false);
4111 if (r)
4112 return r;
4113 }
4114
3fa8f89d
S
4115 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4116 DRM_WARN("smart shift update failed\n");
4117
d38ceaf9
AD
4118 drm_kms_helper_poll_disable(dev);
4119
5f818173 4120 if (fbcon)
087451f3 4121 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4122
beff74bc 4123 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4124
5e6932fe 4125 amdgpu_ras_suspend(adev);
4126
2196927b 4127 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4128
c004d44e 4129 if (!adev->in_s0ix)
5d3a2d95 4130 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4131
7863c155
ML
4132 r = amdgpu_device_evict_resources(adev);
4133 if (r)
4134 return r;
d38ceaf9 4135
8d35a259 4136 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4137
2196927b 4138 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4139
d7274ec7
BZ
4140 if (amdgpu_sriov_vf(adev))
4141 amdgpu_virt_release_full_gpu(adev, false);
4142
d38ceaf9
AD
4143 return 0;
4144}
4145
4146/**
810ddc3a 4147 * amdgpu_device_resume - initiate device resume
d38ceaf9 4148 *
87e3f136 4149 * @dev: drm dev pointer
87e3f136 4150 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4151 *
4152 * Bring the hw back to operating state (all asics).
4153 * Returns 0 for success or an error on failure.
4154 * Called at driver resume.
4155 */
de185019 4156int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4157{
1348969a 4158 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4159 int r = 0;
d38ceaf9 4160
d7274ec7
BZ
4161 if (amdgpu_sriov_vf(adev)) {
4162 r = amdgpu_virt_request_full_gpu(adev, true);
4163 if (r)
4164 return r;
4165 }
4166
d38ceaf9
AD
4167 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4168 return 0;
4169
62498733 4170 if (adev->in_s0ix)
bc143d8b 4171 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4172
d38ceaf9 4173 /* post card */
39c640c0 4174 if (amdgpu_device_need_post(adev)) {
4d2997ab 4175 r = amdgpu_device_asic_init(adev);
74b0b157 4176 if (r)
aac89168 4177 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4178 }
d38ceaf9 4179
06ec9070 4180 r = amdgpu_device_ip_resume(adev);
d7274ec7
BZ
4181
4182 /* no matter what r is, always need to properly release full GPU */
4183 if (amdgpu_sriov_vf(adev)) {
4184 amdgpu_virt_init_data_exchange(adev);
4185 amdgpu_virt_release_full_gpu(adev, true);
4186 }
4187
e6707218 4188 if (r) {
aac89168 4189 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 4190 return r;
e6707218 4191 }
8d35a259 4192 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4193
06ec9070 4194 r = amdgpu_device_ip_late_init(adev);
03161a6e 4195 if (r)
4d3b9ae5 4196 return r;
d38ceaf9 4197
beff74bc
AD
4198 queue_delayed_work(system_wq, &adev->delayed_init_work,
4199 msecs_to_jiffies(AMDGPU_RESUME_MS));
4200
c004d44e 4201 if (!adev->in_s0ix) {
5d3a2d95
AD
4202 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4203 if (r)
4204 return r;
4205 }
756e6880 4206
96a5d8d4 4207 /* Make sure IB tests flushed */
ec4927d4
VZ
4208 if (amdgpu_sriov_vf(adev))
4209 amdgpu_irq_gpu_reset_resume_helper(adev);
beff74bc 4210 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4211
f543d286
PL
4212 if (adev->in_s0ix) {
4213 /* re-enable gfxoff after IP resume. This re-enables gfxoff after
4214 * it was disabled for IP resume in amdgpu_device_ip_resume_phase2().
4215 */
4216 amdgpu_gfx_off_ctrl(adev, true);
4217 DRM_DEBUG("will enable gfxoff for the mission mode\n");
4218 }
a2e15b0e 4219 if (fbcon)
087451f3 4220 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9
AD
4221
4222 drm_kms_helper_poll_enable(dev);
23a1a9e5 4223
5e6932fe 4224 amdgpu_ras_resume(adev);
4225
d09ef243
AD
4226 if (adev->mode_info.num_crtc) {
4227 /*
4228 * Most of the connector probing functions try to acquire runtime pm
4229 * refs to ensure that the GPU is powered on when connector polling is
4230 * performed. Since we're calling this from a runtime PM callback,
4231 * trying to acquire rpm refs will cause us to deadlock.
4232 *
4233 * Since we're guaranteed to be holding the rpm lock, it's safe to
4234 * temporarily disable the rpm helpers so this doesn't deadlock us.
4235 */
23a1a9e5 4236#ifdef CONFIG_PM
d09ef243 4237 dev->dev->power.disable_depth++;
23a1a9e5 4238#endif
d09ef243
AD
4239 if (!adev->dc_enabled)
4240 drm_helper_hpd_irq_event(dev);
4241 else
4242 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4243#ifdef CONFIG_PM
d09ef243 4244 dev->dev->power.disable_depth--;
23a1a9e5 4245#endif
d09ef243 4246 }
44779b43
RZ
4247 adev->in_suspend = false;
4248
3fa8f89d
S
4249 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4250 DRM_WARN("smart shift update failed\n");
4251
4d3b9ae5 4252 return 0;
d38ceaf9
AD
4253}
4254
e3ecdffa
AD
4255/**
4256 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4257 *
4258 * @adev: amdgpu_device pointer
4259 *
4260 * The list of all the hardware IPs that make up the asic is walked and
4261 * the check_soft_reset callbacks are run. check_soft_reset determines
4262 * if the asic is still hung or not.
4263 * Returns true if any of the IPs are still in a hung state, false if not.
4264 */
06ec9070 4265static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4266{
4267 int i;
4268 bool asic_hang = false;
4269
f993d628
ML
4270 if (amdgpu_sriov_vf(adev))
4271 return true;
4272
8bc04c29
AD
4273 if (amdgpu_asic_need_full_reset(adev))
4274 return true;
4275
63fbf42f 4276 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4277 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4278 continue;
a1255107
AD
4279 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4280 adev->ip_blocks[i].status.hang =
4281 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4282 if (adev->ip_blocks[i].status.hang) {
aac89168 4283 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4284 asic_hang = true;
4285 }
4286 }
4287 return asic_hang;
4288}
4289
e3ecdffa
AD
4290/**
4291 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4292 *
4293 * @adev: amdgpu_device pointer
4294 *
4295 * The list of all the hardware IPs that make up the asic is walked and the
4296 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4297 * handles any IP specific hardware or software state changes that are
4298 * necessary for a soft reset to succeed.
4299 * Returns 0 on success, negative error code on failure.
4300 */
06ec9070 4301static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4302{
4303 int i, r = 0;
4304
4305 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4306 if (!adev->ip_blocks[i].status.valid)
d31a501e 4307 continue;
a1255107
AD
4308 if (adev->ip_blocks[i].status.hang &&
4309 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4310 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4311 if (r)
4312 return r;
4313 }
4314 }
4315
4316 return 0;
4317}
4318
e3ecdffa
AD
4319/**
4320 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4321 *
4322 * @adev: amdgpu_device pointer
4323 *
4324 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4325 * reset is necessary to recover.
4326 * Returns true if a full asic reset is required, false if not.
4327 */
06ec9070 4328static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4329{
da146d3b
AD
4330 int i;
4331
8bc04c29
AD
4332 if (amdgpu_asic_need_full_reset(adev))
4333 return true;
4334
da146d3b 4335 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4336 if (!adev->ip_blocks[i].status.valid)
da146d3b 4337 continue;
a1255107
AD
4338 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4339 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4340 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4341 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4342 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4343 if (adev->ip_blocks[i].status.hang) {
aac89168 4344 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4345 return true;
4346 }
4347 }
35d782fe
CZ
4348 }
4349 return false;
4350}
4351
e3ecdffa
AD
4352/**
4353 * amdgpu_device_ip_soft_reset - do a soft reset
4354 *
4355 * @adev: amdgpu_device pointer
4356 *
4357 * The list of all the hardware IPs that make up the asic is walked and the
4358 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4359 * IP specific hardware or software state changes that are necessary to soft
4360 * reset the IP.
4361 * Returns 0 on success, negative error code on failure.
4362 */
06ec9070 4363static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4364{
4365 int i, r = 0;
4366
4367 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4368 if (!adev->ip_blocks[i].status.valid)
35d782fe 4369 continue;
a1255107
AD
4370 if (adev->ip_blocks[i].status.hang &&
4371 adev->ip_blocks[i].version->funcs->soft_reset) {
4372 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4373 if (r)
4374 return r;
4375 }
4376 }
4377
4378 return 0;
4379}
4380
e3ecdffa
AD
4381/**
4382 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4383 *
4384 * @adev: amdgpu_device pointer
4385 *
4386 * The list of all the hardware IPs that make up the asic is walked and the
4387 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4388 * handles any IP specific hardware or software state changes that are
4389 * necessary after the IP has been soft reset.
4390 * Returns 0 on success, negative error code on failure.
4391 */
06ec9070 4392static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4393{
4394 int i, r = 0;
4395
4396 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4397 if (!adev->ip_blocks[i].status.valid)
35d782fe 4398 continue;
a1255107
AD
4399 if (adev->ip_blocks[i].status.hang &&
4400 adev->ip_blocks[i].version->funcs->post_soft_reset)
4401 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4402 if (r)
4403 return r;
4404 }
4405
4406 return 0;
4407}
4408
e3ecdffa 4409/**
c33adbc7 4410 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4411 *
4412 * @adev: amdgpu_device pointer
4413 *
4414 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4415 * restore things like GPUVM page tables after a GPU reset where
4416 * the contents of VRAM might be lost.
403009bf
CK
4417 *
4418 * Returns:
4419 * 0 on success, negative error code on failure.
e3ecdffa 4420 */
c33adbc7 4421static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4422{
c41d1cf6 4423 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4424 struct amdgpu_bo *shadow;
e18aaea7 4425 struct amdgpu_bo_vm *vmbo;
403009bf 4426 long r = 1, tmo;
c41d1cf6
ML
4427
4428 if (amdgpu_sriov_runtime(adev))
b045d3af 4429 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4430 else
4431 tmo = msecs_to_jiffies(100);
4432
aac89168 4433 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4434 mutex_lock(&adev->shadow_list_lock);
e18aaea7
ND
4435 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4436 shadow = &vmbo->bo;
403009bf 4437 /* No need to recover an evicted BO */
d3116756
CK
4438 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4439 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4440 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4441 continue;
4442
4443 r = amdgpu_bo_restore_shadow(shadow, &next);
4444 if (r)
4445 break;
4446
c41d1cf6 4447 if (fence) {
1712fb1a 4448 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4449 dma_fence_put(fence);
4450 fence = next;
1712fb1a 4451 if (tmo == 0) {
4452 r = -ETIMEDOUT;
c41d1cf6 4453 break;
1712fb1a 4454 } else if (tmo < 0) {
4455 r = tmo;
4456 break;
4457 }
403009bf
CK
4458 } else {
4459 fence = next;
c41d1cf6 4460 }
c41d1cf6
ML
4461 }
4462 mutex_unlock(&adev->shadow_list_lock);
4463
403009bf
CK
4464 if (fence)
4465 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4466 dma_fence_put(fence);
4467
1712fb1a 4468 if (r < 0 || tmo <= 0) {
aac89168 4469 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4470 return -EIO;
4471 }
c41d1cf6 4472
aac89168 4473 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4474 return 0;
c41d1cf6
ML
4475}
4476
a90ad3c2 4477
e3ecdffa 4478/**
06ec9070 4479 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4480 *
982a820b 4481 * @adev: amdgpu_device pointer
87e3f136 4482 * @from_hypervisor: request from hypervisor
5740682e
ML
4483 *
4484 * do VF FLR and reinitialize Asic
3f48c681 4485 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4486 */
4487static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4488 bool from_hypervisor)
5740682e
ML
4489{
4490 int r;
a5f67c93 4491 struct amdgpu_hive_info *hive = NULL;
7258fa31 4492 int retry_limit = 0;
5740682e 4493
7258fa31 4494retry:
c004d44e 4495 amdgpu_amdkfd_pre_reset(adev);
428890a3 4496
5740682e
ML
4497 if (from_hypervisor)
4498 r = amdgpu_virt_request_full_gpu(adev, true);
4499 else
4500 r = amdgpu_virt_reset_gpu(adev);
4501 if (r)
4502 return r;
a90ad3c2
ML
4503
4504 /* Resume IP prior to SMC */
06ec9070 4505 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4506 if (r)
4507 goto error;
a90ad3c2 4508
c9ffa427 4509 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4510
7a3e0bb2
RZ
4511 r = amdgpu_device_fw_loading(adev);
4512 if (r)
4513 return r;
4514
a90ad3c2 4515 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4516 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4517 if (r)
4518 goto error;
a90ad3c2 4519
a5f67c93
ZL
4520 hive = amdgpu_get_xgmi_hive(adev);
4521 /* Update PSP FW topology after reset */
4522 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4523 r = amdgpu_xgmi_update_topology(hive, adev);
4524
4525 if (hive)
4526 amdgpu_put_xgmi_hive(hive);
4527
4528 if (!r) {
4529 amdgpu_irq_gpu_reset_resume_helper(adev);
4530 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4531
c004d44e 4532 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4533 }
a90ad3c2 4534
abc34253 4535error:
c41d1cf6 4536 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4537 amdgpu_inc_vram_lost(adev);
c33adbc7 4538 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4539 }
437f3e0b 4540 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4541
7258fa31
SK
4542 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4543 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4544 retry_limit++;
4545 goto retry;
4546 } else
4547 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4548 }
4549
a90ad3c2
ML
4550 return r;
4551}
4552
9a1cddd6 4553/**
4554 * amdgpu_device_has_job_running - check if there is any job in mirror list
4555 *
982a820b 4556 * @adev: amdgpu_device pointer
9a1cddd6 4557 *
4558 * check if there is any job in mirror list
4559 */
4560bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4561{
4562 int i;
4563 struct drm_sched_job *job;
4564
4565 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4566 struct amdgpu_ring *ring = adev->rings[i];
4567
4568 if (!ring || !ring->sched.thread)
4569 continue;
4570
4571 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4572 job = list_first_entry_or_null(&ring->sched.pending_list,
4573 struct drm_sched_job, list);
9a1cddd6 4574 spin_unlock(&ring->sched.job_list_lock);
4575 if (job)
4576 return true;
4577 }
4578 return false;
4579}
4580
12938fad
CK
4581/**
4582 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4583 *
982a820b 4584 * @adev: amdgpu_device pointer
12938fad
CK
4585 *
4586 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4587 * a hung GPU.
4588 */
4589bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4590{
12938fad 4591
3ba7b418
AG
4592 if (amdgpu_gpu_recovery == 0)
4593 goto disabled;
4594
1a11a65d
YC
4595 /* Skip soft reset check in fatal error mode */
4596 if (!amdgpu_ras_is_poison_mode_supported(adev))
4597 return true;
4598
d3ef9d57
CG
4599 if (!amdgpu_device_ip_check_soft_reset(adev)) {
4600 dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");
4601 return false;
4602 }
4603
3ba7b418
AG
4604 if (amdgpu_sriov_vf(adev))
4605 return true;
4606
4607 if (amdgpu_gpu_recovery == -1) {
4608 switch (adev->asic_type) {
b3523c45
AD
4609#ifdef CONFIG_DRM_AMDGPU_SI
4610 case CHIP_VERDE:
4611 case CHIP_TAHITI:
4612 case CHIP_PITCAIRN:
4613 case CHIP_OLAND:
4614 case CHIP_HAINAN:
4615#endif
4616#ifdef CONFIG_DRM_AMDGPU_CIK
4617 case CHIP_KAVERI:
4618 case CHIP_KABINI:
4619 case CHIP_MULLINS:
4620#endif
4621 case CHIP_CARRIZO:
4622 case CHIP_STONEY:
4623 case CHIP_CYAN_SKILLFISH:
3ba7b418 4624 goto disabled;
b3523c45
AD
4625 default:
4626 break;
3ba7b418 4627 }
12938fad
CK
4628 }
4629
4630 return true;
3ba7b418
AG
4631
4632disabled:
aac89168 4633 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4634 return false;
12938fad
CK
4635}
4636
5c03e584
FX
4637int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4638{
4639 u32 i;
4640 int ret = 0;
4641
4642 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4643
4644 dev_info(adev->dev, "GPU mode1 reset\n");
4645
4646 /* disable BM */
4647 pci_clear_master(adev->pdev);
4648
4649 amdgpu_device_cache_pci_state(adev->pdev);
4650
4651 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4652 dev_info(adev->dev, "GPU smu mode1 reset\n");
4653 ret = amdgpu_dpm_mode1_reset(adev);
4654 } else {
4655 dev_info(adev->dev, "GPU psp mode1 reset\n");
4656 ret = psp_gpu_reset(adev);
4657 }
4658
4659 if (ret)
4660 dev_err(adev->dev, "GPU mode1 reset failed\n");
4661
4662 amdgpu_device_load_pci_state(adev->pdev);
4663
4664 /* wait for asic to come out of reset */
4665 for (i = 0; i < adev->usec_timeout; i++) {
4666 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4667
4668 if (memsize != 0xffffffff)
4669 break;
4670 udelay(1);
4671 }
4672
4673 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4674 return ret;
4675}
5c6dd71e 4676
e3c1b071 4677int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4678 struct amdgpu_reset_context *reset_context)
26bc5340 4679{
5c1e6fa4 4680 int i, r = 0;
04442bf7
LL
4681 struct amdgpu_job *job = NULL;
4682 bool need_full_reset =
4683 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4684
4685 if (reset_context->reset_req_dev == adev)
4686 job = reset_context->job;
71182665 4687
b602ca5f
TZ
4688 if (amdgpu_sriov_vf(adev)) {
4689 /* stop the data exchange thread */
4690 amdgpu_virt_fini_data_exchange(adev);
4691 }
4692
9e225fb9
AG
4693 amdgpu_fence_driver_isr_toggle(adev, true);
4694
71182665 4695 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4696 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4697 struct amdgpu_ring *ring = adev->rings[i];
4698
51687759 4699 if (!ring || !ring->sched.thread)
0875dc9e 4700 continue;
5740682e 4701
c530b02f
JZ
4702 /*clear job fence from fence drv to avoid force_completion
4703 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4704 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4705
2f9d4084
ML
4706 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4707 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4708 }
d38ceaf9 4709
9e225fb9
AG
4710 amdgpu_fence_driver_isr_toggle(adev, false);
4711
ff99849b 4712 if (job && job->vm)
222b5f04
AG
4713 drm_sched_increase_karma(&job->base);
4714
04442bf7 4715 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4716 /* If reset handler not implemented, continue; otherwise return */
4717 if (r == -ENOSYS)
4718 r = 0;
4719 else
04442bf7
LL
4720 return r;
4721
1d721ed6 4722 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4723 if (!amdgpu_sriov_vf(adev)) {
4724
4725 if (!need_full_reset)
4726 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4727
d3ef9d57 4728 if (!need_full_reset && amdgpu_gpu_recovery) {
26bc5340
AG
4729 amdgpu_device_ip_pre_soft_reset(adev);
4730 r = amdgpu_device_ip_soft_reset(adev);
4731 amdgpu_device_ip_post_soft_reset(adev);
4732 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4733 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4734 need_full_reset = true;
4735 }
4736 }
4737
4738 if (need_full_reset)
4739 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4740 if (need_full_reset)
4741 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4742 else
4743 clear_bit(AMDGPU_NEED_FULL_RESET,
4744 &reset_context->flags);
26bc5340
AG
4745 }
4746
4747 return r;
4748}
4749
15fd09a0
SA
4750static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4751{
15fd09a0
SA
4752 int i;
4753
38a15ad9 4754 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4755
4756 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4757 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4758 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4759 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4760 }
4761
4762 return 0;
4763}
4764
3d8785f6
SA
4765#ifdef CONFIG_DEV_COREDUMP
4766static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4767 size_t count, void *data, size_t datalen)
4768{
4769 struct drm_printer p;
4770 struct amdgpu_device *adev = data;
4771 struct drm_print_iterator iter;
4772 int i;
4773
4774 iter.data = buffer;
4775 iter.offset = 0;
4776 iter.start = offset;
4777 iter.remain = count;
4778
4779 p = drm_coredump_printer(&iter);
4780
4781 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4782 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4783 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4784 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4785 if (adev->reset_task_info.pid)
4786 drm_printf(&p, "process_name: %s PID: %d\n",
4787 adev->reset_task_info.process_name,
4788 adev->reset_task_info.pid);
4789
4790 if (adev->reset_vram_lost)
4791 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4792 if (adev->num_regs) {
4793 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4794
4795 for (i = 0; i < adev->num_regs; i++)
4796 drm_printf(&p, "0x%08x: 0x%08x\n",
4797 adev->reset_dump_reg_list[i],
4798 adev->reset_dump_reg_value[i]);
4799 }
4800
4801 return count - iter.remain;
4802}
4803
4804static void amdgpu_devcoredump_free(void *data)
4805{
4806}
4807
4808static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4809{
4810 struct drm_device *dev = adev_to_drm(adev);
4811
4812 ktime_get_ts64(&adev->reset_time);
4813 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4814 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4815}
4816#endif
4817
04442bf7
LL
4818int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4819 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4820{
4821 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4822 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4823 int r = 0;
f5c7e779 4824 bool gpu_reset_for_dev_remove = 0;
26bc5340 4825
04442bf7
LL
4826 /* Try reset handler method first */
4827 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4828 reset_list);
15fd09a0 4829 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4830
4831 reset_context->reset_device_list = device_list_handle;
04442bf7 4832 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4833 /* If reset handler not implemented, continue; otherwise return */
4834 if (r == -ENOSYS)
4835 r = 0;
4836 else
04442bf7
LL
4837 return r;
4838
4839 /* Reset handler not implemented, use the default method */
4840 need_full_reset =
4841 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4842 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4843
f5c7e779
YC
4844 gpu_reset_for_dev_remove =
4845 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4846 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4847
26bc5340 4848 /*
655ce9cb 4849 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4850 * to allow proper links negotiation in FW (within 1 sec)
4851 */
7ac71382 4852 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4853 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4854 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4855 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4856 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4857 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4858 r = -EALREADY;
4859 } else
4860 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4861
041a62bc 4862 if (r) {
aac89168 4863 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4864 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4865 break;
ce316fa5
LM
4866 }
4867 }
4868
041a62bc
AG
4869 /* For XGMI wait for all resets to complete before proceed */
4870 if (!r) {
655ce9cb 4871 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4872 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4873 flush_work(&tmp_adev->xgmi_reset_work);
4874 r = tmp_adev->asic_reset_res;
4875 if (r)
4876 break;
ce316fa5
LM
4877 }
4878 }
4879 }
ce316fa5 4880 }
26bc5340 4881
43c4d576 4882 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4883 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4884 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4885 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4886 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4887 }
4888
00eaa571 4889 amdgpu_ras_intr_cleared();
43c4d576 4890 }
00eaa571 4891
f5c7e779
YC
4892 /* Since the mode1 reset affects base ip blocks, the
4893 * phase1 ip blocks need to be resumed. Otherwise there
4894 * will be a BIOS signature error and the psp bootloader
4895 * can't load kdb on the next amdgpu install.
4896 */
4897 if (gpu_reset_for_dev_remove) {
4898 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4899 amdgpu_device_ip_resume_phase1(tmp_adev);
4900
4901 goto end;
4902 }
4903
655ce9cb 4904 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4905 if (need_full_reset) {
4906 /* post card */
e3c1b071 4907 r = amdgpu_device_asic_init(tmp_adev);
4908 if (r) {
aac89168 4909 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4910 } else {
26bc5340 4911 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4912 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4913 if (r)
4914 goto out;
4915
26bc5340
AG
4916 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4917 if (r)
4918 goto out;
4919
4920 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4921#ifdef CONFIG_DEV_COREDUMP
4922 tmp_adev->reset_vram_lost = vram_lost;
4923 memset(&tmp_adev->reset_task_info, 0,
4924 sizeof(tmp_adev->reset_task_info));
4925 if (reset_context->job && reset_context->job->vm)
4926 tmp_adev->reset_task_info =
4927 reset_context->job->vm->task_info;
4928 amdgpu_reset_capture_coredumpm(tmp_adev);
4929#endif
26bc5340 4930 if (vram_lost) {
77e7f829 4931 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4932 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4933 }
4934
26bc5340
AG
4935 r = amdgpu_device_fw_loading(tmp_adev);
4936 if (r)
4937 return r;
4938
4939 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4940 if (r)
4941 goto out;
4942
4943 if (vram_lost)
4944 amdgpu_device_fill_reset_magic(tmp_adev);
4945
fdafb359
EQ
4946 /*
4947 * Add this ASIC as tracked as reset was already
4948 * complete successfully.
4949 */
4950 amdgpu_register_gpu_instance(tmp_adev);
4951
04442bf7
LL
4952 if (!reset_context->hive &&
4953 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4954 amdgpu_xgmi_add_device(tmp_adev);
4955
7c04ca50 4956 r = amdgpu_device_ip_late_init(tmp_adev);
4957 if (r)
4958 goto out;
4959
087451f3 4960 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 4961
e8fbaf03
GC
4962 /*
4963 * The GPU enters bad state once faulty pages
4964 * by ECC has reached the threshold, and ras
4965 * recovery is scheduled next. So add one check
4966 * here to break recovery if it indeed exceeds
4967 * bad page threshold, and remind user to
4968 * retire this GPU or setting one bigger
4969 * bad_page_threshold value to fix this once
4970 * probing driver again.
4971 */
11003c68 4972 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
4973 /* must succeed. */
4974 amdgpu_ras_resume(tmp_adev);
4975 } else {
4976 r = -EINVAL;
4977 goto out;
4978 }
e79a04d5 4979
26bc5340 4980 /* Update PSP FW topology after reset */
04442bf7
LL
4981 if (reset_context->hive &&
4982 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4983 r = amdgpu_xgmi_update_topology(
4984 reset_context->hive, tmp_adev);
26bc5340
AG
4985 }
4986 }
4987
26bc5340
AG
4988out:
4989 if (!r) {
4990 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4991 r = amdgpu_ib_ring_tests(tmp_adev);
4992 if (r) {
4993 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
4994 need_full_reset = true;
4995 r = -EAGAIN;
4996 goto end;
4997 }
4998 }
4999
5000 if (!r)
5001 r = amdgpu_device_recover_vram(tmp_adev);
5002 else
5003 tmp_adev->asic_reset_res = r;
5004 }
5005
5006end:
04442bf7
LL
5007 if (need_full_reset)
5008 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5009 else
5010 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5011 return r;
5012}
5013
e923be99 5014static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5015{
5740682e 5016
a3a09142
AD
5017 switch (amdgpu_asic_reset_method(adev)) {
5018 case AMD_RESET_METHOD_MODE1:
5019 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5020 break;
5021 case AMD_RESET_METHOD_MODE2:
5022 adev->mp1_state = PP_MP1_STATE_RESET;
5023 break;
5024 default:
5025 adev->mp1_state = PP_MP1_STATE_NONE;
5026 break;
5027 }
26bc5340 5028}
d38ceaf9 5029
e923be99 5030static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5031{
89041940 5032 amdgpu_vf_error_trans_all(adev);
a3a09142 5033 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5034}
5035
3f12acc8
EQ
5036static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5037{
5038 struct pci_dev *p = NULL;
5039
5040 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5041 adev->pdev->bus->number, 1);
5042 if (p) {
5043 pm_runtime_enable(&(p->dev));
5044 pm_runtime_resume(&(p->dev));
5045 }
5046}
5047
5048static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5049{
5050 enum amd_reset_method reset_method;
5051 struct pci_dev *p = NULL;
5052 u64 expires;
5053
5054 /*
5055 * For now, only BACO and mode1 reset are confirmed
5056 * to suffer the audio issue without proper suspended.
5057 */
5058 reset_method = amdgpu_asic_reset_method(adev);
5059 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5060 (reset_method != AMD_RESET_METHOD_MODE1))
5061 return -EINVAL;
5062
5063 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5064 adev->pdev->bus->number, 1);
5065 if (!p)
5066 return -ENODEV;
5067
5068 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5069 if (!expires)
5070 /*
5071 * If we cannot get the audio device autosuspend delay,
5072 * a fixed 4S interval will be used. Considering 3S is
5073 * the audio controller default autosuspend delay setting.
5074 * 4S used here is guaranteed to cover that.
5075 */
54b7feb9 5076 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5077
5078 while (!pm_runtime_status_suspended(&(p->dev))) {
5079 if (!pm_runtime_suspend(&(p->dev)))
5080 break;
5081
5082 if (expires < ktime_get_mono_fast_ns()) {
5083 dev_warn(adev->dev, "failed to suspend display audio\n");
5084 /* TODO: abort the succeeding gpu reset? */
5085 return -ETIMEDOUT;
5086 }
5087 }
5088
5089 pm_runtime_disable(&(p->dev));
5090
5091 return 0;
5092}
5093
d193b12b 5094static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5095{
5096 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5097
5098#if defined(CONFIG_DEBUG_FS)
5099 if (!amdgpu_sriov_vf(adev))
5100 cancel_work(&adev->reset_work);
5101#endif
5102
5103 if (adev->kfd.dev)
5104 cancel_work(&adev->kfd.reset_work);
5105
5106 if (amdgpu_sriov_vf(adev))
5107 cancel_work(&adev->virt.flr_work);
5108
5109 if (con && adev->ras_enabled)
5110 cancel_work(&con->recovery_work);
5111
5112}
5113
26bc5340 5114/**
6e9c65f7 5115 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5116 *
982a820b 5117 * @adev: amdgpu_device pointer
26bc5340
AG
5118 * @job: which job trigger hang
5119 *
5120 * Attempt to reset the GPU if it has hung (all asics).
5121 * Attempt to do soft-reset or full-reset and reinitialize Asic
5122 * Returns 0 for success or an error on failure.
5123 */
5124
cf727044 5125int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5126 struct amdgpu_job *job,
5127 struct amdgpu_reset_context *reset_context)
26bc5340 5128{
1d721ed6 5129 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5130 bool job_signaled = false;
26bc5340 5131 struct amdgpu_hive_info *hive = NULL;
26bc5340 5132 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5133 int i, r = 0;
bb5c7235 5134 bool need_emergency_restart = false;
3f12acc8 5135 bool audio_suspended = false;
f5c7e779
YC
5136 bool gpu_reset_for_dev_remove = false;
5137
5138 gpu_reset_for_dev_remove =
5139 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5140 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5141
6e3cd2a9 5142 /*
bb5c7235
WS
5143 * Special case: RAS triggered and full reset isn't supported
5144 */
5145 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5146
d5ea093e
AG
5147 /*
5148 * Flush RAM to disk so that after reboot
5149 * the user can read log and see why the system rebooted.
5150 */
bb5c7235 5151 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5152 DRM_WARN("Emergency reboot.");
5153
5154 ksys_sync_helper();
5155 emergency_restart();
5156 }
5157
b823821f 5158 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5159 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5160
175ac6ec
ZL
5161 if (!amdgpu_sriov_vf(adev))
5162 hive = amdgpu_get_xgmi_hive(adev);
681260df 5163 if (hive)
53b3f8f4 5164 mutex_lock(&hive->hive_lock);
26bc5340 5165
f1549c09
LG
5166 reset_context->job = job;
5167 reset_context->hive = hive;
9e94d22c
EQ
5168 /*
5169 * Build list of devices to reset.
5170 * In case we are in XGMI hive mode, resort the device list
5171 * to put adev in the 1st position.
5172 */
5173 INIT_LIST_HEAD(&device_list);
175ac6ec 5174 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5175 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5176 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5177 if (gpu_reset_for_dev_remove && adev->shutdown)
5178 tmp_adev->shutdown = true;
5179 }
655ce9cb 5180 if (!list_is_first(&adev->reset_list, &device_list))
5181 list_rotate_to_front(&adev->reset_list, &device_list);
5182 device_list_handle = &device_list;
26bc5340 5183 } else {
655ce9cb 5184 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5185 device_list_handle = &device_list;
5186 }
5187
e923be99
AG
5188 /* We need to lock reset domain only once both for XGMI and single device */
5189 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5190 reset_list);
3675c2f2 5191 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5192
1d721ed6 5193 /* block all schedulers and reset given job's ring */
655ce9cb 5194 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5195
e923be99 5196 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5197
3f12acc8
EQ
5198 /*
5199 * Try to put the audio codec into suspend state
5200 * before gpu reset started.
5201 *
5202 * Due to the power domain of the graphics device
5203 * is shared with AZ power domain. Without this,
5204 * we may change the audio hardware from behind
5205 * the audio driver's back. That will trigger
5206 * some audio codec errors.
5207 */
5208 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5209 audio_suspended = true;
5210
9e94d22c
EQ
5211 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5212
52fb44cf
EQ
5213 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5214
c004d44e 5215 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5216 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5217
12ffa55d
AG
5218 /*
5219 * Mark these ASICs to be reseted as untracked first
5220 * And add them back after reset completed
5221 */
5222 amdgpu_unregister_gpu_instance(tmp_adev);
5223
163d4cd2 5224 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5225
f1c1314b 5226 /* disable ras on ALL IPs */
bb5c7235 5227 if (!need_emergency_restart &&
b823821f 5228 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5229 amdgpu_ras_suspend(tmp_adev);
5230
1d721ed6
AG
5231 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5232 struct amdgpu_ring *ring = tmp_adev->rings[i];
5233
5234 if (!ring || !ring->sched.thread)
5235 continue;
5236
0b2d2c2e 5237 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5238
bb5c7235 5239 if (need_emergency_restart)
7c6e68c7 5240 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5241 }
8f8c80f4 5242 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5243 }
5244
bb5c7235 5245 if (need_emergency_restart)
7c6e68c7
AG
5246 goto skip_sched_resume;
5247
1d721ed6
AG
5248 /*
5249 * Must check guilty signal here since after this point all old
5250 * HW fences are force signaled.
5251 *
5252 * job->base holds a reference to parent fence
5253 */
f6a3f660 5254 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5255 job_signaled = true;
1d721ed6
AG
5256 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5257 goto skip_hw_reset;
5258 }
5259
26bc5340 5260retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5261 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5262 if (gpu_reset_for_dev_remove) {
5263 /* Workaroud for ASICs need to disable SMC first */
5264 amdgpu_device_smu_fini_early(tmp_adev);
5265 }
f1549c09 5266 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5267 /*TODO Should we stop ?*/
5268 if (r) {
aac89168 5269 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5270 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5271 tmp_adev->asic_reset_res = r;
5272 }
247c7b0d
AG
5273
5274 /*
5275 * Drop all pending non scheduler resets. Scheduler resets
5276 * were already dropped during drm_sched_stop
5277 */
d193b12b 5278 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5279 }
5280
5281 /* Actual ASIC resets if needed.*/
4f30d920 5282 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5283 if (amdgpu_sriov_vf(adev)) {
5284 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5285 if (r)
5286 adev->asic_reset_res = r;
950d6425
SY
5287
5288 /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5289 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5290 amdgpu_ras_resume(adev);
26bc5340 5291 } else {
f1549c09 5292 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5293 if (r && r == -EAGAIN)
26bc5340 5294 goto retry;
f5c7e779
YC
5295
5296 if (!r && gpu_reset_for_dev_remove)
5297 goto recover_end;
26bc5340
AG
5298 }
5299
1d721ed6
AG
5300skip_hw_reset:
5301
26bc5340 5302 /* Post ASIC reset for all devs .*/
655ce9cb 5303 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5304
1d721ed6
AG
5305 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5306 struct amdgpu_ring *ring = tmp_adev->rings[i];
5307
5308 if (!ring || !ring->sched.thread)
5309 continue;
5310
6868a2c4 5311 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5312 }
5313
693073a0 5314 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5315 amdgpu_mes_self_test(tmp_adev);
5316
1053b9c9 5317 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5318 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5319 }
5320
7258fa31
SK
5321 if (tmp_adev->asic_reset_res)
5322 r = tmp_adev->asic_reset_res;
5323
1d721ed6 5324 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5325
5326 if (r) {
5327 /* bad news, how to tell it to userspace ? */
12ffa55d 5328 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5329 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5330 } else {
12ffa55d 5331 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5332 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5333 DRM_WARN("smart shift update failed\n");
26bc5340 5334 }
7c6e68c7 5335 }
26bc5340 5336
7c6e68c7 5337skip_sched_resume:
655ce9cb 5338 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5339 /* unlock kfd: SRIOV would do it separately */
c004d44e 5340 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5341 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5342
5343 /* kfd_post_reset will do nothing if kfd device is not initialized,
5344 * need to bring up kfd here if it's not be initialized before
5345 */
5346 if (!adev->kfd.init_complete)
5347 amdgpu_amdkfd_device_init(adev);
5348
3f12acc8
EQ
5349 if (audio_suspended)
5350 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5351
5352 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5353
5354 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5355 }
5356
f5c7e779 5357recover_end:
e923be99
AG
5358 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5359 reset_list);
5360 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5361
9e94d22c 5362 if (hive) {
9e94d22c 5363 mutex_unlock(&hive->hive_lock);
d95e8e97 5364 amdgpu_put_xgmi_hive(hive);
9e94d22c 5365 }
26bc5340 5366
f287a3c5 5367 if (r)
26bc5340 5368 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5369
5370 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5371 return r;
5372}
5373
e3ecdffa
AD
5374/**
5375 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5376 *
5377 * @adev: amdgpu_device pointer
5378 *
5379 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5380 * and lanes) of the slot the device is in. Handles APUs and
5381 * virtualized environments where PCIE config space may not be available.
5382 */
5494d864 5383static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5384{
5d9a6330 5385 struct pci_dev *pdev;
c5313457
HK
5386 enum pci_bus_speed speed_cap, platform_speed_cap;
5387 enum pcie_link_width platform_link_width;
d0dd7f0c 5388
cd474ba0
AD
5389 if (amdgpu_pcie_gen_cap)
5390 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5391
cd474ba0
AD
5392 if (amdgpu_pcie_lane_cap)
5393 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5394
cd474ba0
AD
5395 /* covers APUs as well */
5396 if (pci_is_root_bus(adev->pdev->bus)) {
5397 if (adev->pm.pcie_gen_mask == 0)
5398 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5399 if (adev->pm.pcie_mlw_mask == 0)
5400 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5401 return;
cd474ba0 5402 }
d0dd7f0c 5403
c5313457
HK
5404 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5405 return;
5406
dbaa922b
AD
5407 pcie_bandwidth_available(adev->pdev, NULL,
5408 &platform_speed_cap, &platform_link_width);
c5313457 5409
cd474ba0 5410 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5411 /* asic caps */
5412 pdev = adev->pdev;
5413 speed_cap = pcie_get_speed_cap(pdev);
5414 if (speed_cap == PCI_SPEED_UNKNOWN) {
5415 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5416 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5417 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5418 } else {
2b3a1f51
FX
5419 if (speed_cap == PCIE_SPEED_32_0GT)
5420 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5421 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5422 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5423 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5424 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5425 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5426 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5427 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5428 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5429 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5430 else if (speed_cap == PCIE_SPEED_8_0GT)
5431 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5432 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5433 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5434 else if (speed_cap == PCIE_SPEED_5_0GT)
5435 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5436 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5437 else
5438 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5439 }
5440 /* platform caps */
c5313457 5441 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5442 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5443 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5444 } else {
2b3a1f51
FX
5445 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5446 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5447 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5448 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5449 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5450 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5451 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5452 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5453 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5454 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5455 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5456 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5457 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5458 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5459 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5460 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5461 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5462 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5463 else
5464 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5465
cd474ba0
AD
5466 }
5467 }
5468 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5469 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5470 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5471 } else {
c5313457 5472 switch (platform_link_width) {
5d9a6330 5473 case PCIE_LNK_X32:
cd474ba0
AD
5474 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5475 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5476 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5477 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5478 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5479 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5480 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5481 break;
5d9a6330 5482 case PCIE_LNK_X16:
cd474ba0
AD
5483 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5484 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5485 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5486 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5487 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5488 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5489 break;
5d9a6330 5490 case PCIE_LNK_X12:
cd474ba0
AD
5491 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5492 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5493 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5494 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5495 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5496 break;
5d9a6330 5497 case PCIE_LNK_X8:
cd474ba0
AD
5498 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5499 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5500 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5501 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5502 break;
5d9a6330 5503 case PCIE_LNK_X4:
cd474ba0
AD
5504 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5505 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5506 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5507 break;
5d9a6330 5508 case PCIE_LNK_X2:
cd474ba0
AD
5509 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5510 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5511 break;
5d9a6330 5512 case PCIE_LNK_X1:
cd474ba0
AD
5513 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5514 break;
5515 default:
5516 break;
5517 }
d0dd7f0c
AD
5518 }
5519 }
5520}
d38ceaf9 5521
08a2fd23
RE
5522/**
5523 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5524 *
5525 * @adev: amdgpu_device pointer
5526 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5527 *
5528 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5529 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5530 * @peer_adev.
5531 */
5532bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5533 struct amdgpu_device *peer_adev)
5534{
5535#ifdef CONFIG_HSA_AMD_P2P
5536 uint64_t address_mask = peer_adev->dev->dma_mask ?
5537 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5538 resource_size_t aper_limit =
5539 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5540 bool p2p_access =
5541 !adev->gmc.xgmi.connected_to_cpu &&
5542 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5543
5544 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5545 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5546 !(adev->gmc.aper_base & address_mask ||
5547 aper_limit & address_mask));
5548#else
5549 return false;
5550#endif
5551}
5552
361dbd01
AD
5553int amdgpu_device_baco_enter(struct drm_device *dev)
5554{
1348969a 5555 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5556 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5557
4a580877 5558 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5559 return -ENOTSUPP;
5560
8ab0d6f0 5561 if (ras && adev->ras_enabled &&
acdae216 5562 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5563 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5564
9530273e 5565 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5566}
5567
5568int amdgpu_device_baco_exit(struct drm_device *dev)
5569{
1348969a 5570 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5571 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5572 int ret = 0;
361dbd01 5573
4a580877 5574 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5575 return -ENOTSUPP;
5576
9530273e
EQ
5577 ret = amdgpu_dpm_baco_exit(adev);
5578 if (ret)
5579 return ret;
7a22677b 5580
8ab0d6f0 5581 if (ras && adev->ras_enabled &&
acdae216 5582 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5583 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5584
1bece222
CL
5585 if (amdgpu_passthrough(adev) &&
5586 adev->nbio.funcs->clear_doorbell_interrupt)
5587 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5588
7a22677b 5589 return 0;
361dbd01 5590}
c9a6b82f
AG
5591
5592/**
5593 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5594 * @pdev: PCI device struct
5595 * @state: PCI channel state
5596 *
5597 * Description: Called when a PCI error is detected.
5598 *
5599 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5600 */
5601pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5602{
5603 struct drm_device *dev = pci_get_drvdata(pdev);
5604 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5605 int i;
c9a6b82f
AG
5606
5607 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5608
6894305c
AG
5609 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5610 DRM_WARN("No support for XGMI hive yet...");
5611 return PCI_ERS_RESULT_DISCONNECT;
5612 }
5613
e17e27f9
GC
5614 adev->pci_channel_state = state;
5615
c9a6b82f
AG
5616 switch (state) {
5617 case pci_channel_io_normal:
5618 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5619 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5620 case pci_channel_io_frozen:
5621 /*
d0fb18b5 5622 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5623 * to GPU during PCI error recovery
5624 */
3675c2f2 5625 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5626 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5627
5628 /*
5629 * Block any work scheduling as we do for regular GPU reset
5630 * for the duration of the recovery
5631 */
5632 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5633 struct amdgpu_ring *ring = adev->rings[i];
5634
5635 if (!ring || !ring->sched.thread)
5636 continue;
5637
5638 drm_sched_stop(&ring->sched, NULL);
5639 }
8f8c80f4 5640 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5641 return PCI_ERS_RESULT_NEED_RESET;
5642 case pci_channel_io_perm_failure:
5643 /* Permanent error, prepare for device removal */
5644 return PCI_ERS_RESULT_DISCONNECT;
5645 }
5646
5647 return PCI_ERS_RESULT_NEED_RESET;
5648}
5649
5650/**
5651 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5652 * @pdev: pointer to PCI device
5653 */
5654pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5655{
5656
5657 DRM_INFO("PCI error: mmio enabled callback!!\n");
5658
5659 /* TODO - dump whatever for debugging purposes */
5660
5661 /* This called only if amdgpu_pci_error_detected returns
5662 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5663 * works, no need to reset slot.
5664 */
5665
5666 return PCI_ERS_RESULT_RECOVERED;
5667}
5668
5669/**
5670 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5671 * @pdev: PCI device struct
5672 *
5673 * Description: This routine is called by the pci error recovery
5674 * code after the PCI slot has been reset, just before we
5675 * should resume normal operations.
5676 */
5677pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5678{
5679 struct drm_device *dev = pci_get_drvdata(pdev);
5680 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5681 int r, i;
04442bf7 5682 struct amdgpu_reset_context reset_context;
362c7b91 5683 u32 memsize;
7ac71382 5684 struct list_head device_list;
c9a6b82f
AG
5685
5686 DRM_INFO("PCI error: slot reset callback!!\n");
5687
04442bf7
LL
5688 memset(&reset_context, 0, sizeof(reset_context));
5689
7ac71382 5690 INIT_LIST_HEAD(&device_list);
655ce9cb 5691 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5692
362c7b91
AG
5693 /* wait for asic to come out of reset */
5694 msleep(500);
5695
7ac71382 5696 /* Restore PCI confspace */
c1dd4aa6 5697 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5698
362c7b91
AG
5699 /* confirm ASIC came out of reset */
5700 for (i = 0; i < adev->usec_timeout; i++) {
5701 memsize = amdgpu_asic_get_config_memsize(adev);
5702
5703 if (memsize != 0xffffffff)
5704 break;
5705 udelay(1);
5706 }
5707 if (memsize == 0xffffffff) {
5708 r = -ETIME;
5709 goto out;
5710 }
5711
04442bf7
LL
5712 reset_context.method = AMD_RESET_METHOD_NONE;
5713 reset_context.reset_req_dev = adev;
5714 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5715 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5716
7afefb81 5717 adev->no_hw_access = true;
04442bf7 5718 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5719 adev->no_hw_access = false;
c9a6b82f
AG
5720 if (r)
5721 goto out;
5722
04442bf7 5723 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5724
5725out:
c9a6b82f 5726 if (!r) {
c1dd4aa6
AG
5727 if (amdgpu_device_cache_pci_state(adev->pdev))
5728 pci_restore_state(adev->pdev);
5729
c9a6b82f
AG
5730 DRM_INFO("PCIe error recovery succeeded\n");
5731 } else {
5732 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5733 amdgpu_device_unset_mp1_state(adev);
5734 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5735 }
5736
5737 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5738}
5739
5740/**
5741 * amdgpu_pci_resume() - resume normal ops after PCI reset
5742 * @pdev: pointer to PCI device
5743 *
5744 * Called when the error recovery driver tells us that its
505199a3 5745 * OK to resume normal operation.
c9a6b82f
AG
5746 */
5747void amdgpu_pci_resume(struct pci_dev *pdev)
5748{
5749 struct drm_device *dev = pci_get_drvdata(pdev);
5750 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5751 int i;
c9a6b82f 5752
c9a6b82f
AG
5753
5754 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5755
e17e27f9
GC
5756 /* Only continue execution for the case of pci_channel_io_frozen */
5757 if (adev->pci_channel_state != pci_channel_io_frozen)
5758 return;
5759
acd89fca
AG
5760 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5761 struct amdgpu_ring *ring = adev->rings[i];
5762
5763 if (!ring || !ring->sched.thread)
5764 continue;
5765
acd89fca
AG
5766 drm_sched_start(&ring->sched, true);
5767 }
5768
e923be99
AG
5769 amdgpu_device_unset_mp1_state(adev);
5770 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5771}
c1dd4aa6
AG
5772
5773bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5774{
5775 struct drm_device *dev = pci_get_drvdata(pdev);
5776 struct amdgpu_device *adev = drm_to_adev(dev);
5777 int r;
5778
5779 r = pci_save_state(pdev);
5780 if (!r) {
5781 kfree(adev->pci_state);
5782
5783 adev->pci_state = pci_store_saved_state(pdev);
5784
5785 if (!adev->pci_state) {
5786 DRM_ERROR("Failed to store PCI saved state");
5787 return false;
5788 }
5789 } else {
5790 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5791 return false;
5792 }
5793
5794 return true;
5795}
5796
5797bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5798{
5799 struct drm_device *dev = pci_get_drvdata(pdev);
5800 struct amdgpu_device *adev = drm_to_adev(dev);
5801 int r;
5802
5803 if (!adev->pci_state)
5804 return false;
5805
5806 r = pci_load_saved_state(pdev, adev->pci_state);
5807
5808 if (!r) {
5809 pci_restore_state(pdev);
5810 } else {
5811 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5812 return false;
5813 }
5814
5815 return true;
5816}
5817
810085dd
EH
5818void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5819 struct amdgpu_ring *ring)
5820{
5821#ifdef CONFIG_X86_64
b818a5d3 5822 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5823 return;
5824#endif
5825 if (adev->gmc.xgmi.connected_to_cpu)
5826 return;
5827
5828 if (ring && ring->funcs->emit_hdp_flush)
5829 amdgpu_ring_emit_hdp_flush(ring);
5830 else
5831 amdgpu_asic_flush_hdp(adev, ring);
5832}
c1dd4aa6 5833
810085dd
EH
5834void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5835 struct amdgpu_ring *ring)
5836{
5837#ifdef CONFIG_X86_64
b818a5d3 5838 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5839 return;
5840#endif
5841 if (adev->gmc.xgmi.connected_to_cpu)
5842 return;
c1dd4aa6 5843
810085dd
EH
5844 amdgpu_asic_invalidate_hdp(adev, ring);
5845}
34f3a4a9 5846
89a7a870
AG
5847int amdgpu_in_reset(struct amdgpu_device *adev)
5848{
5849 return atomic_read(&adev->reset_domain->in_gpu_reset);
5850 }
5851
34f3a4a9
LY
5852/**
5853 * amdgpu_device_halt() - bring hardware to some kind of halt state
5854 *
5855 * @adev: amdgpu_device pointer
5856 *
5857 * Bring hardware to some kind of halt state so that no one can touch it
5858 * any more. It will help to maintain error context when error occurred.
5859 * Compare to a simple hang, the system will keep stable at least for SSH
5860 * access. Then it should be trivial to inspect the hardware state and
5861 * see what's going on. Implemented as following:
5862 *
5863 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5864 * clears all CPU mappings to device, disallows remappings through page faults
5865 * 2. amdgpu_irq_disable_all() disables all interrupts
5866 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5867 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5868 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5869 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5870 * flush any in flight DMA operations
5871 */
5872void amdgpu_device_halt(struct amdgpu_device *adev)
5873{
5874 struct pci_dev *pdev = adev->pdev;
e0f943b4 5875 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
5876
5877 drm_dev_unplug(ddev);
5878
5879 amdgpu_irq_disable_all(adev);
5880
5881 amdgpu_fence_driver_hw_fini(adev);
5882
5883 adev->no_hw_access = true;
5884
5885 amdgpu_device_unmap_mmio(adev);
5886
5887 pci_disable_device(pdev);
5888 pci_wait_for_pending_transaction(pdev);
5889}
86700a40
XD
5890
5891u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5892 u32 reg)
5893{
5894 unsigned long flags, address, data;
5895 u32 r;
5896
5897 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5898 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5899
5900 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5901 WREG32(address, reg * 4);
5902 (void)RREG32(address);
5903 r = RREG32(data);
5904 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5905 return r;
5906}
5907
5908void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5909 u32 reg, u32 v)
5910{
5911 unsigned long flags, address, data;
5912
5913 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5914 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5915
5916 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5917 WREG32(address, reg * 4);
5918 (void)RREG32(address);
5919 WREG32(data, v);
5920 (void)RREG32(data);
5921 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5922}
68ce8b24
CK
5923
5924/**
5925 * amdgpu_device_switch_gang - switch to a new gang
5926 * @adev: amdgpu_device pointer
5927 * @gang: the gang to switch to
5928 *
5929 * Try to switch to a new gang.
5930 * Returns: NULL if we switched to the new gang or a reference to the current
5931 * gang leader.
5932 */
5933struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5934 struct dma_fence *gang)
5935{
5936 struct dma_fence *old = NULL;
5937
5938 do {
5939 dma_fence_put(old);
5940 rcu_read_lock();
5941 old = dma_fence_get_rcu_safe(&adev->gang_submit);
5942 rcu_read_unlock();
5943
5944 if (old == gang)
5945 break;
5946
5947 if (!dma_fence_is_signaled(old))
5948 return old;
5949
5950 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5951 old, gang) != old);
5952
5953 dma_fence_put(old);
5954 return NULL;
5955}
220c8cc8
AD
5956
5957bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
5958{
5959 switch (adev->asic_type) {
5960#ifdef CONFIG_DRM_AMDGPU_SI
5961 case CHIP_HAINAN:
5962#endif
5963 case CHIP_TOPAZ:
5964 /* chips with no display hardware */
5965 return false;
5966#ifdef CONFIG_DRM_AMDGPU_SI
5967 case CHIP_TAHITI:
5968 case CHIP_PITCAIRN:
5969 case CHIP_VERDE:
5970 case CHIP_OLAND:
5971#endif
5972#ifdef CONFIG_DRM_AMDGPU_CIK
5973 case CHIP_BONAIRE:
5974 case CHIP_HAWAII:
5975 case CHIP_KAVERI:
5976 case CHIP_KABINI:
5977 case CHIP_MULLINS:
5978#endif
5979 case CHIP_TONGA:
5980 case CHIP_FIJI:
5981 case CHIP_POLARIS10:
5982 case CHIP_POLARIS11:
5983 case CHIP_POLARIS12:
5984 case CHIP_VEGAM:
5985 case CHIP_CARRIZO:
5986 case CHIP_STONEY:
5987 /* chips with display hardware */
5988 return true;
5989 default:
5990 /* IP discovery */
5991 if (!adev->ip_versions[DCE_HWIP][0] ||
5992 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
5993 return false;
5994 return true;
5995 }
5996}