drm/amdgpu: Fix potential NULL dereference
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
fdf2f6c5 38
4562236b 39#include <drm/drm_atomic_helper.h>
45b64fd9 40#include <drm/drm_fb_helper.h>
fcd70cd3 41#include <drm/drm_probe_helper.h>
d38ceaf9
AD
42#include <drm/amdgpu_drm.h>
43#include <linux/vgaarb.h>
44#include <linux/vga_switcheroo.h>
45#include <linux/efi.h>
46#include "amdgpu.h"
f4b373f4 47#include "amdgpu_trace.h"
d38ceaf9
AD
48#include "amdgpu_i2c.h"
49#include "atom.h"
50#include "amdgpu_atombios.h"
a5bde2f9 51#include "amdgpu_atomfirmware.h"
d0dd7f0c 52#include "amd_pcie.h"
33f34802
KW
53#ifdef CONFIG_DRM_AMDGPU_SI
54#include "si.h"
55#endif
a2e73f56
AD
56#ifdef CONFIG_DRM_AMDGPU_CIK
57#include "cik.h"
58#endif
aaa36a97 59#include "vi.h"
460826e6 60#include "soc15.h"
0a5b8c7b 61#include "nv.h"
d38ceaf9 62#include "bif/bif_4_1_d.h"
bec86378 63#include <linux/firmware.h>
89041940 64#include "amdgpu_vf_error.h"
d38ceaf9 65
ba997709 66#include "amdgpu_amdkfd.h"
d2f52ac8 67#include "amdgpu_pm.h"
d38ceaf9 68
5183411b 69#include "amdgpu_xgmi.h"
c030f2e4 70#include "amdgpu_ras.h"
9c7c85f7 71#include "amdgpu_pmu.h"
bd607166 72#include "amdgpu_fru_eeprom.h"
04442bf7 73#include "amdgpu_reset.h"
5183411b 74
d5ea093e 75#include <linux/suspend.h>
c6a6e2db 76#include <drm/task_barrier.h>
3f12acc8 77#include <linux/pm_runtime.h>
d5ea093e 78
f89f8c6b
AG
79#include <drm/drm_drv.h>
80
e2a75f88 81MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 82MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 83MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 84MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 85MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 86MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 87MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 88
2dc80b00 89#define AMDGPU_RESUME_MS 2000
7258fa31
SK
90#define AMDGPU_MAX_RETRY_LIMIT 2
91#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 92
050091ab 93const char *amdgpu_asic_name[] = {
da69c161
KW
94 "TAHITI",
95 "PITCAIRN",
96 "VERDE",
97 "OLAND",
98 "HAINAN",
d38ceaf9
AD
99 "BONAIRE",
100 "KAVERI",
101 "KABINI",
102 "HAWAII",
103 "MULLINS",
104 "TOPAZ",
105 "TONGA",
48299f95 106 "FIJI",
d38ceaf9 107 "CARRIZO",
139f4917 108 "STONEY",
2cc0c0b5
FC
109 "POLARIS10",
110 "POLARIS11",
c4642a47 111 "POLARIS12",
48ff108d 112 "VEGAM",
d4196f01 113 "VEGA10",
8fab806a 114 "VEGA12",
956fcddc 115 "VEGA20",
2ca8a5d2 116 "RAVEN",
d6c3b24e 117 "ARCTURUS",
1eee4228 118 "RENOIR",
d46b417a 119 "ALDEBARAN",
852a6626 120 "NAVI10",
d0f56dc2 121 "CYAN_SKILLFISH",
87dbad02 122 "NAVI14",
9802f5d7 123 "NAVI12",
ccaf72d3 124 "SIENNA_CICHLID",
ddd8fbe7 125 "NAVY_FLOUNDER",
4f1e9a76 126 "VANGOGH",
a2468e04 127 "DIMGREY_CAVEFISH",
6f169591 128 "BEIGE_GOBY",
ee9236b7 129 "YELLOW_CARP",
3ae695d6 130 "IP DISCOVERY",
d38ceaf9
AD
131 "LAST",
132};
133
dcea6e65
KR
134/**
135 * DOC: pcie_replay_count
136 *
137 * The amdgpu driver provides a sysfs API for reporting the total number
138 * of PCIe replays (NAKs)
139 * The file pcie_replay_count is used for this and returns the total
140 * number of replays as a sum of the NAKs generated and NAKs received
141 */
142
143static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
144 struct device_attribute *attr, char *buf)
145{
146 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 147 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
148 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
149
36000c7a 150 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
151}
152
153static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
154 amdgpu_device_get_pcie_replay_count, NULL);
155
5494d864
AD
156static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
157
bd607166
KR
158/**
159 * DOC: product_name
160 *
161 * The amdgpu driver provides a sysfs API for reporting the product name
162 * for the device
163 * The file serial_number is used for this and returns the product name
164 * as returned from the FRU.
165 * NOTE: This is only available for certain server cards
166 */
167
168static ssize_t amdgpu_device_get_product_name(struct device *dev,
169 struct device_attribute *attr, char *buf)
170{
171 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 172 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 173
36000c7a 174 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
175}
176
177static DEVICE_ATTR(product_name, S_IRUGO,
178 amdgpu_device_get_product_name, NULL);
179
180/**
181 * DOC: product_number
182 *
183 * The amdgpu driver provides a sysfs API for reporting the part number
184 * for the device
185 * The file serial_number is used for this and returns the part number
186 * as returned from the FRU.
187 * NOTE: This is only available for certain server cards
188 */
189
190static ssize_t amdgpu_device_get_product_number(struct device *dev,
191 struct device_attribute *attr, char *buf)
192{
193 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 194 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 195
36000c7a 196 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
197}
198
199static DEVICE_ATTR(product_number, S_IRUGO,
200 amdgpu_device_get_product_number, NULL);
201
202/**
203 * DOC: serial_number
204 *
205 * The amdgpu driver provides a sysfs API for reporting the serial number
206 * for the device
207 * The file serial_number is used for this and returns the serial number
208 * as returned from the FRU.
209 * NOTE: This is only available for certain server cards
210 */
211
212static ssize_t amdgpu_device_get_serial_number(struct device *dev,
213 struct device_attribute *attr, char *buf)
214{
215 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 216 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 217
36000c7a 218 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
219}
220
221static DEVICE_ATTR(serial_number, S_IRUGO,
222 amdgpu_device_get_serial_number, NULL);
223
fd496ca8 224/**
b98c6299 225 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
226 *
227 * @dev: drm_device pointer
228 *
b98c6299 229 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
230 * otherwise return false.
231 */
b98c6299 232bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
233{
234 struct amdgpu_device *adev = drm_to_adev(dev);
235
b98c6299 236 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
237 return true;
238 return false;
239}
240
e3ecdffa 241/**
0330b848 242 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
243 *
244 * @dev: drm_device pointer
245 *
b98c6299 246 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
247 * otherwise return false.
248 */
31af062a 249bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 250{
1348969a 251 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 252
b98c6299
AD
253 if (adev->has_pr3 ||
254 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
255 return true;
256 return false;
257}
258
a69cba42
AD
259/**
260 * amdgpu_device_supports_baco - Does the device support BACO
261 *
262 * @dev: drm_device pointer
263 *
264 * Returns true if the device supporte BACO,
265 * otherwise return false.
266 */
267bool amdgpu_device_supports_baco(struct drm_device *dev)
268{
1348969a 269 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
270
271 return amdgpu_asic_supports_baco(adev);
272}
273
3fa8f89d
S
274/**
275 * amdgpu_device_supports_smart_shift - Is the device dGPU with
276 * smart shift support
277 *
278 * @dev: drm_device pointer
279 *
280 * Returns true if the device is a dGPU with Smart Shift support,
281 * otherwise returns false.
282 */
283bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
284{
285 return (amdgpu_device_supports_boco(dev) &&
286 amdgpu_acpi_is_power_shift_control_supported());
287}
288
6e3cd2a9
MCC
289/*
290 * VRAM access helper functions
291 */
292
e35e2b11 293/**
048af66b 294 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
295 *
296 * @adev: amdgpu_device pointer
297 * @pos: offset of the buffer in vram
298 * @buf: virtual address of the buffer in system memory
299 * @size: read/write size, sizeof(@buf) must > @size
300 * @write: true - write to vram, otherwise - read from vram
301 */
048af66b
KW
302void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
303 void *buf, size_t size, bool write)
e35e2b11 304{
e35e2b11 305 unsigned long flags;
048af66b
KW
306 uint32_t hi = ~0, tmp = 0;
307 uint32_t *data = buf;
ce05ac56 308 uint64_t last;
f89f8c6b 309 int idx;
ce05ac56 310
c58a863b 311 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 312 return;
9d11eb0d 313
048af66b
KW
314 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
315
316 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
317 for (last = pos + size; pos < last; pos += 4) {
318 tmp = pos >> 31;
319
320 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
321 if (tmp != hi) {
322 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
323 hi = tmp;
324 }
325 if (write)
326 WREG32_NO_KIQ(mmMM_DATA, *data++);
327 else
328 *data++ = RREG32_NO_KIQ(mmMM_DATA);
329 }
330
331 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
332 drm_dev_exit(idx);
333}
334
335/**
bbe04dec 336 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
337 *
338 * @adev: amdgpu_device pointer
339 * @pos: offset of the buffer in vram
340 * @buf: virtual address of the buffer in system memory
341 * @size: read/write size, sizeof(@buf) must > @size
342 * @write: true - write to vram, otherwise - read from vram
343 *
344 * The return value means how many bytes have been transferred.
345 */
346size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
347 void *buf, size_t size, bool write)
348{
9d11eb0d 349#ifdef CONFIG_64BIT
048af66b
KW
350 void __iomem *addr;
351 size_t count = 0;
352 uint64_t last;
353
354 if (!adev->mman.aper_base_kaddr)
355 return 0;
356
9d11eb0d
CK
357 last = min(pos + size, adev->gmc.visible_vram_size);
358 if (last > pos) {
048af66b
KW
359 addr = adev->mman.aper_base_kaddr + pos;
360 count = last - pos;
9d11eb0d
CK
361
362 if (write) {
363 memcpy_toio(addr, buf, count);
364 mb();
810085dd 365 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 366 } else {
810085dd 367 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
368 mb();
369 memcpy_fromio(buf, addr, count);
370 }
371
9d11eb0d 372 }
048af66b
KW
373
374 return count;
375#else
376 return 0;
9d11eb0d 377#endif
048af66b 378}
9d11eb0d 379
048af66b
KW
380/**
381 * amdgpu_device_vram_access - read/write a buffer in vram
382 *
383 * @adev: amdgpu_device pointer
384 * @pos: offset of the buffer in vram
385 * @buf: virtual address of the buffer in system memory
386 * @size: read/write size, sizeof(@buf) must > @size
387 * @write: true - write to vram, otherwise - read from vram
388 */
389void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
390 void *buf, size_t size, bool write)
391{
392 size_t count;
e35e2b11 393
048af66b
KW
394 /* try to using vram apreature to access vram first */
395 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
396 size -= count;
397 if (size) {
398 /* using MM to access rest vram */
399 pos += count;
400 buf += count;
401 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
402 }
403}
404
d38ceaf9 405/*
f7ee1874 406 * register access helper functions.
d38ceaf9 407 */
56b53c0b
DL
408
409/* Check if hw access should be skipped because of hotplug or device error */
410bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
411{
7afefb81 412 if (adev->no_hw_access)
56b53c0b
DL
413 return true;
414
415#ifdef CONFIG_LOCKDEP
416 /*
417 * This is a bit complicated to understand, so worth a comment. What we assert
418 * here is that the GPU reset is not running on another thread in parallel.
419 *
420 * For this we trylock the read side of the reset semaphore, if that succeeds
421 * we know that the reset is not running in paralell.
422 *
423 * If the trylock fails we assert that we are either already holding the read
424 * side of the lock or are the reset thread itself and hold the write side of
425 * the lock.
426 */
427 if (in_task()) {
d0fb18b5
AG
428 if (down_read_trylock(&adev->reset_domain->sem))
429 up_read(&adev->reset_domain->sem);
56b53c0b 430 else
d0fb18b5 431 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
432 }
433#endif
434 return false;
435}
436
e3ecdffa 437/**
f7ee1874 438 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
439 *
440 * @adev: amdgpu_device pointer
441 * @reg: dword aligned register offset
442 * @acc_flags: access flags which require special behavior
443 *
444 * Returns the 32 bit value from the offset specified.
445 */
f7ee1874
HZ
446uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
447 uint32_t reg, uint32_t acc_flags)
d38ceaf9 448{
f4b373f4
TSD
449 uint32_t ret;
450
56b53c0b 451 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
452 return 0;
453
f7ee1874
HZ
454 if ((reg * 4) < adev->rmmio_size) {
455 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
456 amdgpu_sriov_runtime(adev) &&
d0fb18b5 457 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 458 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 459 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
460 } else {
461 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
462 }
463 } else {
464 ret = adev->pcie_rreg(adev, reg * 4);
81202807 465 }
bc992ba5 466
f7ee1874 467 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 468
f4b373f4 469 return ret;
d38ceaf9
AD
470}
471
421a2a30
ML
472/*
473 * MMIO register read with bytes helper functions
474 * @offset:bytes offset from MMIO start
475 *
476*/
477
e3ecdffa
AD
478/**
479 * amdgpu_mm_rreg8 - read a memory mapped IO register
480 *
481 * @adev: amdgpu_device pointer
482 * @offset: byte aligned register offset
483 *
484 * Returns the 8 bit value from the offset specified.
485 */
7cbbc745
AG
486uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
487{
56b53c0b 488 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
489 return 0;
490
421a2a30
ML
491 if (offset < adev->rmmio_size)
492 return (readb(adev->rmmio + offset));
493 BUG();
494}
495
496/*
497 * MMIO register write with bytes helper functions
498 * @offset:bytes offset from MMIO start
499 * @value: the value want to be written to the register
500 *
501*/
e3ecdffa
AD
502/**
503 * amdgpu_mm_wreg8 - read a memory mapped IO register
504 *
505 * @adev: amdgpu_device pointer
506 * @offset: byte aligned register offset
507 * @value: 8 bit value to write
508 *
509 * Writes the value specified to the offset specified.
510 */
7cbbc745
AG
511void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
512{
56b53c0b 513 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
514 return;
515
421a2a30
ML
516 if (offset < adev->rmmio_size)
517 writeb(value, adev->rmmio + offset);
518 else
519 BUG();
520}
521
e3ecdffa 522/**
f7ee1874 523 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
524 *
525 * @adev: amdgpu_device pointer
526 * @reg: dword aligned register offset
527 * @v: 32 bit value to write to the register
528 * @acc_flags: access flags which require special behavior
529 *
530 * Writes the value specified to the offset specified.
531 */
f7ee1874
HZ
532void amdgpu_device_wreg(struct amdgpu_device *adev,
533 uint32_t reg, uint32_t v,
534 uint32_t acc_flags)
d38ceaf9 535{
56b53c0b 536 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
537 return;
538
f7ee1874
HZ
539 if ((reg * 4) < adev->rmmio_size) {
540 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
541 amdgpu_sriov_runtime(adev) &&
d0fb18b5 542 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 543 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 544 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
545 } else {
546 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
547 }
548 } else {
549 adev->pcie_wreg(adev, reg * 4, v);
81202807 550 }
bc992ba5 551
f7ee1874 552 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 553}
d38ceaf9 554
03f2abb0 555/**
4cc9f86f 556 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 557 *
71579346
RB
558 * @adev: amdgpu_device pointer
559 * @reg: mmio/rlc register
560 * @v: value to write
561 *
562 * this function is invoked only for the debugfs register access
03f2abb0 563 */
f7ee1874
HZ
564void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
565 uint32_t reg, uint32_t v)
2e0cc4d4 566{
56b53c0b 567 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
568 return;
569
2e0cc4d4 570 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
571 adev->gfx.rlc.funcs &&
572 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 573 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 574 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
575 } else if ((reg * 4) >= adev->rmmio_size) {
576 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
577 } else {
578 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 579 }
d38ceaf9
AD
580}
581
d38ceaf9
AD
582/**
583 * amdgpu_mm_rdoorbell - read a doorbell dword
584 *
585 * @adev: amdgpu_device pointer
586 * @index: doorbell index
587 *
588 * Returns the value in the doorbell aperture at the
589 * requested doorbell index (CIK).
590 */
591u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
592{
56b53c0b 593 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
594 return 0;
595
d38ceaf9
AD
596 if (index < adev->doorbell.num_doorbells) {
597 return readl(adev->doorbell.ptr + index);
598 } else {
599 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
600 return 0;
601 }
602}
603
604/**
605 * amdgpu_mm_wdoorbell - write a doorbell dword
606 *
607 * @adev: amdgpu_device pointer
608 * @index: doorbell index
609 * @v: value to write
610 *
611 * Writes @v to the doorbell aperture at the
612 * requested doorbell index (CIK).
613 */
614void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
615{
56b53c0b 616 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
617 return;
618
d38ceaf9
AD
619 if (index < adev->doorbell.num_doorbells) {
620 writel(v, adev->doorbell.ptr + index);
621 } else {
622 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
623 }
624}
625
832be404
KW
626/**
627 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
628 *
629 * @adev: amdgpu_device pointer
630 * @index: doorbell index
631 *
632 * Returns the value in the doorbell aperture at the
633 * requested doorbell index (VEGA10+).
634 */
635u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
636{
56b53c0b 637 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
638 return 0;
639
832be404
KW
640 if (index < adev->doorbell.num_doorbells) {
641 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
642 } else {
643 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
644 return 0;
645 }
646}
647
648/**
649 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
650 *
651 * @adev: amdgpu_device pointer
652 * @index: doorbell index
653 * @v: value to write
654 *
655 * Writes @v to the doorbell aperture at the
656 * requested doorbell index (VEGA10+).
657 */
658void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
659{
56b53c0b 660 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
661 return;
662
832be404
KW
663 if (index < adev->doorbell.num_doorbells) {
664 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
665 } else {
666 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
667 }
668}
669
1bba3683
HZ
670/**
671 * amdgpu_device_indirect_rreg - read an indirect register
672 *
673 * @adev: amdgpu_device pointer
674 * @pcie_index: mmio register offset
675 * @pcie_data: mmio register offset
22f453fb 676 * @reg_addr: indirect register address to read from
1bba3683
HZ
677 *
678 * Returns the value of indirect register @reg_addr
679 */
680u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
681 u32 pcie_index, u32 pcie_data,
682 u32 reg_addr)
683{
684 unsigned long flags;
685 u32 r;
686 void __iomem *pcie_index_offset;
687 void __iomem *pcie_data_offset;
688
689 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
690 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
691 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
692
693 writel(reg_addr, pcie_index_offset);
694 readl(pcie_index_offset);
695 r = readl(pcie_data_offset);
696 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
697
698 return r;
699}
700
701/**
702 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
703 *
704 * @adev: amdgpu_device pointer
705 * @pcie_index: mmio register offset
706 * @pcie_data: mmio register offset
22f453fb 707 * @reg_addr: indirect register address to read from
1bba3683
HZ
708 *
709 * Returns the value of indirect register @reg_addr
710 */
711u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
712 u32 pcie_index, u32 pcie_data,
713 u32 reg_addr)
714{
715 unsigned long flags;
716 u64 r;
717 void __iomem *pcie_index_offset;
718 void __iomem *pcie_data_offset;
719
720 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
721 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
722 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
723
724 /* read low 32 bits */
725 writel(reg_addr, pcie_index_offset);
726 readl(pcie_index_offset);
727 r = readl(pcie_data_offset);
728 /* read high 32 bits */
729 writel(reg_addr + 4, pcie_index_offset);
730 readl(pcie_index_offset);
731 r |= ((u64)readl(pcie_data_offset) << 32);
732 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
733
734 return r;
735}
736
737/**
738 * amdgpu_device_indirect_wreg - write an indirect register address
739 *
740 * @adev: amdgpu_device pointer
741 * @pcie_index: mmio register offset
742 * @pcie_data: mmio register offset
743 * @reg_addr: indirect register offset
744 * @reg_data: indirect register data
745 *
746 */
747void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
748 u32 pcie_index, u32 pcie_data,
749 u32 reg_addr, u32 reg_data)
750{
751 unsigned long flags;
752 void __iomem *pcie_index_offset;
753 void __iomem *pcie_data_offset;
754
755 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
756 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
757 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
758
759 writel(reg_addr, pcie_index_offset);
760 readl(pcie_index_offset);
761 writel(reg_data, pcie_data_offset);
762 readl(pcie_data_offset);
763 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
764}
765
766/**
767 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
768 *
769 * @adev: amdgpu_device pointer
770 * @pcie_index: mmio register offset
771 * @pcie_data: mmio register offset
772 * @reg_addr: indirect register offset
773 * @reg_data: indirect register data
774 *
775 */
776void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
777 u32 pcie_index, u32 pcie_data,
778 u32 reg_addr, u64 reg_data)
779{
780 unsigned long flags;
781 void __iomem *pcie_index_offset;
782 void __iomem *pcie_data_offset;
783
784 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
785 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
786 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
787
788 /* write low 32 bits */
789 writel(reg_addr, pcie_index_offset);
790 readl(pcie_index_offset);
791 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
792 readl(pcie_data_offset);
793 /* write high 32 bits */
794 writel(reg_addr + 4, pcie_index_offset);
795 readl(pcie_index_offset);
796 writel((u32)(reg_data >> 32), pcie_data_offset);
797 readl(pcie_data_offset);
798 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
799}
800
d38ceaf9
AD
801/**
802 * amdgpu_invalid_rreg - dummy reg read function
803 *
982a820b 804 * @adev: amdgpu_device pointer
d38ceaf9
AD
805 * @reg: offset of register
806 *
807 * Dummy register read function. Used for register blocks
808 * that certain asics don't have (all asics).
809 * Returns the value in the register.
810 */
811static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
812{
813 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
814 BUG();
815 return 0;
816}
817
818/**
819 * amdgpu_invalid_wreg - dummy reg write function
820 *
982a820b 821 * @adev: amdgpu_device pointer
d38ceaf9
AD
822 * @reg: offset of register
823 * @v: value to write to the register
824 *
825 * Dummy register read function. Used for register blocks
826 * that certain asics don't have (all asics).
827 */
828static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
829{
830 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
831 reg, v);
832 BUG();
833}
834
4fa1c6a6
TZ
835/**
836 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
837 *
982a820b 838 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
839 * @reg: offset of register
840 *
841 * Dummy register read function. Used for register blocks
842 * that certain asics don't have (all asics).
843 * Returns the value in the register.
844 */
845static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
846{
847 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
848 BUG();
849 return 0;
850}
851
852/**
853 * amdgpu_invalid_wreg64 - dummy reg write function
854 *
982a820b 855 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
856 * @reg: offset of register
857 * @v: value to write to the register
858 *
859 * Dummy register read function. Used for register blocks
860 * that certain asics don't have (all asics).
861 */
862static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
863{
864 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
865 reg, v);
866 BUG();
867}
868
d38ceaf9
AD
869/**
870 * amdgpu_block_invalid_rreg - dummy reg read function
871 *
982a820b 872 * @adev: amdgpu_device pointer
d38ceaf9
AD
873 * @block: offset of instance
874 * @reg: offset of register
875 *
876 * Dummy register read function. Used for register blocks
877 * that certain asics don't have (all asics).
878 * Returns the value in the register.
879 */
880static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
881 uint32_t block, uint32_t reg)
882{
883 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
884 reg, block);
885 BUG();
886 return 0;
887}
888
889/**
890 * amdgpu_block_invalid_wreg - dummy reg write function
891 *
982a820b 892 * @adev: amdgpu_device pointer
d38ceaf9
AD
893 * @block: offset of instance
894 * @reg: offset of register
895 * @v: value to write to the register
896 *
897 * Dummy register read function. Used for register blocks
898 * that certain asics don't have (all asics).
899 */
900static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
901 uint32_t block,
902 uint32_t reg, uint32_t v)
903{
904 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
905 reg, block, v);
906 BUG();
907}
908
4d2997ab
AD
909/**
910 * amdgpu_device_asic_init - Wrapper for atom asic_init
911 *
982a820b 912 * @adev: amdgpu_device pointer
4d2997ab
AD
913 *
914 * Does any asic specific work and then calls atom asic init.
915 */
916static int amdgpu_device_asic_init(struct amdgpu_device *adev)
917{
918 amdgpu_asic_pre_asic_init(adev);
919
85d1bcc6
HZ
920 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
921 return amdgpu_atomfirmware_asic_init(adev, true);
922 else
923 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
924}
925
e3ecdffa 926/**
7ccfd79f 927 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 928 *
982a820b 929 * @adev: amdgpu_device pointer
e3ecdffa
AD
930 *
931 * Allocates a scratch page of VRAM for use by various things in the
932 * driver.
933 */
7ccfd79f 934static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 935{
7ccfd79f
CK
936 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
937 AMDGPU_GEM_DOMAIN_VRAM |
938 AMDGPU_GEM_DOMAIN_GTT,
939 &adev->mem_scratch.robj,
940 &adev->mem_scratch.gpu_addr,
941 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
942}
943
e3ecdffa 944/**
7ccfd79f 945 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 946 *
982a820b 947 * @adev: amdgpu_device pointer
e3ecdffa
AD
948 *
949 * Frees the VRAM scratch page.
950 */
7ccfd79f 951static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 952{
7ccfd79f 953 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
954}
955
956/**
9c3f2b54 957 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
958 *
959 * @adev: amdgpu_device pointer
960 * @registers: pointer to the register array
961 * @array_size: size of the register array
962 *
963 * Programs an array or registers with and and or masks.
964 * This is a helper for setting golden registers.
965 */
9c3f2b54
AD
966void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
967 const u32 *registers,
968 const u32 array_size)
d38ceaf9
AD
969{
970 u32 tmp, reg, and_mask, or_mask;
971 int i;
972
973 if (array_size % 3)
974 return;
975
976 for (i = 0; i < array_size; i +=3) {
977 reg = registers[i + 0];
978 and_mask = registers[i + 1];
979 or_mask = registers[i + 2];
980
981 if (and_mask == 0xffffffff) {
982 tmp = or_mask;
983 } else {
984 tmp = RREG32(reg);
985 tmp &= ~and_mask;
e0d07657
HZ
986 if (adev->family >= AMDGPU_FAMILY_AI)
987 tmp |= (or_mask & and_mask);
988 else
989 tmp |= or_mask;
d38ceaf9
AD
990 }
991 WREG32(reg, tmp);
992 }
993}
994
e3ecdffa
AD
995/**
996 * amdgpu_device_pci_config_reset - reset the GPU
997 *
998 * @adev: amdgpu_device pointer
999 *
1000 * Resets the GPU using the pci config reset sequence.
1001 * Only applicable to asics prior to vega10.
1002 */
8111c387 1003void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1004{
1005 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1006}
1007
af484df8
AD
1008/**
1009 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1010 *
1011 * @adev: amdgpu_device pointer
1012 *
1013 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1014 */
1015int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1016{
1017 return pci_reset_function(adev->pdev);
1018}
1019
d38ceaf9
AD
1020/*
1021 * GPU doorbell aperture helpers function.
1022 */
1023/**
06ec9070 1024 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1025 *
1026 * @adev: amdgpu_device pointer
1027 *
1028 * Init doorbell driver information (CIK)
1029 * Returns 0 on success, error on failure.
1030 */
06ec9070 1031static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1032{
6585661d 1033
705e519e
CK
1034 /* No doorbell on SI hardware generation */
1035 if (adev->asic_type < CHIP_BONAIRE) {
1036 adev->doorbell.base = 0;
1037 adev->doorbell.size = 0;
1038 adev->doorbell.num_doorbells = 0;
1039 adev->doorbell.ptr = NULL;
1040 return 0;
1041 }
1042
d6895ad3
CK
1043 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1044 return -EINVAL;
1045
22357775
AD
1046 amdgpu_asic_init_doorbell_index(adev);
1047
d38ceaf9
AD
1048 /* doorbell bar mapping */
1049 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1050 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1051
de33a329
JX
1052 if (adev->enable_mes) {
1053 adev->doorbell.num_doorbells =
1054 adev->doorbell.size / sizeof(u32);
1055 } else {
1056 adev->doorbell.num_doorbells =
1057 min_t(u32, adev->doorbell.size / sizeof(u32),
1058 adev->doorbell_index.max_assignment+1);
1059 if (adev->doorbell.num_doorbells == 0)
1060 return -EINVAL;
1061
1062 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1063 * paging queue doorbell use the second page. The
1064 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1065 * doorbells are in the first page. So with paging queue enabled,
1066 * the max num_doorbells should + 1 page (0x400 in dword)
1067 */
1068 if (adev->asic_type >= CHIP_VEGA10)
1069 adev->doorbell.num_doorbells += 0x400;
1070 }
ec3db8a6 1071
8972e5d2
CK
1072 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1073 adev->doorbell.num_doorbells *
1074 sizeof(u32));
1075 if (adev->doorbell.ptr == NULL)
d38ceaf9 1076 return -ENOMEM;
d38ceaf9
AD
1077
1078 return 0;
1079}
1080
1081/**
06ec9070 1082 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1083 *
1084 * @adev: amdgpu_device pointer
1085 *
1086 * Tear down doorbell driver information (CIK)
1087 */
06ec9070 1088static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1089{
1090 iounmap(adev->doorbell.ptr);
1091 adev->doorbell.ptr = NULL;
1092}
1093
22cb0164 1094
d38ceaf9
AD
1095
1096/*
06ec9070 1097 * amdgpu_device_wb_*()
455a7bc2 1098 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1099 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1100 */
1101
1102/**
06ec9070 1103 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1104 *
1105 * @adev: amdgpu_device pointer
1106 *
1107 * Disables Writeback and frees the Writeback memory (all asics).
1108 * Used at driver shutdown.
1109 */
06ec9070 1110static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1111{
1112 if (adev->wb.wb_obj) {
a76ed485
AD
1113 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1114 &adev->wb.gpu_addr,
1115 (void **)&adev->wb.wb);
d38ceaf9
AD
1116 adev->wb.wb_obj = NULL;
1117 }
1118}
1119
1120/**
03f2abb0 1121 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1122 *
1123 * @adev: amdgpu_device pointer
1124 *
455a7bc2 1125 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1126 * Used at driver startup.
1127 * Returns 0 on success or an -error on failure.
1128 */
06ec9070 1129static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1130{
1131 int r;
1132
1133 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1134 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1135 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1136 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1137 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1138 (void **)&adev->wb.wb);
d38ceaf9
AD
1139 if (r) {
1140 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1141 return r;
1142 }
d38ceaf9
AD
1143
1144 adev->wb.num_wb = AMDGPU_MAX_WB;
1145 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1146
1147 /* clear wb memory */
73469585 1148 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1149 }
1150
1151 return 0;
1152}
1153
1154/**
131b4b36 1155 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1156 *
1157 * @adev: amdgpu_device pointer
1158 * @wb: wb index
1159 *
1160 * Allocate a wb slot for use by the driver (all asics).
1161 * Returns 0 on success or -EINVAL on failure.
1162 */
131b4b36 1163int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1164{
1165 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1166
97407b63 1167 if (offset < adev->wb.num_wb) {
7014285a 1168 __set_bit(offset, adev->wb.used);
63ae07ca 1169 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1170 return 0;
1171 } else {
1172 return -EINVAL;
1173 }
1174}
1175
d38ceaf9 1176/**
131b4b36 1177 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1178 *
1179 * @adev: amdgpu_device pointer
1180 * @wb: wb index
1181 *
1182 * Free a wb slot allocated for use by the driver (all asics)
1183 */
131b4b36 1184void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1185{
73469585 1186 wb >>= 3;
d38ceaf9 1187 if (wb < adev->wb.num_wb)
73469585 1188 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1189}
1190
d6895ad3
CK
1191/**
1192 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1193 *
1194 * @adev: amdgpu_device pointer
1195 *
1196 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1197 * to fail, but if any of the BARs is not accessible after the size we abort
1198 * driver loading by returning -ENODEV.
1199 */
1200int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1201{
453f617a 1202 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1203 struct pci_bus *root;
1204 struct resource *res;
1205 unsigned i;
d6895ad3
CK
1206 u16 cmd;
1207 int r;
1208
0c03b912 1209 /* Bypass for VF */
1210 if (amdgpu_sriov_vf(adev))
1211 return 0;
1212
b7221f2b
AD
1213 /* skip if the bios has already enabled large BAR */
1214 if (adev->gmc.real_vram_size &&
1215 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1216 return 0;
1217
31b8adab
CK
1218 /* Check if the root BUS has 64bit memory resources */
1219 root = adev->pdev->bus;
1220 while (root->parent)
1221 root = root->parent;
1222
1223 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1224 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1225 res->start > 0x100000000ull)
1226 break;
1227 }
1228
1229 /* Trying to resize is pointless without a root hub window above 4GB */
1230 if (!res)
1231 return 0;
1232
453f617a
ND
1233 /* Limit the BAR size to what is available */
1234 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1235 rbar_size);
1236
d6895ad3
CK
1237 /* Disable memory decoding while we change the BAR addresses and size */
1238 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1239 pci_write_config_word(adev->pdev, PCI_COMMAND,
1240 cmd & ~PCI_COMMAND_MEMORY);
1241
1242 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1243 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1244 if (adev->asic_type >= CHIP_BONAIRE)
1245 pci_release_resource(adev->pdev, 2);
1246
1247 pci_release_resource(adev->pdev, 0);
1248
1249 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1250 if (r == -ENOSPC)
1251 DRM_INFO("Not enough PCI address space for a large BAR.");
1252 else if (r && r != -ENOTSUPP)
1253 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1254
1255 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1256
1257 /* When the doorbell or fb BAR isn't available we have no chance of
1258 * using the device.
1259 */
06ec9070 1260 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1261 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1262 return -ENODEV;
1263
1264 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1265
1266 return 0;
1267}
a05502e5 1268
d38ceaf9
AD
1269/*
1270 * GPU helpers function.
1271 */
1272/**
39c640c0 1273 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1274 *
1275 * @adev: amdgpu_device pointer
1276 *
c836fec5
JQ
1277 * Check if the asic has been initialized (all asics) at driver startup
1278 * or post is needed if hw reset is performed.
1279 * Returns true if need or false if not.
d38ceaf9 1280 */
39c640c0 1281bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1282{
1283 uint32_t reg;
1284
bec86378
ML
1285 if (amdgpu_sriov_vf(adev))
1286 return false;
1287
1288 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1289 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1290 * some old smc fw still need driver do vPost otherwise gpu hang, while
1291 * those smc fw version above 22.15 doesn't have this flaw, so we force
1292 * vpost executed for smc version below 22.15
bec86378
ML
1293 */
1294 if (adev->asic_type == CHIP_FIJI) {
1295 int err;
1296 uint32_t fw_ver;
1297 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1298 /* force vPost if error occured */
1299 if (err)
1300 return true;
1301
1302 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1303 if (fw_ver < 0x00160e00)
1304 return true;
bec86378 1305 }
bec86378 1306 }
91fe77eb 1307
e3c1b071 1308 /* Don't post if we need to reset whole hive on init */
1309 if (adev->gmc.xgmi.pending_reset)
1310 return false;
1311
91fe77eb 1312 if (adev->has_hw_reset) {
1313 adev->has_hw_reset = false;
1314 return true;
1315 }
1316
1317 /* bios scratch used on CIK+ */
1318 if (adev->asic_type >= CHIP_BONAIRE)
1319 return amdgpu_atombios_scratch_need_asic_init(adev);
1320
1321 /* check MEM_SIZE for older asics */
1322 reg = amdgpu_asic_get_config_memsize(adev);
1323
1324 if ((reg != 0) && (reg != 0xffffffff))
1325 return false;
1326
1327 return true;
bec86378
ML
1328}
1329
0ab5d711
ML
1330/**
1331 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1332 *
1333 * @adev: amdgpu_device pointer
1334 *
1335 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1336 * be set for this device.
1337 *
1338 * Returns true if it should be used or false if not.
1339 */
1340bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1341{
1342 switch (amdgpu_aspm) {
1343 case -1:
1344 break;
1345 case 0:
1346 return false;
1347 case 1:
1348 return true;
1349 default:
1350 return false;
1351 }
1352 return pcie_aspm_enabled(adev->pdev);
1353}
1354
d38ceaf9
AD
1355/* if we get transitioned to only one device, take VGA back */
1356/**
06ec9070 1357 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1358 *
bf44e8ce 1359 * @pdev: PCI device pointer
d38ceaf9
AD
1360 * @state: enable/disable vga decode
1361 *
1362 * Enable/disable vga decode (all asics).
1363 * Returns VGA resource flags.
1364 */
bf44e8ce
CH
1365static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1366 bool state)
d38ceaf9 1367{
bf44e8ce 1368 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1369 amdgpu_asic_set_vga_state(adev, state);
1370 if (state)
1371 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1372 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1373 else
1374 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1375}
1376
e3ecdffa
AD
1377/**
1378 * amdgpu_device_check_block_size - validate the vm block size
1379 *
1380 * @adev: amdgpu_device pointer
1381 *
1382 * Validates the vm block size specified via module parameter.
1383 * The vm block size defines number of bits in page table versus page directory,
1384 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1385 * page table and the remaining bits are in the page directory.
1386 */
06ec9070 1387static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1388{
1389 /* defines number of bits in page table versus page directory,
1390 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1391 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1392 if (amdgpu_vm_block_size == -1)
1393 return;
a1adf8be 1394
bab4fee7 1395 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1396 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1397 amdgpu_vm_block_size);
97489129 1398 amdgpu_vm_block_size = -1;
a1adf8be 1399 }
a1adf8be
CZ
1400}
1401
e3ecdffa
AD
1402/**
1403 * amdgpu_device_check_vm_size - validate the vm size
1404 *
1405 * @adev: amdgpu_device pointer
1406 *
1407 * Validates the vm size in GB specified via module parameter.
1408 * The VM size is the size of the GPU virtual memory space in GB.
1409 */
06ec9070 1410static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1411{
64dab074
AD
1412 /* no need to check the default value */
1413 if (amdgpu_vm_size == -1)
1414 return;
1415
83ca145d
ZJ
1416 if (amdgpu_vm_size < 1) {
1417 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1418 amdgpu_vm_size);
f3368128 1419 amdgpu_vm_size = -1;
83ca145d 1420 }
83ca145d
ZJ
1421}
1422
7951e376
RZ
1423static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1424{
1425 struct sysinfo si;
a9d4fe2f 1426 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1427 uint64_t total_memory;
1428 uint64_t dram_size_seven_GB = 0x1B8000000;
1429 uint64_t dram_size_three_GB = 0xB8000000;
1430
1431 if (amdgpu_smu_memory_pool_size == 0)
1432 return;
1433
1434 if (!is_os_64) {
1435 DRM_WARN("Not 64-bit OS, feature not supported\n");
1436 goto def_value;
1437 }
1438 si_meminfo(&si);
1439 total_memory = (uint64_t)si.totalram * si.mem_unit;
1440
1441 if ((amdgpu_smu_memory_pool_size == 1) ||
1442 (amdgpu_smu_memory_pool_size == 2)) {
1443 if (total_memory < dram_size_three_GB)
1444 goto def_value1;
1445 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1446 (amdgpu_smu_memory_pool_size == 8)) {
1447 if (total_memory < dram_size_seven_GB)
1448 goto def_value1;
1449 } else {
1450 DRM_WARN("Smu memory pool size not supported\n");
1451 goto def_value;
1452 }
1453 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1454
1455 return;
1456
1457def_value1:
1458 DRM_WARN("No enough system memory\n");
1459def_value:
1460 adev->pm.smu_prv_buffer_size = 0;
1461}
1462
9f6a7857
HR
1463static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1464{
1465 if (!(adev->flags & AMD_IS_APU) ||
1466 adev->asic_type < CHIP_RAVEN)
1467 return 0;
1468
1469 switch (adev->asic_type) {
1470 case CHIP_RAVEN:
1471 if (adev->pdev->device == 0x15dd)
1472 adev->apu_flags |= AMD_APU_IS_RAVEN;
1473 if (adev->pdev->device == 0x15d8)
1474 adev->apu_flags |= AMD_APU_IS_PICASSO;
1475 break;
1476 case CHIP_RENOIR:
1477 if ((adev->pdev->device == 0x1636) ||
1478 (adev->pdev->device == 0x164c))
1479 adev->apu_flags |= AMD_APU_IS_RENOIR;
1480 else
1481 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1482 break;
1483 case CHIP_VANGOGH:
1484 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1485 break;
1486 case CHIP_YELLOW_CARP:
1487 break;
d0f56dc2 1488 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1489 if ((adev->pdev->device == 0x13FE) ||
1490 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1491 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1492 break;
9f6a7857 1493 default:
4eaf21b7 1494 break;
9f6a7857
HR
1495 }
1496
1497 return 0;
1498}
1499
d38ceaf9 1500/**
06ec9070 1501 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1502 *
1503 * @adev: amdgpu_device pointer
1504 *
1505 * Validates certain module parameters and updates
1506 * the associated values used by the driver (all asics).
1507 */
912dfc84 1508static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1509{
5b011235
CZ
1510 if (amdgpu_sched_jobs < 4) {
1511 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1512 amdgpu_sched_jobs);
1513 amdgpu_sched_jobs = 4;
76117507 1514 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1515 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1516 amdgpu_sched_jobs);
1517 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1518 }
d38ceaf9 1519
83e74db6 1520 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1521 /* gart size must be greater or equal to 32M */
1522 dev_warn(adev->dev, "gart size (%d) too small\n",
1523 amdgpu_gart_size);
83e74db6 1524 amdgpu_gart_size = -1;
d38ceaf9
AD
1525 }
1526
36d38372 1527 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1528 /* gtt size must be greater or equal to 32M */
36d38372
CK
1529 dev_warn(adev->dev, "gtt size (%d) too small\n",
1530 amdgpu_gtt_size);
1531 amdgpu_gtt_size = -1;
d38ceaf9
AD
1532 }
1533
d07f14be
RH
1534 /* valid range is between 4 and 9 inclusive */
1535 if (amdgpu_vm_fragment_size != -1 &&
1536 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1537 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1538 amdgpu_vm_fragment_size = -1;
1539 }
1540
5d5bd5e3
KW
1541 if (amdgpu_sched_hw_submission < 2) {
1542 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1543 amdgpu_sched_hw_submission);
1544 amdgpu_sched_hw_submission = 2;
1545 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1546 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1547 amdgpu_sched_hw_submission);
1548 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1549 }
1550
2656fd23
AG
1551 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1552 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1553 amdgpu_reset_method = -1;
1554 }
1555
7951e376
RZ
1556 amdgpu_device_check_smu_prv_buffer_size(adev);
1557
06ec9070 1558 amdgpu_device_check_vm_size(adev);
d38ceaf9 1559
06ec9070 1560 amdgpu_device_check_block_size(adev);
6a7f76e7 1561
19aede77 1562 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1563
e3c00faa 1564 return 0;
d38ceaf9
AD
1565}
1566
1567/**
1568 * amdgpu_switcheroo_set_state - set switcheroo state
1569 *
1570 * @pdev: pci dev pointer
1694467b 1571 * @state: vga_switcheroo state
d38ceaf9 1572 *
12024b17 1573 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1574 * the asics before or after it is powered up using ACPI methods.
1575 */
8aba21b7
LT
1576static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1577 enum vga_switcheroo_state state)
d38ceaf9
AD
1578{
1579 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1580 int r;
d38ceaf9 1581
b98c6299 1582 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1583 return;
1584
1585 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1586 pr_info("switched on\n");
d38ceaf9
AD
1587 /* don't suspend or resume card normally */
1588 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1589
8f66090b
TZ
1590 pci_set_power_state(pdev, PCI_D0);
1591 amdgpu_device_load_pci_state(pdev);
1592 r = pci_enable_device(pdev);
de185019
AD
1593 if (r)
1594 DRM_WARN("pci_enable_device failed (%d)\n", r);
1595 amdgpu_device_resume(dev, true);
d38ceaf9 1596
d38ceaf9 1597 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1598 } else {
dd4fa6c1 1599 pr_info("switched off\n");
d38ceaf9 1600 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1601 amdgpu_device_suspend(dev, true);
8f66090b 1602 amdgpu_device_cache_pci_state(pdev);
de185019 1603 /* Shut down the device */
8f66090b
TZ
1604 pci_disable_device(pdev);
1605 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1606 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1607 }
1608}
1609
1610/**
1611 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1612 *
1613 * @pdev: pci dev pointer
1614 *
1615 * Callback for the switcheroo driver. Check of the switcheroo
1616 * state can be changed.
1617 * Returns true if the state can be changed, false if not.
1618 */
1619static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1620{
1621 struct drm_device *dev = pci_get_drvdata(pdev);
1622
1623 /*
1624 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1625 * locking inversion with the driver load path. And the access here is
1626 * completely racy anyway. So don't bother with locking for now.
1627 */
7e13ad89 1628 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1629}
1630
1631static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1632 .set_gpu_state = amdgpu_switcheroo_set_state,
1633 .reprobe = NULL,
1634 .can_switch = amdgpu_switcheroo_can_switch,
1635};
1636
e3ecdffa
AD
1637/**
1638 * amdgpu_device_ip_set_clockgating_state - set the CG state
1639 *
87e3f136 1640 * @dev: amdgpu_device pointer
e3ecdffa
AD
1641 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1642 * @state: clockgating state (gate or ungate)
1643 *
1644 * Sets the requested clockgating state for all instances of
1645 * the hardware IP specified.
1646 * Returns the error code from the last instance.
1647 */
43fa561f 1648int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1649 enum amd_ip_block_type block_type,
1650 enum amd_clockgating_state state)
d38ceaf9 1651{
43fa561f 1652 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1653 int i, r = 0;
1654
1655 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1656 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1657 continue;
c722865a
RZ
1658 if (adev->ip_blocks[i].version->type != block_type)
1659 continue;
1660 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1661 continue;
1662 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1663 (void *)adev, state);
1664 if (r)
1665 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1666 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1667 }
1668 return r;
1669}
1670
e3ecdffa
AD
1671/**
1672 * amdgpu_device_ip_set_powergating_state - set the PG state
1673 *
87e3f136 1674 * @dev: amdgpu_device pointer
e3ecdffa
AD
1675 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1676 * @state: powergating state (gate or ungate)
1677 *
1678 * Sets the requested powergating state for all instances of
1679 * the hardware IP specified.
1680 * Returns the error code from the last instance.
1681 */
43fa561f 1682int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1683 enum amd_ip_block_type block_type,
1684 enum amd_powergating_state state)
d38ceaf9 1685{
43fa561f 1686 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1687 int i, r = 0;
1688
1689 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1690 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1691 continue;
c722865a
RZ
1692 if (adev->ip_blocks[i].version->type != block_type)
1693 continue;
1694 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1695 continue;
1696 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1697 (void *)adev, state);
1698 if (r)
1699 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1700 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1701 }
1702 return r;
1703}
1704
e3ecdffa
AD
1705/**
1706 * amdgpu_device_ip_get_clockgating_state - get the CG state
1707 *
1708 * @adev: amdgpu_device pointer
1709 * @flags: clockgating feature flags
1710 *
1711 * Walks the list of IPs on the device and updates the clockgating
1712 * flags for each IP.
1713 * Updates @flags with the feature flags for each hardware IP where
1714 * clockgating is enabled.
1715 */
2990a1fc 1716void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1717 u64 *flags)
6cb2d4e4
HR
1718{
1719 int i;
1720
1721 for (i = 0; i < adev->num_ip_blocks; i++) {
1722 if (!adev->ip_blocks[i].status.valid)
1723 continue;
1724 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1725 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1726 }
1727}
1728
e3ecdffa
AD
1729/**
1730 * amdgpu_device_ip_wait_for_idle - wait for idle
1731 *
1732 * @adev: amdgpu_device pointer
1733 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1734 *
1735 * Waits for the request hardware IP to be idle.
1736 * Returns 0 for success or a negative error code on failure.
1737 */
2990a1fc
AD
1738int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1739 enum amd_ip_block_type block_type)
5dbbb60b
AD
1740{
1741 int i, r;
1742
1743 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1744 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1745 continue;
a1255107
AD
1746 if (adev->ip_blocks[i].version->type == block_type) {
1747 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1748 if (r)
1749 return r;
1750 break;
1751 }
1752 }
1753 return 0;
1754
1755}
1756
e3ecdffa
AD
1757/**
1758 * amdgpu_device_ip_is_idle - is the hardware IP idle
1759 *
1760 * @adev: amdgpu_device pointer
1761 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1762 *
1763 * Check if the hardware IP is idle or not.
1764 * Returns true if it the IP is idle, false if not.
1765 */
2990a1fc
AD
1766bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1767 enum amd_ip_block_type block_type)
5dbbb60b
AD
1768{
1769 int i;
1770
1771 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1772 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1773 continue;
a1255107
AD
1774 if (adev->ip_blocks[i].version->type == block_type)
1775 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1776 }
1777 return true;
1778
1779}
1780
e3ecdffa
AD
1781/**
1782 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1783 *
1784 * @adev: amdgpu_device pointer
87e3f136 1785 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1786 *
1787 * Returns a pointer to the hardware IP block structure
1788 * if it exists for the asic, otherwise NULL.
1789 */
2990a1fc
AD
1790struct amdgpu_ip_block *
1791amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1792 enum amd_ip_block_type type)
d38ceaf9
AD
1793{
1794 int i;
1795
1796 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1797 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1798 return &adev->ip_blocks[i];
1799
1800 return NULL;
1801}
1802
1803/**
2990a1fc 1804 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1805 *
1806 * @adev: amdgpu_device pointer
5fc3aeeb 1807 * @type: enum amd_ip_block_type
d38ceaf9
AD
1808 * @major: major version
1809 * @minor: minor version
1810 *
1811 * return 0 if equal or greater
1812 * return 1 if smaller or the ip_block doesn't exist
1813 */
2990a1fc
AD
1814int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1815 enum amd_ip_block_type type,
1816 u32 major, u32 minor)
d38ceaf9 1817{
2990a1fc 1818 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1819
a1255107
AD
1820 if (ip_block && ((ip_block->version->major > major) ||
1821 ((ip_block->version->major == major) &&
1822 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1823 return 0;
1824
1825 return 1;
1826}
1827
a1255107 1828/**
2990a1fc 1829 * amdgpu_device_ip_block_add
a1255107
AD
1830 *
1831 * @adev: amdgpu_device pointer
1832 * @ip_block_version: pointer to the IP to add
1833 *
1834 * Adds the IP block driver information to the collection of IPs
1835 * on the asic.
1836 */
2990a1fc
AD
1837int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1838 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1839{
1840 if (!ip_block_version)
1841 return -EINVAL;
1842
7bd939d0
LG
1843 switch (ip_block_version->type) {
1844 case AMD_IP_BLOCK_TYPE_VCN:
1845 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1846 return 0;
1847 break;
1848 case AMD_IP_BLOCK_TYPE_JPEG:
1849 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1850 return 0;
1851 break;
1852 default:
1853 break;
1854 }
1855
e966a725 1856 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1857 ip_block_version->funcs->name);
1858
a1255107
AD
1859 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1860
1861 return 0;
1862}
1863
e3ecdffa
AD
1864/**
1865 * amdgpu_device_enable_virtual_display - enable virtual display feature
1866 *
1867 * @adev: amdgpu_device pointer
1868 *
1869 * Enabled the virtual display feature if the user has enabled it via
1870 * the module parameter virtual_display. This feature provides a virtual
1871 * display hardware on headless boards or in virtualized environments.
1872 * This function parses and validates the configuration string specified by
1873 * the user and configues the virtual display configuration (number of
1874 * virtual connectors, crtcs, etc.) specified.
1875 */
483ef985 1876static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1877{
1878 adev->enable_virtual_display = false;
1879
1880 if (amdgpu_virtual_display) {
8f66090b 1881 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1882 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1883
1884 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1885 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1886 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1887 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1888 if (!strcmp("all", pciaddname)
1889 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1890 long num_crtc;
1891 int res = -1;
1892
9accf2fd 1893 adev->enable_virtual_display = true;
0f66356d
ED
1894
1895 if (pciaddname_tmp)
1896 res = kstrtol(pciaddname_tmp, 10,
1897 &num_crtc);
1898
1899 if (!res) {
1900 if (num_crtc < 1)
1901 num_crtc = 1;
1902 if (num_crtc > 6)
1903 num_crtc = 6;
1904 adev->mode_info.num_crtc = num_crtc;
1905 } else {
1906 adev->mode_info.num_crtc = 1;
1907 }
9accf2fd
ED
1908 break;
1909 }
1910 }
1911
0f66356d
ED
1912 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1913 amdgpu_virtual_display, pci_address_name,
1914 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1915
1916 kfree(pciaddstr);
1917 }
1918}
1919
25263da3
AD
1920void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1921{
1922 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1923 adev->mode_info.num_crtc = 1;
1924 adev->enable_virtual_display = true;
1925 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1926 adev->enable_virtual_display, adev->mode_info.num_crtc);
1927 }
1928}
1929
e3ecdffa
AD
1930/**
1931 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1932 *
1933 * @adev: amdgpu_device pointer
1934 *
1935 * Parses the asic configuration parameters specified in the gpu info
1936 * firmware and makes them availale to the driver for use in configuring
1937 * the asic.
1938 * Returns 0 on success, -EINVAL on failure.
1939 */
e2a75f88
AD
1940static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1941{
e2a75f88 1942 const char *chip_name;
c0a43457 1943 char fw_name[40];
e2a75f88
AD
1944 int err;
1945 const struct gpu_info_firmware_header_v1_0 *hdr;
1946
ab4fe3e1
HR
1947 adev->firmware.gpu_info_fw = NULL;
1948
72de33f8 1949 if (adev->mman.discovery_bin) {
cc375d8c
TY
1950 /*
1951 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1952 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1953 * when DAL no longer needs it.
1954 */
1955 if (adev->asic_type != CHIP_NAVI12)
1956 return 0;
258620d0
AD
1957 }
1958
e2a75f88 1959 switch (adev->asic_type) {
e2a75f88
AD
1960 default:
1961 return 0;
1962 case CHIP_VEGA10:
1963 chip_name = "vega10";
1964 break;
3f76dced
AD
1965 case CHIP_VEGA12:
1966 chip_name = "vega12";
1967 break;
2d2e5e7e 1968 case CHIP_RAVEN:
54f78a76 1969 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1970 chip_name = "raven2";
54f78a76 1971 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1972 chip_name = "picasso";
54c4d17e
FX
1973 else
1974 chip_name = "raven";
2d2e5e7e 1975 break;
65e60f6e
LM
1976 case CHIP_ARCTURUS:
1977 chip_name = "arcturus";
1978 break;
42b325e5
XY
1979 case CHIP_NAVI12:
1980 chip_name = "navi12";
1981 break;
e2a75f88
AD
1982 }
1983
1984 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1985 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1986 if (err) {
1987 dev_err(adev->dev,
1988 "Failed to load gpu_info firmware \"%s\"\n",
1989 fw_name);
1990 goto out;
1991 }
ab4fe3e1 1992 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1993 if (err) {
1994 dev_err(adev->dev,
1995 "Failed to validate gpu_info firmware \"%s\"\n",
1996 fw_name);
1997 goto out;
1998 }
1999
ab4fe3e1 2000 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
2001 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2002
2003 switch (hdr->version_major) {
2004 case 1:
2005 {
2006 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2007 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2008 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2009
cc375d8c
TY
2010 /*
2011 * Should be droped when DAL no longer needs it.
2012 */
2013 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2014 goto parse_soc_bounding_box;
2015
b5ab16bf
AD
2016 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2017 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2018 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2019 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2020 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2021 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2022 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2023 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2024 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2025 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2026 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2027 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2028 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2029 adev->gfx.cu_info.max_waves_per_simd =
2030 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2031 adev->gfx.cu_info.max_scratch_slots_per_cu =
2032 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2033 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2034 if (hdr->version_minor >= 1) {
35c2e910
HZ
2035 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2036 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2037 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2038 adev->gfx.config.num_sc_per_sh =
2039 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2040 adev->gfx.config.num_packer_per_sc =
2041 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2042 }
ec51d3fa
XY
2043
2044parse_soc_bounding_box:
ec51d3fa
XY
2045 /*
2046 * soc bounding box info is not integrated in disocovery table,
258620d0 2047 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2048 */
48321c3d
HW
2049 if (hdr->version_minor == 2) {
2050 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2051 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2052 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2053 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2054 }
e2a75f88
AD
2055 break;
2056 }
2057 default:
2058 dev_err(adev->dev,
2059 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2060 err = -EINVAL;
2061 goto out;
2062 }
2063out:
e2a75f88
AD
2064 return err;
2065}
2066
e3ecdffa
AD
2067/**
2068 * amdgpu_device_ip_early_init - run early init for hardware IPs
2069 *
2070 * @adev: amdgpu_device pointer
2071 *
2072 * Early initialization pass for hardware IPs. The hardware IPs that make
2073 * up each asic are discovered each IP's early_init callback is run. This
2074 * is the first stage in initializing the asic.
2075 * Returns 0 on success, negative error code on failure.
2076 */
06ec9070 2077static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2078{
901e2be2
AD
2079 struct drm_device *dev = adev_to_drm(adev);
2080 struct pci_dev *parent;
aaa36a97 2081 int i, r;
d38ceaf9 2082
483ef985 2083 amdgpu_device_enable_virtual_display(adev);
a6be7570 2084
00a979f3 2085 if (amdgpu_sriov_vf(adev)) {
00a979f3 2086 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2087 if (r)
2088 return r;
00a979f3
WS
2089 }
2090
d38ceaf9 2091 switch (adev->asic_type) {
33f34802
KW
2092#ifdef CONFIG_DRM_AMDGPU_SI
2093 case CHIP_VERDE:
2094 case CHIP_TAHITI:
2095 case CHIP_PITCAIRN:
2096 case CHIP_OLAND:
2097 case CHIP_HAINAN:
295d0daf 2098 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2099 r = si_set_ip_blocks(adev);
2100 if (r)
2101 return r;
2102 break;
2103#endif
a2e73f56
AD
2104#ifdef CONFIG_DRM_AMDGPU_CIK
2105 case CHIP_BONAIRE:
2106 case CHIP_HAWAII:
2107 case CHIP_KAVERI:
2108 case CHIP_KABINI:
2109 case CHIP_MULLINS:
e1ad2d53 2110 if (adev->flags & AMD_IS_APU)
a2e73f56 2111 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2112 else
2113 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2114
2115 r = cik_set_ip_blocks(adev);
2116 if (r)
2117 return r;
2118 break;
2119#endif
da87c30b
AD
2120 case CHIP_TOPAZ:
2121 case CHIP_TONGA:
2122 case CHIP_FIJI:
2123 case CHIP_POLARIS10:
2124 case CHIP_POLARIS11:
2125 case CHIP_POLARIS12:
2126 case CHIP_VEGAM:
2127 case CHIP_CARRIZO:
2128 case CHIP_STONEY:
2129 if (adev->flags & AMD_IS_APU)
2130 adev->family = AMDGPU_FAMILY_CZ;
2131 else
2132 adev->family = AMDGPU_FAMILY_VI;
2133
2134 r = vi_set_ip_blocks(adev);
2135 if (r)
2136 return r;
2137 break;
d38ceaf9 2138 default:
63352b7f
AD
2139 r = amdgpu_discovery_set_ip_blocks(adev);
2140 if (r)
2141 return r;
2142 break;
d38ceaf9
AD
2143 }
2144
901e2be2
AD
2145 if (amdgpu_has_atpx() &&
2146 (amdgpu_is_atpx_hybrid() ||
2147 amdgpu_has_atpx_dgpu_power_cntl()) &&
2148 ((adev->flags & AMD_IS_APU) == 0) &&
2149 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2150 adev->flags |= AMD_IS_PX;
2151
85ac2021
AD
2152 if (!(adev->flags & AMD_IS_APU)) {
2153 parent = pci_upstream_bridge(adev->pdev);
2154 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2155 }
901e2be2 2156
c004d44e 2157 amdgpu_amdkfd_device_probe(adev);
1884734a 2158
3b94fb10 2159 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2160 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2161 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2162 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2163 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2164
d38ceaf9
AD
2165 for (i = 0; i < adev->num_ip_blocks; i++) {
2166 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2167 DRM_ERROR("disabled ip block: %d <%s>\n",
2168 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2169 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2170 } else {
a1255107
AD
2171 if (adev->ip_blocks[i].version->funcs->early_init) {
2172 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2173 if (r == -ENOENT) {
a1255107 2174 adev->ip_blocks[i].status.valid = false;
2c1a2784 2175 } else if (r) {
a1255107
AD
2176 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2177 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2178 return r;
2c1a2784 2179 } else {
a1255107 2180 adev->ip_blocks[i].status.valid = true;
2c1a2784 2181 }
974e6b64 2182 } else {
a1255107 2183 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2184 }
d38ceaf9 2185 }
21a249ca
AD
2186 /* get the vbios after the asic_funcs are set up */
2187 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2188 r = amdgpu_device_parse_gpu_info_fw(adev);
2189 if (r)
2190 return r;
2191
21a249ca
AD
2192 /* Read BIOS */
2193 if (!amdgpu_get_bios(adev))
2194 return -EINVAL;
2195
2196 r = amdgpu_atombios_init(adev);
2197 if (r) {
2198 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2199 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2200 return r;
2201 }
77eabc6f
PJZ
2202
2203 /*get pf2vf msg info at it's earliest time*/
2204 if (amdgpu_sriov_vf(adev))
2205 amdgpu_virt_init_data_exchange(adev);
2206
21a249ca 2207 }
d38ceaf9
AD
2208 }
2209
395d1fb9
NH
2210 adev->cg_flags &= amdgpu_cg_mask;
2211 adev->pg_flags &= amdgpu_pg_mask;
2212
d38ceaf9
AD
2213 return 0;
2214}
2215
0a4f2520
RZ
2216static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2217{
2218 int i, r;
2219
2220 for (i = 0; i < adev->num_ip_blocks; i++) {
2221 if (!adev->ip_blocks[i].status.sw)
2222 continue;
2223 if (adev->ip_blocks[i].status.hw)
2224 continue;
2225 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2226 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2227 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2228 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2229 if (r) {
2230 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2231 adev->ip_blocks[i].version->funcs->name, r);
2232 return r;
2233 }
2234 adev->ip_blocks[i].status.hw = true;
2235 }
2236 }
2237
2238 return 0;
2239}
2240
2241static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2242{
2243 int i, r;
2244
2245 for (i = 0; i < adev->num_ip_blocks; i++) {
2246 if (!adev->ip_blocks[i].status.sw)
2247 continue;
2248 if (adev->ip_blocks[i].status.hw)
2249 continue;
2250 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2251 if (r) {
2252 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2253 adev->ip_blocks[i].version->funcs->name, r);
2254 return r;
2255 }
2256 adev->ip_blocks[i].status.hw = true;
2257 }
2258
2259 return 0;
2260}
2261
7a3e0bb2
RZ
2262static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2263{
2264 int r = 0;
2265 int i;
80f41f84 2266 uint32_t smu_version;
7a3e0bb2
RZ
2267
2268 if (adev->asic_type >= CHIP_VEGA10) {
2269 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2270 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2271 continue;
2272
e3c1b071 2273 if (!adev->ip_blocks[i].status.sw)
2274 continue;
2275
482f0e53
ML
2276 /* no need to do the fw loading again if already done*/
2277 if (adev->ip_blocks[i].status.hw == true)
2278 break;
2279
53b3f8f4 2280 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2281 r = adev->ip_blocks[i].version->funcs->resume(adev);
2282 if (r) {
2283 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2284 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2285 return r;
2286 }
2287 } else {
2288 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2289 if (r) {
2290 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2291 adev->ip_blocks[i].version->funcs->name, r);
2292 return r;
7a3e0bb2 2293 }
7a3e0bb2 2294 }
482f0e53
ML
2295
2296 adev->ip_blocks[i].status.hw = true;
2297 break;
7a3e0bb2
RZ
2298 }
2299 }
482f0e53 2300
8973d9ec
ED
2301 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2302 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2303
80f41f84 2304 return r;
7a3e0bb2
RZ
2305}
2306
5fd8518d
AG
2307static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2308{
2309 long timeout;
2310 int r, i;
2311
2312 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2313 struct amdgpu_ring *ring = adev->rings[i];
2314
2315 /* No need to setup the GPU scheduler for rings that don't need it */
2316 if (!ring || ring->no_scheduler)
2317 continue;
2318
2319 switch (ring->funcs->type) {
2320 case AMDGPU_RING_TYPE_GFX:
2321 timeout = adev->gfx_timeout;
2322 break;
2323 case AMDGPU_RING_TYPE_COMPUTE:
2324 timeout = adev->compute_timeout;
2325 break;
2326 case AMDGPU_RING_TYPE_SDMA:
2327 timeout = adev->sdma_timeout;
2328 break;
2329 default:
2330 timeout = adev->video_timeout;
2331 break;
2332 }
2333
2334 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2335 ring->num_hw_submission, amdgpu_job_hang_limit,
8ab62eda
JG
2336 timeout, adev->reset_domain->wq,
2337 ring->sched_score, ring->name,
2338 adev->dev);
5fd8518d
AG
2339 if (r) {
2340 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2341 ring->name);
2342 return r;
2343 }
2344 }
2345
2346 return 0;
2347}
2348
2349
e3ecdffa
AD
2350/**
2351 * amdgpu_device_ip_init - run init for hardware IPs
2352 *
2353 * @adev: amdgpu_device pointer
2354 *
2355 * Main initialization pass for hardware IPs. The list of all the hardware
2356 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2357 * are run. sw_init initializes the software state associated with each IP
2358 * and hw_init initializes the hardware associated with each IP.
2359 * Returns 0 on success, negative error code on failure.
2360 */
06ec9070 2361static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2362{
2363 int i, r;
2364
c030f2e4 2365 r = amdgpu_ras_init(adev);
2366 if (r)
2367 return r;
2368
d38ceaf9 2369 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2370 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2371 continue;
a1255107 2372 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2373 if (r) {
a1255107
AD
2374 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2375 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2376 goto init_failed;
2c1a2784 2377 }
a1255107 2378 adev->ip_blocks[i].status.sw = true;
bfca0289 2379
c1c39032
AD
2380 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2381 /* need to do common hw init early so everything is set up for gmc */
2382 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2383 if (r) {
2384 DRM_ERROR("hw_init %d failed %d\n", i, r);
2385 goto init_failed;
2386 }
2387 adev->ip_blocks[i].status.hw = true;
2388 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2389 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2390 /* Try to reserve bad pages early */
2391 if (amdgpu_sriov_vf(adev))
2392 amdgpu_virt_exchange_data(adev);
2393
7ccfd79f 2394 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2395 if (r) {
7ccfd79f 2396 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2397 goto init_failed;
2c1a2784 2398 }
a1255107 2399 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2400 if (r) {
2401 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2402 goto init_failed;
2c1a2784 2403 }
06ec9070 2404 r = amdgpu_device_wb_init(adev);
2c1a2784 2405 if (r) {
06ec9070 2406 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2407 goto init_failed;
2c1a2784 2408 }
a1255107 2409 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2410
2411 /* right after GMC hw init, we create CSA */
8a1fbb4a 2412 if (amdgpu_mcbp) {
1e256e27 2413 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2414 AMDGPU_GEM_DOMAIN_VRAM |
2415 AMDGPU_GEM_DOMAIN_GTT,
2416 AMDGPU_CSA_SIZE);
2493664f
ML
2417 if (r) {
2418 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2419 goto init_failed;
2493664f
ML
2420 }
2421 }
d38ceaf9
AD
2422 }
2423 }
2424
c9ffa427 2425 if (amdgpu_sriov_vf(adev))
22c16d25 2426 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2427
533aed27
AG
2428 r = amdgpu_ib_pool_init(adev);
2429 if (r) {
2430 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2431 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2432 goto init_failed;
2433 }
2434
c8963ea4
RZ
2435 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2436 if (r)
72d3f592 2437 goto init_failed;
0a4f2520
RZ
2438
2439 r = amdgpu_device_ip_hw_init_phase1(adev);
2440 if (r)
72d3f592 2441 goto init_failed;
0a4f2520 2442
7a3e0bb2
RZ
2443 r = amdgpu_device_fw_loading(adev);
2444 if (r)
72d3f592 2445 goto init_failed;
7a3e0bb2 2446
0a4f2520
RZ
2447 r = amdgpu_device_ip_hw_init_phase2(adev);
2448 if (r)
72d3f592 2449 goto init_failed;
d38ceaf9 2450
121a2bc6
AG
2451 /*
2452 * retired pages will be loaded from eeprom and reserved here,
2453 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2454 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2455 * for I2C communication which only true at this point.
b82e65a9
GC
2456 *
2457 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2458 * failure from bad gpu situation and stop amdgpu init process
2459 * accordingly. For other failed cases, it will still release all
2460 * the resource and print error message, rather than returning one
2461 * negative value to upper level.
121a2bc6
AG
2462 *
2463 * Note: theoretically, this should be called before all vram allocations
2464 * to protect retired page from abusing
2465 */
b82e65a9
GC
2466 r = amdgpu_ras_recovery_init(adev);
2467 if (r)
2468 goto init_failed;
121a2bc6 2469
cfbb6b00
AG
2470 /**
2471 * In case of XGMI grab extra reference for reset domain for this device
2472 */
a4c63caf 2473 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2474 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2475 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2476 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2477
dfd0287b
LH
2478 if (WARN_ON(!hive)) {
2479 r = -ENOENT;
2480 goto init_failed;
2481 }
2482
46c67660 2483 if (!hive->reset_domain ||
2484 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2485 r = -ENOENT;
2486 amdgpu_put_xgmi_hive(hive);
2487 goto init_failed;
2488 }
2489
2490 /* Drop the early temporary reset domain we created for device */
2491 amdgpu_reset_put_reset_domain(adev->reset_domain);
2492 adev->reset_domain = hive->reset_domain;
9dfa4860 2493 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2494 }
a4c63caf
AG
2495 }
2496 }
2497
5fd8518d
AG
2498 r = amdgpu_device_init_schedulers(adev);
2499 if (r)
2500 goto init_failed;
e3c1b071 2501
2502 /* Don't init kfd if whole hive need to be reset during init */
c004d44e 2503 if (!adev->gmc.xgmi.pending_reset)
e3c1b071 2504 amdgpu_amdkfd_device_init(adev);
c6332b97 2505
bd607166
KR
2506 amdgpu_fru_get_product_info(adev);
2507
72d3f592 2508init_failed:
c9ffa427 2509 if (amdgpu_sriov_vf(adev))
c6332b97 2510 amdgpu_virt_release_full_gpu(adev, true);
2511
72d3f592 2512 return r;
d38ceaf9
AD
2513}
2514
e3ecdffa
AD
2515/**
2516 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2517 *
2518 * @adev: amdgpu_device pointer
2519 *
2520 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2521 * this function before a GPU reset. If the value is retained after a
2522 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2523 */
06ec9070 2524static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2525{
2526 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2527}
2528
e3ecdffa
AD
2529/**
2530 * amdgpu_device_check_vram_lost - check if vram is valid
2531 *
2532 * @adev: amdgpu_device pointer
2533 *
2534 * Checks the reset magic value written to the gart pointer in VRAM.
2535 * The driver calls this after a GPU reset to see if the contents of
2536 * VRAM is lost or now.
2537 * returns true if vram is lost, false if not.
2538 */
06ec9070 2539static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2540{
dadce777
EQ
2541 if (memcmp(adev->gart.ptr, adev->reset_magic,
2542 AMDGPU_RESET_MAGIC_NUM))
2543 return true;
2544
53b3f8f4 2545 if (!amdgpu_in_reset(adev))
dadce777
EQ
2546 return false;
2547
2548 /*
2549 * For all ASICs with baco/mode1 reset, the VRAM is
2550 * always assumed to be lost.
2551 */
2552 switch (amdgpu_asic_reset_method(adev)) {
2553 case AMD_RESET_METHOD_BACO:
2554 case AMD_RESET_METHOD_MODE1:
2555 return true;
2556 default:
2557 return false;
2558 }
0c49e0b8
CZ
2559}
2560
e3ecdffa 2561/**
1112a46b 2562 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2563 *
2564 * @adev: amdgpu_device pointer
b8b72130 2565 * @state: clockgating state (gate or ungate)
e3ecdffa 2566 *
e3ecdffa 2567 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2568 * set_clockgating_state callbacks are run.
2569 * Late initialization pass enabling clockgating for hardware IPs.
2570 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2571 * Returns 0 on success, negative error code on failure.
2572 */
fdd34271 2573
5d89bb2d
LL
2574int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2575 enum amd_clockgating_state state)
d38ceaf9 2576{
1112a46b 2577 int i, j, r;
d38ceaf9 2578
4a2ba394
SL
2579 if (amdgpu_emu_mode == 1)
2580 return 0;
2581
1112a46b
RZ
2582 for (j = 0; j < adev->num_ip_blocks; j++) {
2583 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2584 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2585 continue;
47198eb7 2586 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2587 if (adev->in_s0ix &&
47198eb7
AD
2588 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2589 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2590 continue;
4a446d55 2591 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2592 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2593 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2594 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2595 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2596 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2597 /* enable clockgating to save power */
a1255107 2598 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2599 state);
4a446d55
AD
2600 if (r) {
2601 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2602 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2603 return r;
2604 }
b0b00ff1 2605 }
d38ceaf9 2606 }
06b18f61 2607
c9f96fd5
RZ
2608 return 0;
2609}
2610
5d89bb2d
LL
2611int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2612 enum amd_powergating_state state)
c9f96fd5 2613{
1112a46b 2614 int i, j, r;
06b18f61 2615
c9f96fd5
RZ
2616 if (amdgpu_emu_mode == 1)
2617 return 0;
2618
1112a46b
RZ
2619 for (j = 0; j < adev->num_ip_blocks; j++) {
2620 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2621 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2622 continue;
47198eb7 2623 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2624 if (adev->in_s0ix &&
47198eb7
AD
2625 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2626 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2627 continue;
c9f96fd5
RZ
2628 /* skip CG for VCE/UVD, it's handled specially */
2629 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2630 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2631 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2632 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2633 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2634 /* enable powergating to save power */
2635 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2636 state);
c9f96fd5
RZ
2637 if (r) {
2638 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2639 adev->ip_blocks[i].version->funcs->name, r);
2640 return r;
2641 }
2642 }
2643 }
2dc80b00
S
2644 return 0;
2645}
2646
beff74bc
AD
2647static int amdgpu_device_enable_mgpu_fan_boost(void)
2648{
2649 struct amdgpu_gpu_instance *gpu_ins;
2650 struct amdgpu_device *adev;
2651 int i, ret = 0;
2652
2653 mutex_lock(&mgpu_info.mutex);
2654
2655 /*
2656 * MGPU fan boost feature should be enabled
2657 * only when there are two or more dGPUs in
2658 * the system
2659 */
2660 if (mgpu_info.num_dgpu < 2)
2661 goto out;
2662
2663 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2664 gpu_ins = &(mgpu_info.gpu_ins[i]);
2665 adev = gpu_ins->adev;
2666 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2667 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2668 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2669 if (ret)
2670 break;
2671
2672 gpu_ins->mgpu_fan_enabled = 1;
2673 }
2674 }
2675
2676out:
2677 mutex_unlock(&mgpu_info.mutex);
2678
2679 return ret;
2680}
2681
e3ecdffa
AD
2682/**
2683 * amdgpu_device_ip_late_init - run late init for hardware IPs
2684 *
2685 * @adev: amdgpu_device pointer
2686 *
2687 * Late initialization pass for hardware IPs. The list of all the hardware
2688 * IPs that make up the asic is walked and the late_init callbacks are run.
2689 * late_init covers any special initialization that an IP requires
2690 * after all of the have been initialized or something that needs to happen
2691 * late in the init process.
2692 * Returns 0 on success, negative error code on failure.
2693 */
06ec9070 2694static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2695{
60599a03 2696 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2697 int i = 0, r;
2698
2699 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2700 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2701 continue;
2702 if (adev->ip_blocks[i].version->funcs->late_init) {
2703 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2704 if (r) {
2705 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2706 adev->ip_blocks[i].version->funcs->name, r);
2707 return r;
2708 }
2dc80b00 2709 }
73f847db 2710 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2711 }
2712
867e24ca 2713 r = amdgpu_ras_late_init(adev);
2714 if (r) {
2715 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2716 return r;
2717 }
2718
a891d239
DL
2719 amdgpu_ras_set_error_query_ready(adev, true);
2720
1112a46b
RZ
2721 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2722 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2723
06ec9070 2724 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2725
beff74bc
AD
2726 r = amdgpu_device_enable_mgpu_fan_boost();
2727 if (r)
2728 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2729
4da8b639 2730 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2731 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2732 adev->asic_type == CHIP_ALDEBARAN ))
bc143d8b 2733 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2734
2735 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2736 mutex_lock(&mgpu_info.mutex);
2737
2738 /*
2739 * Reset device p-state to low as this was booted with high.
2740 *
2741 * This should be performed only after all devices from the same
2742 * hive get initialized.
2743 *
2744 * However, it's unknown how many device in the hive in advance.
2745 * As this is counted one by one during devices initializations.
2746 *
2747 * So, we wait for all XGMI interlinked devices initialized.
2748 * This may bring some delays as those devices may come from
2749 * different hives. But that should be OK.
2750 */
2751 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2752 for (i = 0; i < mgpu_info.num_gpu; i++) {
2753 gpu_instance = &(mgpu_info.gpu_ins[i]);
2754 if (gpu_instance->adev->flags & AMD_IS_APU)
2755 continue;
2756
d84a430d
JK
2757 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2758 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2759 if (r) {
2760 DRM_ERROR("pstate setting failed (%d).\n", r);
2761 break;
2762 }
2763 }
2764 }
2765
2766 mutex_unlock(&mgpu_info.mutex);
2767 }
2768
d38ceaf9
AD
2769 return 0;
2770}
2771
613aa3ea
LY
2772/**
2773 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2774 *
2775 * @adev: amdgpu_device pointer
2776 *
2777 * For ASICs need to disable SMC first
2778 */
2779static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2780{
2781 int i, r;
2782
2783 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2784 return;
2785
2786 for (i = 0; i < adev->num_ip_blocks; i++) {
2787 if (!adev->ip_blocks[i].status.hw)
2788 continue;
2789 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2790 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2791 /* XXX handle errors */
2792 if (r) {
2793 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2794 adev->ip_blocks[i].version->funcs->name, r);
2795 }
2796 adev->ip_blocks[i].status.hw = false;
2797 break;
2798 }
2799 }
2800}
2801
e9669fb7 2802static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2803{
2804 int i, r;
2805
e9669fb7
AG
2806 for (i = 0; i < adev->num_ip_blocks; i++) {
2807 if (!adev->ip_blocks[i].version->funcs->early_fini)
2808 continue;
5278a159 2809
e9669fb7
AG
2810 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2811 if (r) {
2812 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2813 adev->ip_blocks[i].version->funcs->name, r);
2814 }
2815 }
c030f2e4 2816
05df1f01 2817 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2818 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2819
7270e895
TY
2820 amdgpu_amdkfd_suspend(adev, false);
2821
613aa3ea
LY
2822 /* Workaroud for ASICs need to disable SMC first */
2823 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2824
d38ceaf9 2825 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2826 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2827 continue;
8201a67a 2828
a1255107 2829 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2830 /* XXX handle errors */
2c1a2784 2831 if (r) {
a1255107
AD
2832 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2833 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2834 }
8201a67a 2835
a1255107 2836 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2837 }
2838
6effad8a
GC
2839 if (amdgpu_sriov_vf(adev)) {
2840 if (amdgpu_virt_release_full_gpu(adev, false))
2841 DRM_ERROR("failed to release exclusive mode on fini\n");
2842 }
2843
e9669fb7
AG
2844 return 0;
2845}
2846
2847/**
2848 * amdgpu_device_ip_fini - run fini for hardware IPs
2849 *
2850 * @adev: amdgpu_device pointer
2851 *
2852 * Main teardown pass for hardware IPs. The list of all the hardware
2853 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2854 * are run. hw_fini tears down the hardware associated with each IP
2855 * and sw_fini tears down any software state associated with each IP.
2856 * Returns 0 on success, negative error code on failure.
2857 */
2858static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2859{
2860 int i, r;
2861
2862 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2863 amdgpu_virt_release_ras_err_handler_data(adev);
2864
e9669fb7
AG
2865 if (adev->gmc.xgmi.num_physical_nodes > 1)
2866 amdgpu_xgmi_remove_device(adev);
2867
c004d44e 2868 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2869
d38ceaf9 2870 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2871 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2872 continue;
c12aba3a
ML
2873
2874 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2875 amdgpu_ucode_free_bo(adev);
1e256e27 2876 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2877 amdgpu_device_wb_fini(adev);
7ccfd79f 2878 amdgpu_device_mem_scratch_fini(adev);
533aed27 2879 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2880 }
2881
a1255107 2882 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2883 /* XXX handle errors */
2c1a2784 2884 if (r) {
a1255107
AD
2885 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2886 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2887 }
a1255107
AD
2888 adev->ip_blocks[i].status.sw = false;
2889 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2890 }
2891
a6dcfd9c 2892 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2893 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2894 continue;
a1255107
AD
2895 if (adev->ip_blocks[i].version->funcs->late_fini)
2896 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2897 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2898 }
2899
c030f2e4 2900 amdgpu_ras_fini(adev);
2901
d38ceaf9
AD
2902 return 0;
2903}
2904
e3ecdffa 2905/**
beff74bc 2906 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2907 *
1112a46b 2908 * @work: work_struct.
e3ecdffa 2909 */
beff74bc 2910static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2911{
2912 struct amdgpu_device *adev =
beff74bc 2913 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2914 int r;
2915
2916 r = amdgpu_ib_ring_tests(adev);
2917 if (r)
2918 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2919}
2920
1e317b99
RZ
2921static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2922{
2923 struct amdgpu_device *adev =
2924 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2925
90a92662
MD
2926 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2927 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2928
2929 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2930 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2931}
2932
e3ecdffa 2933/**
e7854a03 2934 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2935 *
2936 * @adev: amdgpu_device pointer
2937 *
2938 * Main suspend function for hardware IPs. The list of all the hardware
2939 * IPs that make up the asic is walked, clockgating is disabled and the
2940 * suspend callbacks are run. suspend puts the hardware and software state
2941 * in each IP into a state suitable for suspend.
2942 * Returns 0 on success, negative error code on failure.
2943 */
e7854a03
AD
2944static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2945{
2946 int i, r;
2947
50ec83f0
AD
2948 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2949 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2950
b31d6ada
EQ
2951 /*
2952 * Per PMFW team's suggestion, driver needs to handle gfxoff
2953 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2954 * scenario. Add the missing df cstate disablement here.
2955 */
2956 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2957 dev_warn(adev->dev, "Failed to disallow df cstate");
2958
e7854a03
AD
2959 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2960 if (!adev->ip_blocks[i].status.valid)
2961 continue;
2b9f7848 2962
e7854a03 2963 /* displays are handled separately */
2b9f7848
ND
2964 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2965 continue;
2966
2967 /* XXX handle errors */
2968 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2969 /* XXX handle errors */
2970 if (r) {
2971 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2972 adev->ip_blocks[i].version->funcs->name, r);
2973 return r;
e7854a03 2974 }
2b9f7848
ND
2975
2976 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2977 }
2978
e7854a03
AD
2979 return 0;
2980}
2981
2982/**
2983 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2984 *
2985 * @adev: amdgpu_device pointer
2986 *
2987 * Main suspend function for hardware IPs. The list of all the hardware
2988 * IPs that make up the asic is walked, clockgating is disabled and the
2989 * suspend callbacks are run. suspend puts the hardware and software state
2990 * in each IP into a state suitable for suspend.
2991 * Returns 0 on success, negative error code on failure.
2992 */
2993static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2994{
2995 int i, r;
2996
557f42a2 2997 if (adev->in_s0ix)
bc143d8b 2998 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 2999
d38ceaf9 3000 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3001 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3002 continue;
e7854a03
AD
3003 /* displays are handled in phase1 */
3004 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3005 continue;
bff77e86
LM
3006 /* PSP lost connection when err_event_athub occurs */
3007 if (amdgpu_ras_intr_triggered() &&
3008 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3009 adev->ip_blocks[i].status.hw = false;
3010 continue;
3011 }
e3c1b071 3012
3013 /* skip unnecessary suspend if we do not initialize them yet */
3014 if (adev->gmc.xgmi.pending_reset &&
3015 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3016 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3017 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3018 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3019 adev->ip_blocks[i].status.hw = false;
3020 continue;
3021 }
557f42a2 3022
5620a188 3023 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3024 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3025 * like at runtime. PSP is also part of the always on hardware
3026 * so no need to suspend it.
3027 */
557f42a2 3028 if (adev->in_s0ix &&
32ff160d 3029 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
5620a188
AD
3030 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3031 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3032 continue;
3033
2a7798ea
AD
3034 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3035 if (adev->in_s0ix &&
3036 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3037 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3038 continue;
3039
d38ceaf9 3040 /* XXX handle errors */
a1255107 3041 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3042 /* XXX handle errors */
2c1a2784 3043 if (r) {
a1255107
AD
3044 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3045 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3046 }
876923fb 3047 adev->ip_blocks[i].status.hw = false;
a3a09142 3048 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
3049 if(!amdgpu_sriov_vf(adev)){
3050 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3051 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3052 if (r) {
3053 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3054 adev->mp1_state, r);
3055 return r;
3056 }
a3a09142
AD
3057 }
3058 }
d38ceaf9
AD
3059 }
3060
3061 return 0;
3062}
3063
e7854a03
AD
3064/**
3065 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3066 *
3067 * @adev: amdgpu_device pointer
3068 *
3069 * Main suspend function for hardware IPs. The list of all the hardware
3070 * IPs that make up the asic is walked, clockgating is disabled and the
3071 * suspend callbacks are run. suspend puts the hardware and software state
3072 * in each IP into a state suitable for suspend.
3073 * Returns 0 on success, negative error code on failure.
3074 */
3075int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3076{
3077 int r;
3078
3c73683c
JC
3079 if (amdgpu_sriov_vf(adev)) {
3080 amdgpu_virt_fini_data_exchange(adev);
e7819644 3081 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3082 }
e7819644 3083
e7854a03
AD
3084 r = amdgpu_device_ip_suspend_phase1(adev);
3085 if (r)
3086 return r;
3087 r = amdgpu_device_ip_suspend_phase2(adev);
3088
e7819644
YT
3089 if (amdgpu_sriov_vf(adev))
3090 amdgpu_virt_release_full_gpu(adev, false);
3091
e7854a03
AD
3092 return r;
3093}
3094
06ec9070 3095static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3096{
3097 int i, r;
3098
2cb681b6 3099 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3100 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3101 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3102 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3103 AMD_IP_BLOCK_TYPE_IH,
3104 };
a90ad3c2 3105
95ea3dbc 3106 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3107 int j;
3108 struct amdgpu_ip_block *block;
a90ad3c2 3109
4cd2a96d
J
3110 block = &adev->ip_blocks[i];
3111 block->status.hw = false;
2cb681b6 3112
4cd2a96d 3113 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3114
4cd2a96d 3115 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3116 !block->status.valid)
3117 continue;
3118
3119 r = block->version->funcs->hw_init(adev);
0aaeefcc 3120 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3121 if (r)
3122 return r;
482f0e53 3123 block->status.hw = true;
a90ad3c2
ML
3124 }
3125 }
3126
3127 return 0;
3128}
3129
06ec9070 3130static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3131{
3132 int i, r;
3133
2cb681b6
ML
3134 static enum amd_ip_block_type ip_order[] = {
3135 AMD_IP_BLOCK_TYPE_SMC,
3136 AMD_IP_BLOCK_TYPE_DCE,
3137 AMD_IP_BLOCK_TYPE_GFX,
3138 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 3139 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
3140 AMD_IP_BLOCK_TYPE_VCE,
3141 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 3142 };
a90ad3c2 3143
2cb681b6
ML
3144 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3145 int j;
3146 struct amdgpu_ip_block *block;
a90ad3c2 3147
2cb681b6
ML
3148 for (j = 0; j < adev->num_ip_blocks; j++) {
3149 block = &adev->ip_blocks[j];
3150
3151 if (block->version->type != ip_order[i] ||
482f0e53
ML
3152 !block->status.valid ||
3153 block->status.hw)
2cb681b6
ML
3154 continue;
3155
895bd048
JZ
3156 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3157 r = block->version->funcs->resume(adev);
3158 else
3159 r = block->version->funcs->hw_init(adev);
3160
0aaeefcc 3161 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3162 if (r)
3163 return r;
482f0e53 3164 block->status.hw = true;
a90ad3c2
ML
3165 }
3166 }
3167
3168 return 0;
3169}
3170
e3ecdffa
AD
3171/**
3172 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3173 *
3174 * @adev: amdgpu_device pointer
3175 *
3176 * First resume function for hardware IPs. The list of all the hardware
3177 * IPs that make up the asic is walked and the resume callbacks are run for
3178 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3179 * after a suspend and updates the software state as necessary. This
3180 * function is also used for restoring the GPU after a GPU reset.
3181 * Returns 0 on success, negative error code on failure.
3182 */
06ec9070 3183static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3184{
3185 int i, r;
3186
a90ad3c2 3187 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3188 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3189 continue;
a90ad3c2 3190 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3191 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3192 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3193 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3194
fcf0649f
CZ
3195 r = adev->ip_blocks[i].version->funcs->resume(adev);
3196 if (r) {
3197 DRM_ERROR("resume of IP block <%s> failed %d\n",
3198 adev->ip_blocks[i].version->funcs->name, r);
3199 return r;
3200 }
482f0e53 3201 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3202 }
3203 }
3204
3205 return 0;
3206}
3207
e3ecdffa
AD
3208/**
3209 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3210 *
3211 * @adev: amdgpu_device pointer
3212 *
3213 * First resume function for hardware IPs. The list of all the hardware
3214 * IPs that make up the asic is walked and the resume callbacks are run for
3215 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3216 * functional state after a suspend and updates the software state as
3217 * necessary. This function is also used for restoring the GPU after a GPU
3218 * reset.
3219 * Returns 0 on success, negative error code on failure.
3220 */
06ec9070 3221static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3222{
3223 int i, r;
3224
3225 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3226 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3227 continue;
fcf0649f 3228 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3229 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3230 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3231 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3232 continue;
a1255107 3233 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3234 if (r) {
a1255107
AD
3235 DRM_ERROR("resume of IP block <%s> failed %d\n",
3236 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3237 return r;
2c1a2784 3238 }
482f0e53 3239 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3240 }
3241
3242 return 0;
3243}
3244
e3ecdffa
AD
3245/**
3246 * amdgpu_device_ip_resume - run resume for hardware IPs
3247 *
3248 * @adev: amdgpu_device pointer
3249 *
3250 * Main resume function for hardware IPs. The hardware IPs
3251 * are split into two resume functions because they are
3252 * are also used in in recovering from a GPU reset and some additional
3253 * steps need to be take between them. In this case (S3/S4) they are
3254 * run sequentially.
3255 * Returns 0 on success, negative error code on failure.
3256 */
06ec9070 3257static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3258{
3259 int r;
3260
9cec53c1
JZ
3261 r = amdgpu_amdkfd_resume_iommu(adev);
3262 if (r)
3263 return r;
3264
06ec9070 3265 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3266 if (r)
3267 return r;
7a3e0bb2
RZ
3268
3269 r = amdgpu_device_fw_loading(adev);
3270 if (r)
3271 return r;
3272
06ec9070 3273 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3274
3275 return r;
3276}
3277
e3ecdffa
AD
3278/**
3279 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3280 *
3281 * @adev: amdgpu_device pointer
3282 *
3283 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3284 */
4e99a44e 3285static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3286{
6867e1b5
ML
3287 if (amdgpu_sriov_vf(adev)) {
3288 if (adev->is_atom_fw) {
58ff791a 3289 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3290 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3291 } else {
3292 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3293 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3294 }
3295
3296 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3297 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3298 }
048765ad
AR
3299}
3300
e3ecdffa
AD
3301/**
3302 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3303 *
3304 * @asic_type: AMD asic type
3305 *
3306 * Check if there is DC (new modesetting infrastructre) support for an asic.
3307 * returns true if DC has support, false if not.
3308 */
4562236b
HW
3309bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3310{
3311 switch (asic_type) {
0637d417
AD
3312#ifdef CONFIG_DRM_AMDGPU_SI
3313 case CHIP_HAINAN:
3314#endif
3315 case CHIP_TOPAZ:
3316 /* chips with no display hardware */
3317 return false;
4562236b 3318#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3319 case CHIP_TAHITI:
3320 case CHIP_PITCAIRN:
3321 case CHIP_VERDE:
3322 case CHIP_OLAND:
2d32ffd6
AD
3323 /*
3324 * We have systems in the wild with these ASICs that require
3325 * LVDS and VGA support which is not supported with DC.
3326 *
3327 * Fallback to the non-DC driver here by default so as not to
3328 * cause regressions.
3329 */
3330#if defined(CONFIG_DRM_AMD_DC_SI)
3331 return amdgpu_dc > 0;
3332#else
3333 return false;
64200c46 3334#endif
4562236b 3335 case CHIP_BONAIRE:
0d6fbccb 3336 case CHIP_KAVERI:
367e6687
AD
3337 case CHIP_KABINI:
3338 case CHIP_MULLINS:
d9fda248
HW
3339 /*
3340 * We have systems in the wild with these ASICs that require
b5a0168e 3341 * VGA support which is not supported with DC.
d9fda248
HW
3342 *
3343 * Fallback to the non-DC driver here by default so as not to
3344 * cause regressions.
3345 */
3346 return amdgpu_dc > 0;
f7f12b25 3347 default:
fd187853 3348 return amdgpu_dc != 0;
f7f12b25 3349#else
4562236b 3350 default:
93b09a9a 3351 if (amdgpu_dc > 0)
044a48f4 3352 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3353 "but isn't supported by ASIC, ignoring\n");
4562236b 3354 return false;
f7f12b25 3355#endif
4562236b
HW
3356 }
3357}
3358
3359/**
3360 * amdgpu_device_has_dc_support - check if dc is supported
3361 *
982a820b 3362 * @adev: amdgpu_device pointer
4562236b
HW
3363 *
3364 * Returns true for supported, false for not supported
3365 */
3366bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3367{
25263da3 3368 if (adev->enable_virtual_display ||
abaf210c 3369 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3370 return false;
3371
4562236b
HW
3372 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3373}
3374
d4535e2c
AG
3375static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3376{
3377 struct amdgpu_device *adev =
3378 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3379 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3380
c6a6e2db
AG
3381 /* It's a bug to not have a hive within this function */
3382 if (WARN_ON(!hive))
3383 return;
3384
3385 /*
3386 * Use task barrier to synchronize all xgmi reset works across the
3387 * hive. task_barrier_enter and task_barrier_exit will block
3388 * until all the threads running the xgmi reset works reach
3389 * those points. task_barrier_full will do both blocks.
3390 */
3391 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3392
3393 task_barrier_enter(&hive->tb);
4a580877 3394 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3395
3396 if (adev->asic_reset_res)
3397 goto fail;
3398
3399 task_barrier_exit(&hive->tb);
4a580877 3400 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3401
3402 if (adev->asic_reset_res)
3403 goto fail;
43c4d576 3404
5e67bba3 3405 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3406 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3407 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3408 } else {
3409
3410 task_barrier_full(&hive->tb);
3411 adev->asic_reset_res = amdgpu_asic_reset(adev);
3412 }
ce316fa5 3413
c6a6e2db 3414fail:
d4535e2c 3415 if (adev->asic_reset_res)
fed184e9 3416 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3417 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3418 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3419}
3420
71f98027
AD
3421static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3422{
3423 char *input = amdgpu_lockup_timeout;
3424 char *timeout_setting = NULL;
3425 int index = 0;
3426 long timeout;
3427 int ret = 0;
3428
3429 /*
67387dfe
AD
3430 * By default timeout for non compute jobs is 10000
3431 * and 60000 for compute jobs.
71f98027 3432 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3433 * jobs are 60000 by default.
71f98027
AD
3434 */
3435 adev->gfx_timeout = msecs_to_jiffies(10000);
3436 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3437 if (amdgpu_sriov_vf(adev))
3438 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3439 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3440 else
67387dfe 3441 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3442
f440ff44 3443 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3444 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3445 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3446 ret = kstrtol(timeout_setting, 0, &timeout);
3447 if (ret)
3448 return ret;
3449
3450 if (timeout == 0) {
3451 index++;
3452 continue;
3453 } else if (timeout < 0) {
3454 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3455 dev_warn(adev->dev, "lockup timeout disabled");
3456 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3457 } else {
3458 timeout = msecs_to_jiffies(timeout);
3459 }
3460
3461 switch (index++) {
3462 case 0:
3463 adev->gfx_timeout = timeout;
3464 break;
3465 case 1:
3466 adev->compute_timeout = timeout;
3467 break;
3468 case 2:
3469 adev->sdma_timeout = timeout;
3470 break;
3471 case 3:
3472 adev->video_timeout = timeout;
3473 break;
3474 default:
3475 break;
3476 }
3477 }
3478 /*
3479 * There is only one value specified and
3480 * it should apply to all non-compute jobs.
3481 */
bcccee89 3482 if (index == 1) {
71f98027 3483 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3484 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3485 adev->compute_timeout = adev->gfx_timeout;
3486 }
71f98027
AD
3487 }
3488
3489 return ret;
3490}
d4535e2c 3491
4a74c38c
PY
3492/**
3493 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3494 *
3495 * @adev: amdgpu_device pointer
3496 *
3497 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3498 */
3499static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3500{
3501 struct iommu_domain *domain;
3502
3503 domain = iommu_get_domain_for_dev(adev->dev);
3504 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3505 adev->ram_is_direct_mapped = true;
3506}
3507
77f3a5cd
ND
3508static const struct attribute *amdgpu_dev_attributes[] = {
3509 &dev_attr_product_name.attr,
3510 &dev_attr_product_number.attr,
3511 &dev_attr_serial_number.attr,
3512 &dev_attr_pcie_replay_count.attr,
3513 NULL
3514};
3515
d38ceaf9
AD
3516/**
3517 * amdgpu_device_init - initialize the driver
3518 *
3519 * @adev: amdgpu_device pointer
d38ceaf9
AD
3520 * @flags: driver flags
3521 *
3522 * Initializes the driver info and hw (all asics).
3523 * Returns 0 for success or an error on failure.
3524 * Called at driver startup.
3525 */
3526int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3527 uint32_t flags)
3528{
8aba21b7
LT
3529 struct drm_device *ddev = adev_to_drm(adev);
3530 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3531 int r, i;
b98c6299 3532 bool px = false;
95844d20 3533 u32 max_MBps;
d38ceaf9
AD
3534
3535 adev->shutdown = false;
d38ceaf9 3536 adev->flags = flags;
4e66d7d2
YZ
3537
3538 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3539 adev->asic_type = amdgpu_force_asic_type;
3540 else
3541 adev->asic_type = flags & AMD_ASIC_MASK;
3542
d38ceaf9 3543 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3544 if (amdgpu_emu_mode == 1)
8bdab6bb 3545 adev->usec_timeout *= 10;
770d13b1 3546 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3547 adev->accel_working = false;
3548 adev->num_rings = 0;
68ce8b24 3549 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3550 adev->mman.buffer_funcs = NULL;
3551 adev->mman.buffer_funcs_ring = NULL;
3552 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3553 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3554 adev->gmc.gmc_funcs = NULL;
7bd939d0 3555 adev->harvest_ip_mask = 0x0;
f54d1867 3556 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3557 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3558
3559 adev->smc_rreg = &amdgpu_invalid_rreg;
3560 adev->smc_wreg = &amdgpu_invalid_wreg;
3561 adev->pcie_rreg = &amdgpu_invalid_rreg;
3562 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3563 adev->pciep_rreg = &amdgpu_invalid_rreg;
3564 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3565 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3566 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3567 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3568 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3569 adev->didt_rreg = &amdgpu_invalid_rreg;
3570 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3571 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3572 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3573 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3574 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3575
3e39ab90
AD
3576 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3577 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3578 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3579
3580 /* mutex initialization are all done here so we
3581 * can recall function without having locking issues */
0e5ca0d1 3582 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3583 mutex_init(&adev->pm.mutex);
3584 mutex_init(&adev->gfx.gpu_clock_mutex);
3585 mutex_init(&adev->srbm_mutex);
b8866c26 3586 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3587 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3588 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3589 mutex_init(&adev->mn_lock);
e23b74aa 3590 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3591 hash_init(adev->mn_hash);
32eaeae0 3592 mutex_init(&adev->psp.mutex);
bd052211 3593 mutex_init(&adev->notifier_lock);
8cda7a4f 3594 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3595 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3596
ab3b9de6 3597 amdgpu_device_init_apu_flags(adev);
9f6a7857 3598
912dfc84
EQ
3599 r = amdgpu_device_check_arguments(adev);
3600 if (r)
3601 return r;
d38ceaf9 3602
d38ceaf9
AD
3603 spin_lock_init(&adev->mmio_idx_lock);
3604 spin_lock_init(&adev->smc_idx_lock);
3605 spin_lock_init(&adev->pcie_idx_lock);
3606 spin_lock_init(&adev->uvd_ctx_idx_lock);
3607 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3608 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3609 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3610 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3611 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3612
0c4e7fa5
CZ
3613 INIT_LIST_HEAD(&adev->shadow_list);
3614 mutex_init(&adev->shadow_list_lock);
3615
655ce9cb 3616 INIT_LIST_HEAD(&adev->reset_list);
3617
6492e1b0 3618 INIT_LIST_HEAD(&adev->ras_list);
3619
beff74bc
AD
3620 INIT_DELAYED_WORK(&adev->delayed_init_work,
3621 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3622 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3623 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3624
d4535e2c
AG
3625 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3626
d23ee13f 3627 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3628 adev->gfx.gfx_off_residency = 0;
3629 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3630 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3631
b265bdbd
EQ
3632 atomic_set(&adev->throttling_logging_enabled, 1);
3633 /*
3634 * If throttling continues, logging will be performed every minute
3635 * to avoid log flooding. "-1" is subtracted since the thermal
3636 * throttling interrupt comes every second. Thus, the total logging
3637 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3638 * for throttling interrupt) = 60 seconds.
3639 */
3640 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3641 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3642
0fa49558
AX
3643 /* Registers mapping */
3644 /* TODO: block userspace mapping of io register */
da69c161
KW
3645 if (adev->asic_type >= CHIP_BONAIRE) {
3646 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3647 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3648 } else {
3649 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3650 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3651 }
d38ceaf9 3652
6c08e0ef
EQ
3653 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3654 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3655
d38ceaf9
AD
3656 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3657 if (adev->rmmio == NULL) {
3658 return -ENOMEM;
3659 }
3660 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3661 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3662
5494d864
AD
3663 amdgpu_device_get_pcie_info(adev);
3664
b239c017
JX
3665 if (amdgpu_mcbp)
3666 DRM_INFO("MCBP is enabled\n");
3667
436afdfa
PY
3668 /*
3669 * Reset domain needs to be present early, before XGMI hive discovered
3670 * (if any) and intitialized to use reset sem and in_gpu reset flag
3671 * early on during init and before calling to RREG32.
3672 */
3673 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3674 if (!adev->reset_domain)
3675 return -ENOMEM;
3676
3aa0115d
ML
3677 /* detect hw virtualization here */
3678 amdgpu_detect_virtualization(adev);
3679
dffa11b4
ML
3680 r = amdgpu_device_get_job_timeout_settings(adev);
3681 if (r) {
3682 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3683 return r;
a190d1c7
XY
3684 }
3685
d38ceaf9 3686 /* early init functions */
06ec9070 3687 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3688 if (r)
4ef87d8f 3689 return r;
d38ceaf9 3690
4d33e704
SK
3691 /* Enable TMZ based on IP_VERSION */
3692 amdgpu_gmc_tmz_set(adev);
3693
957b0787 3694 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3695 /* Need to get xgmi info early to decide the reset behavior*/
3696 if (adev->gmc.xgmi.supported) {
3697 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3698 if (r)
3699 return r;
3700 }
3701
8e6d0b69 3702 /* enable PCIE atomic ops */
3703 if (amdgpu_sriov_vf(adev))
3704 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
e15c9d06 3705 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
8e6d0b69 3706 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3707 else
3708 adev->have_atomics_support =
3709 !pci_enable_atomic_ops_to_root(adev->pdev,
3710 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3711 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3712 if (!adev->have_atomics_support)
3713 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3714
6585661d
OZ
3715 /* doorbell bar mapping and doorbell index init*/
3716 amdgpu_device_doorbell_init(adev);
3717
9475a943
SL
3718 if (amdgpu_emu_mode == 1) {
3719 /* post the asic on emulation mode */
3720 emu_soc_asic_init(adev);
bfca0289 3721 goto fence_driver_init;
9475a943 3722 }
bfca0289 3723
04442bf7
LL
3724 amdgpu_reset_init(adev);
3725
4e99a44e
ML
3726 /* detect if we are with an SRIOV vbios */
3727 amdgpu_device_detect_sriov_bios(adev);
048765ad 3728
95e8e59e
AD
3729 /* check if we need to reset the asic
3730 * E.g., driver was not cleanly unloaded previously, etc.
3731 */
f14899fd 3732 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3733 if (adev->gmc.xgmi.num_physical_nodes) {
3734 dev_info(adev->dev, "Pending hive reset.\n");
3735 adev->gmc.xgmi.pending_reset = true;
3736 /* Only need to init necessary block for SMU to handle the reset */
3737 for (i = 0; i < adev->num_ip_blocks; i++) {
3738 if (!adev->ip_blocks[i].status.valid)
3739 continue;
3740 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3741 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3742 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3743 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3744 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3745 adev->ip_blocks[i].version->funcs->name);
3746 adev->ip_blocks[i].status.hw = true;
3747 }
3748 }
3749 } else {
3750 r = amdgpu_asic_reset(adev);
3751 if (r) {
3752 dev_err(adev->dev, "asic reset on init failed\n");
3753 goto failed;
3754 }
95e8e59e
AD
3755 }
3756 }
3757
8f66090b 3758 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3759
d38ceaf9 3760 /* Post card if necessary */
39c640c0 3761 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3762 if (!adev->bios) {
bec86378 3763 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3764 r = -EINVAL;
3765 goto failed;
d38ceaf9 3766 }
bec86378 3767 DRM_INFO("GPU posting now...\n");
4d2997ab 3768 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3769 if (r) {
3770 dev_err(adev->dev, "gpu post error!\n");
3771 goto failed;
3772 }
d38ceaf9
AD
3773 }
3774
88b64e95
AD
3775 if (adev->is_atom_fw) {
3776 /* Initialize clocks */
3777 r = amdgpu_atomfirmware_get_clock_info(adev);
3778 if (r) {
3779 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3780 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3781 goto failed;
3782 }
3783 } else {
a5bde2f9
AD
3784 /* Initialize clocks */
3785 r = amdgpu_atombios_get_clock_info(adev);
3786 if (r) {
3787 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3788 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3789 goto failed;
a5bde2f9
AD
3790 }
3791 /* init i2c buses */
4562236b
HW
3792 if (!amdgpu_device_has_dc_support(adev))
3793 amdgpu_atombios_i2c_init(adev);
2c1a2784 3794 }
d38ceaf9 3795
bfca0289 3796fence_driver_init:
d38ceaf9 3797 /* Fence driver */
067f44c8 3798 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3799 if (r) {
067f44c8 3800 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3801 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3802 goto failed;
2c1a2784 3803 }
d38ceaf9
AD
3804
3805 /* init the mode config */
4a580877 3806 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3807
06ec9070 3808 r = amdgpu_device_ip_init(adev);
d38ceaf9 3809 if (r) {
8840a387 3810 /* failed in exclusive mode due to timeout */
3811 if (amdgpu_sriov_vf(adev) &&
3812 !amdgpu_sriov_runtime(adev) &&
3813 amdgpu_virt_mmio_blocked(adev) &&
3814 !amdgpu_virt_wait_reset(adev)) {
3815 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3816 /* Don't send request since VF is inactive. */
3817 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3818 adev->virt.ops = NULL;
8840a387 3819 r = -EAGAIN;
970fd197 3820 goto release_ras_con;
8840a387 3821 }
06ec9070 3822 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3823 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3824 goto release_ras_con;
d38ceaf9
AD
3825 }
3826
8d35a259
LG
3827 amdgpu_fence_driver_hw_init(adev);
3828
d69b8971
YZ
3829 dev_info(adev->dev,
3830 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3831 adev->gfx.config.max_shader_engines,
3832 adev->gfx.config.max_sh_per_se,
3833 adev->gfx.config.max_cu_per_sh,
3834 adev->gfx.cu_info.number);
3835
d38ceaf9
AD
3836 adev->accel_working = true;
3837
e59c0205
AX
3838 amdgpu_vm_check_compute_bug(adev);
3839
95844d20
MO
3840 /* Initialize the buffer migration limit. */
3841 if (amdgpu_moverate >= 0)
3842 max_MBps = amdgpu_moverate;
3843 else
3844 max_MBps = 8; /* Allow 8 MB/s. */
3845 /* Get a log2 for easy divisions. */
3846 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3847
d2f52ac8 3848 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3849 if (r) {
3850 adev->pm_sysfs_en = false;
d2f52ac8 3851 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3852 } else
3853 adev->pm_sysfs_en = true;
d2f52ac8 3854
5bb23532 3855 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3856 if (r) {
3857 adev->ucode_sysfs_en = false;
5bb23532 3858 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3859 } else
3860 adev->ucode_sysfs_en = true;
5bb23532 3861
8424f2cc
LG
3862 r = amdgpu_psp_sysfs_init(adev);
3863 if (r) {
3864 adev->psp_sysfs_en = false;
3865 if (!amdgpu_sriov_vf(adev))
3866 DRM_ERROR("Creating psp sysfs failed\n");
3867 } else
3868 adev->psp_sysfs_en = true;
3869
b0adca4d
EQ
3870 /*
3871 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3872 * Otherwise the mgpu fan boost feature will be skipped due to the
3873 * gpu instance is counted less.
3874 */
3875 amdgpu_register_gpu_instance(adev);
3876
d38ceaf9
AD
3877 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3878 * explicit gating rather than handling it automatically.
3879 */
e3c1b071 3880 if (!adev->gmc.xgmi.pending_reset) {
3881 r = amdgpu_device_ip_late_init(adev);
3882 if (r) {
3883 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3884 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3885 goto release_ras_con;
e3c1b071 3886 }
3887 /* must succeed. */
3888 amdgpu_ras_resume(adev);
3889 queue_delayed_work(system_wq, &adev->delayed_init_work,
3890 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3891 }
d38ceaf9 3892
2c738637
ML
3893 if (amdgpu_sriov_vf(adev))
3894 flush_delayed_work(&adev->delayed_init_work);
3895
77f3a5cd 3896 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3897 if (r)
77f3a5cd 3898 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3899
d155bef0
AB
3900 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3901 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3902 if (r)
3903 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3904
c1dd4aa6
AG
3905 /* Have stored pci confspace at hand for restore in sudden PCI error */
3906 if (amdgpu_device_cache_pci_state(adev->pdev))
3907 pci_restore_state(pdev);
3908
8c3dd61c
KHF
3909 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3910 /* this will fail for cards that aren't VGA class devices, just
3911 * ignore it */
3912 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3913 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c
KHF
3914
3915 if (amdgpu_device_supports_px(ddev)) {
3916 px = true;
3917 vga_switcheroo_register_client(adev->pdev,
3918 &amdgpu_switcheroo_ops, px);
3919 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3920 }
3921
e3c1b071 3922 if (adev->gmc.xgmi.pending_reset)
3923 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3924 msecs_to_jiffies(AMDGPU_RESUME_MS));
3925
4a74c38c
PY
3926 amdgpu_device_check_iommu_direct_map(adev);
3927
d38ceaf9 3928 return 0;
83ba126a 3929
970fd197
SY
3930release_ras_con:
3931 amdgpu_release_ras_context(adev);
3932
83ba126a 3933failed:
89041940 3934 amdgpu_vf_error_trans_all(adev);
8840a387 3935
83ba126a 3936 return r;
d38ceaf9
AD
3937}
3938
07775fc1
AG
3939static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3940{
62d5f9f7 3941
07775fc1
AG
3942 /* Clear all CPU mappings pointing to this device */
3943 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3944
3945 /* Unmap all mapped bars - Doorbell, registers and VRAM */
3946 amdgpu_device_doorbell_fini(adev);
3947
3948 iounmap(adev->rmmio);
3949 adev->rmmio = NULL;
3950 if (adev->mman.aper_base_kaddr)
3951 iounmap(adev->mman.aper_base_kaddr);
3952 adev->mman.aper_base_kaddr = NULL;
3953
3954 /* Memory manager related */
3955 if (!adev->gmc.xgmi.connected_to_cpu) {
3956 arch_phys_wc_del(adev->gmc.vram_mtrr);
3957 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3958 }
3959}
3960
d38ceaf9 3961/**
bbe04dec 3962 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3963 *
3964 * @adev: amdgpu_device pointer
3965 *
3966 * Tear down the driver info (all asics).
3967 * Called at driver shutdown.
3968 */
72c8c97b 3969void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 3970{
aac89168 3971 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3972 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3973 adev->shutdown = true;
9f875167 3974
752c683d
ML
3975 /* make sure IB test finished before entering exclusive mode
3976 * to avoid preemption on IB test
3977 * */
519b8b76 3978 if (amdgpu_sriov_vf(adev)) {
752c683d 3979 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3980 amdgpu_virt_fini_data_exchange(adev);
3981 }
752c683d 3982
e5b03032
ML
3983 /* disable all interrupts */
3984 amdgpu_irq_disable_all(adev);
ff97cba8 3985 if (adev->mode_info.mode_config_initialized){
1053b9c9 3986 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 3987 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3988 else
4a580877 3989 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3990 }
8d35a259 3991 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 3992
98f56188
YY
3993 if (adev->mman.initialized) {
3994 flush_delayed_work(&adev->mman.bdev.wq);
3995 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
3996 }
3997
7c868b59
YT
3998 if (adev->pm_sysfs_en)
3999 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4000 if (adev->ucode_sysfs_en)
4001 amdgpu_ucode_sysfs_fini(adev);
8424f2cc
LG
4002 if (adev->psp_sysfs_en)
4003 amdgpu_psp_sysfs_fini(adev);
72c8c97b
AG
4004 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4005
232d1d43
SY
4006 /* disable ras feature must before hw fini */
4007 amdgpu_ras_pre_fini(adev);
4008
e9669fb7 4009 amdgpu_device_ip_fini_early(adev);
d10d0daa 4010
a3848df6
YW
4011 amdgpu_irq_fini_hw(adev);
4012
b6fd6e0f
SK
4013 if (adev->mman.initialized)
4014 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4015
d10d0daa 4016 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4017
fac53471 4018 amdgpu_device_unmap_mmio(adev);
87172e89 4019
72c8c97b
AG
4020}
4021
4022void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4023{
62d5f9f7
LS
4024 int idx;
4025
8d35a259 4026 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4027 amdgpu_device_ip_fini(adev);
75e1658e
ND
4028 release_firmware(adev->firmware.gpu_info_fw);
4029 adev->firmware.gpu_info_fw = NULL;
d38ceaf9 4030 adev->accel_working = false;
68ce8b24 4031 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4032
4033 amdgpu_reset_fini(adev);
4034
d38ceaf9 4035 /* free i2c buses */
4562236b
HW
4036 if (!amdgpu_device_has_dc_support(adev))
4037 amdgpu_i2c_fini(adev);
bfca0289
SL
4038
4039 if (amdgpu_emu_mode != 1)
4040 amdgpu_atombios_fini(adev);
4041
d38ceaf9
AD
4042 kfree(adev->bios);
4043 adev->bios = NULL;
b98c6299 4044 if (amdgpu_device_supports_px(adev_to_drm(adev))) {
84c8b22e 4045 vga_switcheroo_unregister_client(adev->pdev);
83ba126a 4046 vga_switcheroo_fini_domain_pm_ops(adev->dev);
b98c6299 4047 }
38d6be81 4048 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4049 vga_client_unregister(adev->pdev);
e9bc1bf7 4050
62d5f9f7
LS
4051 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4052
4053 iounmap(adev->rmmio);
4054 adev->rmmio = NULL;
4055 amdgpu_device_doorbell_fini(adev);
4056 drm_dev_exit(idx);
4057 }
4058
d155bef0
AB
4059 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4060 amdgpu_pmu_fini(adev);
72de33f8 4061 if (adev->mman.discovery_bin)
a190d1c7 4062 amdgpu_discovery_fini(adev);
72c8c97b 4063
cfbb6b00
AG
4064 amdgpu_reset_put_reset_domain(adev->reset_domain);
4065 adev->reset_domain = NULL;
4066
72c8c97b
AG
4067 kfree(adev->pci_state);
4068
d38ceaf9
AD
4069}
4070
58144d28
ND
4071/**
4072 * amdgpu_device_evict_resources - evict device resources
4073 * @adev: amdgpu device object
4074 *
4075 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4076 * of the vram memory type. Mainly used for evicting device resources
4077 * at suspend time.
4078 *
4079 */
7863c155 4080static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4081{
7863c155
ML
4082 int ret;
4083
e53d9665
ML
4084 /* No need to evict vram on APUs for suspend to ram or s2idle */
4085 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4086 return 0;
58144d28 4087
7863c155
ML
4088 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4089 if (ret)
58144d28 4090 DRM_WARN("evicting device resources failed\n");
7863c155 4091 return ret;
58144d28 4092}
d38ceaf9
AD
4093
4094/*
4095 * Suspend & resume.
4096 */
4097/**
810ddc3a 4098 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4099 *
87e3f136 4100 * @dev: drm dev pointer
87e3f136 4101 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4102 *
4103 * Puts the hw in the suspend state (all asics).
4104 * Returns 0 for success or an error on failure.
4105 * Called at driver suspend.
4106 */
de185019 4107int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4108{
a2e15b0e 4109 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4110 int r = 0;
d38ceaf9 4111
d38ceaf9
AD
4112 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4113 return 0;
4114
44779b43 4115 adev->in_suspend = true;
3fa8f89d 4116
47ea2076
SF
4117 /* Evict the majority of BOs before grabbing the full access */
4118 r = amdgpu_device_evict_resources(adev);
4119 if (r)
4120 return r;
4121
d7274ec7
BZ
4122 if (amdgpu_sriov_vf(adev)) {
4123 amdgpu_virt_fini_data_exchange(adev);
4124 r = amdgpu_virt_request_full_gpu(adev, false);
4125 if (r)
4126 return r;
4127 }
4128
3fa8f89d
S
4129 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4130 DRM_WARN("smart shift update failed\n");
4131
d38ceaf9
AD
4132 drm_kms_helper_poll_disable(dev);
4133
5f818173 4134 if (fbcon)
087451f3 4135 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4136
beff74bc 4137 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4138
5e6932fe 4139 amdgpu_ras_suspend(adev);
4140
2196927b 4141 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4142
c004d44e 4143 if (!adev->in_s0ix)
5d3a2d95 4144 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4145
7863c155
ML
4146 r = amdgpu_device_evict_resources(adev);
4147 if (r)
4148 return r;
d38ceaf9 4149
8d35a259 4150 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4151
2196927b 4152 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4153
d7274ec7
BZ
4154 if (amdgpu_sriov_vf(adev))
4155 amdgpu_virt_release_full_gpu(adev, false);
4156
d38ceaf9
AD
4157 return 0;
4158}
4159
4160/**
810ddc3a 4161 * amdgpu_device_resume - initiate device resume
d38ceaf9 4162 *
87e3f136 4163 * @dev: drm dev pointer
87e3f136 4164 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4165 *
4166 * Bring the hw back to operating state (all asics).
4167 * Returns 0 for success or an error on failure.
4168 * Called at driver resume.
4169 */
de185019 4170int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4171{
1348969a 4172 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4173 int r = 0;
d38ceaf9 4174
d7274ec7
BZ
4175 if (amdgpu_sriov_vf(adev)) {
4176 r = amdgpu_virt_request_full_gpu(adev, true);
4177 if (r)
4178 return r;
4179 }
4180
d38ceaf9
AD
4181 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4182 return 0;
4183
62498733 4184 if (adev->in_s0ix)
bc143d8b 4185 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4186
d38ceaf9 4187 /* post card */
39c640c0 4188 if (amdgpu_device_need_post(adev)) {
4d2997ab 4189 r = amdgpu_device_asic_init(adev);
74b0b157 4190 if (r)
aac89168 4191 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4192 }
d38ceaf9 4193
06ec9070 4194 r = amdgpu_device_ip_resume(adev);
d7274ec7 4195
e6707218 4196 if (r) {
aac89168 4197 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4198 goto exit;
e6707218 4199 }
8d35a259 4200 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4201
06ec9070 4202 r = amdgpu_device_ip_late_init(adev);
03161a6e 4203 if (r)
3c22c1ea 4204 goto exit;
d38ceaf9 4205
beff74bc
AD
4206 queue_delayed_work(system_wq, &adev->delayed_init_work,
4207 msecs_to_jiffies(AMDGPU_RESUME_MS));
4208
c004d44e 4209 if (!adev->in_s0ix) {
5d3a2d95
AD
4210 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4211 if (r)
3c22c1ea 4212 goto exit;
5d3a2d95 4213 }
756e6880 4214
3c22c1ea
SF
4215exit:
4216 if (amdgpu_sriov_vf(adev)) {
4217 amdgpu_virt_init_data_exchange(adev);
4218 amdgpu_virt_release_full_gpu(adev, true);
4219 }
4220
4221 if (r)
4222 return r;
4223
96a5d8d4 4224 /* Make sure IB tests flushed */
beff74bc 4225 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4226
a2e15b0e 4227 if (fbcon)
087451f3 4228 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9
AD
4229
4230 drm_kms_helper_poll_enable(dev);
23a1a9e5 4231
5e6932fe 4232 amdgpu_ras_resume(adev);
4233
d09ef243
AD
4234 if (adev->mode_info.num_crtc) {
4235 /*
4236 * Most of the connector probing functions try to acquire runtime pm
4237 * refs to ensure that the GPU is powered on when connector polling is
4238 * performed. Since we're calling this from a runtime PM callback,
4239 * trying to acquire rpm refs will cause us to deadlock.
4240 *
4241 * Since we're guaranteed to be holding the rpm lock, it's safe to
4242 * temporarily disable the rpm helpers so this doesn't deadlock us.
4243 */
23a1a9e5 4244#ifdef CONFIG_PM
d09ef243 4245 dev->dev->power.disable_depth++;
23a1a9e5 4246#endif
d09ef243
AD
4247 if (!adev->dc_enabled)
4248 drm_helper_hpd_irq_event(dev);
4249 else
4250 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4251#ifdef CONFIG_PM
d09ef243 4252 dev->dev->power.disable_depth--;
23a1a9e5 4253#endif
d09ef243 4254 }
44779b43
RZ
4255 adev->in_suspend = false;
4256
3fa8f89d
S
4257 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4258 DRM_WARN("smart shift update failed\n");
4259
4d3b9ae5 4260 return 0;
d38ceaf9
AD
4261}
4262
e3ecdffa
AD
4263/**
4264 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4265 *
4266 * @adev: amdgpu_device pointer
4267 *
4268 * The list of all the hardware IPs that make up the asic is walked and
4269 * the check_soft_reset callbacks are run. check_soft_reset determines
4270 * if the asic is still hung or not.
4271 * Returns true if any of the IPs are still in a hung state, false if not.
4272 */
06ec9070 4273static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4274{
4275 int i;
4276 bool asic_hang = false;
4277
f993d628
ML
4278 if (amdgpu_sriov_vf(adev))
4279 return true;
4280
8bc04c29
AD
4281 if (amdgpu_asic_need_full_reset(adev))
4282 return true;
4283
63fbf42f 4284 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4285 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4286 continue;
a1255107
AD
4287 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4288 adev->ip_blocks[i].status.hang =
4289 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4290 if (adev->ip_blocks[i].status.hang) {
aac89168 4291 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4292 asic_hang = true;
4293 }
4294 }
4295 return asic_hang;
4296}
4297
e3ecdffa
AD
4298/**
4299 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4300 *
4301 * @adev: amdgpu_device pointer
4302 *
4303 * The list of all the hardware IPs that make up the asic is walked and the
4304 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4305 * handles any IP specific hardware or software state changes that are
4306 * necessary for a soft reset to succeed.
4307 * Returns 0 on success, negative error code on failure.
4308 */
06ec9070 4309static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4310{
4311 int i, r = 0;
4312
4313 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4314 if (!adev->ip_blocks[i].status.valid)
d31a501e 4315 continue;
a1255107
AD
4316 if (adev->ip_blocks[i].status.hang &&
4317 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4318 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4319 if (r)
4320 return r;
4321 }
4322 }
4323
4324 return 0;
4325}
4326
e3ecdffa
AD
4327/**
4328 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4329 *
4330 * @adev: amdgpu_device pointer
4331 *
4332 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4333 * reset is necessary to recover.
4334 * Returns true if a full asic reset is required, false if not.
4335 */
06ec9070 4336static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4337{
da146d3b
AD
4338 int i;
4339
8bc04c29
AD
4340 if (amdgpu_asic_need_full_reset(adev))
4341 return true;
4342
da146d3b 4343 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4344 if (!adev->ip_blocks[i].status.valid)
da146d3b 4345 continue;
a1255107
AD
4346 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4347 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4348 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4349 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4350 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4351 if (adev->ip_blocks[i].status.hang) {
aac89168 4352 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4353 return true;
4354 }
4355 }
35d782fe
CZ
4356 }
4357 return false;
4358}
4359
e3ecdffa
AD
4360/**
4361 * amdgpu_device_ip_soft_reset - do a soft reset
4362 *
4363 * @adev: amdgpu_device pointer
4364 *
4365 * The list of all the hardware IPs that make up the asic is walked and the
4366 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4367 * IP specific hardware or software state changes that are necessary to soft
4368 * reset the IP.
4369 * Returns 0 on success, negative error code on failure.
4370 */
06ec9070 4371static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4372{
4373 int i, r = 0;
4374
4375 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4376 if (!adev->ip_blocks[i].status.valid)
35d782fe 4377 continue;
a1255107
AD
4378 if (adev->ip_blocks[i].status.hang &&
4379 adev->ip_blocks[i].version->funcs->soft_reset) {
4380 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4381 if (r)
4382 return r;
4383 }
4384 }
4385
4386 return 0;
4387}
4388
e3ecdffa
AD
4389/**
4390 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4391 *
4392 * @adev: amdgpu_device pointer
4393 *
4394 * The list of all the hardware IPs that make up the asic is walked and the
4395 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4396 * handles any IP specific hardware or software state changes that are
4397 * necessary after the IP has been soft reset.
4398 * Returns 0 on success, negative error code on failure.
4399 */
06ec9070 4400static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4401{
4402 int i, r = 0;
4403
4404 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4405 if (!adev->ip_blocks[i].status.valid)
35d782fe 4406 continue;
a1255107
AD
4407 if (adev->ip_blocks[i].status.hang &&
4408 adev->ip_blocks[i].version->funcs->post_soft_reset)
4409 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4410 if (r)
4411 return r;
4412 }
4413
4414 return 0;
4415}
4416
e3ecdffa 4417/**
c33adbc7 4418 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4419 *
4420 * @adev: amdgpu_device pointer
4421 *
4422 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4423 * restore things like GPUVM page tables after a GPU reset where
4424 * the contents of VRAM might be lost.
403009bf
CK
4425 *
4426 * Returns:
4427 * 0 on success, negative error code on failure.
e3ecdffa 4428 */
c33adbc7 4429static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4430{
c41d1cf6 4431 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4432 struct amdgpu_bo *shadow;
e18aaea7 4433 struct amdgpu_bo_vm *vmbo;
403009bf 4434 long r = 1, tmo;
c41d1cf6
ML
4435
4436 if (amdgpu_sriov_runtime(adev))
b045d3af 4437 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4438 else
4439 tmo = msecs_to_jiffies(100);
4440
aac89168 4441 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4442 mutex_lock(&adev->shadow_list_lock);
e18aaea7
ND
4443 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4444 shadow = &vmbo->bo;
403009bf 4445 /* No need to recover an evicted BO */
d3116756
CK
4446 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4447 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4448 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4449 continue;
4450
4451 r = amdgpu_bo_restore_shadow(shadow, &next);
4452 if (r)
4453 break;
4454
c41d1cf6 4455 if (fence) {
1712fb1a 4456 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4457 dma_fence_put(fence);
4458 fence = next;
1712fb1a 4459 if (tmo == 0) {
4460 r = -ETIMEDOUT;
c41d1cf6 4461 break;
1712fb1a 4462 } else if (tmo < 0) {
4463 r = tmo;
4464 break;
4465 }
403009bf
CK
4466 } else {
4467 fence = next;
c41d1cf6 4468 }
c41d1cf6
ML
4469 }
4470 mutex_unlock(&adev->shadow_list_lock);
4471
403009bf
CK
4472 if (fence)
4473 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4474 dma_fence_put(fence);
4475
1712fb1a 4476 if (r < 0 || tmo <= 0) {
aac89168 4477 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4478 return -EIO;
4479 }
c41d1cf6 4480
aac89168 4481 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4482 return 0;
c41d1cf6
ML
4483}
4484
a90ad3c2 4485
e3ecdffa 4486/**
06ec9070 4487 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4488 *
982a820b 4489 * @adev: amdgpu_device pointer
87e3f136 4490 * @from_hypervisor: request from hypervisor
5740682e
ML
4491 *
4492 * do VF FLR and reinitialize Asic
3f48c681 4493 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4494 */
4495static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4496 bool from_hypervisor)
5740682e
ML
4497{
4498 int r;
a5f67c93 4499 struct amdgpu_hive_info *hive = NULL;
7258fa31 4500 int retry_limit = 0;
5740682e 4501
7258fa31 4502retry:
c004d44e 4503 amdgpu_amdkfd_pre_reset(adev);
428890a3 4504
5740682e
ML
4505 if (from_hypervisor)
4506 r = amdgpu_virt_request_full_gpu(adev, true);
4507 else
4508 r = amdgpu_virt_reset_gpu(adev);
4509 if (r)
4510 return r;
a90ad3c2
ML
4511
4512 /* Resume IP prior to SMC */
06ec9070 4513 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4514 if (r)
4515 goto error;
a90ad3c2 4516
c9ffa427 4517 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4518
7a3e0bb2
RZ
4519 r = amdgpu_device_fw_loading(adev);
4520 if (r)
4521 return r;
4522
a90ad3c2 4523 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4524 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4525 if (r)
4526 goto error;
a90ad3c2 4527
a5f67c93
ZL
4528 hive = amdgpu_get_xgmi_hive(adev);
4529 /* Update PSP FW topology after reset */
4530 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4531 r = amdgpu_xgmi_update_topology(hive, adev);
4532
4533 if (hive)
4534 amdgpu_put_xgmi_hive(hive);
4535
4536 if (!r) {
4537 amdgpu_irq_gpu_reset_resume_helper(adev);
4538 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4539
c004d44e 4540 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4541 }
a90ad3c2 4542
abc34253 4543error:
c41d1cf6 4544 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4545 amdgpu_inc_vram_lost(adev);
c33adbc7 4546 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4547 }
437f3e0b 4548 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4549
7258fa31
SK
4550 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4551 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4552 retry_limit++;
4553 goto retry;
4554 } else
4555 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4556 }
4557
a90ad3c2
ML
4558 return r;
4559}
4560
9a1cddd6 4561/**
4562 * amdgpu_device_has_job_running - check if there is any job in mirror list
4563 *
982a820b 4564 * @adev: amdgpu_device pointer
9a1cddd6 4565 *
4566 * check if there is any job in mirror list
4567 */
4568bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4569{
4570 int i;
4571 struct drm_sched_job *job;
4572
4573 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4574 struct amdgpu_ring *ring = adev->rings[i];
4575
4576 if (!ring || !ring->sched.thread)
4577 continue;
4578
4579 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4580 job = list_first_entry_or_null(&ring->sched.pending_list,
4581 struct drm_sched_job, list);
9a1cddd6 4582 spin_unlock(&ring->sched.job_list_lock);
4583 if (job)
4584 return true;
4585 }
4586 return false;
4587}
4588
12938fad
CK
4589/**
4590 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4591 *
982a820b 4592 * @adev: amdgpu_device pointer
12938fad
CK
4593 *
4594 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4595 * a hung GPU.
4596 */
4597bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4598{
12938fad 4599
3ba7b418
AG
4600 if (amdgpu_gpu_recovery == 0)
4601 goto disabled;
4602
1a11a65d
YC
4603 /* Skip soft reset check in fatal error mode */
4604 if (!amdgpu_ras_is_poison_mode_supported(adev))
4605 return true;
4606
3ba7b418
AG
4607 if (amdgpu_sriov_vf(adev))
4608 return true;
4609
4610 if (amdgpu_gpu_recovery == -1) {
4611 switch (adev->asic_type) {
b3523c45
AD
4612#ifdef CONFIG_DRM_AMDGPU_SI
4613 case CHIP_VERDE:
4614 case CHIP_TAHITI:
4615 case CHIP_PITCAIRN:
4616 case CHIP_OLAND:
4617 case CHIP_HAINAN:
4618#endif
4619#ifdef CONFIG_DRM_AMDGPU_CIK
4620 case CHIP_KAVERI:
4621 case CHIP_KABINI:
4622 case CHIP_MULLINS:
4623#endif
4624 case CHIP_CARRIZO:
4625 case CHIP_STONEY:
4626 case CHIP_CYAN_SKILLFISH:
3ba7b418 4627 goto disabled;
b3523c45
AD
4628 default:
4629 break;
3ba7b418 4630 }
12938fad
CK
4631 }
4632
4633 return true;
3ba7b418
AG
4634
4635disabled:
aac89168 4636 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4637 return false;
12938fad
CK
4638}
4639
5c03e584
FX
4640int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4641{
4642 u32 i;
4643 int ret = 0;
4644
4645 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4646
4647 dev_info(adev->dev, "GPU mode1 reset\n");
4648
4649 /* disable BM */
4650 pci_clear_master(adev->pdev);
4651
4652 amdgpu_device_cache_pci_state(adev->pdev);
4653
4654 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4655 dev_info(adev->dev, "GPU smu mode1 reset\n");
4656 ret = amdgpu_dpm_mode1_reset(adev);
4657 } else {
4658 dev_info(adev->dev, "GPU psp mode1 reset\n");
4659 ret = psp_gpu_reset(adev);
4660 }
4661
4662 if (ret)
4663 dev_err(adev->dev, "GPU mode1 reset failed\n");
4664
4665 amdgpu_device_load_pci_state(adev->pdev);
4666
4667 /* wait for asic to come out of reset */
4668 for (i = 0; i < adev->usec_timeout; i++) {
4669 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4670
4671 if (memsize != 0xffffffff)
4672 break;
4673 udelay(1);
4674 }
4675
4676 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4677 return ret;
4678}
5c6dd71e 4679
e3c1b071 4680int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4681 struct amdgpu_reset_context *reset_context)
26bc5340 4682{
5c1e6fa4 4683 int i, r = 0;
04442bf7
LL
4684 struct amdgpu_job *job = NULL;
4685 bool need_full_reset =
4686 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4687
4688 if (reset_context->reset_req_dev == adev)
4689 job = reset_context->job;
71182665 4690
b602ca5f
TZ
4691 if (amdgpu_sriov_vf(adev)) {
4692 /* stop the data exchange thread */
4693 amdgpu_virt_fini_data_exchange(adev);
4694 }
4695
9e225fb9
AG
4696 amdgpu_fence_driver_isr_toggle(adev, true);
4697
71182665 4698 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4699 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4700 struct amdgpu_ring *ring = adev->rings[i];
4701
51687759 4702 if (!ring || !ring->sched.thread)
0875dc9e 4703 continue;
5740682e 4704
c530b02f
JZ
4705 /*clear job fence from fence drv to avoid force_completion
4706 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4707 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4708
2f9d4084
ML
4709 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4710 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4711 }
d38ceaf9 4712
9e225fb9
AG
4713 amdgpu_fence_driver_isr_toggle(adev, false);
4714
ff99849b 4715 if (job && job->vm)
222b5f04
AG
4716 drm_sched_increase_karma(&job->base);
4717
04442bf7 4718 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4719 /* If reset handler not implemented, continue; otherwise return */
4720 if (r == -ENOSYS)
4721 r = 0;
4722 else
04442bf7
LL
4723 return r;
4724
1d721ed6 4725 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4726 if (!amdgpu_sriov_vf(adev)) {
4727
4728 if (!need_full_reset)
4729 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4730
360cd081
LG
4731 if (!need_full_reset && amdgpu_gpu_recovery &&
4732 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4733 amdgpu_device_ip_pre_soft_reset(adev);
4734 r = amdgpu_device_ip_soft_reset(adev);
4735 amdgpu_device_ip_post_soft_reset(adev);
4736 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4737 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4738 need_full_reset = true;
4739 }
4740 }
4741
4742 if (need_full_reset)
4743 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4744 if (need_full_reset)
4745 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4746 else
4747 clear_bit(AMDGPU_NEED_FULL_RESET,
4748 &reset_context->flags);
26bc5340
AG
4749 }
4750
4751 return r;
4752}
4753
15fd09a0
SA
4754static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4755{
15fd09a0
SA
4756 int i;
4757
38a15ad9 4758 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4759
4760 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4761 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4762 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4763 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4764 }
4765
4766 return 0;
4767}
4768
3d8785f6
SA
4769#ifdef CONFIG_DEV_COREDUMP
4770static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4771 size_t count, void *data, size_t datalen)
4772{
4773 struct drm_printer p;
4774 struct amdgpu_device *adev = data;
4775 struct drm_print_iterator iter;
4776 int i;
4777
4778 iter.data = buffer;
4779 iter.offset = 0;
4780 iter.start = offset;
4781 iter.remain = count;
4782
4783 p = drm_coredump_printer(&iter);
4784
4785 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4786 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4787 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4788 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4789 if (adev->reset_task_info.pid)
4790 drm_printf(&p, "process_name: %s PID: %d\n",
4791 adev->reset_task_info.process_name,
4792 adev->reset_task_info.pid);
4793
4794 if (adev->reset_vram_lost)
4795 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4796 if (adev->num_regs) {
4797 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4798
4799 for (i = 0; i < adev->num_regs; i++)
4800 drm_printf(&p, "0x%08x: 0x%08x\n",
4801 adev->reset_dump_reg_list[i],
4802 adev->reset_dump_reg_value[i]);
4803 }
4804
4805 return count - iter.remain;
4806}
4807
4808static void amdgpu_devcoredump_free(void *data)
4809{
4810}
4811
4812static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4813{
4814 struct drm_device *dev = adev_to_drm(adev);
4815
4816 ktime_get_ts64(&adev->reset_time);
4817 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4818 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4819}
4820#endif
4821
04442bf7
LL
4822int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4823 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4824{
4825 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4826 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4827 int r = 0;
f5c7e779 4828 bool gpu_reset_for_dev_remove = 0;
26bc5340 4829
04442bf7
LL
4830 /* Try reset handler method first */
4831 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4832 reset_list);
15fd09a0 4833 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4834
4835 reset_context->reset_device_list = device_list_handle;
04442bf7 4836 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4837 /* If reset handler not implemented, continue; otherwise return */
4838 if (r == -ENOSYS)
4839 r = 0;
4840 else
04442bf7
LL
4841 return r;
4842
4843 /* Reset handler not implemented, use the default method */
4844 need_full_reset =
4845 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4846 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4847
f5c7e779
YC
4848 gpu_reset_for_dev_remove =
4849 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4850 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4851
26bc5340 4852 /*
655ce9cb 4853 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4854 * to allow proper links negotiation in FW (within 1 sec)
4855 */
7ac71382 4856 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4857 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4858 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4859 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4860 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4861 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4862 r = -EALREADY;
4863 } else
4864 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4865
041a62bc 4866 if (r) {
aac89168 4867 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4868 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4869 break;
ce316fa5
LM
4870 }
4871 }
4872
041a62bc
AG
4873 /* For XGMI wait for all resets to complete before proceed */
4874 if (!r) {
655ce9cb 4875 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4876 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4877 flush_work(&tmp_adev->xgmi_reset_work);
4878 r = tmp_adev->asic_reset_res;
4879 if (r)
4880 break;
ce316fa5
LM
4881 }
4882 }
4883 }
ce316fa5 4884 }
26bc5340 4885
43c4d576 4886 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4887 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4888 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4889 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4890 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4891 }
4892
00eaa571 4893 amdgpu_ras_intr_cleared();
43c4d576 4894 }
00eaa571 4895
f5c7e779
YC
4896 /* Since the mode1 reset affects base ip blocks, the
4897 * phase1 ip blocks need to be resumed. Otherwise there
4898 * will be a BIOS signature error and the psp bootloader
4899 * can't load kdb on the next amdgpu install.
4900 */
4901 if (gpu_reset_for_dev_remove) {
4902 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4903 amdgpu_device_ip_resume_phase1(tmp_adev);
4904
4905 goto end;
4906 }
4907
655ce9cb 4908 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4909 if (need_full_reset) {
4910 /* post card */
e3c1b071 4911 r = amdgpu_device_asic_init(tmp_adev);
4912 if (r) {
aac89168 4913 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4914 } else {
26bc5340 4915 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4916 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4917 if (r)
4918 goto out;
4919
26bc5340
AG
4920 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4921 if (r)
4922 goto out;
4923
4924 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4925#ifdef CONFIG_DEV_COREDUMP
4926 tmp_adev->reset_vram_lost = vram_lost;
4927 memset(&tmp_adev->reset_task_info, 0,
4928 sizeof(tmp_adev->reset_task_info));
4929 if (reset_context->job && reset_context->job->vm)
4930 tmp_adev->reset_task_info =
4931 reset_context->job->vm->task_info;
4932 amdgpu_reset_capture_coredumpm(tmp_adev);
4933#endif
26bc5340 4934 if (vram_lost) {
77e7f829 4935 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4936 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4937 }
4938
26bc5340
AG
4939 r = amdgpu_device_fw_loading(tmp_adev);
4940 if (r)
4941 return r;
4942
4943 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4944 if (r)
4945 goto out;
4946
4947 if (vram_lost)
4948 amdgpu_device_fill_reset_magic(tmp_adev);
4949
fdafb359
EQ
4950 /*
4951 * Add this ASIC as tracked as reset was already
4952 * complete successfully.
4953 */
4954 amdgpu_register_gpu_instance(tmp_adev);
4955
04442bf7
LL
4956 if (!reset_context->hive &&
4957 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4958 amdgpu_xgmi_add_device(tmp_adev);
4959
7c04ca50 4960 r = amdgpu_device_ip_late_init(tmp_adev);
4961 if (r)
4962 goto out;
4963
087451f3 4964 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 4965
e8fbaf03
GC
4966 /*
4967 * The GPU enters bad state once faulty pages
4968 * by ECC has reached the threshold, and ras
4969 * recovery is scheduled next. So add one check
4970 * here to break recovery if it indeed exceeds
4971 * bad page threshold, and remind user to
4972 * retire this GPU or setting one bigger
4973 * bad_page_threshold value to fix this once
4974 * probing driver again.
4975 */
11003c68 4976 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
4977 /* must succeed. */
4978 amdgpu_ras_resume(tmp_adev);
4979 } else {
4980 r = -EINVAL;
4981 goto out;
4982 }
e79a04d5 4983
26bc5340 4984 /* Update PSP FW topology after reset */
04442bf7
LL
4985 if (reset_context->hive &&
4986 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4987 r = amdgpu_xgmi_update_topology(
4988 reset_context->hive, tmp_adev);
26bc5340
AG
4989 }
4990 }
4991
26bc5340
AG
4992out:
4993 if (!r) {
4994 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4995 r = amdgpu_ib_ring_tests(tmp_adev);
4996 if (r) {
4997 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
4998 need_full_reset = true;
4999 r = -EAGAIN;
5000 goto end;
5001 }
5002 }
5003
5004 if (!r)
5005 r = amdgpu_device_recover_vram(tmp_adev);
5006 else
5007 tmp_adev->asic_reset_res = r;
5008 }
5009
5010end:
04442bf7
LL
5011 if (need_full_reset)
5012 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5013 else
5014 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5015 return r;
5016}
5017
e923be99 5018static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5019{
5740682e 5020
a3a09142
AD
5021 switch (amdgpu_asic_reset_method(adev)) {
5022 case AMD_RESET_METHOD_MODE1:
5023 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5024 break;
5025 case AMD_RESET_METHOD_MODE2:
5026 adev->mp1_state = PP_MP1_STATE_RESET;
5027 break;
5028 default:
5029 adev->mp1_state = PP_MP1_STATE_NONE;
5030 break;
5031 }
26bc5340 5032}
d38ceaf9 5033
e923be99 5034static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5035{
89041940 5036 amdgpu_vf_error_trans_all(adev);
a3a09142 5037 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5038}
5039
3f12acc8
EQ
5040static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5041{
5042 struct pci_dev *p = NULL;
5043
5044 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5045 adev->pdev->bus->number, 1);
5046 if (p) {
5047 pm_runtime_enable(&(p->dev));
5048 pm_runtime_resume(&(p->dev));
5049 }
b85e285e
YY
5050
5051 pci_dev_put(p);
3f12acc8
EQ
5052}
5053
5054static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5055{
5056 enum amd_reset_method reset_method;
5057 struct pci_dev *p = NULL;
5058 u64 expires;
5059
5060 /*
5061 * For now, only BACO and mode1 reset are confirmed
5062 * to suffer the audio issue without proper suspended.
5063 */
5064 reset_method = amdgpu_asic_reset_method(adev);
5065 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5066 (reset_method != AMD_RESET_METHOD_MODE1))
5067 return -EINVAL;
5068
5069 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5070 adev->pdev->bus->number, 1);
5071 if (!p)
5072 return -ENODEV;
5073
5074 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5075 if (!expires)
5076 /*
5077 * If we cannot get the audio device autosuspend delay,
5078 * a fixed 4S interval will be used. Considering 3S is
5079 * the audio controller default autosuspend delay setting.
5080 * 4S used here is guaranteed to cover that.
5081 */
54b7feb9 5082 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5083
5084 while (!pm_runtime_status_suspended(&(p->dev))) {
5085 if (!pm_runtime_suspend(&(p->dev)))
5086 break;
5087
5088 if (expires < ktime_get_mono_fast_ns()) {
5089 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5090 pci_dev_put(p);
3f12acc8
EQ
5091 /* TODO: abort the succeeding gpu reset? */
5092 return -ETIMEDOUT;
5093 }
5094 }
5095
5096 pm_runtime_disable(&(p->dev));
5097
b85e285e 5098 pci_dev_put(p);
3f12acc8
EQ
5099 return 0;
5100}
5101
d193b12b 5102static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5103{
5104 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5105
5106#if defined(CONFIG_DEBUG_FS)
5107 if (!amdgpu_sriov_vf(adev))
5108 cancel_work(&adev->reset_work);
5109#endif
5110
5111 if (adev->kfd.dev)
5112 cancel_work(&adev->kfd.reset_work);
5113
5114 if (amdgpu_sriov_vf(adev))
5115 cancel_work(&adev->virt.flr_work);
5116
5117 if (con && adev->ras_enabled)
5118 cancel_work(&con->recovery_work);
5119
5120}
5121
26bc5340 5122/**
6e9c65f7 5123 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5124 *
982a820b 5125 * @adev: amdgpu_device pointer
26bc5340
AG
5126 * @job: which job trigger hang
5127 *
5128 * Attempt to reset the GPU if it has hung (all asics).
5129 * Attempt to do soft-reset or full-reset and reinitialize Asic
5130 * Returns 0 for success or an error on failure.
5131 */
5132
cf727044 5133int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5134 struct amdgpu_job *job,
5135 struct amdgpu_reset_context *reset_context)
26bc5340 5136{
1d721ed6 5137 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5138 bool job_signaled = false;
26bc5340 5139 struct amdgpu_hive_info *hive = NULL;
26bc5340 5140 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5141 int i, r = 0;
bb5c7235 5142 bool need_emergency_restart = false;
3f12acc8 5143 bool audio_suspended = false;
f5c7e779
YC
5144 bool gpu_reset_for_dev_remove = false;
5145
5146 gpu_reset_for_dev_remove =
5147 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5148 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5149
6e3cd2a9 5150 /*
bb5c7235
WS
5151 * Special case: RAS triggered and full reset isn't supported
5152 */
5153 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5154
d5ea093e
AG
5155 /*
5156 * Flush RAM to disk so that after reboot
5157 * the user can read log and see why the system rebooted.
5158 */
bb5c7235 5159 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5160 DRM_WARN("Emergency reboot.");
5161
5162 ksys_sync_helper();
5163 emergency_restart();
5164 }
5165
b823821f 5166 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5167 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5168
175ac6ec
ZL
5169 if (!amdgpu_sriov_vf(adev))
5170 hive = amdgpu_get_xgmi_hive(adev);
681260df 5171 if (hive)
53b3f8f4 5172 mutex_lock(&hive->hive_lock);
26bc5340 5173
f1549c09
LG
5174 reset_context->job = job;
5175 reset_context->hive = hive;
9e94d22c
EQ
5176 /*
5177 * Build list of devices to reset.
5178 * In case we are in XGMI hive mode, resort the device list
5179 * to put adev in the 1st position.
5180 */
5181 INIT_LIST_HEAD(&device_list);
175ac6ec 5182 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5183 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5184 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5185 if (gpu_reset_for_dev_remove && adev->shutdown)
5186 tmp_adev->shutdown = true;
5187 }
655ce9cb 5188 if (!list_is_first(&adev->reset_list, &device_list))
5189 list_rotate_to_front(&adev->reset_list, &device_list);
5190 device_list_handle = &device_list;
26bc5340 5191 } else {
655ce9cb 5192 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5193 device_list_handle = &device_list;
5194 }
5195
e923be99
AG
5196 /* We need to lock reset domain only once both for XGMI and single device */
5197 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5198 reset_list);
3675c2f2 5199 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5200
1d721ed6 5201 /* block all schedulers and reset given job's ring */
655ce9cb 5202 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5203
e923be99 5204 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5205
3f12acc8
EQ
5206 /*
5207 * Try to put the audio codec into suspend state
5208 * before gpu reset started.
5209 *
5210 * Due to the power domain of the graphics device
5211 * is shared with AZ power domain. Without this,
5212 * we may change the audio hardware from behind
5213 * the audio driver's back. That will trigger
5214 * some audio codec errors.
5215 */
5216 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5217 audio_suspended = true;
5218
9e94d22c
EQ
5219 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5220
52fb44cf
EQ
5221 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5222
c004d44e 5223 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5224 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5225
12ffa55d
AG
5226 /*
5227 * Mark these ASICs to be reseted as untracked first
5228 * And add them back after reset completed
5229 */
5230 amdgpu_unregister_gpu_instance(tmp_adev);
5231
163d4cd2 5232 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5233
f1c1314b 5234 /* disable ras on ALL IPs */
bb5c7235 5235 if (!need_emergency_restart &&
b823821f 5236 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5237 amdgpu_ras_suspend(tmp_adev);
5238
1d721ed6
AG
5239 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5240 struct amdgpu_ring *ring = tmp_adev->rings[i];
5241
5242 if (!ring || !ring->sched.thread)
5243 continue;
5244
0b2d2c2e 5245 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5246
bb5c7235 5247 if (need_emergency_restart)
7c6e68c7 5248 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5249 }
8f8c80f4 5250 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5251 }
5252
bb5c7235 5253 if (need_emergency_restart)
7c6e68c7
AG
5254 goto skip_sched_resume;
5255
1d721ed6
AG
5256 /*
5257 * Must check guilty signal here since after this point all old
5258 * HW fences are force signaled.
5259 *
5260 * job->base holds a reference to parent fence
5261 */
f6a3f660 5262 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5263 job_signaled = true;
1d721ed6
AG
5264 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5265 goto skip_hw_reset;
5266 }
5267
26bc5340 5268retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5269 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5270 if (gpu_reset_for_dev_remove) {
5271 /* Workaroud for ASICs need to disable SMC first */
5272 amdgpu_device_smu_fini_early(tmp_adev);
5273 }
f1549c09 5274 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5275 /*TODO Should we stop ?*/
5276 if (r) {
aac89168 5277 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5278 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5279 tmp_adev->asic_reset_res = r;
5280 }
247c7b0d
AG
5281
5282 /*
5283 * Drop all pending non scheduler resets. Scheduler resets
5284 * were already dropped during drm_sched_stop
5285 */
d193b12b 5286 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5287 }
5288
5289 /* Actual ASIC resets if needed.*/
4f30d920 5290 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5291 if (amdgpu_sriov_vf(adev)) {
5292 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5293 if (r)
5294 adev->asic_reset_res = r;
950d6425
SY
5295
5296 /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5297 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5298 amdgpu_ras_resume(adev);
26bc5340 5299 } else {
f1549c09 5300 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5301 if (r && r == -EAGAIN)
26bc5340 5302 goto retry;
f5c7e779
YC
5303
5304 if (!r && gpu_reset_for_dev_remove)
5305 goto recover_end;
26bc5340
AG
5306 }
5307
1d721ed6
AG
5308skip_hw_reset:
5309
26bc5340 5310 /* Post ASIC reset for all devs .*/
655ce9cb 5311 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5312
1d721ed6
AG
5313 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5314 struct amdgpu_ring *ring = tmp_adev->rings[i];
5315
5316 if (!ring || !ring->sched.thread)
5317 continue;
5318
6868a2c4 5319 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5320 }
5321
693073a0 5322 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5323 amdgpu_mes_self_test(tmp_adev);
5324
1053b9c9 5325 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5326 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5327 }
5328
7258fa31
SK
5329 if (tmp_adev->asic_reset_res)
5330 r = tmp_adev->asic_reset_res;
5331
1d721ed6 5332 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5333
5334 if (r) {
5335 /* bad news, how to tell it to userspace ? */
12ffa55d 5336 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5337 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5338 } else {
12ffa55d 5339 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5340 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5341 DRM_WARN("smart shift update failed\n");
26bc5340 5342 }
7c6e68c7 5343 }
26bc5340 5344
7c6e68c7 5345skip_sched_resume:
655ce9cb 5346 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5347 /* unlock kfd: SRIOV would do it separately */
c004d44e 5348 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5349 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5350
5351 /* kfd_post_reset will do nothing if kfd device is not initialized,
5352 * need to bring up kfd here if it's not be initialized before
5353 */
5354 if (!adev->kfd.init_complete)
5355 amdgpu_amdkfd_device_init(adev);
5356
3f12acc8
EQ
5357 if (audio_suspended)
5358 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5359
5360 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5361
5362 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5363 }
5364
f5c7e779 5365recover_end:
e923be99
AG
5366 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5367 reset_list);
5368 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5369
9e94d22c 5370 if (hive) {
9e94d22c 5371 mutex_unlock(&hive->hive_lock);
d95e8e97 5372 amdgpu_put_xgmi_hive(hive);
9e94d22c 5373 }
26bc5340 5374
f287a3c5 5375 if (r)
26bc5340 5376 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5377
5378 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5379 return r;
5380}
5381
e3ecdffa
AD
5382/**
5383 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5384 *
5385 * @adev: amdgpu_device pointer
5386 *
5387 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5388 * and lanes) of the slot the device is in. Handles APUs and
5389 * virtualized environments where PCIE config space may not be available.
5390 */
5494d864 5391static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5392{
5d9a6330 5393 struct pci_dev *pdev;
c5313457
HK
5394 enum pci_bus_speed speed_cap, platform_speed_cap;
5395 enum pcie_link_width platform_link_width;
d0dd7f0c 5396
cd474ba0
AD
5397 if (amdgpu_pcie_gen_cap)
5398 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5399
cd474ba0
AD
5400 if (amdgpu_pcie_lane_cap)
5401 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5402
cd474ba0
AD
5403 /* covers APUs as well */
5404 if (pci_is_root_bus(adev->pdev->bus)) {
5405 if (adev->pm.pcie_gen_mask == 0)
5406 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5407 if (adev->pm.pcie_mlw_mask == 0)
5408 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5409 return;
cd474ba0 5410 }
d0dd7f0c 5411
c5313457
HK
5412 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5413 return;
5414
dbaa922b
AD
5415 pcie_bandwidth_available(adev->pdev, NULL,
5416 &platform_speed_cap, &platform_link_width);
c5313457 5417
cd474ba0 5418 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5419 /* asic caps */
5420 pdev = adev->pdev;
5421 speed_cap = pcie_get_speed_cap(pdev);
5422 if (speed_cap == PCI_SPEED_UNKNOWN) {
5423 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5424 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5425 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5426 } else {
2b3a1f51
FX
5427 if (speed_cap == PCIE_SPEED_32_0GT)
5428 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5429 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5430 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5431 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5432 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5433 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5434 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5435 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5436 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5437 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5438 else if (speed_cap == PCIE_SPEED_8_0GT)
5439 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5440 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5441 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5442 else if (speed_cap == PCIE_SPEED_5_0GT)
5443 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5444 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5445 else
5446 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5447 }
5448 /* platform caps */
c5313457 5449 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5450 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5451 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5452 } else {
2b3a1f51
FX
5453 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5454 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5455 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5456 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5457 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5458 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5459 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5460 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5461 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5462 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5463 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5464 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5465 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5466 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5467 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5468 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5469 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5470 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5471 else
5472 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5473
cd474ba0
AD
5474 }
5475 }
5476 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5477 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5478 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5479 } else {
c5313457 5480 switch (platform_link_width) {
5d9a6330 5481 case PCIE_LNK_X32:
cd474ba0
AD
5482 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5483 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5484 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5485 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5486 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5487 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5488 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5489 break;
5d9a6330 5490 case PCIE_LNK_X16:
cd474ba0
AD
5491 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5492 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5493 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5494 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5495 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5496 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5497 break;
5d9a6330 5498 case PCIE_LNK_X12:
cd474ba0
AD
5499 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5500 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5501 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5502 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5503 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5504 break;
5d9a6330 5505 case PCIE_LNK_X8:
cd474ba0
AD
5506 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5509 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5510 break;
5d9a6330 5511 case PCIE_LNK_X4:
cd474ba0
AD
5512 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5515 break;
5d9a6330 5516 case PCIE_LNK_X2:
cd474ba0
AD
5517 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5519 break;
5d9a6330 5520 case PCIE_LNK_X1:
cd474ba0
AD
5521 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5522 break;
5523 default:
5524 break;
5525 }
d0dd7f0c
AD
5526 }
5527 }
5528}
d38ceaf9 5529
08a2fd23
RE
5530/**
5531 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5532 *
5533 * @adev: amdgpu_device pointer
5534 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5535 *
5536 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5537 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5538 * @peer_adev.
5539 */
5540bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5541 struct amdgpu_device *peer_adev)
5542{
5543#ifdef CONFIG_HSA_AMD_P2P
5544 uint64_t address_mask = peer_adev->dev->dma_mask ?
5545 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5546 resource_size_t aper_limit =
5547 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5548 bool p2p_access =
5549 !adev->gmc.xgmi.connected_to_cpu &&
5550 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5551
5552 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5553 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5554 !(adev->gmc.aper_base & address_mask ||
5555 aper_limit & address_mask));
5556#else
5557 return false;
5558#endif
5559}
5560
361dbd01
AD
5561int amdgpu_device_baco_enter(struct drm_device *dev)
5562{
1348969a 5563 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5564 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5565
4a580877 5566 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5567 return -ENOTSUPP;
5568
8ab0d6f0 5569 if (ras && adev->ras_enabled &&
acdae216 5570 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5571 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5572
9530273e 5573 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5574}
5575
5576int amdgpu_device_baco_exit(struct drm_device *dev)
5577{
1348969a 5578 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5579 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5580 int ret = 0;
361dbd01 5581
4a580877 5582 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5583 return -ENOTSUPP;
5584
9530273e
EQ
5585 ret = amdgpu_dpm_baco_exit(adev);
5586 if (ret)
5587 return ret;
7a22677b 5588
8ab0d6f0 5589 if (ras && adev->ras_enabled &&
acdae216 5590 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5591 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5592
1bece222
CL
5593 if (amdgpu_passthrough(adev) &&
5594 adev->nbio.funcs->clear_doorbell_interrupt)
5595 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5596
7a22677b 5597 return 0;
361dbd01 5598}
c9a6b82f
AG
5599
5600/**
5601 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5602 * @pdev: PCI device struct
5603 * @state: PCI channel state
5604 *
5605 * Description: Called when a PCI error is detected.
5606 *
5607 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5608 */
5609pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5610{
5611 struct drm_device *dev = pci_get_drvdata(pdev);
5612 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5613 int i;
c9a6b82f
AG
5614
5615 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5616
6894305c
AG
5617 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5618 DRM_WARN("No support for XGMI hive yet...");
5619 return PCI_ERS_RESULT_DISCONNECT;
5620 }
5621
e17e27f9
GC
5622 adev->pci_channel_state = state;
5623
c9a6b82f
AG
5624 switch (state) {
5625 case pci_channel_io_normal:
5626 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5627 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5628 case pci_channel_io_frozen:
5629 /*
d0fb18b5 5630 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5631 * to GPU during PCI error recovery
5632 */
3675c2f2 5633 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5634 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5635
5636 /*
5637 * Block any work scheduling as we do for regular GPU reset
5638 * for the duration of the recovery
5639 */
5640 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5641 struct amdgpu_ring *ring = adev->rings[i];
5642
5643 if (!ring || !ring->sched.thread)
5644 continue;
5645
5646 drm_sched_stop(&ring->sched, NULL);
5647 }
8f8c80f4 5648 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5649 return PCI_ERS_RESULT_NEED_RESET;
5650 case pci_channel_io_perm_failure:
5651 /* Permanent error, prepare for device removal */
5652 return PCI_ERS_RESULT_DISCONNECT;
5653 }
5654
5655 return PCI_ERS_RESULT_NEED_RESET;
5656}
5657
5658/**
5659 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5660 * @pdev: pointer to PCI device
5661 */
5662pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5663{
5664
5665 DRM_INFO("PCI error: mmio enabled callback!!\n");
5666
5667 /* TODO - dump whatever for debugging purposes */
5668
5669 /* This called only if amdgpu_pci_error_detected returns
5670 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5671 * works, no need to reset slot.
5672 */
5673
5674 return PCI_ERS_RESULT_RECOVERED;
5675}
5676
5677/**
5678 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5679 * @pdev: PCI device struct
5680 *
5681 * Description: This routine is called by the pci error recovery
5682 * code after the PCI slot has been reset, just before we
5683 * should resume normal operations.
5684 */
5685pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5686{
5687 struct drm_device *dev = pci_get_drvdata(pdev);
5688 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5689 int r, i;
04442bf7 5690 struct amdgpu_reset_context reset_context;
362c7b91 5691 u32 memsize;
7ac71382 5692 struct list_head device_list;
c9a6b82f
AG
5693
5694 DRM_INFO("PCI error: slot reset callback!!\n");
5695
04442bf7
LL
5696 memset(&reset_context, 0, sizeof(reset_context));
5697
7ac71382 5698 INIT_LIST_HEAD(&device_list);
655ce9cb 5699 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5700
362c7b91
AG
5701 /* wait for asic to come out of reset */
5702 msleep(500);
5703
7ac71382 5704 /* Restore PCI confspace */
c1dd4aa6 5705 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5706
362c7b91
AG
5707 /* confirm ASIC came out of reset */
5708 for (i = 0; i < adev->usec_timeout; i++) {
5709 memsize = amdgpu_asic_get_config_memsize(adev);
5710
5711 if (memsize != 0xffffffff)
5712 break;
5713 udelay(1);
5714 }
5715 if (memsize == 0xffffffff) {
5716 r = -ETIME;
5717 goto out;
5718 }
5719
04442bf7
LL
5720 reset_context.method = AMD_RESET_METHOD_NONE;
5721 reset_context.reset_req_dev = adev;
5722 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5723 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5724
7afefb81 5725 adev->no_hw_access = true;
04442bf7 5726 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5727 adev->no_hw_access = false;
c9a6b82f
AG
5728 if (r)
5729 goto out;
5730
04442bf7 5731 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5732
5733out:
c9a6b82f 5734 if (!r) {
c1dd4aa6
AG
5735 if (amdgpu_device_cache_pci_state(adev->pdev))
5736 pci_restore_state(adev->pdev);
5737
c9a6b82f
AG
5738 DRM_INFO("PCIe error recovery succeeded\n");
5739 } else {
5740 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5741 amdgpu_device_unset_mp1_state(adev);
5742 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5743 }
5744
5745 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5746}
5747
5748/**
5749 * amdgpu_pci_resume() - resume normal ops after PCI reset
5750 * @pdev: pointer to PCI device
5751 *
5752 * Called when the error recovery driver tells us that its
505199a3 5753 * OK to resume normal operation.
c9a6b82f
AG
5754 */
5755void amdgpu_pci_resume(struct pci_dev *pdev)
5756{
5757 struct drm_device *dev = pci_get_drvdata(pdev);
5758 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5759 int i;
c9a6b82f 5760
c9a6b82f
AG
5761
5762 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5763
e17e27f9
GC
5764 /* Only continue execution for the case of pci_channel_io_frozen */
5765 if (adev->pci_channel_state != pci_channel_io_frozen)
5766 return;
5767
acd89fca
AG
5768 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5769 struct amdgpu_ring *ring = adev->rings[i];
5770
5771 if (!ring || !ring->sched.thread)
5772 continue;
5773
acd89fca
AG
5774 drm_sched_start(&ring->sched, true);
5775 }
5776
e923be99
AG
5777 amdgpu_device_unset_mp1_state(adev);
5778 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5779}
c1dd4aa6
AG
5780
5781bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5782{
5783 struct drm_device *dev = pci_get_drvdata(pdev);
5784 struct amdgpu_device *adev = drm_to_adev(dev);
5785 int r;
5786
5787 r = pci_save_state(pdev);
5788 if (!r) {
5789 kfree(adev->pci_state);
5790
5791 adev->pci_state = pci_store_saved_state(pdev);
5792
5793 if (!adev->pci_state) {
5794 DRM_ERROR("Failed to store PCI saved state");
5795 return false;
5796 }
5797 } else {
5798 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5799 return false;
5800 }
5801
5802 return true;
5803}
5804
5805bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5806{
5807 struct drm_device *dev = pci_get_drvdata(pdev);
5808 struct amdgpu_device *adev = drm_to_adev(dev);
5809 int r;
5810
5811 if (!adev->pci_state)
5812 return false;
5813
5814 r = pci_load_saved_state(pdev, adev->pci_state);
5815
5816 if (!r) {
5817 pci_restore_state(pdev);
5818 } else {
5819 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5820 return false;
5821 }
5822
5823 return true;
5824}
5825
810085dd
EH
5826void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5827 struct amdgpu_ring *ring)
5828{
5829#ifdef CONFIG_X86_64
b818a5d3 5830 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5831 return;
5832#endif
5833 if (adev->gmc.xgmi.connected_to_cpu)
5834 return;
5835
5836 if (ring && ring->funcs->emit_hdp_flush)
5837 amdgpu_ring_emit_hdp_flush(ring);
5838 else
5839 amdgpu_asic_flush_hdp(adev, ring);
5840}
c1dd4aa6 5841
810085dd
EH
5842void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5843 struct amdgpu_ring *ring)
5844{
5845#ifdef CONFIG_X86_64
b818a5d3 5846 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5847 return;
5848#endif
5849 if (adev->gmc.xgmi.connected_to_cpu)
5850 return;
c1dd4aa6 5851
810085dd
EH
5852 amdgpu_asic_invalidate_hdp(adev, ring);
5853}
34f3a4a9 5854
89a7a870
AG
5855int amdgpu_in_reset(struct amdgpu_device *adev)
5856{
5857 return atomic_read(&adev->reset_domain->in_gpu_reset);
5858 }
5859
34f3a4a9
LY
5860/**
5861 * amdgpu_device_halt() - bring hardware to some kind of halt state
5862 *
5863 * @adev: amdgpu_device pointer
5864 *
5865 * Bring hardware to some kind of halt state so that no one can touch it
5866 * any more. It will help to maintain error context when error occurred.
5867 * Compare to a simple hang, the system will keep stable at least for SSH
5868 * access. Then it should be trivial to inspect the hardware state and
5869 * see what's going on. Implemented as following:
5870 *
5871 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5872 * clears all CPU mappings to device, disallows remappings through page faults
5873 * 2. amdgpu_irq_disable_all() disables all interrupts
5874 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5875 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5876 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5877 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5878 * flush any in flight DMA operations
5879 */
5880void amdgpu_device_halt(struct amdgpu_device *adev)
5881{
5882 struct pci_dev *pdev = adev->pdev;
e0f943b4 5883 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
5884
5885 drm_dev_unplug(ddev);
5886
5887 amdgpu_irq_disable_all(adev);
5888
5889 amdgpu_fence_driver_hw_fini(adev);
5890
5891 adev->no_hw_access = true;
5892
5893 amdgpu_device_unmap_mmio(adev);
5894
5895 pci_disable_device(pdev);
5896 pci_wait_for_pending_transaction(pdev);
5897}
86700a40
XD
5898
5899u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5900 u32 reg)
5901{
5902 unsigned long flags, address, data;
5903 u32 r;
5904
5905 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5906 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5907
5908 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5909 WREG32(address, reg * 4);
5910 (void)RREG32(address);
5911 r = RREG32(data);
5912 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5913 return r;
5914}
5915
5916void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5917 u32 reg, u32 v)
5918{
5919 unsigned long flags, address, data;
5920
5921 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5922 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5923
5924 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5925 WREG32(address, reg * 4);
5926 (void)RREG32(address);
5927 WREG32(data, v);
5928 (void)RREG32(data);
5929 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5930}
68ce8b24
CK
5931
5932/**
5933 * amdgpu_device_switch_gang - switch to a new gang
5934 * @adev: amdgpu_device pointer
5935 * @gang: the gang to switch to
5936 *
5937 * Try to switch to a new gang.
5938 * Returns: NULL if we switched to the new gang or a reference to the current
5939 * gang leader.
5940 */
5941struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5942 struct dma_fence *gang)
5943{
5944 struct dma_fence *old = NULL;
5945
5946 do {
5947 dma_fence_put(old);
5948 rcu_read_lock();
5949 old = dma_fence_get_rcu_safe(&adev->gang_submit);
5950 rcu_read_unlock();
5951
5952 if (old == gang)
5953 break;
5954
5955 if (!dma_fence_is_signaled(old))
5956 return old;
5957
5958 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5959 old, gang) != old);
5960
5961 dma_fence_put(old);
5962 return NULL;
5963}
220c8cc8
AD
5964
5965bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
5966{
5967 switch (adev->asic_type) {
5968#ifdef CONFIG_DRM_AMDGPU_SI
5969 case CHIP_HAINAN:
5970#endif
5971 case CHIP_TOPAZ:
5972 /* chips with no display hardware */
5973 return false;
5974#ifdef CONFIG_DRM_AMDGPU_SI
5975 case CHIP_TAHITI:
5976 case CHIP_PITCAIRN:
5977 case CHIP_VERDE:
5978 case CHIP_OLAND:
5979#endif
5980#ifdef CONFIG_DRM_AMDGPU_CIK
5981 case CHIP_BONAIRE:
5982 case CHIP_HAWAII:
5983 case CHIP_KAVERI:
5984 case CHIP_KABINI:
5985 case CHIP_MULLINS:
5986#endif
5987 case CHIP_TONGA:
5988 case CHIP_FIJI:
5989 case CHIP_POLARIS10:
5990 case CHIP_POLARIS11:
5991 case CHIP_POLARIS12:
5992 case CHIP_VEGAM:
5993 case CHIP_CARRIZO:
5994 case CHIP_STONEY:
5995 /* chips with display hardware */
5996 return true;
5997 default:
5998 /* IP discovery */
5999 if (!adev->ip_versions[DCE_HWIP][0] ||
6000 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6001 return false;
6002 return true;
6003 }
6004}