drm/amd: Remove needless break for legacy IP discovery MP0 9.0.0
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
fdf2f6c5 38
b7cdb41e 39#include <drm/drm_aperture.h>
4562236b 40#include <drm/drm_atomic_helper.h>
45b64fd9 41#include <drm/drm_fb_helper.h>
fcd70cd3 42#include <drm/drm_probe_helper.h>
d38ceaf9
AD
43#include <drm/amdgpu_drm.h>
44#include <linux/vgaarb.h>
45#include <linux/vga_switcheroo.h>
46#include <linux/efi.h>
47#include "amdgpu.h"
f4b373f4 48#include "amdgpu_trace.h"
d38ceaf9
AD
49#include "amdgpu_i2c.h"
50#include "atom.h"
51#include "amdgpu_atombios.h"
a5bde2f9 52#include "amdgpu_atomfirmware.h"
d0dd7f0c 53#include "amd_pcie.h"
33f34802
KW
54#ifdef CONFIG_DRM_AMDGPU_SI
55#include "si.h"
56#endif
a2e73f56
AD
57#ifdef CONFIG_DRM_AMDGPU_CIK
58#include "cik.h"
59#endif
aaa36a97 60#include "vi.h"
460826e6 61#include "soc15.h"
0a5b8c7b 62#include "nv.h"
d38ceaf9 63#include "bif/bif_4_1_d.h"
bec86378 64#include <linux/firmware.h>
89041940 65#include "amdgpu_vf_error.h"
d38ceaf9 66
ba997709 67#include "amdgpu_amdkfd.h"
d2f52ac8 68#include "amdgpu_pm.h"
d38ceaf9 69
5183411b 70#include "amdgpu_xgmi.h"
c030f2e4 71#include "amdgpu_ras.h"
9c7c85f7 72#include "amdgpu_pmu.h"
bd607166 73#include "amdgpu_fru_eeprom.h"
04442bf7 74#include "amdgpu_reset.h"
5183411b 75
d5ea093e 76#include <linux/suspend.h>
c6a6e2db 77#include <drm/task_barrier.h>
3f12acc8 78#include <linux/pm_runtime.h>
d5ea093e 79
f89f8c6b
AG
80#include <drm/drm_drv.h>
81
e2a75f88 82MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 83MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 84MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 85MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 86MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 87MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 88MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 89
2dc80b00 90#define AMDGPU_RESUME_MS 2000
7258fa31
SK
91#define AMDGPU_MAX_RETRY_LIMIT 2
92#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 93
b7cdb41e
ML
94static const struct drm_driver amdgpu_kms_driver;
95
050091ab 96const char *amdgpu_asic_name[] = {
da69c161
KW
97 "TAHITI",
98 "PITCAIRN",
99 "VERDE",
100 "OLAND",
101 "HAINAN",
d38ceaf9
AD
102 "BONAIRE",
103 "KAVERI",
104 "KABINI",
105 "HAWAII",
106 "MULLINS",
107 "TOPAZ",
108 "TONGA",
48299f95 109 "FIJI",
d38ceaf9 110 "CARRIZO",
139f4917 111 "STONEY",
2cc0c0b5
FC
112 "POLARIS10",
113 "POLARIS11",
c4642a47 114 "POLARIS12",
48ff108d 115 "VEGAM",
d4196f01 116 "VEGA10",
8fab806a 117 "VEGA12",
956fcddc 118 "VEGA20",
2ca8a5d2 119 "RAVEN",
d6c3b24e 120 "ARCTURUS",
1eee4228 121 "RENOIR",
d46b417a 122 "ALDEBARAN",
852a6626 123 "NAVI10",
d0f56dc2 124 "CYAN_SKILLFISH",
87dbad02 125 "NAVI14",
9802f5d7 126 "NAVI12",
ccaf72d3 127 "SIENNA_CICHLID",
ddd8fbe7 128 "NAVY_FLOUNDER",
4f1e9a76 129 "VANGOGH",
a2468e04 130 "DIMGREY_CAVEFISH",
6f169591 131 "BEIGE_GOBY",
ee9236b7 132 "YELLOW_CARP",
3ae695d6 133 "IP DISCOVERY",
d38ceaf9
AD
134 "LAST",
135};
136
dcea6e65
KR
137/**
138 * DOC: pcie_replay_count
139 *
140 * The amdgpu driver provides a sysfs API for reporting the total number
141 * of PCIe replays (NAKs)
142 * The file pcie_replay_count is used for this and returns the total
143 * number of replays as a sum of the NAKs generated and NAKs received
144 */
145
146static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
147 struct device_attribute *attr, char *buf)
148{
149 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 150 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
151 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
152
36000c7a 153 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
154}
155
156static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
157 amdgpu_device_get_pcie_replay_count, NULL);
158
5494d864
AD
159static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
160
bd607166
KR
161/**
162 * DOC: product_name
163 *
164 * The amdgpu driver provides a sysfs API for reporting the product name
165 * for the device
166 * The file serial_number is used for this and returns the product name
167 * as returned from the FRU.
168 * NOTE: This is only available for certain server cards
169 */
170
171static ssize_t amdgpu_device_get_product_name(struct device *dev,
172 struct device_attribute *attr, char *buf)
173{
174 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 175 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 176
36000c7a 177 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
178}
179
180static DEVICE_ATTR(product_name, S_IRUGO,
181 amdgpu_device_get_product_name, NULL);
182
183/**
184 * DOC: product_number
185 *
186 * The amdgpu driver provides a sysfs API for reporting the part number
187 * for the device
188 * The file serial_number is used for this and returns the part number
189 * as returned from the FRU.
190 * NOTE: This is only available for certain server cards
191 */
192
193static ssize_t amdgpu_device_get_product_number(struct device *dev,
194 struct device_attribute *attr, char *buf)
195{
196 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 197 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 198
36000c7a 199 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
200}
201
202static DEVICE_ATTR(product_number, S_IRUGO,
203 amdgpu_device_get_product_number, NULL);
204
205/**
206 * DOC: serial_number
207 *
208 * The amdgpu driver provides a sysfs API for reporting the serial number
209 * for the device
210 * The file serial_number is used for this and returns the serial number
211 * as returned from the FRU.
212 * NOTE: This is only available for certain server cards
213 */
214
215static ssize_t amdgpu_device_get_serial_number(struct device *dev,
216 struct device_attribute *attr, char *buf)
217{
218 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 219 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 220
36000c7a 221 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
222}
223
224static DEVICE_ATTR(serial_number, S_IRUGO,
225 amdgpu_device_get_serial_number, NULL);
226
fd496ca8 227/**
b98c6299 228 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
229 *
230 * @dev: drm_device pointer
231 *
b98c6299 232 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
233 * otherwise return false.
234 */
b98c6299 235bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
236{
237 struct amdgpu_device *adev = drm_to_adev(dev);
238
b98c6299 239 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
240 return true;
241 return false;
242}
243
e3ecdffa 244/**
0330b848 245 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
246 *
247 * @dev: drm_device pointer
248 *
b98c6299 249 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
250 * otherwise return false.
251 */
31af062a 252bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 253{
1348969a 254 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 255
b98c6299
AD
256 if (adev->has_pr3 ||
257 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
258 return true;
259 return false;
260}
261
a69cba42
AD
262/**
263 * amdgpu_device_supports_baco - Does the device support BACO
264 *
265 * @dev: drm_device pointer
266 *
267 * Returns true if the device supporte BACO,
268 * otherwise return false.
269 */
270bool amdgpu_device_supports_baco(struct drm_device *dev)
271{
1348969a 272 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
273
274 return amdgpu_asic_supports_baco(adev);
275}
276
3fa8f89d
S
277/**
278 * amdgpu_device_supports_smart_shift - Is the device dGPU with
279 * smart shift support
280 *
281 * @dev: drm_device pointer
282 *
283 * Returns true if the device is a dGPU with Smart Shift support,
284 * otherwise returns false.
285 */
286bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
287{
288 return (amdgpu_device_supports_boco(dev) &&
289 amdgpu_acpi_is_power_shift_control_supported());
290}
291
6e3cd2a9
MCC
292/*
293 * VRAM access helper functions
294 */
295
e35e2b11 296/**
048af66b 297 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
298 *
299 * @adev: amdgpu_device pointer
300 * @pos: offset of the buffer in vram
301 * @buf: virtual address of the buffer in system memory
302 * @size: read/write size, sizeof(@buf) must > @size
303 * @write: true - write to vram, otherwise - read from vram
304 */
048af66b
KW
305void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
306 void *buf, size_t size, bool write)
e35e2b11 307{
e35e2b11 308 unsigned long flags;
048af66b
KW
309 uint32_t hi = ~0, tmp = 0;
310 uint32_t *data = buf;
ce05ac56 311 uint64_t last;
f89f8c6b 312 int idx;
ce05ac56 313
c58a863b 314 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 315 return;
9d11eb0d 316
048af66b
KW
317 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
318
319 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
320 for (last = pos + size; pos < last; pos += 4) {
321 tmp = pos >> 31;
322
323 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
324 if (tmp != hi) {
325 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
326 hi = tmp;
327 }
328 if (write)
329 WREG32_NO_KIQ(mmMM_DATA, *data++);
330 else
331 *data++ = RREG32_NO_KIQ(mmMM_DATA);
332 }
333
334 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
335 drm_dev_exit(idx);
336}
337
338/**
bbe04dec 339 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
340 *
341 * @adev: amdgpu_device pointer
342 * @pos: offset of the buffer in vram
343 * @buf: virtual address of the buffer in system memory
344 * @size: read/write size, sizeof(@buf) must > @size
345 * @write: true - write to vram, otherwise - read from vram
346 *
347 * The return value means how many bytes have been transferred.
348 */
349size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
350 void *buf, size_t size, bool write)
351{
9d11eb0d 352#ifdef CONFIG_64BIT
048af66b
KW
353 void __iomem *addr;
354 size_t count = 0;
355 uint64_t last;
356
357 if (!adev->mman.aper_base_kaddr)
358 return 0;
359
9d11eb0d
CK
360 last = min(pos + size, adev->gmc.visible_vram_size);
361 if (last > pos) {
048af66b
KW
362 addr = adev->mman.aper_base_kaddr + pos;
363 count = last - pos;
9d11eb0d
CK
364
365 if (write) {
366 memcpy_toio(addr, buf, count);
367 mb();
810085dd 368 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 369 } else {
810085dd 370 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
371 mb();
372 memcpy_fromio(buf, addr, count);
373 }
374
9d11eb0d 375 }
048af66b
KW
376
377 return count;
378#else
379 return 0;
9d11eb0d 380#endif
048af66b 381}
9d11eb0d 382
048af66b
KW
383/**
384 * amdgpu_device_vram_access - read/write a buffer in vram
385 *
386 * @adev: amdgpu_device pointer
387 * @pos: offset of the buffer in vram
388 * @buf: virtual address of the buffer in system memory
389 * @size: read/write size, sizeof(@buf) must > @size
390 * @write: true - write to vram, otherwise - read from vram
391 */
392void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
393 void *buf, size_t size, bool write)
394{
395 size_t count;
e35e2b11 396
048af66b
KW
397 /* try to using vram apreature to access vram first */
398 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
399 size -= count;
400 if (size) {
401 /* using MM to access rest vram */
402 pos += count;
403 buf += count;
404 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
405 }
406}
407
d38ceaf9 408/*
f7ee1874 409 * register access helper functions.
d38ceaf9 410 */
56b53c0b
DL
411
412/* Check if hw access should be skipped because of hotplug or device error */
413bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
414{
7afefb81 415 if (adev->no_hw_access)
56b53c0b
DL
416 return true;
417
418#ifdef CONFIG_LOCKDEP
419 /*
420 * This is a bit complicated to understand, so worth a comment. What we assert
421 * here is that the GPU reset is not running on another thread in parallel.
422 *
423 * For this we trylock the read side of the reset semaphore, if that succeeds
424 * we know that the reset is not running in paralell.
425 *
426 * If the trylock fails we assert that we are either already holding the read
427 * side of the lock or are the reset thread itself and hold the write side of
428 * the lock.
429 */
430 if (in_task()) {
d0fb18b5
AG
431 if (down_read_trylock(&adev->reset_domain->sem))
432 up_read(&adev->reset_domain->sem);
56b53c0b 433 else
d0fb18b5 434 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
435 }
436#endif
437 return false;
438}
439
e3ecdffa 440/**
f7ee1874 441 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
442 *
443 * @adev: amdgpu_device pointer
444 * @reg: dword aligned register offset
445 * @acc_flags: access flags which require special behavior
446 *
447 * Returns the 32 bit value from the offset specified.
448 */
f7ee1874
HZ
449uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
450 uint32_t reg, uint32_t acc_flags)
d38ceaf9 451{
f4b373f4
TSD
452 uint32_t ret;
453
56b53c0b 454 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
455 return 0;
456
f7ee1874
HZ
457 if ((reg * 4) < adev->rmmio_size) {
458 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
459 amdgpu_sriov_runtime(adev) &&
d0fb18b5 460 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 461 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 462 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
463 } else {
464 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
465 }
466 } else {
467 ret = adev->pcie_rreg(adev, reg * 4);
81202807 468 }
bc992ba5 469
f7ee1874 470 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 471
f4b373f4 472 return ret;
d38ceaf9
AD
473}
474
421a2a30
ML
475/*
476 * MMIO register read with bytes helper functions
477 * @offset:bytes offset from MMIO start
478 *
479*/
480
e3ecdffa
AD
481/**
482 * amdgpu_mm_rreg8 - read a memory mapped IO register
483 *
484 * @adev: amdgpu_device pointer
485 * @offset: byte aligned register offset
486 *
487 * Returns the 8 bit value from the offset specified.
488 */
7cbbc745
AG
489uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
490{
56b53c0b 491 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
492 return 0;
493
421a2a30
ML
494 if (offset < adev->rmmio_size)
495 return (readb(adev->rmmio + offset));
496 BUG();
497}
498
499/*
500 * MMIO register write with bytes helper functions
501 * @offset:bytes offset from MMIO start
502 * @value: the value want to be written to the register
503 *
504*/
e3ecdffa
AD
505/**
506 * amdgpu_mm_wreg8 - read a memory mapped IO register
507 *
508 * @adev: amdgpu_device pointer
509 * @offset: byte aligned register offset
510 * @value: 8 bit value to write
511 *
512 * Writes the value specified to the offset specified.
513 */
7cbbc745
AG
514void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
515{
56b53c0b 516 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
517 return;
518
421a2a30
ML
519 if (offset < adev->rmmio_size)
520 writeb(value, adev->rmmio + offset);
521 else
522 BUG();
523}
524
e3ecdffa 525/**
f7ee1874 526 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
527 *
528 * @adev: amdgpu_device pointer
529 * @reg: dword aligned register offset
530 * @v: 32 bit value to write to the register
531 * @acc_flags: access flags which require special behavior
532 *
533 * Writes the value specified to the offset specified.
534 */
f7ee1874
HZ
535void amdgpu_device_wreg(struct amdgpu_device *adev,
536 uint32_t reg, uint32_t v,
537 uint32_t acc_flags)
d38ceaf9 538{
56b53c0b 539 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
540 return;
541
f7ee1874
HZ
542 if ((reg * 4) < adev->rmmio_size) {
543 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
544 amdgpu_sriov_runtime(adev) &&
d0fb18b5 545 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 546 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 547 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
548 } else {
549 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
550 }
551 } else {
552 adev->pcie_wreg(adev, reg * 4, v);
81202807 553 }
bc992ba5 554
f7ee1874 555 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 556}
d38ceaf9 557
03f2abb0 558/**
4cc9f86f 559 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 560 *
71579346
RB
561 * @adev: amdgpu_device pointer
562 * @reg: mmio/rlc register
563 * @v: value to write
564 *
565 * this function is invoked only for the debugfs register access
03f2abb0 566 */
f7ee1874
HZ
567void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
568 uint32_t reg, uint32_t v)
2e0cc4d4 569{
56b53c0b 570 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
571 return;
572
2e0cc4d4 573 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
574 adev->gfx.rlc.funcs &&
575 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 576 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 577 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
578 } else if ((reg * 4) >= adev->rmmio_size) {
579 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
580 } else {
581 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 582 }
d38ceaf9
AD
583}
584
d38ceaf9
AD
585/**
586 * amdgpu_mm_rdoorbell - read a doorbell dword
587 *
588 * @adev: amdgpu_device pointer
589 * @index: doorbell index
590 *
591 * Returns the value in the doorbell aperture at the
592 * requested doorbell index (CIK).
593 */
594u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
595{
56b53c0b 596 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
597 return 0;
598
d38ceaf9
AD
599 if (index < adev->doorbell.num_doorbells) {
600 return readl(adev->doorbell.ptr + index);
601 } else {
602 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
603 return 0;
604 }
605}
606
607/**
608 * amdgpu_mm_wdoorbell - write a doorbell dword
609 *
610 * @adev: amdgpu_device pointer
611 * @index: doorbell index
612 * @v: value to write
613 *
614 * Writes @v to the doorbell aperture at the
615 * requested doorbell index (CIK).
616 */
617void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
618{
56b53c0b 619 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
620 return;
621
d38ceaf9
AD
622 if (index < adev->doorbell.num_doorbells) {
623 writel(v, adev->doorbell.ptr + index);
624 } else {
625 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
626 }
627}
628
832be404
KW
629/**
630 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
631 *
632 * @adev: amdgpu_device pointer
633 * @index: doorbell index
634 *
635 * Returns the value in the doorbell aperture at the
636 * requested doorbell index (VEGA10+).
637 */
638u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
639{
56b53c0b 640 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
641 return 0;
642
832be404
KW
643 if (index < adev->doorbell.num_doorbells) {
644 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
645 } else {
646 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
647 return 0;
648 }
649}
650
651/**
652 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
653 *
654 * @adev: amdgpu_device pointer
655 * @index: doorbell index
656 * @v: value to write
657 *
658 * Writes @v to the doorbell aperture at the
659 * requested doorbell index (VEGA10+).
660 */
661void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
662{
56b53c0b 663 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
664 return;
665
832be404
KW
666 if (index < adev->doorbell.num_doorbells) {
667 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
668 } else {
669 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
670 }
671}
672
1bba3683
HZ
673/**
674 * amdgpu_device_indirect_rreg - read an indirect register
675 *
676 * @adev: amdgpu_device pointer
677 * @pcie_index: mmio register offset
678 * @pcie_data: mmio register offset
22f453fb 679 * @reg_addr: indirect register address to read from
1bba3683
HZ
680 *
681 * Returns the value of indirect register @reg_addr
682 */
683u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
684 u32 pcie_index, u32 pcie_data,
685 u32 reg_addr)
686{
687 unsigned long flags;
688 u32 r;
689 void __iomem *pcie_index_offset;
690 void __iomem *pcie_data_offset;
691
692 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
693 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
694 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
695
696 writel(reg_addr, pcie_index_offset);
697 readl(pcie_index_offset);
698 r = readl(pcie_data_offset);
699 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
700
701 return r;
702}
703
704/**
705 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
706 *
707 * @adev: amdgpu_device pointer
708 * @pcie_index: mmio register offset
709 * @pcie_data: mmio register offset
22f453fb 710 * @reg_addr: indirect register address to read from
1bba3683
HZ
711 *
712 * Returns the value of indirect register @reg_addr
713 */
714u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
715 u32 pcie_index, u32 pcie_data,
716 u32 reg_addr)
717{
718 unsigned long flags;
719 u64 r;
720 void __iomem *pcie_index_offset;
721 void __iomem *pcie_data_offset;
722
723 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
724 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
725 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
726
727 /* read low 32 bits */
728 writel(reg_addr, pcie_index_offset);
729 readl(pcie_index_offset);
730 r = readl(pcie_data_offset);
731 /* read high 32 bits */
732 writel(reg_addr + 4, pcie_index_offset);
733 readl(pcie_index_offset);
734 r |= ((u64)readl(pcie_data_offset) << 32);
735 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
736
737 return r;
738}
739
740/**
741 * amdgpu_device_indirect_wreg - write an indirect register address
742 *
743 * @adev: amdgpu_device pointer
744 * @pcie_index: mmio register offset
745 * @pcie_data: mmio register offset
746 * @reg_addr: indirect register offset
747 * @reg_data: indirect register data
748 *
749 */
750void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
751 u32 pcie_index, u32 pcie_data,
752 u32 reg_addr, u32 reg_data)
753{
754 unsigned long flags;
755 void __iomem *pcie_index_offset;
756 void __iomem *pcie_data_offset;
757
758 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
759 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
760 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
761
762 writel(reg_addr, pcie_index_offset);
763 readl(pcie_index_offset);
764 writel(reg_data, pcie_data_offset);
765 readl(pcie_data_offset);
766 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
767}
768
769/**
770 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
771 *
772 * @adev: amdgpu_device pointer
773 * @pcie_index: mmio register offset
774 * @pcie_data: mmio register offset
775 * @reg_addr: indirect register offset
776 * @reg_data: indirect register data
777 *
778 */
779void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
780 u32 pcie_index, u32 pcie_data,
781 u32 reg_addr, u64 reg_data)
782{
783 unsigned long flags;
784 void __iomem *pcie_index_offset;
785 void __iomem *pcie_data_offset;
786
787 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
788 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
789 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
790
791 /* write low 32 bits */
792 writel(reg_addr, pcie_index_offset);
793 readl(pcie_index_offset);
794 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
795 readl(pcie_data_offset);
796 /* write high 32 bits */
797 writel(reg_addr + 4, pcie_index_offset);
798 readl(pcie_index_offset);
799 writel((u32)(reg_data >> 32), pcie_data_offset);
800 readl(pcie_data_offset);
801 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
802}
803
d38ceaf9
AD
804/**
805 * amdgpu_invalid_rreg - dummy reg read function
806 *
982a820b 807 * @adev: amdgpu_device pointer
d38ceaf9
AD
808 * @reg: offset of register
809 *
810 * Dummy register read function. Used for register blocks
811 * that certain asics don't have (all asics).
812 * Returns the value in the register.
813 */
814static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
815{
816 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
817 BUG();
818 return 0;
819}
820
821/**
822 * amdgpu_invalid_wreg - dummy reg write function
823 *
982a820b 824 * @adev: amdgpu_device pointer
d38ceaf9
AD
825 * @reg: offset of register
826 * @v: value to write to the register
827 *
828 * Dummy register read function. Used for register blocks
829 * that certain asics don't have (all asics).
830 */
831static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
832{
833 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
834 reg, v);
835 BUG();
836}
837
4fa1c6a6
TZ
838/**
839 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
840 *
982a820b 841 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
842 * @reg: offset of register
843 *
844 * Dummy register read function. Used for register blocks
845 * that certain asics don't have (all asics).
846 * Returns the value in the register.
847 */
848static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
849{
850 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
851 BUG();
852 return 0;
853}
854
855/**
856 * amdgpu_invalid_wreg64 - dummy reg write function
857 *
982a820b 858 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
859 * @reg: offset of register
860 * @v: value to write to the register
861 *
862 * Dummy register read function. Used for register blocks
863 * that certain asics don't have (all asics).
864 */
865static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
866{
867 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
868 reg, v);
869 BUG();
870}
871
d38ceaf9
AD
872/**
873 * amdgpu_block_invalid_rreg - dummy reg read function
874 *
982a820b 875 * @adev: amdgpu_device pointer
d38ceaf9
AD
876 * @block: offset of instance
877 * @reg: offset of register
878 *
879 * Dummy register read function. Used for register blocks
880 * that certain asics don't have (all asics).
881 * Returns the value in the register.
882 */
883static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
884 uint32_t block, uint32_t reg)
885{
886 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
887 reg, block);
888 BUG();
889 return 0;
890}
891
892/**
893 * amdgpu_block_invalid_wreg - dummy reg write function
894 *
982a820b 895 * @adev: amdgpu_device pointer
d38ceaf9
AD
896 * @block: offset of instance
897 * @reg: offset of register
898 * @v: value to write to the register
899 *
900 * Dummy register read function. Used for register blocks
901 * that certain asics don't have (all asics).
902 */
903static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
904 uint32_t block,
905 uint32_t reg, uint32_t v)
906{
907 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
908 reg, block, v);
909 BUG();
910}
911
4d2997ab
AD
912/**
913 * amdgpu_device_asic_init - Wrapper for atom asic_init
914 *
982a820b 915 * @adev: amdgpu_device pointer
4d2997ab
AD
916 *
917 * Does any asic specific work and then calls atom asic init.
918 */
919static int amdgpu_device_asic_init(struct amdgpu_device *adev)
920{
921 amdgpu_asic_pre_asic_init(adev);
922
85d1bcc6
HZ
923 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
924 return amdgpu_atomfirmware_asic_init(adev, true);
925 else
926 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
927}
928
e3ecdffa 929/**
7ccfd79f 930 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 931 *
982a820b 932 * @adev: amdgpu_device pointer
e3ecdffa
AD
933 *
934 * Allocates a scratch page of VRAM for use by various things in the
935 * driver.
936 */
7ccfd79f 937static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 938{
7ccfd79f
CK
939 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
940 AMDGPU_GEM_DOMAIN_VRAM |
941 AMDGPU_GEM_DOMAIN_GTT,
942 &adev->mem_scratch.robj,
943 &adev->mem_scratch.gpu_addr,
944 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
945}
946
e3ecdffa 947/**
7ccfd79f 948 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 949 *
982a820b 950 * @adev: amdgpu_device pointer
e3ecdffa
AD
951 *
952 * Frees the VRAM scratch page.
953 */
7ccfd79f 954static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 955{
7ccfd79f 956 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
957}
958
959/**
9c3f2b54 960 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
961 *
962 * @adev: amdgpu_device pointer
963 * @registers: pointer to the register array
964 * @array_size: size of the register array
965 *
966 * Programs an array or registers with and and or masks.
967 * This is a helper for setting golden registers.
968 */
9c3f2b54
AD
969void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
970 const u32 *registers,
971 const u32 array_size)
d38ceaf9
AD
972{
973 u32 tmp, reg, and_mask, or_mask;
974 int i;
975
976 if (array_size % 3)
977 return;
978
979 for (i = 0; i < array_size; i +=3) {
980 reg = registers[i + 0];
981 and_mask = registers[i + 1];
982 or_mask = registers[i + 2];
983
984 if (and_mask == 0xffffffff) {
985 tmp = or_mask;
986 } else {
987 tmp = RREG32(reg);
988 tmp &= ~and_mask;
e0d07657
HZ
989 if (adev->family >= AMDGPU_FAMILY_AI)
990 tmp |= (or_mask & and_mask);
991 else
992 tmp |= or_mask;
d38ceaf9
AD
993 }
994 WREG32(reg, tmp);
995 }
996}
997
e3ecdffa
AD
998/**
999 * amdgpu_device_pci_config_reset - reset the GPU
1000 *
1001 * @adev: amdgpu_device pointer
1002 *
1003 * Resets the GPU using the pci config reset sequence.
1004 * Only applicable to asics prior to vega10.
1005 */
8111c387 1006void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1007{
1008 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1009}
1010
af484df8
AD
1011/**
1012 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1013 *
1014 * @adev: amdgpu_device pointer
1015 *
1016 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1017 */
1018int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1019{
1020 return pci_reset_function(adev->pdev);
1021}
1022
d38ceaf9
AD
1023/*
1024 * GPU doorbell aperture helpers function.
1025 */
1026/**
06ec9070 1027 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1028 *
1029 * @adev: amdgpu_device pointer
1030 *
1031 * Init doorbell driver information (CIK)
1032 * Returns 0 on success, error on failure.
1033 */
06ec9070 1034static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1035{
6585661d 1036
705e519e
CK
1037 /* No doorbell on SI hardware generation */
1038 if (adev->asic_type < CHIP_BONAIRE) {
1039 adev->doorbell.base = 0;
1040 adev->doorbell.size = 0;
1041 adev->doorbell.num_doorbells = 0;
1042 adev->doorbell.ptr = NULL;
1043 return 0;
1044 }
1045
d6895ad3
CK
1046 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1047 return -EINVAL;
1048
22357775
AD
1049 amdgpu_asic_init_doorbell_index(adev);
1050
d38ceaf9
AD
1051 /* doorbell bar mapping */
1052 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1053 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1054
de33a329
JX
1055 if (adev->enable_mes) {
1056 adev->doorbell.num_doorbells =
1057 adev->doorbell.size / sizeof(u32);
1058 } else {
1059 adev->doorbell.num_doorbells =
1060 min_t(u32, adev->doorbell.size / sizeof(u32),
1061 adev->doorbell_index.max_assignment+1);
1062 if (adev->doorbell.num_doorbells == 0)
1063 return -EINVAL;
1064
1065 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1066 * paging queue doorbell use the second page. The
1067 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1068 * doorbells are in the first page. So with paging queue enabled,
1069 * the max num_doorbells should + 1 page (0x400 in dword)
1070 */
1071 if (adev->asic_type >= CHIP_VEGA10)
1072 adev->doorbell.num_doorbells += 0x400;
1073 }
ec3db8a6 1074
8972e5d2
CK
1075 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1076 adev->doorbell.num_doorbells *
1077 sizeof(u32));
1078 if (adev->doorbell.ptr == NULL)
d38ceaf9 1079 return -ENOMEM;
d38ceaf9
AD
1080
1081 return 0;
1082}
1083
1084/**
06ec9070 1085 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1086 *
1087 * @adev: amdgpu_device pointer
1088 *
1089 * Tear down doorbell driver information (CIK)
1090 */
06ec9070 1091static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1092{
1093 iounmap(adev->doorbell.ptr);
1094 adev->doorbell.ptr = NULL;
1095}
1096
22cb0164 1097
d38ceaf9
AD
1098
1099/*
06ec9070 1100 * amdgpu_device_wb_*()
455a7bc2 1101 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1102 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1103 */
1104
1105/**
06ec9070 1106 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1107 *
1108 * @adev: amdgpu_device pointer
1109 *
1110 * Disables Writeback and frees the Writeback memory (all asics).
1111 * Used at driver shutdown.
1112 */
06ec9070 1113static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1114{
1115 if (adev->wb.wb_obj) {
a76ed485
AD
1116 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1117 &adev->wb.gpu_addr,
1118 (void **)&adev->wb.wb);
d38ceaf9
AD
1119 adev->wb.wb_obj = NULL;
1120 }
1121}
1122
1123/**
03f2abb0 1124 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1125 *
1126 * @adev: amdgpu_device pointer
1127 *
455a7bc2 1128 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1129 * Used at driver startup.
1130 * Returns 0 on success or an -error on failure.
1131 */
06ec9070 1132static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1133{
1134 int r;
1135
1136 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1137 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1138 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1139 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1140 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1141 (void **)&adev->wb.wb);
d38ceaf9
AD
1142 if (r) {
1143 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1144 return r;
1145 }
d38ceaf9
AD
1146
1147 adev->wb.num_wb = AMDGPU_MAX_WB;
1148 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1149
1150 /* clear wb memory */
73469585 1151 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1152 }
1153
1154 return 0;
1155}
1156
1157/**
131b4b36 1158 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1159 *
1160 * @adev: amdgpu_device pointer
1161 * @wb: wb index
1162 *
1163 * Allocate a wb slot for use by the driver (all asics).
1164 * Returns 0 on success or -EINVAL on failure.
1165 */
131b4b36 1166int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1167{
1168 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1169
97407b63 1170 if (offset < adev->wb.num_wb) {
7014285a 1171 __set_bit(offset, adev->wb.used);
63ae07ca 1172 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1173 return 0;
1174 } else {
1175 return -EINVAL;
1176 }
1177}
1178
d38ceaf9 1179/**
131b4b36 1180 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1181 *
1182 * @adev: amdgpu_device pointer
1183 * @wb: wb index
1184 *
1185 * Free a wb slot allocated for use by the driver (all asics)
1186 */
131b4b36 1187void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1188{
73469585 1189 wb >>= 3;
d38ceaf9 1190 if (wb < adev->wb.num_wb)
73469585 1191 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1192}
1193
d6895ad3
CK
1194/**
1195 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1196 *
1197 * @adev: amdgpu_device pointer
1198 *
1199 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1200 * to fail, but if any of the BARs is not accessible after the size we abort
1201 * driver loading by returning -ENODEV.
1202 */
1203int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1204{
453f617a 1205 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1206 struct pci_bus *root;
1207 struct resource *res;
1208 unsigned i;
d6895ad3
CK
1209 u16 cmd;
1210 int r;
1211
0c03b912 1212 /* Bypass for VF */
1213 if (amdgpu_sriov_vf(adev))
1214 return 0;
1215
b7221f2b
AD
1216 /* skip if the bios has already enabled large BAR */
1217 if (adev->gmc.real_vram_size &&
1218 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1219 return 0;
1220
31b8adab
CK
1221 /* Check if the root BUS has 64bit memory resources */
1222 root = adev->pdev->bus;
1223 while (root->parent)
1224 root = root->parent;
1225
1226 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1227 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1228 res->start > 0x100000000ull)
1229 break;
1230 }
1231
1232 /* Trying to resize is pointless without a root hub window above 4GB */
1233 if (!res)
1234 return 0;
1235
453f617a
ND
1236 /* Limit the BAR size to what is available */
1237 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1238 rbar_size);
1239
d6895ad3
CK
1240 /* Disable memory decoding while we change the BAR addresses and size */
1241 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1242 pci_write_config_word(adev->pdev, PCI_COMMAND,
1243 cmd & ~PCI_COMMAND_MEMORY);
1244
1245 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1246 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1247 if (adev->asic_type >= CHIP_BONAIRE)
1248 pci_release_resource(adev->pdev, 2);
1249
1250 pci_release_resource(adev->pdev, 0);
1251
1252 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1253 if (r == -ENOSPC)
1254 DRM_INFO("Not enough PCI address space for a large BAR.");
1255 else if (r && r != -ENOTSUPP)
1256 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1257
1258 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1259
1260 /* When the doorbell or fb BAR isn't available we have no chance of
1261 * using the device.
1262 */
06ec9070 1263 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1264 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1265 return -ENODEV;
1266
1267 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1268
1269 return 0;
1270}
a05502e5 1271
d38ceaf9
AD
1272/*
1273 * GPU helpers function.
1274 */
1275/**
39c640c0 1276 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1277 *
1278 * @adev: amdgpu_device pointer
1279 *
c836fec5
JQ
1280 * Check if the asic has been initialized (all asics) at driver startup
1281 * or post is needed if hw reset is performed.
1282 * Returns true if need or false if not.
d38ceaf9 1283 */
39c640c0 1284bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1285{
1286 uint32_t reg;
1287
bec86378
ML
1288 if (amdgpu_sriov_vf(adev))
1289 return false;
1290
1291 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1292 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1293 * some old smc fw still need driver do vPost otherwise gpu hang, while
1294 * those smc fw version above 22.15 doesn't have this flaw, so we force
1295 * vpost executed for smc version below 22.15
bec86378
ML
1296 */
1297 if (adev->asic_type == CHIP_FIJI) {
1298 int err;
1299 uint32_t fw_ver;
1300 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1301 /* force vPost if error occured */
1302 if (err)
1303 return true;
1304
1305 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1306 if (fw_ver < 0x00160e00)
1307 return true;
bec86378 1308 }
bec86378 1309 }
91fe77eb 1310
e3c1b071 1311 /* Don't post if we need to reset whole hive on init */
1312 if (adev->gmc.xgmi.pending_reset)
1313 return false;
1314
91fe77eb 1315 if (adev->has_hw_reset) {
1316 adev->has_hw_reset = false;
1317 return true;
1318 }
1319
1320 /* bios scratch used on CIK+ */
1321 if (adev->asic_type >= CHIP_BONAIRE)
1322 return amdgpu_atombios_scratch_need_asic_init(adev);
1323
1324 /* check MEM_SIZE for older asics */
1325 reg = amdgpu_asic_get_config_memsize(adev);
1326
1327 if ((reg != 0) && (reg != 0xffffffff))
1328 return false;
1329
1330 return true;
bec86378
ML
1331}
1332
0ab5d711
ML
1333/**
1334 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1335 *
1336 * @adev: amdgpu_device pointer
1337 *
1338 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1339 * be set for this device.
1340 *
1341 * Returns true if it should be used or false if not.
1342 */
1343bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1344{
1345 switch (amdgpu_aspm) {
1346 case -1:
1347 break;
1348 case 0:
1349 return false;
1350 case 1:
1351 return true;
1352 default:
1353 return false;
1354 }
1355 return pcie_aspm_enabled(adev->pdev);
1356}
1357
d38ceaf9
AD
1358/* if we get transitioned to only one device, take VGA back */
1359/**
06ec9070 1360 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1361 *
bf44e8ce 1362 * @pdev: PCI device pointer
d38ceaf9
AD
1363 * @state: enable/disable vga decode
1364 *
1365 * Enable/disable vga decode (all asics).
1366 * Returns VGA resource flags.
1367 */
bf44e8ce
CH
1368static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1369 bool state)
d38ceaf9 1370{
bf44e8ce 1371 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1372 amdgpu_asic_set_vga_state(adev, state);
1373 if (state)
1374 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1375 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1376 else
1377 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1378}
1379
e3ecdffa
AD
1380/**
1381 * amdgpu_device_check_block_size - validate the vm block size
1382 *
1383 * @adev: amdgpu_device pointer
1384 *
1385 * Validates the vm block size specified via module parameter.
1386 * The vm block size defines number of bits in page table versus page directory,
1387 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1388 * page table and the remaining bits are in the page directory.
1389 */
06ec9070 1390static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1391{
1392 /* defines number of bits in page table versus page directory,
1393 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1394 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1395 if (amdgpu_vm_block_size == -1)
1396 return;
a1adf8be 1397
bab4fee7 1398 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1399 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1400 amdgpu_vm_block_size);
97489129 1401 amdgpu_vm_block_size = -1;
a1adf8be 1402 }
a1adf8be
CZ
1403}
1404
e3ecdffa
AD
1405/**
1406 * amdgpu_device_check_vm_size - validate the vm size
1407 *
1408 * @adev: amdgpu_device pointer
1409 *
1410 * Validates the vm size in GB specified via module parameter.
1411 * The VM size is the size of the GPU virtual memory space in GB.
1412 */
06ec9070 1413static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1414{
64dab074
AD
1415 /* no need to check the default value */
1416 if (amdgpu_vm_size == -1)
1417 return;
1418
83ca145d
ZJ
1419 if (amdgpu_vm_size < 1) {
1420 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1421 amdgpu_vm_size);
f3368128 1422 amdgpu_vm_size = -1;
83ca145d 1423 }
83ca145d
ZJ
1424}
1425
7951e376
RZ
1426static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1427{
1428 struct sysinfo si;
a9d4fe2f 1429 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1430 uint64_t total_memory;
1431 uint64_t dram_size_seven_GB = 0x1B8000000;
1432 uint64_t dram_size_three_GB = 0xB8000000;
1433
1434 if (amdgpu_smu_memory_pool_size == 0)
1435 return;
1436
1437 if (!is_os_64) {
1438 DRM_WARN("Not 64-bit OS, feature not supported\n");
1439 goto def_value;
1440 }
1441 si_meminfo(&si);
1442 total_memory = (uint64_t)si.totalram * si.mem_unit;
1443
1444 if ((amdgpu_smu_memory_pool_size == 1) ||
1445 (amdgpu_smu_memory_pool_size == 2)) {
1446 if (total_memory < dram_size_three_GB)
1447 goto def_value1;
1448 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1449 (amdgpu_smu_memory_pool_size == 8)) {
1450 if (total_memory < dram_size_seven_GB)
1451 goto def_value1;
1452 } else {
1453 DRM_WARN("Smu memory pool size not supported\n");
1454 goto def_value;
1455 }
1456 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1457
1458 return;
1459
1460def_value1:
1461 DRM_WARN("No enough system memory\n");
1462def_value:
1463 adev->pm.smu_prv_buffer_size = 0;
1464}
1465
9f6a7857
HR
1466static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1467{
1468 if (!(adev->flags & AMD_IS_APU) ||
1469 adev->asic_type < CHIP_RAVEN)
1470 return 0;
1471
1472 switch (adev->asic_type) {
1473 case CHIP_RAVEN:
1474 if (adev->pdev->device == 0x15dd)
1475 adev->apu_flags |= AMD_APU_IS_RAVEN;
1476 if (adev->pdev->device == 0x15d8)
1477 adev->apu_flags |= AMD_APU_IS_PICASSO;
1478 break;
1479 case CHIP_RENOIR:
1480 if ((adev->pdev->device == 0x1636) ||
1481 (adev->pdev->device == 0x164c))
1482 adev->apu_flags |= AMD_APU_IS_RENOIR;
1483 else
1484 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1485 break;
1486 case CHIP_VANGOGH:
1487 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1488 break;
1489 case CHIP_YELLOW_CARP:
1490 break;
d0f56dc2 1491 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1492 if ((adev->pdev->device == 0x13FE) ||
1493 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1494 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1495 break;
9f6a7857 1496 default:
4eaf21b7 1497 break;
9f6a7857
HR
1498 }
1499
1500 return 0;
1501}
1502
d38ceaf9 1503/**
06ec9070 1504 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1505 *
1506 * @adev: amdgpu_device pointer
1507 *
1508 * Validates certain module parameters and updates
1509 * the associated values used by the driver (all asics).
1510 */
912dfc84 1511static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1512{
5b011235
CZ
1513 if (amdgpu_sched_jobs < 4) {
1514 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1515 amdgpu_sched_jobs);
1516 amdgpu_sched_jobs = 4;
76117507 1517 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1518 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1519 amdgpu_sched_jobs);
1520 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1521 }
d38ceaf9 1522
83e74db6 1523 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1524 /* gart size must be greater or equal to 32M */
1525 dev_warn(adev->dev, "gart size (%d) too small\n",
1526 amdgpu_gart_size);
83e74db6 1527 amdgpu_gart_size = -1;
d38ceaf9
AD
1528 }
1529
36d38372 1530 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1531 /* gtt size must be greater or equal to 32M */
36d38372
CK
1532 dev_warn(adev->dev, "gtt size (%d) too small\n",
1533 amdgpu_gtt_size);
1534 amdgpu_gtt_size = -1;
d38ceaf9
AD
1535 }
1536
d07f14be
RH
1537 /* valid range is between 4 and 9 inclusive */
1538 if (amdgpu_vm_fragment_size != -1 &&
1539 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1540 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1541 amdgpu_vm_fragment_size = -1;
1542 }
1543
5d5bd5e3
KW
1544 if (amdgpu_sched_hw_submission < 2) {
1545 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1546 amdgpu_sched_hw_submission);
1547 amdgpu_sched_hw_submission = 2;
1548 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1549 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1550 amdgpu_sched_hw_submission);
1551 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1552 }
1553
2656fd23
AG
1554 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1555 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1556 amdgpu_reset_method = -1;
1557 }
1558
7951e376
RZ
1559 amdgpu_device_check_smu_prv_buffer_size(adev);
1560
06ec9070 1561 amdgpu_device_check_vm_size(adev);
d38ceaf9 1562
06ec9070 1563 amdgpu_device_check_block_size(adev);
6a7f76e7 1564
19aede77 1565 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1566
e3c00faa 1567 return 0;
d38ceaf9
AD
1568}
1569
1570/**
1571 * amdgpu_switcheroo_set_state - set switcheroo state
1572 *
1573 * @pdev: pci dev pointer
1694467b 1574 * @state: vga_switcheroo state
d38ceaf9 1575 *
12024b17 1576 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1577 * the asics before or after it is powered up using ACPI methods.
1578 */
8aba21b7
LT
1579static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1580 enum vga_switcheroo_state state)
d38ceaf9
AD
1581{
1582 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1583 int r;
d38ceaf9 1584
b98c6299 1585 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1586 return;
1587
1588 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1589 pr_info("switched on\n");
d38ceaf9
AD
1590 /* don't suspend or resume card normally */
1591 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1592
8f66090b
TZ
1593 pci_set_power_state(pdev, PCI_D0);
1594 amdgpu_device_load_pci_state(pdev);
1595 r = pci_enable_device(pdev);
de185019
AD
1596 if (r)
1597 DRM_WARN("pci_enable_device failed (%d)\n", r);
1598 amdgpu_device_resume(dev, true);
d38ceaf9 1599
d38ceaf9 1600 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1601 } else {
dd4fa6c1 1602 pr_info("switched off\n");
d38ceaf9 1603 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1604 amdgpu_device_suspend(dev, true);
8f66090b 1605 amdgpu_device_cache_pci_state(pdev);
de185019 1606 /* Shut down the device */
8f66090b
TZ
1607 pci_disable_device(pdev);
1608 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1609 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1610 }
1611}
1612
1613/**
1614 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1615 *
1616 * @pdev: pci dev pointer
1617 *
1618 * Callback for the switcheroo driver. Check of the switcheroo
1619 * state can be changed.
1620 * Returns true if the state can be changed, false if not.
1621 */
1622static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1623{
1624 struct drm_device *dev = pci_get_drvdata(pdev);
1625
1626 /*
1627 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1628 * locking inversion with the driver load path. And the access here is
1629 * completely racy anyway. So don't bother with locking for now.
1630 */
7e13ad89 1631 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1632}
1633
1634static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1635 .set_gpu_state = amdgpu_switcheroo_set_state,
1636 .reprobe = NULL,
1637 .can_switch = amdgpu_switcheroo_can_switch,
1638};
1639
e3ecdffa
AD
1640/**
1641 * amdgpu_device_ip_set_clockgating_state - set the CG state
1642 *
87e3f136 1643 * @dev: amdgpu_device pointer
e3ecdffa
AD
1644 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1645 * @state: clockgating state (gate or ungate)
1646 *
1647 * Sets the requested clockgating state for all instances of
1648 * the hardware IP specified.
1649 * Returns the error code from the last instance.
1650 */
43fa561f 1651int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1652 enum amd_ip_block_type block_type,
1653 enum amd_clockgating_state state)
d38ceaf9 1654{
43fa561f 1655 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1656 int i, r = 0;
1657
1658 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1659 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1660 continue;
c722865a
RZ
1661 if (adev->ip_blocks[i].version->type != block_type)
1662 continue;
1663 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1664 continue;
1665 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1666 (void *)adev, state);
1667 if (r)
1668 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1669 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1670 }
1671 return r;
1672}
1673
e3ecdffa
AD
1674/**
1675 * amdgpu_device_ip_set_powergating_state - set the PG state
1676 *
87e3f136 1677 * @dev: amdgpu_device pointer
e3ecdffa
AD
1678 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1679 * @state: powergating state (gate or ungate)
1680 *
1681 * Sets the requested powergating state for all instances of
1682 * the hardware IP specified.
1683 * Returns the error code from the last instance.
1684 */
43fa561f 1685int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1686 enum amd_ip_block_type block_type,
1687 enum amd_powergating_state state)
d38ceaf9 1688{
43fa561f 1689 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1690 int i, r = 0;
1691
1692 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1693 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1694 continue;
c722865a
RZ
1695 if (adev->ip_blocks[i].version->type != block_type)
1696 continue;
1697 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1698 continue;
1699 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1700 (void *)adev, state);
1701 if (r)
1702 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1703 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1704 }
1705 return r;
1706}
1707
e3ecdffa
AD
1708/**
1709 * amdgpu_device_ip_get_clockgating_state - get the CG state
1710 *
1711 * @adev: amdgpu_device pointer
1712 * @flags: clockgating feature flags
1713 *
1714 * Walks the list of IPs on the device and updates the clockgating
1715 * flags for each IP.
1716 * Updates @flags with the feature flags for each hardware IP where
1717 * clockgating is enabled.
1718 */
2990a1fc 1719void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1720 u64 *flags)
6cb2d4e4
HR
1721{
1722 int i;
1723
1724 for (i = 0; i < adev->num_ip_blocks; i++) {
1725 if (!adev->ip_blocks[i].status.valid)
1726 continue;
1727 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1728 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1729 }
1730}
1731
e3ecdffa
AD
1732/**
1733 * amdgpu_device_ip_wait_for_idle - wait for idle
1734 *
1735 * @adev: amdgpu_device pointer
1736 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1737 *
1738 * Waits for the request hardware IP to be idle.
1739 * Returns 0 for success or a negative error code on failure.
1740 */
2990a1fc
AD
1741int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1742 enum amd_ip_block_type block_type)
5dbbb60b
AD
1743{
1744 int i, r;
1745
1746 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1747 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1748 continue;
a1255107
AD
1749 if (adev->ip_blocks[i].version->type == block_type) {
1750 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1751 if (r)
1752 return r;
1753 break;
1754 }
1755 }
1756 return 0;
1757
1758}
1759
e3ecdffa
AD
1760/**
1761 * amdgpu_device_ip_is_idle - is the hardware IP idle
1762 *
1763 * @adev: amdgpu_device pointer
1764 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1765 *
1766 * Check if the hardware IP is idle or not.
1767 * Returns true if it the IP is idle, false if not.
1768 */
2990a1fc
AD
1769bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1770 enum amd_ip_block_type block_type)
5dbbb60b
AD
1771{
1772 int i;
1773
1774 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1775 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1776 continue;
a1255107
AD
1777 if (adev->ip_blocks[i].version->type == block_type)
1778 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1779 }
1780 return true;
1781
1782}
1783
e3ecdffa
AD
1784/**
1785 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1786 *
1787 * @adev: amdgpu_device pointer
87e3f136 1788 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1789 *
1790 * Returns a pointer to the hardware IP block structure
1791 * if it exists for the asic, otherwise NULL.
1792 */
2990a1fc
AD
1793struct amdgpu_ip_block *
1794amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1795 enum amd_ip_block_type type)
d38ceaf9
AD
1796{
1797 int i;
1798
1799 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1800 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1801 return &adev->ip_blocks[i];
1802
1803 return NULL;
1804}
1805
1806/**
2990a1fc 1807 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1808 *
1809 * @adev: amdgpu_device pointer
5fc3aeeb 1810 * @type: enum amd_ip_block_type
d38ceaf9
AD
1811 * @major: major version
1812 * @minor: minor version
1813 *
1814 * return 0 if equal or greater
1815 * return 1 if smaller or the ip_block doesn't exist
1816 */
2990a1fc
AD
1817int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1818 enum amd_ip_block_type type,
1819 u32 major, u32 minor)
d38ceaf9 1820{
2990a1fc 1821 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1822
a1255107
AD
1823 if (ip_block && ((ip_block->version->major > major) ||
1824 ((ip_block->version->major == major) &&
1825 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1826 return 0;
1827
1828 return 1;
1829}
1830
a1255107 1831/**
2990a1fc 1832 * amdgpu_device_ip_block_add
a1255107
AD
1833 *
1834 * @adev: amdgpu_device pointer
1835 * @ip_block_version: pointer to the IP to add
1836 *
1837 * Adds the IP block driver information to the collection of IPs
1838 * on the asic.
1839 */
2990a1fc
AD
1840int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1841 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1842{
1843 if (!ip_block_version)
1844 return -EINVAL;
1845
7bd939d0
LG
1846 switch (ip_block_version->type) {
1847 case AMD_IP_BLOCK_TYPE_VCN:
1848 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1849 return 0;
1850 break;
1851 case AMD_IP_BLOCK_TYPE_JPEG:
1852 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1853 return 0;
1854 break;
1855 default:
1856 break;
1857 }
1858
e966a725 1859 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1860 ip_block_version->funcs->name);
1861
a1255107
AD
1862 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1863
1864 return 0;
1865}
1866
e3ecdffa
AD
1867/**
1868 * amdgpu_device_enable_virtual_display - enable virtual display feature
1869 *
1870 * @adev: amdgpu_device pointer
1871 *
1872 * Enabled the virtual display feature if the user has enabled it via
1873 * the module parameter virtual_display. This feature provides a virtual
1874 * display hardware on headless boards or in virtualized environments.
1875 * This function parses and validates the configuration string specified by
1876 * the user and configues the virtual display configuration (number of
1877 * virtual connectors, crtcs, etc.) specified.
1878 */
483ef985 1879static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1880{
1881 adev->enable_virtual_display = false;
1882
1883 if (amdgpu_virtual_display) {
8f66090b 1884 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1885 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1886
1887 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1888 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1889 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1890 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1891 if (!strcmp("all", pciaddname)
1892 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1893 long num_crtc;
1894 int res = -1;
1895
9accf2fd 1896 adev->enable_virtual_display = true;
0f66356d
ED
1897
1898 if (pciaddname_tmp)
1899 res = kstrtol(pciaddname_tmp, 10,
1900 &num_crtc);
1901
1902 if (!res) {
1903 if (num_crtc < 1)
1904 num_crtc = 1;
1905 if (num_crtc > 6)
1906 num_crtc = 6;
1907 adev->mode_info.num_crtc = num_crtc;
1908 } else {
1909 adev->mode_info.num_crtc = 1;
1910 }
9accf2fd
ED
1911 break;
1912 }
1913 }
1914
0f66356d
ED
1915 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1916 amdgpu_virtual_display, pci_address_name,
1917 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1918
1919 kfree(pciaddstr);
1920 }
1921}
1922
25263da3
AD
1923void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1924{
1925 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1926 adev->mode_info.num_crtc = 1;
1927 adev->enable_virtual_display = true;
1928 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1929 adev->enable_virtual_display, adev->mode_info.num_crtc);
1930 }
1931}
1932
e3ecdffa
AD
1933/**
1934 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1935 *
1936 * @adev: amdgpu_device pointer
1937 *
1938 * Parses the asic configuration parameters specified in the gpu info
1939 * firmware and makes them availale to the driver for use in configuring
1940 * the asic.
1941 * Returns 0 on success, -EINVAL on failure.
1942 */
e2a75f88
AD
1943static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1944{
e2a75f88 1945 const char *chip_name;
c0a43457 1946 char fw_name[40];
e2a75f88
AD
1947 int err;
1948 const struct gpu_info_firmware_header_v1_0 *hdr;
1949
ab4fe3e1
HR
1950 adev->firmware.gpu_info_fw = NULL;
1951
72de33f8 1952 if (adev->mman.discovery_bin) {
cc375d8c
TY
1953 /*
1954 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1955 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1956 * when DAL no longer needs it.
1957 */
1958 if (adev->asic_type != CHIP_NAVI12)
1959 return 0;
258620d0
AD
1960 }
1961
e2a75f88 1962 switch (adev->asic_type) {
e2a75f88
AD
1963 default:
1964 return 0;
1965 case CHIP_VEGA10:
1966 chip_name = "vega10";
1967 break;
3f76dced
AD
1968 case CHIP_VEGA12:
1969 chip_name = "vega12";
1970 break;
2d2e5e7e 1971 case CHIP_RAVEN:
54f78a76 1972 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1973 chip_name = "raven2";
54f78a76 1974 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1975 chip_name = "picasso";
54c4d17e
FX
1976 else
1977 chip_name = "raven";
2d2e5e7e 1978 break;
65e60f6e
LM
1979 case CHIP_ARCTURUS:
1980 chip_name = "arcturus";
1981 break;
42b325e5
XY
1982 case CHIP_NAVI12:
1983 chip_name = "navi12";
1984 break;
e2a75f88
AD
1985 }
1986
1987 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 1988 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
1989 if (err) {
1990 dev_err(adev->dev,
b31d3063 1991 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
1992 fw_name);
1993 goto out;
1994 }
1995
ab4fe3e1 1996 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1997 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1998
1999 switch (hdr->version_major) {
2000 case 1:
2001 {
2002 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2003 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2004 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2005
cc375d8c
TY
2006 /*
2007 * Should be droped when DAL no longer needs it.
2008 */
2009 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2010 goto parse_soc_bounding_box;
2011
b5ab16bf
AD
2012 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2013 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2014 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2015 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2016 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2017 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2018 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2019 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2020 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2021 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2022 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2023 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2024 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2025 adev->gfx.cu_info.max_waves_per_simd =
2026 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2027 adev->gfx.cu_info.max_scratch_slots_per_cu =
2028 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2029 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2030 if (hdr->version_minor >= 1) {
35c2e910
HZ
2031 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2032 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2033 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2034 adev->gfx.config.num_sc_per_sh =
2035 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2036 adev->gfx.config.num_packer_per_sc =
2037 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2038 }
ec51d3fa
XY
2039
2040parse_soc_bounding_box:
ec51d3fa
XY
2041 /*
2042 * soc bounding box info is not integrated in disocovery table,
258620d0 2043 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2044 */
48321c3d
HW
2045 if (hdr->version_minor == 2) {
2046 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2047 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2048 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2049 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2050 }
e2a75f88
AD
2051 break;
2052 }
2053 default:
2054 dev_err(adev->dev,
2055 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2056 err = -EINVAL;
2057 goto out;
2058 }
2059out:
e2a75f88
AD
2060 return err;
2061}
2062
e3ecdffa
AD
2063/**
2064 * amdgpu_device_ip_early_init - run early init for hardware IPs
2065 *
2066 * @adev: amdgpu_device pointer
2067 *
2068 * Early initialization pass for hardware IPs. The hardware IPs that make
2069 * up each asic are discovered each IP's early_init callback is run. This
2070 * is the first stage in initializing the asic.
2071 * Returns 0 on success, negative error code on failure.
2072 */
06ec9070 2073static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2074{
901e2be2
AD
2075 struct drm_device *dev = adev_to_drm(adev);
2076 struct pci_dev *parent;
aaa36a97 2077 int i, r;
d38ceaf9 2078
483ef985 2079 amdgpu_device_enable_virtual_display(adev);
a6be7570 2080
00a979f3 2081 if (amdgpu_sriov_vf(adev)) {
00a979f3 2082 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2083 if (r)
2084 return r;
00a979f3
WS
2085 }
2086
d38ceaf9 2087 switch (adev->asic_type) {
33f34802
KW
2088#ifdef CONFIG_DRM_AMDGPU_SI
2089 case CHIP_VERDE:
2090 case CHIP_TAHITI:
2091 case CHIP_PITCAIRN:
2092 case CHIP_OLAND:
2093 case CHIP_HAINAN:
295d0daf 2094 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2095 r = si_set_ip_blocks(adev);
2096 if (r)
2097 return r;
2098 break;
2099#endif
a2e73f56
AD
2100#ifdef CONFIG_DRM_AMDGPU_CIK
2101 case CHIP_BONAIRE:
2102 case CHIP_HAWAII:
2103 case CHIP_KAVERI:
2104 case CHIP_KABINI:
2105 case CHIP_MULLINS:
e1ad2d53 2106 if (adev->flags & AMD_IS_APU)
a2e73f56 2107 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2108 else
2109 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2110
2111 r = cik_set_ip_blocks(adev);
2112 if (r)
2113 return r;
2114 break;
2115#endif
da87c30b
AD
2116 case CHIP_TOPAZ:
2117 case CHIP_TONGA:
2118 case CHIP_FIJI:
2119 case CHIP_POLARIS10:
2120 case CHIP_POLARIS11:
2121 case CHIP_POLARIS12:
2122 case CHIP_VEGAM:
2123 case CHIP_CARRIZO:
2124 case CHIP_STONEY:
2125 if (adev->flags & AMD_IS_APU)
2126 adev->family = AMDGPU_FAMILY_CZ;
2127 else
2128 adev->family = AMDGPU_FAMILY_VI;
2129
2130 r = vi_set_ip_blocks(adev);
2131 if (r)
2132 return r;
2133 break;
d38ceaf9 2134 default:
63352b7f
AD
2135 r = amdgpu_discovery_set_ip_blocks(adev);
2136 if (r)
2137 return r;
2138 break;
d38ceaf9
AD
2139 }
2140
901e2be2
AD
2141 if (amdgpu_has_atpx() &&
2142 (amdgpu_is_atpx_hybrid() ||
2143 amdgpu_has_atpx_dgpu_power_cntl()) &&
2144 ((adev->flags & AMD_IS_APU) == 0) &&
2145 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2146 adev->flags |= AMD_IS_PX;
2147
85ac2021
AD
2148 if (!(adev->flags & AMD_IS_APU)) {
2149 parent = pci_upstream_bridge(adev->pdev);
2150 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2151 }
901e2be2 2152
c004d44e 2153 amdgpu_amdkfd_device_probe(adev);
1884734a 2154
3b94fb10 2155 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2156 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2157 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2158 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2159 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2160
d38ceaf9
AD
2161 for (i = 0; i < adev->num_ip_blocks; i++) {
2162 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2163 DRM_ERROR("disabled ip block: %d <%s>\n",
2164 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2165 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2166 } else {
a1255107
AD
2167 if (adev->ip_blocks[i].version->funcs->early_init) {
2168 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2169 if (r == -ENOENT) {
a1255107 2170 adev->ip_blocks[i].status.valid = false;
2c1a2784 2171 } else if (r) {
a1255107
AD
2172 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2173 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2174 return r;
2c1a2784 2175 } else {
a1255107 2176 adev->ip_blocks[i].status.valid = true;
2c1a2784 2177 }
974e6b64 2178 } else {
a1255107 2179 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2180 }
d38ceaf9 2181 }
21a249ca
AD
2182 /* get the vbios after the asic_funcs are set up */
2183 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2184 r = amdgpu_device_parse_gpu_info_fw(adev);
2185 if (r)
2186 return r;
2187
21a249ca
AD
2188 /* Read BIOS */
2189 if (!amdgpu_get_bios(adev))
2190 return -EINVAL;
2191
2192 r = amdgpu_atombios_init(adev);
2193 if (r) {
2194 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2195 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2196 return r;
2197 }
77eabc6f
PJZ
2198
2199 /*get pf2vf msg info at it's earliest time*/
2200 if (amdgpu_sriov_vf(adev))
2201 amdgpu_virt_init_data_exchange(adev);
2202
21a249ca 2203 }
d38ceaf9
AD
2204 }
2205
395d1fb9
NH
2206 adev->cg_flags &= amdgpu_cg_mask;
2207 adev->pg_flags &= amdgpu_pg_mask;
2208
d38ceaf9
AD
2209 return 0;
2210}
2211
0a4f2520
RZ
2212static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2213{
2214 int i, r;
2215
2216 for (i = 0; i < adev->num_ip_blocks; i++) {
2217 if (!adev->ip_blocks[i].status.sw)
2218 continue;
2219 if (adev->ip_blocks[i].status.hw)
2220 continue;
2221 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2222 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2223 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2224 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2225 if (r) {
2226 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2227 adev->ip_blocks[i].version->funcs->name, r);
2228 return r;
2229 }
2230 adev->ip_blocks[i].status.hw = true;
2231 }
2232 }
2233
2234 return 0;
2235}
2236
2237static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2238{
2239 int i, r;
2240
2241 for (i = 0; i < adev->num_ip_blocks; i++) {
2242 if (!adev->ip_blocks[i].status.sw)
2243 continue;
2244 if (adev->ip_blocks[i].status.hw)
2245 continue;
2246 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2247 if (r) {
2248 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2249 adev->ip_blocks[i].version->funcs->name, r);
2250 return r;
2251 }
2252 adev->ip_blocks[i].status.hw = true;
2253 }
2254
2255 return 0;
2256}
2257
7a3e0bb2
RZ
2258static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2259{
2260 int r = 0;
2261 int i;
80f41f84 2262 uint32_t smu_version;
7a3e0bb2
RZ
2263
2264 if (adev->asic_type >= CHIP_VEGA10) {
2265 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2266 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2267 continue;
2268
e3c1b071 2269 if (!adev->ip_blocks[i].status.sw)
2270 continue;
2271
482f0e53
ML
2272 /* no need to do the fw loading again if already done*/
2273 if (adev->ip_blocks[i].status.hw == true)
2274 break;
2275
53b3f8f4 2276 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2277 r = adev->ip_blocks[i].version->funcs->resume(adev);
2278 if (r) {
2279 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2280 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2281 return r;
2282 }
2283 } else {
2284 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2285 if (r) {
2286 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2287 adev->ip_blocks[i].version->funcs->name, r);
2288 return r;
7a3e0bb2 2289 }
7a3e0bb2 2290 }
482f0e53
ML
2291
2292 adev->ip_blocks[i].status.hw = true;
2293 break;
7a3e0bb2
RZ
2294 }
2295 }
482f0e53 2296
8973d9ec
ED
2297 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2298 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2299
80f41f84 2300 return r;
7a3e0bb2
RZ
2301}
2302
5fd8518d
AG
2303static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2304{
2305 long timeout;
2306 int r, i;
2307
2308 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2309 struct amdgpu_ring *ring = adev->rings[i];
2310
2311 /* No need to setup the GPU scheduler for rings that don't need it */
2312 if (!ring || ring->no_scheduler)
2313 continue;
2314
2315 switch (ring->funcs->type) {
2316 case AMDGPU_RING_TYPE_GFX:
2317 timeout = adev->gfx_timeout;
2318 break;
2319 case AMDGPU_RING_TYPE_COMPUTE:
2320 timeout = adev->compute_timeout;
2321 break;
2322 case AMDGPU_RING_TYPE_SDMA:
2323 timeout = adev->sdma_timeout;
2324 break;
2325 default:
2326 timeout = adev->video_timeout;
2327 break;
2328 }
2329
2330 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2331 ring->num_hw_submission, amdgpu_job_hang_limit,
8ab62eda
JG
2332 timeout, adev->reset_domain->wq,
2333 ring->sched_score, ring->name,
2334 adev->dev);
5fd8518d
AG
2335 if (r) {
2336 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2337 ring->name);
2338 return r;
2339 }
2340 }
2341
2342 return 0;
2343}
2344
2345
e3ecdffa
AD
2346/**
2347 * amdgpu_device_ip_init - run init for hardware IPs
2348 *
2349 * @adev: amdgpu_device pointer
2350 *
2351 * Main initialization pass for hardware IPs. The list of all the hardware
2352 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2353 * are run. sw_init initializes the software state associated with each IP
2354 * and hw_init initializes the hardware associated with each IP.
2355 * Returns 0 on success, negative error code on failure.
2356 */
06ec9070 2357static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2358{
2359 int i, r;
2360
c030f2e4 2361 r = amdgpu_ras_init(adev);
2362 if (r)
2363 return r;
2364
d38ceaf9 2365 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2366 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2367 continue;
a1255107 2368 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2369 if (r) {
a1255107
AD
2370 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2371 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2372 goto init_failed;
2c1a2784 2373 }
a1255107 2374 adev->ip_blocks[i].status.sw = true;
bfca0289 2375
c1c39032
AD
2376 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2377 /* need to do common hw init early so everything is set up for gmc */
2378 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2379 if (r) {
2380 DRM_ERROR("hw_init %d failed %d\n", i, r);
2381 goto init_failed;
2382 }
2383 adev->ip_blocks[i].status.hw = true;
2384 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2385 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2386 /* Try to reserve bad pages early */
2387 if (amdgpu_sriov_vf(adev))
2388 amdgpu_virt_exchange_data(adev);
2389
7ccfd79f 2390 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2391 if (r) {
7ccfd79f 2392 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2393 goto init_failed;
2c1a2784 2394 }
a1255107 2395 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2396 if (r) {
2397 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2398 goto init_failed;
2c1a2784 2399 }
06ec9070 2400 r = amdgpu_device_wb_init(adev);
2c1a2784 2401 if (r) {
06ec9070 2402 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2403 goto init_failed;
2c1a2784 2404 }
a1255107 2405 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2406
2407 /* right after GMC hw init, we create CSA */
8a1fbb4a 2408 if (amdgpu_mcbp) {
1e256e27 2409 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2410 AMDGPU_GEM_DOMAIN_VRAM |
2411 AMDGPU_GEM_DOMAIN_GTT,
2412 AMDGPU_CSA_SIZE);
2493664f
ML
2413 if (r) {
2414 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2415 goto init_failed;
2493664f
ML
2416 }
2417 }
d38ceaf9
AD
2418 }
2419 }
2420
c9ffa427 2421 if (amdgpu_sriov_vf(adev))
22c16d25 2422 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2423
533aed27
AG
2424 r = amdgpu_ib_pool_init(adev);
2425 if (r) {
2426 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2427 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2428 goto init_failed;
2429 }
2430
c8963ea4
RZ
2431 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2432 if (r)
72d3f592 2433 goto init_failed;
0a4f2520
RZ
2434
2435 r = amdgpu_device_ip_hw_init_phase1(adev);
2436 if (r)
72d3f592 2437 goto init_failed;
0a4f2520 2438
7a3e0bb2
RZ
2439 r = amdgpu_device_fw_loading(adev);
2440 if (r)
72d3f592 2441 goto init_failed;
7a3e0bb2 2442
0a4f2520
RZ
2443 r = amdgpu_device_ip_hw_init_phase2(adev);
2444 if (r)
72d3f592 2445 goto init_failed;
d38ceaf9 2446
121a2bc6
AG
2447 /*
2448 * retired pages will be loaded from eeprom and reserved here,
2449 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2450 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2451 * for I2C communication which only true at this point.
b82e65a9
GC
2452 *
2453 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2454 * failure from bad gpu situation and stop amdgpu init process
2455 * accordingly. For other failed cases, it will still release all
2456 * the resource and print error message, rather than returning one
2457 * negative value to upper level.
121a2bc6
AG
2458 *
2459 * Note: theoretically, this should be called before all vram allocations
2460 * to protect retired page from abusing
2461 */
b82e65a9
GC
2462 r = amdgpu_ras_recovery_init(adev);
2463 if (r)
2464 goto init_failed;
121a2bc6 2465
cfbb6b00
AG
2466 /**
2467 * In case of XGMI grab extra reference for reset domain for this device
2468 */
a4c63caf 2469 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2470 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2471 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2472 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2473
dfd0287b
LH
2474 if (WARN_ON(!hive)) {
2475 r = -ENOENT;
2476 goto init_failed;
2477 }
2478
46c67660 2479 if (!hive->reset_domain ||
2480 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2481 r = -ENOENT;
2482 amdgpu_put_xgmi_hive(hive);
2483 goto init_failed;
2484 }
2485
2486 /* Drop the early temporary reset domain we created for device */
2487 amdgpu_reset_put_reset_domain(adev->reset_domain);
2488 adev->reset_domain = hive->reset_domain;
9dfa4860 2489 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2490 }
a4c63caf
AG
2491 }
2492 }
2493
5fd8518d
AG
2494 r = amdgpu_device_init_schedulers(adev);
2495 if (r)
2496 goto init_failed;
e3c1b071 2497
2498 /* Don't init kfd if whole hive need to be reset during init */
c004d44e 2499 if (!adev->gmc.xgmi.pending_reset)
e3c1b071 2500 amdgpu_amdkfd_device_init(adev);
c6332b97 2501
bd607166
KR
2502 amdgpu_fru_get_product_info(adev);
2503
72d3f592 2504init_failed:
c9ffa427 2505 if (amdgpu_sriov_vf(adev))
c6332b97 2506 amdgpu_virt_release_full_gpu(adev, true);
2507
72d3f592 2508 return r;
d38ceaf9
AD
2509}
2510
e3ecdffa
AD
2511/**
2512 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2513 *
2514 * @adev: amdgpu_device pointer
2515 *
2516 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2517 * this function before a GPU reset. If the value is retained after a
2518 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2519 */
06ec9070 2520static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2521{
2522 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2523}
2524
e3ecdffa
AD
2525/**
2526 * amdgpu_device_check_vram_lost - check if vram is valid
2527 *
2528 * @adev: amdgpu_device pointer
2529 *
2530 * Checks the reset magic value written to the gart pointer in VRAM.
2531 * The driver calls this after a GPU reset to see if the contents of
2532 * VRAM is lost or now.
2533 * returns true if vram is lost, false if not.
2534 */
06ec9070 2535static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2536{
dadce777
EQ
2537 if (memcmp(adev->gart.ptr, adev->reset_magic,
2538 AMDGPU_RESET_MAGIC_NUM))
2539 return true;
2540
53b3f8f4 2541 if (!amdgpu_in_reset(adev))
dadce777
EQ
2542 return false;
2543
2544 /*
2545 * For all ASICs with baco/mode1 reset, the VRAM is
2546 * always assumed to be lost.
2547 */
2548 switch (amdgpu_asic_reset_method(adev)) {
2549 case AMD_RESET_METHOD_BACO:
2550 case AMD_RESET_METHOD_MODE1:
2551 return true;
2552 default:
2553 return false;
2554 }
0c49e0b8
CZ
2555}
2556
e3ecdffa 2557/**
1112a46b 2558 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2559 *
2560 * @adev: amdgpu_device pointer
b8b72130 2561 * @state: clockgating state (gate or ungate)
e3ecdffa 2562 *
e3ecdffa 2563 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2564 * set_clockgating_state callbacks are run.
2565 * Late initialization pass enabling clockgating for hardware IPs.
2566 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2567 * Returns 0 on success, negative error code on failure.
2568 */
fdd34271 2569
5d89bb2d
LL
2570int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2571 enum amd_clockgating_state state)
d38ceaf9 2572{
1112a46b 2573 int i, j, r;
d38ceaf9 2574
4a2ba394
SL
2575 if (amdgpu_emu_mode == 1)
2576 return 0;
2577
1112a46b
RZ
2578 for (j = 0; j < adev->num_ip_blocks; j++) {
2579 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2580 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2581 continue;
47198eb7 2582 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2583 if (adev->in_s0ix &&
47198eb7
AD
2584 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2585 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2586 continue;
4a446d55 2587 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2588 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2589 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2590 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2591 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2592 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2593 /* enable clockgating to save power */
a1255107 2594 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2595 state);
4a446d55
AD
2596 if (r) {
2597 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2598 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2599 return r;
2600 }
b0b00ff1 2601 }
d38ceaf9 2602 }
06b18f61 2603
c9f96fd5
RZ
2604 return 0;
2605}
2606
5d89bb2d
LL
2607int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2608 enum amd_powergating_state state)
c9f96fd5 2609{
1112a46b 2610 int i, j, r;
06b18f61 2611
c9f96fd5
RZ
2612 if (amdgpu_emu_mode == 1)
2613 return 0;
2614
1112a46b
RZ
2615 for (j = 0; j < adev->num_ip_blocks; j++) {
2616 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2617 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2618 continue;
47198eb7 2619 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2620 if (adev->in_s0ix &&
47198eb7
AD
2621 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2622 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2623 continue;
c9f96fd5
RZ
2624 /* skip CG for VCE/UVD, it's handled specially */
2625 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2626 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2627 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2628 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2629 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2630 /* enable powergating to save power */
2631 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2632 state);
c9f96fd5
RZ
2633 if (r) {
2634 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2635 adev->ip_blocks[i].version->funcs->name, r);
2636 return r;
2637 }
2638 }
2639 }
2dc80b00
S
2640 return 0;
2641}
2642
beff74bc
AD
2643static int amdgpu_device_enable_mgpu_fan_boost(void)
2644{
2645 struct amdgpu_gpu_instance *gpu_ins;
2646 struct amdgpu_device *adev;
2647 int i, ret = 0;
2648
2649 mutex_lock(&mgpu_info.mutex);
2650
2651 /*
2652 * MGPU fan boost feature should be enabled
2653 * only when there are two or more dGPUs in
2654 * the system
2655 */
2656 if (mgpu_info.num_dgpu < 2)
2657 goto out;
2658
2659 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2660 gpu_ins = &(mgpu_info.gpu_ins[i]);
2661 adev = gpu_ins->adev;
2662 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2663 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2664 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2665 if (ret)
2666 break;
2667
2668 gpu_ins->mgpu_fan_enabled = 1;
2669 }
2670 }
2671
2672out:
2673 mutex_unlock(&mgpu_info.mutex);
2674
2675 return ret;
2676}
2677
e3ecdffa
AD
2678/**
2679 * amdgpu_device_ip_late_init - run late init for hardware IPs
2680 *
2681 * @adev: amdgpu_device pointer
2682 *
2683 * Late initialization pass for hardware IPs. The list of all the hardware
2684 * IPs that make up the asic is walked and the late_init callbacks are run.
2685 * late_init covers any special initialization that an IP requires
2686 * after all of the have been initialized or something that needs to happen
2687 * late in the init process.
2688 * Returns 0 on success, negative error code on failure.
2689 */
06ec9070 2690static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2691{
60599a03 2692 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2693 int i = 0, r;
2694
2695 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2696 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2697 continue;
2698 if (adev->ip_blocks[i].version->funcs->late_init) {
2699 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2700 if (r) {
2701 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2702 adev->ip_blocks[i].version->funcs->name, r);
2703 return r;
2704 }
2dc80b00 2705 }
73f847db 2706 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2707 }
2708
867e24ca 2709 r = amdgpu_ras_late_init(adev);
2710 if (r) {
2711 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2712 return r;
2713 }
2714
a891d239
DL
2715 amdgpu_ras_set_error_query_ready(adev, true);
2716
1112a46b
RZ
2717 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2718 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2719
06ec9070 2720 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2721
beff74bc
AD
2722 r = amdgpu_device_enable_mgpu_fan_boost();
2723 if (r)
2724 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2725
4da8b639 2726 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2727 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2728 adev->asic_type == CHIP_ALDEBARAN ))
bc143d8b 2729 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2730
2731 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2732 mutex_lock(&mgpu_info.mutex);
2733
2734 /*
2735 * Reset device p-state to low as this was booted with high.
2736 *
2737 * This should be performed only after all devices from the same
2738 * hive get initialized.
2739 *
2740 * However, it's unknown how many device in the hive in advance.
2741 * As this is counted one by one during devices initializations.
2742 *
2743 * So, we wait for all XGMI interlinked devices initialized.
2744 * This may bring some delays as those devices may come from
2745 * different hives. But that should be OK.
2746 */
2747 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2748 for (i = 0; i < mgpu_info.num_gpu; i++) {
2749 gpu_instance = &(mgpu_info.gpu_ins[i]);
2750 if (gpu_instance->adev->flags & AMD_IS_APU)
2751 continue;
2752
d84a430d
JK
2753 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2754 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2755 if (r) {
2756 DRM_ERROR("pstate setting failed (%d).\n", r);
2757 break;
2758 }
2759 }
2760 }
2761
2762 mutex_unlock(&mgpu_info.mutex);
2763 }
2764
d38ceaf9
AD
2765 return 0;
2766}
2767
613aa3ea
LY
2768/**
2769 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2770 *
2771 * @adev: amdgpu_device pointer
2772 *
2773 * For ASICs need to disable SMC first
2774 */
2775static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2776{
2777 int i, r;
2778
2779 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2780 return;
2781
2782 for (i = 0; i < adev->num_ip_blocks; i++) {
2783 if (!adev->ip_blocks[i].status.hw)
2784 continue;
2785 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2786 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2787 /* XXX handle errors */
2788 if (r) {
2789 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2790 adev->ip_blocks[i].version->funcs->name, r);
2791 }
2792 adev->ip_blocks[i].status.hw = false;
2793 break;
2794 }
2795 }
2796}
2797
e9669fb7 2798static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2799{
2800 int i, r;
2801
e9669fb7
AG
2802 for (i = 0; i < adev->num_ip_blocks; i++) {
2803 if (!adev->ip_blocks[i].version->funcs->early_fini)
2804 continue;
5278a159 2805
e9669fb7
AG
2806 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2807 if (r) {
2808 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2809 adev->ip_blocks[i].version->funcs->name, r);
2810 }
2811 }
c030f2e4 2812
05df1f01 2813 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2814 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2815
7270e895
TY
2816 amdgpu_amdkfd_suspend(adev, false);
2817
613aa3ea
LY
2818 /* Workaroud for ASICs need to disable SMC first */
2819 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2820
d38ceaf9 2821 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2822 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2823 continue;
8201a67a 2824
a1255107 2825 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2826 /* XXX handle errors */
2c1a2784 2827 if (r) {
a1255107
AD
2828 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2829 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2830 }
8201a67a 2831
a1255107 2832 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2833 }
2834
6effad8a
GC
2835 if (amdgpu_sriov_vf(adev)) {
2836 if (amdgpu_virt_release_full_gpu(adev, false))
2837 DRM_ERROR("failed to release exclusive mode on fini\n");
2838 }
2839
e9669fb7
AG
2840 return 0;
2841}
2842
2843/**
2844 * amdgpu_device_ip_fini - run fini for hardware IPs
2845 *
2846 * @adev: amdgpu_device pointer
2847 *
2848 * Main teardown pass for hardware IPs. The list of all the hardware
2849 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2850 * are run. hw_fini tears down the hardware associated with each IP
2851 * and sw_fini tears down any software state associated with each IP.
2852 * Returns 0 on success, negative error code on failure.
2853 */
2854static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2855{
2856 int i, r;
2857
2858 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2859 amdgpu_virt_release_ras_err_handler_data(adev);
2860
e9669fb7
AG
2861 if (adev->gmc.xgmi.num_physical_nodes > 1)
2862 amdgpu_xgmi_remove_device(adev);
2863
c004d44e 2864 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2865
d38ceaf9 2866 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2867 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2868 continue;
c12aba3a
ML
2869
2870 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2871 amdgpu_ucode_free_bo(adev);
1e256e27 2872 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2873 amdgpu_device_wb_fini(adev);
7ccfd79f 2874 amdgpu_device_mem_scratch_fini(adev);
533aed27 2875 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2876 }
2877
a1255107 2878 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2879 /* XXX handle errors */
2c1a2784 2880 if (r) {
a1255107
AD
2881 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2882 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2883 }
a1255107
AD
2884 adev->ip_blocks[i].status.sw = false;
2885 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2886 }
2887
a6dcfd9c 2888 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2889 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2890 continue;
a1255107
AD
2891 if (adev->ip_blocks[i].version->funcs->late_fini)
2892 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2893 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2894 }
2895
c030f2e4 2896 amdgpu_ras_fini(adev);
2897
d38ceaf9
AD
2898 return 0;
2899}
2900
e3ecdffa 2901/**
beff74bc 2902 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2903 *
1112a46b 2904 * @work: work_struct.
e3ecdffa 2905 */
beff74bc 2906static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2907{
2908 struct amdgpu_device *adev =
beff74bc 2909 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2910 int r;
2911
2912 r = amdgpu_ib_ring_tests(adev);
2913 if (r)
2914 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2915}
2916
1e317b99
RZ
2917static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2918{
2919 struct amdgpu_device *adev =
2920 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2921
90a92662
MD
2922 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2923 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2924
2925 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2926 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2927}
2928
e3ecdffa 2929/**
e7854a03 2930 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2931 *
2932 * @adev: amdgpu_device pointer
2933 *
2934 * Main suspend function for hardware IPs. The list of all the hardware
2935 * IPs that make up the asic is walked, clockgating is disabled and the
2936 * suspend callbacks are run. suspend puts the hardware and software state
2937 * in each IP into a state suitable for suspend.
2938 * Returns 0 on success, negative error code on failure.
2939 */
e7854a03
AD
2940static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2941{
2942 int i, r;
2943
50ec83f0
AD
2944 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2945 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2946
b31d6ada
EQ
2947 /*
2948 * Per PMFW team's suggestion, driver needs to handle gfxoff
2949 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2950 * scenario. Add the missing df cstate disablement here.
2951 */
2952 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2953 dev_warn(adev->dev, "Failed to disallow df cstate");
2954
e7854a03
AD
2955 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2956 if (!adev->ip_blocks[i].status.valid)
2957 continue;
2b9f7848 2958
e7854a03 2959 /* displays are handled separately */
2b9f7848
ND
2960 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2961 continue;
2962
2963 /* XXX handle errors */
2964 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2965 /* XXX handle errors */
2966 if (r) {
2967 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2968 adev->ip_blocks[i].version->funcs->name, r);
2969 return r;
e7854a03 2970 }
2b9f7848
ND
2971
2972 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2973 }
2974
e7854a03
AD
2975 return 0;
2976}
2977
2978/**
2979 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2980 *
2981 * @adev: amdgpu_device pointer
2982 *
2983 * Main suspend function for hardware IPs. The list of all the hardware
2984 * IPs that make up the asic is walked, clockgating is disabled and the
2985 * suspend callbacks are run. suspend puts the hardware and software state
2986 * in each IP into a state suitable for suspend.
2987 * Returns 0 on success, negative error code on failure.
2988 */
2989static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2990{
2991 int i, r;
2992
557f42a2 2993 if (adev->in_s0ix)
bc143d8b 2994 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 2995
d38ceaf9 2996 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2997 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2998 continue;
e7854a03
AD
2999 /* displays are handled in phase1 */
3000 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3001 continue;
bff77e86
LM
3002 /* PSP lost connection when err_event_athub occurs */
3003 if (amdgpu_ras_intr_triggered() &&
3004 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3005 adev->ip_blocks[i].status.hw = false;
3006 continue;
3007 }
e3c1b071 3008
3009 /* skip unnecessary suspend if we do not initialize them yet */
3010 if (adev->gmc.xgmi.pending_reset &&
3011 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3012 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3013 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3014 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3015 adev->ip_blocks[i].status.hw = false;
3016 continue;
3017 }
557f42a2 3018
5620a188 3019 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3020 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3021 * like at runtime. PSP is also part of the always on hardware
3022 * so no need to suspend it.
3023 */
557f42a2 3024 if (adev->in_s0ix &&
32ff160d 3025 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
5620a188
AD
3026 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3027 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3028 continue;
3029
2a7798ea
AD
3030 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3031 if (adev->in_s0ix &&
3032 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3033 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3034 continue;
3035
d38ceaf9 3036 /* XXX handle errors */
a1255107 3037 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3038 /* XXX handle errors */
2c1a2784 3039 if (r) {
a1255107
AD
3040 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3041 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3042 }
876923fb 3043 adev->ip_blocks[i].status.hw = false;
a3a09142 3044 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
3045 if(!amdgpu_sriov_vf(adev)){
3046 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3047 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3048 if (r) {
3049 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3050 adev->mp1_state, r);
3051 return r;
3052 }
a3a09142
AD
3053 }
3054 }
d38ceaf9
AD
3055 }
3056
3057 return 0;
3058}
3059
e7854a03
AD
3060/**
3061 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3062 *
3063 * @adev: amdgpu_device pointer
3064 *
3065 * Main suspend function for hardware IPs. The list of all the hardware
3066 * IPs that make up the asic is walked, clockgating is disabled and the
3067 * suspend callbacks are run. suspend puts the hardware and software state
3068 * in each IP into a state suitable for suspend.
3069 * Returns 0 on success, negative error code on failure.
3070 */
3071int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3072{
3073 int r;
3074
3c73683c
JC
3075 if (amdgpu_sriov_vf(adev)) {
3076 amdgpu_virt_fini_data_exchange(adev);
e7819644 3077 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3078 }
e7819644 3079
e7854a03
AD
3080 r = amdgpu_device_ip_suspend_phase1(adev);
3081 if (r)
3082 return r;
3083 r = amdgpu_device_ip_suspend_phase2(adev);
3084
e7819644
YT
3085 if (amdgpu_sriov_vf(adev))
3086 amdgpu_virt_release_full_gpu(adev, false);
3087
e7854a03
AD
3088 return r;
3089}
3090
06ec9070 3091static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3092{
3093 int i, r;
3094
2cb681b6 3095 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3096 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3097 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3098 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3099 AMD_IP_BLOCK_TYPE_IH,
3100 };
a90ad3c2 3101
95ea3dbc 3102 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3103 int j;
3104 struct amdgpu_ip_block *block;
a90ad3c2 3105
4cd2a96d
J
3106 block = &adev->ip_blocks[i];
3107 block->status.hw = false;
2cb681b6 3108
4cd2a96d 3109 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3110
4cd2a96d 3111 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3112 !block->status.valid)
3113 continue;
3114
3115 r = block->version->funcs->hw_init(adev);
0aaeefcc 3116 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3117 if (r)
3118 return r;
482f0e53 3119 block->status.hw = true;
a90ad3c2
ML
3120 }
3121 }
3122
3123 return 0;
3124}
3125
06ec9070 3126static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3127{
3128 int i, r;
3129
2cb681b6
ML
3130 static enum amd_ip_block_type ip_order[] = {
3131 AMD_IP_BLOCK_TYPE_SMC,
3132 AMD_IP_BLOCK_TYPE_DCE,
3133 AMD_IP_BLOCK_TYPE_GFX,
3134 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 3135 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
3136 AMD_IP_BLOCK_TYPE_VCE,
3137 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 3138 };
a90ad3c2 3139
2cb681b6
ML
3140 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3141 int j;
3142 struct amdgpu_ip_block *block;
a90ad3c2 3143
2cb681b6
ML
3144 for (j = 0; j < adev->num_ip_blocks; j++) {
3145 block = &adev->ip_blocks[j];
3146
3147 if (block->version->type != ip_order[i] ||
482f0e53
ML
3148 !block->status.valid ||
3149 block->status.hw)
2cb681b6
ML
3150 continue;
3151
895bd048
JZ
3152 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3153 r = block->version->funcs->resume(adev);
3154 else
3155 r = block->version->funcs->hw_init(adev);
3156
0aaeefcc 3157 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3158 if (r)
3159 return r;
482f0e53 3160 block->status.hw = true;
a90ad3c2
ML
3161 }
3162 }
3163
3164 return 0;
3165}
3166
e3ecdffa
AD
3167/**
3168 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3169 *
3170 * @adev: amdgpu_device pointer
3171 *
3172 * First resume function for hardware IPs. The list of all the hardware
3173 * IPs that make up the asic is walked and the resume callbacks are run for
3174 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3175 * after a suspend and updates the software state as necessary. This
3176 * function is also used for restoring the GPU after a GPU reset.
3177 * Returns 0 on success, negative error code on failure.
3178 */
06ec9070 3179static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3180{
3181 int i, r;
3182
a90ad3c2 3183 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3184 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3185 continue;
a90ad3c2 3186 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3187 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3188 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3189 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3190
fcf0649f
CZ
3191 r = adev->ip_blocks[i].version->funcs->resume(adev);
3192 if (r) {
3193 DRM_ERROR("resume of IP block <%s> failed %d\n",
3194 adev->ip_blocks[i].version->funcs->name, r);
3195 return r;
3196 }
482f0e53 3197 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3198 }
3199 }
3200
3201 return 0;
3202}
3203
e3ecdffa
AD
3204/**
3205 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3206 *
3207 * @adev: amdgpu_device pointer
3208 *
3209 * First resume function for hardware IPs. The list of all the hardware
3210 * IPs that make up the asic is walked and the resume callbacks are run for
3211 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3212 * functional state after a suspend and updates the software state as
3213 * necessary. This function is also used for restoring the GPU after a GPU
3214 * reset.
3215 * Returns 0 on success, negative error code on failure.
3216 */
06ec9070 3217static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3218{
3219 int i, r;
3220
3221 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3222 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3223 continue;
fcf0649f 3224 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3225 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3226 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3227 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3228 continue;
a1255107 3229 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3230 if (r) {
a1255107
AD
3231 DRM_ERROR("resume of IP block <%s> failed %d\n",
3232 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3233 return r;
2c1a2784 3234 }
482f0e53 3235 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3236 }
3237
3238 return 0;
3239}
3240
e3ecdffa
AD
3241/**
3242 * amdgpu_device_ip_resume - run resume for hardware IPs
3243 *
3244 * @adev: amdgpu_device pointer
3245 *
3246 * Main resume function for hardware IPs. The hardware IPs
3247 * are split into two resume functions because they are
3248 * are also used in in recovering from a GPU reset and some additional
3249 * steps need to be take between them. In this case (S3/S4) they are
3250 * run sequentially.
3251 * Returns 0 on success, negative error code on failure.
3252 */
06ec9070 3253static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3254{
3255 int r;
3256
9cec53c1
JZ
3257 r = amdgpu_amdkfd_resume_iommu(adev);
3258 if (r)
3259 return r;
3260
06ec9070 3261 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3262 if (r)
3263 return r;
7a3e0bb2
RZ
3264
3265 r = amdgpu_device_fw_loading(adev);
3266 if (r)
3267 return r;
3268
06ec9070 3269 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3270
3271 return r;
3272}
3273
e3ecdffa
AD
3274/**
3275 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3276 *
3277 * @adev: amdgpu_device pointer
3278 *
3279 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3280 */
4e99a44e 3281static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3282{
6867e1b5
ML
3283 if (amdgpu_sriov_vf(adev)) {
3284 if (adev->is_atom_fw) {
58ff791a 3285 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3286 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3287 } else {
3288 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3289 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3290 }
3291
3292 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3293 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3294 }
048765ad
AR
3295}
3296
e3ecdffa
AD
3297/**
3298 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3299 *
3300 * @asic_type: AMD asic type
3301 *
3302 * Check if there is DC (new modesetting infrastructre) support for an asic.
3303 * returns true if DC has support, false if not.
3304 */
4562236b
HW
3305bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3306{
3307 switch (asic_type) {
0637d417
AD
3308#ifdef CONFIG_DRM_AMDGPU_SI
3309 case CHIP_HAINAN:
3310#endif
3311 case CHIP_TOPAZ:
3312 /* chips with no display hardware */
3313 return false;
4562236b 3314#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3315 case CHIP_TAHITI:
3316 case CHIP_PITCAIRN:
3317 case CHIP_VERDE:
3318 case CHIP_OLAND:
2d32ffd6
AD
3319 /*
3320 * We have systems in the wild with these ASICs that require
3321 * LVDS and VGA support which is not supported with DC.
3322 *
3323 * Fallback to the non-DC driver here by default so as not to
3324 * cause regressions.
3325 */
3326#if defined(CONFIG_DRM_AMD_DC_SI)
3327 return amdgpu_dc > 0;
3328#else
3329 return false;
64200c46 3330#endif
4562236b 3331 case CHIP_BONAIRE:
0d6fbccb 3332 case CHIP_KAVERI:
367e6687
AD
3333 case CHIP_KABINI:
3334 case CHIP_MULLINS:
d9fda248
HW
3335 /*
3336 * We have systems in the wild with these ASICs that require
b5a0168e 3337 * VGA support which is not supported with DC.
d9fda248
HW
3338 *
3339 * Fallback to the non-DC driver here by default so as not to
3340 * cause regressions.
3341 */
3342 return amdgpu_dc > 0;
f7f12b25 3343 default:
fd187853 3344 return amdgpu_dc != 0;
f7f12b25 3345#else
4562236b 3346 default:
93b09a9a 3347 if (amdgpu_dc > 0)
044a48f4 3348 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3349 "but isn't supported by ASIC, ignoring\n");
4562236b 3350 return false;
f7f12b25 3351#endif
4562236b
HW
3352 }
3353}
3354
3355/**
3356 * amdgpu_device_has_dc_support - check if dc is supported
3357 *
982a820b 3358 * @adev: amdgpu_device pointer
4562236b
HW
3359 *
3360 * Returns true for supported, false for not supported
3361 */
3362bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3363{
25263da3 3364 if (adev->enable_virtual_display ||
abaf210c 3365 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3366 return false;
3367
4562236b
HW
3368 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3369}
3370
d4535e2c
AG
3371static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3372{
3373 struct amdgpu_device *adev =
3374 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3375 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3376
c6a6e2db
AG
3377 /* It's a bug to not have a hive within this function */
3378 if (WARN_ON(!hive))
3379 return;
3380
3381 /*
3382 * Use task barrier to synchronize all xgmi reset works across the
3383 * hive. task_barrier_enter and task_barrier_exit will block
3384 * until all the threads running the xgmi reset works reach
3385 * those points. task_barrier_full will do both blocks.
3386 */
3387 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3388
3389 task_barrier_enter(&hive->tb);
4a580877 3390 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3391
3392 if (adev->asic_reset_res)
3393 goto fail;
3394
3395 task_barrier_exit(&hive->tb);
4a580877 3396 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3397
3398 if (adev->asic_reset_res)
3399 goto fail;
43c4d576 3400
5e67bba3 3401 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3402 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3403 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3404 } else {
3405
3406 task_barrier_full(&hive->tb);
3407 adev->asic_reset_res = amdgpu_asic_reset(adev);
3408 }
ce316fa5 3409
c6a6e2db 3410fail:
d4535e2c 3411 if (adev->asic_reset_res)
fed184e9 3412 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3413 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3414 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3415}
3416
71f98027
AD
3417static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3418{
3419 char *input = amdgpu_lockup_timeout;
3420 char *timeout_setting = NULL;
3421 int index = 0;
3422 long timeout;
3423 int ret = 0;
3424
3425 /*
67387dfe
AD
3426 * By default timeout for non compute jobs is 10000
3427 * and 60000 for compute jobs.
71f98027 3428 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3429 * jobs are 60000 by default.
71f98027
AD
3430 */
3431 adev->gfx_timeout = msecs_to_jiffies(10000);
3432 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3433 if (amdgpu_sriov_vf(adev))
3434 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3435 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3436 else
67387dfe 3437 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3438
f440ff44 3439 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3440 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3441 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3442 ret = kstrtol(timeout_setting, 0, &timeout);
3443 if (ret)
3444 return ret;
3445
3446 if (timeout == 0) {
3447 index++;
3448 continue;
3449 } else if (timeout < 0) {
3450 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3451 dev_warn(adev->dev, "lockup timeout disabled");
3452 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3453 } else {
3454 timeout = msecs_to_jiffies(timeout);
3455 }
3456
3457 switch (index++) {
3458 case 0:
3459 adev->gfx_timeout = timeout;
3460 break;
3461 case 1:
3462 adev->compute_timeout = timeout;
3463 break;
3464 case 2:
3465 adev->sdma_timeout = timeout;
3466 break;
3467 case 3:
3468 adev->video_timeout = timeout;
3469 break;
3470 default:
3471 break;
3472 }
3473 }
3474 /*
3475 * There is only one value specified and
3476 * it should apply to all non-compute jobs.
3477 */
bcccee89 3478 if (index == 1) {
71f98027 3479 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3480 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3481 adev->compute_timeout = adev->gfx_timeout;
3482 }
71f98027
AD
3483 }
3484
3485 return ret;
3486}
d4535e2c 3487
4a74c38c
PY
3488/**
3489 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3490 *
3491 * @adev: amdgpu_device pointer
3492 *
3493 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3494 */
3495static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3496{
3497 struct iommu_domain *domain;
3498
3499 domain = iommu_get_domain_for_dev(adev->dev);
3500 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3501 adev->ram_is_direct_mapped = true;
3502}
3503
77f3a5cd
ND
3504static const struct attribute *amdgpu_dev_attributes[] = {
3505 &dev_attr_product_name.attr,
3506 &dev_attr_product_number.attr,
3507 &dev_attr_serial_number.attr,
3508 &dev_attr_pcie_replay_count.attr,
3509 NULL
3510};
3511
d38ceaf9
AD
3512/**
3513 * amdgpu_device_init - initialize the driver
3514 *
3515 * @adev: amdgpu_device pointer
d38ceaf9
AD
3516 * @flags: driver flags
3517 *
3518 * Initializes the driver info and hw (all asics).
3519 * Returns 0 for success or an error on failure.
3520 * Called at driver startup.
3521 */
3522int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3523 uint32_t flags)
3524{
8aba21b7
LT
3525 struct drm_device *ddev = adev_to_drm(adev);
3526 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3527 int r, i;
b98c6299 3528 bool px = false;
95844d20 3529 u32 max_MBps;
d38ceaf9
AD
3530
3531 adev->shutdown = false;
d38ceaf9 3532 adev->flags = flags;
4e66d7d2
YZ
3533
3534 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3535 adev->asic_type = amdgpu_force_asic_type;
3536 else
3537 adev->asic_type = flags & AMD_ASIC_MASK;
3538
d38ceaf9 3539 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3540 if (amdgpu_emu_mode == 1)
8bdab6bb 3541 adev->usec_timeout *= 10;
770d13b1 3542 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3543 adev->accel_working = false;
3544 adev->num_rings = 0;
68ce8b24 3545 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3546 adev->mman.buffer_funcs = NULL;
3547 adev->mman.buffer_funcs_ring = NULL;
3548 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3549 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3550 adev->gmc.gmc_funcs = NULL;
7bd939d0 3551 adev->harvest_ip_mask = 0x0;
f54d1867 3552 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3553 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3554
3555 adev->smc_rreg = &amdgpu_invalid_rreg;
3556 adev->smc_wreg = &amdgpu_invalid_wreg;
3557 adev->pcie_rreg = &amdgpu_invalid_rreg;
3558 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3559 adev->pciep_rreg = &amdgpu_invalid_rreg;
3560 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3561 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3562 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3563 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3564 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3565 adev->didt_rreg = &amdgpu_invalid_rreg;
3566 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3567 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3568 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3569 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3570 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3571
3e39ab90
AD
3572 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3573 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3574 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3575
3576 /* mutex initialization are all done here so we
3577 * can recall function without having locking issues */
0e5ca0d1 3578 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3579 mutex_init(&adev->pm.mutex);
3580 mutex_init(&adev->gfx.gpu_clock_mutex);
3581 mutex_init(&adev->srbm_mutex);
b8866c26 3582 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3583 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3584 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3585 mutex_init(&adev->mn_lock);
e23b74aa 3586 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3587 hash_init(adev->mn_hash);
32eaeae0 3588 mutex_init(&adev->psp.mutex);
bd052211 3589 mutex_init(&adev->notifier_lock);
8cda7a4f 3590 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3591 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3592
ab3b9de6 3593 amdgpu_device_init_apu_flags(adev);
9f6a7857 3594
912dfc84
EQ
3595 r = amdgpu_device_check_arguments(adev);
3596 if (r)
3597 return r;
d38ceaf9 3598
d38ceaf9
AD
3599 spin_lock_init(&adev->mmio_idx_lock);
3600 spin_lock_init(&adev->smc_idx_lock);
3601 spin_lock_init(&adev->pcie_idx_lock);
3602 spin_lock_init(&adev->uvd_ctx_idx_lock);
3603 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3604 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3605 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3606 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3607 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3608
0c4e7fa5
CZ
3609 INIT_LIST_HEAD(&adev->shadow_list);
3610 mutex_init(&adev->shadow_list_lock);
3611
655ce9cb 3612 INIT_LIST_HEAD(&adev->reset_list);
3613
6492e1b0 3614 INIT_LIST_HEAD(&adev->ras_list);
3615
beff74bc
AD
3616 INIT_DELAYED_WORK(&adev->delayed_init_work,
3617 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3618 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3619 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3620
d4535e2c
AG
3621 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3622
d23ee13f 3623 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3624 adev->gfx.gfx_off_residency = 0;
3625 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3626 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3627
b265bdbd
EQ
3628 atomic_set(&adev->throttling_logging_enabled, 1);
3629 /*
3630 * If throttling continues, logging will be performed every minute
3631 * to avoid log flooding. "-1" is subtracted since the thermal
3632 * throttling interrupt comes every second. Thus, the total logging
3633 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3634 * for throttling interrupt) = 60 seconds.
3635 */
3636 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3637 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3638
0fa49558
AX
3639 /* Registers mapping */
3640 /* TODO: block userspace mapping of io register */
da69c161
KW
3641 if (adev->asic_type >= CHIP_BONAIRE) {
3642 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3643 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3644 } else {
3645 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3646 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3647 }
d38ceaf9 3648
6c08e0ef
EQ
3649 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3650 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3651
d38ceaf9
AD
3652 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3653 if (adev->rmmio == NULL) {
3654 return -ENOMEM;
3655 }
3656 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3657 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3658
5494d864
AD
3659 amdgpu_device_get_pcie_info(adev);
3660
b239c017
JX
3661 if (amdgpu_mcbp)
3662 DRM_INFO("MCBP is enabled\n");
3663
436afdfa
PY
3664 /*
3665 * Reset domain needs to be present early, before XGMI hive discovered
3666 * (if any) and intitialized to use reset sem and in_gpu reset flag
3667 * early on during init and before calling to RREG32.
3668 */
3669 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3670 if (!adev->reset_domain)
3671 return -ENOMEM;
3672
3aa0115d
ML
3673 /* detect hw virtualization here */
3674 amdgpu_detect_virtualization(adev);
3675
dffa11b4
ML
3676 r = amdgpu_device_get_job_timeout_settings(adev);
3677 if (r) {
3678 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3679 return r;
a190d1c7
XY
3680 }
3681
d38ceaf9 3682 /* early init functions */
06ec9070 3683 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3684 if (r)
4ef87d8f 3685 return r;
d38ceaf9 3686
b7cdb41e
ML
3687 /* Get rid of things like offb */
3688 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3689 if (r)
3690 return r;
3691
4d33e704
SK
3692 /* Enable TMZ based on IP_VERSION */
3693 amdgpu_gmc_tmz_set(adev);
3694
957b0787 3695 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3696 /* Need to get xgmi info early to decide the reset behavior*/
3697 if (adev->gmc.xgmi.supported) {
3698 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3699 if (r)
3700 return r;
3701 }
3702
8e6d0b69 3703 /* enable PCIE atomic ops */
3704 if (amdgpu_sriov_vf(adev))
3705 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
e15c9d06 3706 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
8e6d0b69 3707 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3708 else
3709 adev->have_atomics_support =
3710 !pci_enable_atomic_ops_to_root(adev->pdev,
3711 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3712 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3713 if (!adev->have_atomics_support)
3714 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3715
6585661d
OZ
3716 /* doorbell bar mapping and doorbell index init*/
3717 amdgpu_device_doorbell_init(adev);
3718
9475a943
SL
3719 if (amdgpu_emu_mode == 1) {
3720 /* post the asic on emulation mode */
3721 emu_soc_asic_init(adev);
bfca0289 3722 goto fence_driver_init;
9475a943 3723 }
bfca0289 3724
04442bf7
LL
3725 amdgpu_reset_init(adev);
3726
4e99a44e
ML
3727 /* detect if we are with an SRIOV vbios */
3728 amdgpu_device_detect_sriov_bios(adev);
048765ad 3729
95e8e59e
AD
3730 /* check if we need to reset the asic
3731 * E.g., driver was not cleanly unloaded previously, etc.
3732 */
f14899fd 3733 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3734 if (adev->gmc.xgmi.num_physical_nodes) {
3735 dev_info(adev->dev, "Pending hive reset.\n");
3736 adev->gmc.xgmi.pending_reset = true;
3737 /* Only need to init necessary block for SMU to handle the reset */
3738 for (i = 0; i < adev->num_ip_blocks; i++) {
3739 if (!adev->ip_blocks[i].status.valid)
3740 continue;
3741 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3742 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3743 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3744 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3745 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3746 adev->ip_blocks[i].version->funcs->name);
3747 adev->ip_blocks[i].status.hw = true;
3748 }
3749 }
3750 } else {
3751 r = amdgpu_asic_reset(adev);
3752 if (r) {
3753 dev_err(adev->dev, "asic reset on init failed\n");
3754 goto failed;
3755 }
95e8e59e
AD
3756 }
3757 }
3758
8f66090b 3759 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3760
d38ceaf9 3761 /* Post card if necessary */
39c640c0 3762 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3763 if (!adev->bios) {
bec86378 3764 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3765 r = -EINVAL;
3766 goto failed;
d38ceaf9 3767 }
bec86378 3768 DRM_INFO("GPU posting now...\n");
4d2997ab 3769 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3770 if (r) {
3771 dev_err(adev->dev, "gpu post error!\n");
3772 goto failed;
3773 }
d38ceaf9
AD
3774 }
3775
88b64e95
AD
3776 if (adev->is_atom_fw) {
3777 /* Initialize clocks */
3778 r = amdgpu_atomfirmware_get_clock_info(adev);
3779 if (r) {
3780 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3781 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3782 goto failed;
3783 }
3784 } else {
a5bde2f9
AD
3785 /* Initialize clocks */
3786 r = amdgpu_atombios_get_clock_info(adev);
3787 if (r) {
3788 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3789 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3790 goto failed;
a5bde2f9
AD
3791 }
3792 /* init i2c buses */
4562236b
HW
3793 if (!amdgpu_device_has_dc_support(adev))
3794 amdgpu_atombios_i2c_init(adev);
2c1a2784 3795 }
d38ceaf9 3796
bfca0289 3797fence_driver_init:
d38ceaf9 3798 /* Fence driver */
067f44c8 3799 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3800 if (r) {
067f44c8 3801 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3802 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3803 goto failed;
2c1a2784 3804 }
d38ceaf9
AD
3805
3806 /* init the mode config */
4a580877 3807 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3808
06ec9070 3809 r = amdgpu_device_ip_init(adev);
d38ceaf9 3810 if (r) {
8840a387 3811 /* failed in exclusive mode due to timeout */
3812 if (amdgpu_sriov_vf(adev) &&
3813 !amdgpu_sriov_runtime(adev) &&
3814 amdgpu_virt_mmio_blocked(adev) &&
3815 !amdgpu_virt_wait_reset(adev)) {
3816 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3817 /* Don't send request since VF is inactive. */
3818 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3819 adev->virt.ops = NULL;
8840a387 3820 r = -EAGAIN;
970fd197 3821 goto release_ras_con;
8840a387 3822 }
06ec9070 3823 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3824 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3825 goto release_ras_con;
d38ceaf9
AD
3826 }
3827
8d35a259
LG
3828 amdgpu_fence_driver_hw_init(adev);
3829
d69b8971
YZ
3830 dev_info(adev->dev,
3831 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3832 adev->gfx.config.max_shader_engines,
3833 adev->gfx.config.max_sh_per_se,
3834 adev->gfx.config.max_cu_per_sh,
3835 adev->gfx.cu_info.number);
3836
d38ceaf9
AD
3837 adev->accel_working = true;
3838
e59c0205
AX
3839 amdgpu_vm_check_compute_bug(adev);
3840
95844d20
MO
3841 /* Initialize the buffer migration limit. */
3842 if (amdgpu_moverate >= 0)
3843 max_MBps = amdgpu_moverate;
3844 else
3845 max_MBps = 8; /* Allow 8 MB/s. */
3846 /* Get a log2 for easy divisions. */
3847 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3848
d2f52ac8 3849 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3850 if (r) {
3851 adev->pm_sysfs_en = false;
d2f52ac8 3852 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3853 } else
3854 adev->pm_sysfs_en = true;
d2f52ac8 3855
5bb23532 3856 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3857 if (r) {
3858 adev->ucode_sysfs_en = false;
5bb23532 3859 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3860 } else
3861 adev->ucode_sysfs_en = true;
5bb23532 3862
8424f2cc
LG
3863 r = amdgpu_psp_sysfs_init(adev);
3864 if (r) {
3865 adev->psp_sysfs_en = false;
3866 if (!amdgpu_sriov_vf(adev))
3867 DRM_ERROR("Creating psp sysfs failed\n");
3868 } else
3869 adev->psp_sysfs_en = true;
3870
b0adca4d
EQ
3871 /*
3872 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3873 * Otherwise the mgpu fan boost feature will be skipped due to the
3874 * gpu instance is counted less.
3875 */
3876 amdgpu_register_gpu_instance(adev);
3877
d38ceaf9
AD
3878 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3879 * explicit gating rather than handling it automatically.
3880 */
e3c1b071 3881 if (!adev->gmc.xgmi.pending_reset) {
3882 r = amdgpu_device_ip_late_init(adev);
3883 if (r) {
3884 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3885 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3886 goto release_ras_con;
e3c1b071 3887 }
3888 /* must succeed. */
3889 amdgpu_ras_resume(adev);
3890 queue_delayed_work(system_wq, &adev->delayed_init_work,
3891 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3892 }
d38ceaf9 3893
2c738637
ML
3894 if (amdgpu_sriov_vf(adev))
3895 flush_delayed_work(&adev->delayed_init_work);
3896
77f3a5cd 3897 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3898 if (r)
77f3a5cd 3899 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3900
d155bef0
AB
3901 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3902 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3903 if (r)
3904 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3905
c1dd4aa6
AG
3906 /* Have stored pci confspace at hand for restore in sudden PCI error */
3907 if (amdgpu_device_cache_pci_state(adev->pdev))
3908 pci_restore_state(pdev);
3909
8c3dd61c
KHF
3910 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3911 /* this will fail for cards that aren't VGA class devices, just
3912 * ignore it */
3913 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3914 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c
KHF
3915
3916 if (amdgpu_device_supports_px(ddev)) {
3917 px = true;
3918 vga_switcheroo_register_client(adev->pdev,
3919 &amdgpu_switcheroo_ops, px);
3920 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3921 }
3922
e3c1b071 3923 if (adev->gmc.xgmi.pending_reset)
3924 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3925 msecs_to_jiffies(AMDGPU_RESUME_MS));
3926
4a74c38c
PY
3927 amdgpu_device_check_iommu_direct_map(adev);
3928
d38ceaf9 3929 return 0;
83ba126a 3930
970fd197
SY
3931release_ras_con:
3932 amdgpu_release_ras_context(adev);
3933
83ba126a 3934failed:
89041940 3935 amdgpu_vf_error_trans_all(adev);
8840a387 3936
83ba126a 3937 return r;
d38ceaf9
AD
3938}
3939
07775fc1
AG
3940static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3941{
62d5f9f7 3942
07775fc1
AG
3943 /* Clear all CPU mappings pointing to this device */
3944 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3945
3946 /* Unmap all mapped bars - Doorbell, registers and VRAM */
3947 amdgpu_device_doorbell_fini(adev);
3948
3949 iounmap(adev->rmmio);
3950 adev->rmmio = NULL;
3951 if (adev->mman.aper_base_kaddr)
3952 iounmap(adev->mman.aper_base_kaddr);
3953 adev->mman.aper_base_kaddr = NULL;
3954
3955 /* Memory manager related */
3956 if (!adev->gmc.xgmi.connected_to_cpu) {
3957 arch_phys_wc_del(adev->gmc.vram_mtrr);
3958 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3959 }
3960}
3961
d38ceaf9 3962/**
bbe04dec 3963 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3964 *
3965 * @adev: amdgpu_device pointer
3966 *
3967 * Tear down the driver info (all asics).
3968 * Called at driver shutdown.
3969 */
72c8c97b 3970void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 3971{
aac89168 3972 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3973 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3974 adev->shutdown = true;
9f875167 3975
752c683d
ML
3976 /* make sure IB test finished before entering exclusive mode
3977 * to avoid preemption on IB test
3978 * */
519b8b76 3979 if (amdgpu_sriov_vf(adev)) {
752c683d 3980 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3981 amdgpu_virt_fini_data_exchange(adev);
3982 }
752c683d 3983
e5b03032
ML
3984 /* disable all interrupts */
3985 amdgpu_irq_disable_all(adev);
ff97cba8 3986 if (adev->mode_info.mode_config_initialized){
1053b9c9 3987 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 3988 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3989 else
4a580877 3990 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3991 }
8d35a259 3992 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 3993
98f56188
YY
3994 if (adev->mman.initialized) {
3995 flush_delayed_work(&adev->mman.bdev.wq);
3996 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
3997 }
3998
7c868b59
YT
3999 if (adev->pm_sysfs_en)
4000 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4001 if (adev->ucode_sysfs_en)
4002 amdgpu_ucode_sysfs_fini(adev);
8424f2cc
LG
4003 if (adev->psp_sysfs_en)
4004 amdgpu_psp_sysfs_fini(adev);
72c8c97b
AG
4005 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4006
232d1d43
SY
4007 /* disable ras feature must before hw fini */
4008 amdgpu_ras_pre_fini(adev);
4009
e9669fb7 4010 amdgpu_device_ip_fini_early(adev);
d10d0daa 4011
a3848df6
YW
4012 amdgpu_irq_fini_hw(adev);
4013
b6fd6e0f
SK
4014 if (adev->mman.initialized)
4015 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4016
d10d0daa 4017 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4018
fac53471 4019 amdgpu_device_unmap_mmio(adev);
87172e89 4020
72c8c97b
AG
4021}
4022
4023void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4024{
62d5f9f7
LS
4025 int idx;
4026
8d35a259 4027 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4028 amdgpu_device_ip_fini(adev);
b31d3063 4029 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4030 adev->accel_working = false;
68ce8b24 4031 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4032
4033 amdgpu_reset_fini(adev);
4034
d38ceaf9 4035 /* free i2c buses */
4562236b
HW
4036 if (!amdgpu_device_has_dc_support(adev))
4037 amdgpu_i2c_fini(adev);
bfca0289
SL
4038
4039 if (amdgpu_emu_mode != 1)
4040 amdgpu_atombios_fini(adev);
4041
d38ceaf9
AD
4042 kfree(adev->bios);
4043 adev->bios = NULL;
b98c6299 4044 if (amdgpu_device_supports_px(adev_to_drm(adev))) {
84c8b22e 4045 vga_switcheroo_unregister_client(adev->pdev);
83ba126a 4046 vga_switcheroo_fini_domain_pm_ops(adev->dev);
b98c6299 4047 }
38d6be81 4048 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4049 vga_client_unregister(adev->pdev);
e9bc1bf7 4050
62d5f9f7
LS
4051 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4052
4053 iounmap(adev->rmmio);
4054 adev->rmmio = NULL;
4055 amdgpu_device_doorbell_fini(adev);
4056 drm_dev_exit(idx);
4057 }
4058
d155bef0
AB
4059 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4060 amdgpu_pmu_fini(adev);
72de33f8 4061 if (adev->mman.discovery_bin)
a190d1c7 4062 amdgpu_discovery_fini(adev);
72c8c97b 4063
cfbb6b00
AG
4064 amdgpu_reset_put_reset_domain(adev->reset_domain);
4065 adev->reset_domain = NULL;
4066
72c8c97b
AG
4067 kfree(adev->pci_state);
4068
d38ceaf9
AD
4069}
4070
58144d28
ND
4071/**
4072 * amdgpu_device_evict_resources - evict device resources
4073 * @adev: amdgpu device object
4074 *
4075 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4076 * of the vram memory type. Mainly used for evicting device resources
4077 * at suspend time.
4078 *
4079 */
7863c155 4080static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4081{
7863c155
ML
4082 int ret;
4083
e53d9665
ML
4084 /* No need to evict vram on APUs for suspend to ram or s2idle */
4085 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4086 return 0;
58144d28 4087
7863c155
ML
4088 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4089 if (ret)
58144d28 4090 DRM_WARN("evicting device resources failed\n");
7863c155 4091 return ret;
58144d28 4092}
d38ceaf9
AD
4093
4094/*
4095 * Suspend & resume.
4096 */
4097/**
810ddc3a 4098 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4099 *
87e3f136 4100 * @dev: drm dev pointer
87e3f136 4101 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4102 *
4103 * Puts the hw in the suspend state (all asics).
4104 * Returns 0 for success or an error on failure.
4105 * Called at driver suspend.
4106 */
de185019 4107int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4108{
a2e15b0e 4109 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4110 int r = 0;
d38ceaf9 4111
d38ceaf9
AD
4112 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4113 return 0;
4114
44779b43 4115 adev->in_suspend = true;
3fa8f89d 4116
47ea2076
SF
4117 /* Evict the majority of BOs before grabbing the full access */
4118 r = amdgpu_device_evict_resources(adev);
4119 if (r)
4120 return r;
4121
d7274ec7
BZ
4122 if (amdgpu_sriov_vf(adev)) {
4123 amdgpu_virt_fini_data_exchange(adev);
4124 r = amdgpu_virt_request_full_gpu(adev, false);
4125 if (r)
4126 return r;
4127 }
4128
3fa8f89d
S
4129 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4130 DRM_WARN("smart shift update failed\n");
4131
d38ceaf9
AD
4132 drm_kms_helper_poll_disable(dev);
4133
5f818173 4134 if (fbcon)
087451f3 4135 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4136
beff74bc 4137 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4138
5e6932fe 4139 amdgpu_ras_suspend(adev);
4140
2196927b 4141 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4142
c004d44e 4143 if (!adev->in_s0ix)
5d3a2d95 4144 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4145
7863c155
ML
4146 r = amdgpu_device_evict_resources(adev);
4147 if (r)
4148 return r;
d38ceaf9 4149
8d35a259 4150 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4151
2196927b 4152 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4153
d7274ec7
BZ
4154 if (amdgpu_sriov_vf(adev))
4155 amdgpu_virt_release_full_gpu(adev, false);
4156
d38ceaf9
AD
4157 return 0;
4158}
4159
4160/**
810ddc3a 4161 * amdgpu_device_resume - initiate device resume
d38ceaf9 4162 *
87e3f136 4163 * @dev: drm dev pointer
87e3f136 4164 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4165 *
4166 * Bring the hw back to operating state (all asics).
4167 * Returns 0 for success or an error on failure.
4168 * Called at driver resume.
4169 */
de185019 4170int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4171{
1348969a 4172 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4173 int r = 0;
d38ceaf9 4174
d7274ec7
BZ
4175 if (amdgpu_sriov_vf(adev)) {
4176 r = amdgpu_virt_request_full_gpu(adev, true);
4177 if (r)
4178 return r;
4179 }
4180
d38ceaf9
AD
4181 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4182 return 0;
4183
62498733 4184 if (adev->in_s0ix)
bc143d8b 4185 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4186
d38ceaf9 4187 /* post card */
39c640c0 4188 if (amdgpu_device_need_post(adev)) {
4d2997ab 4189 r = amdgpu_device_asic_init(adev);
74b0b157 4190 if (r)
aac89168 4191 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4192 }
d38ceaf9 4193
06ec9070 4194 r = amdgpu_device_ip_resume(adev);
d7274ec7 4195
e6707218 4196 if (r) {
aac89168 4197 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4198 goto exit;
e6707218 4199 }
8d35a259 4200 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4201
06ec9070 4202 r = amdgpu_device_ip_late_init(adev);
03161a6e 4203 if (r)
3c22c1ea 4204 goto exit;
d38ceaf9 4205
beff74bc
AD
4206 queue_delayed_work(system_wq, &adev->delayed_init_work,
4207 msecs_to_jiffies(AMDGPU_RESUME_MS));
4208
c004d44e 4209 if (!adev->in_s0ix) {
5d3a2d95
AD
4210 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4211 if (r)
3c22c1ea 4212 goto exit;
5d3a2d95 4213 }
756e6880 4214
3c22c1ea
SF
4215exit:
4216 if (amdgpu_sriov_vf(adev)) {
4217 amdgpu_virt_init_data_exchange(adev);
4218 amdgpu_virt_release_full_gpu(adev, true);
4219 }
4220
4221 if (r)
4222 return r;
4223
96a5d8d4 4224 /* Make sure IB tests flushed */
beff74bc 4225 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4226
a2e15b0e 4227 if (fbcon)
087451f3 4228 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9
AD
4229
4230 drm_kms_helper_poll_enable(dev);
23a1a9e5 4231
5e6932fe 4232 amdgpu_ras_resume(adev);
4233
d09ef243
AD
4234 if (adev->mode_info.num_crtc) {
4235 /*
4236 * Most of the connector probing functions try to acquire runtime pm
4237 * refs to ensure that the GPU is powered on when connector polling is
4238 * performed. Since we're calling this from a runtime PM callback,
4239 * trying to acquire rpm refs will cause us to deadlock.
4240 *
4241 * Since we're guaranteed to be holding the rpm lock, it's safe to
4242 * temporarily disable the rpm helpers so this doesn't deadlock us.
4243 */
23a1a9e5 4244#ifdef CONFIG_PM
d09ef243 4245 dev->dev->power.disable_depth++;
23a1a9e5 4246#endif
d09ef243
AD
4247 if (!adev->dc_enabled)
4248 drm_helper_hpd_irq_event(dev);
4249 else
4250 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4251#ifdef CONFIG_PM
d09ef243 4252 dev->dev->power.disable_depth--;
23a1a9e5 4253#endif
d09ef243 4254 }
44779b43
RZ
4255 adev->in_suspend = false;
4256
3fa8f89d
S
4257 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4258 DRM_WARN("smart shift update failed\n");
4259
4d3b9ae5 4260 return 0;
d38ceaf9
AD
4261}
4262
e3ecdffa
AD
4263/**
4264 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4265 *
4266 * @adev: amdgpu_device pointer
4267 *
4268 * The list of all the hardware IPs that make up the asic is walked and
4269 * the check_soft_reset callbacks are run. check_soft_reset determines
4270 * if the asic is still hung or not.
4271 * Returns true if any of the IPs are still in a hung state, false if not.
4272 */
06ec9070 4273static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4274{
4275 int i;
4276 bool asic_hang = false;
4277
f993d628
ML
4278 if (amdgpu_sriov_vf(adev))
4279 return true;
4280
8bc04c29
AD
4281 if (amdgpu_asic_need_full_reset(adev))
4282 return true;
4283
63fbf42f 4284 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4285 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4286 continue;
a1255107
AD
4287 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4288 adev->ip_blocks[i].status.hang =
4289 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4290 if (adev->ip_blocks[i].status.hang) {
aac89168 4291 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4292 asic_hang = true;
4293 }
4294 }
4295 return asic_hang;
4296}
4297
e3ecdffa
AD
4298/**
4299 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4300 *
4301 * @adev: amdgpu_device pointer
4302 *
4303 * The list of all the hardware IPs that make up the asic is walked and the
4304 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4305 * handles any IP specific hardware or software state changes that are
4306 * necessary for a soft reset to succeed.
4307 * Returns 0 on success, negative error code on failure.
4308 */
06ec9070 4309static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4310{
4311 int i, r = 0;
4312
4313 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4314 if (!adev->ip_blocks[i].status.valid)
d31a501e 4315 continue;
a1255107
AD
4316 if (adev->ip_blocks[i].status.hang &&
4317 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4318 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4319 if (r)
4320 return r;
4321 }
4322 }
4323
4324 return 0;
4325}
4326
e3ecdffa
AD
4327/**
4328 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4329 *
4330 * @adev: amdgpu_device pointer
4331 *
4332 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4333 * reset is necessary to recover.
4334 * Returns true if a full asic reset is required, false if not.
4335 */
06ec9070 4336static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4337{
da146d3b
AD
4338 int i;
4339
8bc04c29
AD
4340 if (amdgpu_asic_need_full_reset(adev))
4341 return true;
4342
da146d3b 4343 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4344 if (!adev->ip_blocks[i].status.valid)
da146d3b 4345 continue;
a1255107
AD
4346 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4347 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4348 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4349 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4350 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4351 if (adev->ip_blocks[i].status.hang) {
aac89168 4352 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4353 return true;
4354 }
4355 }
35d782fe
CZ
4356 }
4357 return false;
4358}
4359
e3ecdffa
AD
4360/**
4361 * amdgpu_device_ip_soft_reset - do a soft reset
4362 *
4363 * @adev: amdgpu_device pointer
4364 *
4365 * The list of all the hardware IPs that make up the asic is walked and the
4366 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4367 * IP specific hardware or software state changes that are necessary to soft
4368 * reset the IP.
4369 * Returns 0 on success, negative error code on failure.
4370 */
06ec9070 4371static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4372{
4373 int i, r = 0;
4374
4375 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4376 if (!adev->ip_blocks[i].status.valid)
35d782fe 4377 continue;
a1255107
AD
4378 if (adev->ip_blocks[i].status.hang &&
4379 adev->ip_blocks[i].version->funcs->soft_reset) {
4380 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4381 if (r)
4382 return r;
4383 }
4384 }
4385
4386 return 0;
4387}
4388
e3ecdffa
AD
4389/**
4390 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4391 *
4392 * @adev: amdgpu_device pointer
4393 *
4394 * The list of all the hardware IPs that make up the asic is walked and the
4395 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4396 * handles any IP specific hardware or software state changes that are
4397 * necessary after the IP has been soft reset.
4398 * Returns 0 on success, negative error code on failure.
4399 */
06ec9070 4400static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4401{
4402 int i, r = 0;
4403
4404 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4405 if (!adev->ip_blocks[i].status.valid)
35d782fe 4406 continue;
a1255107
AD
4407 if (adev->ip_blocks[i].status.hang &&
4408 adev->ip_blocks[i].version->funcs->post_soft_reset)
4409 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4410 if (r)
4411 return r;
4412 }
4413
4414 return 0;
4415}
4416
e3ecdffa 4417/**
c33adbc7 4418 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4419 *
4420 * @adev: amdgpu_device pointer
4421 *
4422 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4423 * restore things like GPUVM page tables after a GPU reset where
4424 * the contents of VRAM might be lost.
403009bf
CK
4425 *
4426 * Returns:
4427 * 0 on success, negative error code on failure.
e3ecdffa 4428 */
c33adbc7 4429static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4430{
c41d1cf6 4431 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4432 struct amdgpu_bo *shadow;
e18aaea7 4433 struct amdgpu_bo_vm *vmbo;
403009bf 4434 long r = 1, tmo;
c41d1cf6
ML
4435
4436 if (amdgpu_sriov_runtime(adev))
b045d3af 4437 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4438 else
4439 tmo = msecs_to_jiffies(100);
4440
aac89168 4441 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4442 mutex_lock(&adev->shadow_list_lock);
e18aaea7
ND
4443 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4444 shadow = &vmbo->bo;
403009bf 4445 /* No need to recover an evicted BO */
d3116756
CK
4446 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4447 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4448 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4449 continue;
4450
4451 r = amdgpu_bo_restore_shadow(shadow, &next);
4452 if (r)
4453 break;
4454
c41d1cf6 4455 if (fence) {
1712fb1a 4456 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4457 dma_fence_put(fence);
4458 fence = next;
1712fb1a 4459 if (tmo == 0) {
4460 r = -ETIMEDOUT;
c41d1cf6 4461 break;
1712fb1a 4462 } else if (tmo < 0) {
4463 r = tmo;
4464 break;
4465 }
403009bf
CK
4466 } else {
4467 fence = next;
c41d1cf6 4468 }
c41d1cf6
ML
4469 }
4470 mutex_unlock(&adev->shadow_list_lock);
4471
403009bf
CK
4472 if (fence)
4473 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4474 dma_fence_put(fence);
4475
1712fb1a 4476 if (r < 0 || tmo <= 0) {
aac89168 4477 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4478 return -EIO;
4479 }
c41d1cf6 4480
aac89168 4481 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4482 return 0;
c41d1cf6
ML
4483}
4484
a90ad3c2 4485
e3ecdffa 4486/**
06ec9070 4487 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4488 *
982a820b 4489 * @adev: amdgpu_device pointer
87e3f136 4490 * @from_hypervisor: request from hypervisor
5740682e
ML
4491 *
4492 * do VF FLR and reinitialize Asic
3f48c681 4493 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4494 */
4495static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4496 bool from_hypervisor)
5740682e
ML
4497{
4498 int r;
a5f67c93 4499 struct amdgpu_hive_info *hive = NULL;
7258fa31 4500 int retry_limit = 0;
5740682e 4501
7258fa31 4502retry:
c004d44e 4503 amdgpu_amdkfd_pre_reset(adev);
428890a3 4504
5740682e
ML
4505 if (from_hypervisor)
4506 r = amdgpu_virt_request_full_gpu(adev, true);
4507 else
4508 r = amdgpu_virt_reset_gpu(adev);
4509 if (r)
4510 return r;
a90ad3c2
ML
4511
4512 /* Resume IP prior to SMC */
06ec9070 4513 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4514 if (r)
4515 goto error;
a90ad3c2 4516
c9ffa427 4517 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4518
7a3e0bb2
RZ
4519 r = amdgpu_device_fw_loading(adev);
4520 if (r)
4521 return r;
4522
a90ad3c2 4523 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4524 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4525 if (r)
4526 goto error;
a90ad3c2 4527
a5f67c93
ZL
4528 hive = amdgpu_get_xgmi_hive(adev);
4529 /* Update PSP FW topology after reset */
4530 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4531 r = amdgpu_xgmi_update_topology(hive, adev);
4532
4533 if (hive)
4534 amdgpu_put_xgmi_hive(hive);
4535
4536 if (!r) {
4537 amdgpu_irq_gpu_reset_resume_helper(adev);
4538 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4539
c004d44e 4540 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4541 }
a90ad3c2 4542
abc34253 4543error:
c41d1cf6 4544 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4545 amdgpu_inc_vram_lost(adev);
c33adbc7 4546 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4547 }
437f3e0b 4548 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4549
7258fa31
SK
4550 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4551 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4552 retry_limit++;
4553 goto retry;
4554 } else
4555 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4556 }
4557
a90ad3c2
ML
4558 return r;
4559}
4560
9a1cddd6 4561/**
4562 * amdgpu_device_has_job_running - check if there is any job in mirror list
4563 *
982a820b 4564 * @adev: amdgpu_device pointer
9a1cddd6 4565 *
4566 * check if there is any job in mirror list
4567 */
4568bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4569{
4570 int i;
4571 struct drm_sched_job *job;
4572
4573 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4574 struct amdgpu_ring *ring = adev->rings[i];
4575
4576 if (!ring || !ring->sched.thread)
4577 continue;
4578
4579 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4580 job = list_first_entry_or_null(&ring->sched.pending_list,
4581 struct drm_sched_job, list);
9a1cddd6 4582 spin_unlock(&ring->sched.job_list_lock);
4583 if (job)
4584 return true;
4585 }
4586 return false;
4587}
4588
12938fad
CK
4589/**
4590 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4591 *
982a820b 4592 * @adev: amdgpu_device pointer
12938fad
CK
4593 *
4594 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4595 * a hung GPU.
4596 */
4597bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4598{
12938fad 4599
3ba7b418
AG
4600 if (amdgpu_gpu_recovery == 0)
4601 goto disabled;
4602
1a11a65d
YC
4603 /* Skip soft reset check in fatal error mode */
4604 if (!amdgpu_ras_is_poison_mode_supported(adev))
4605 return true;
4606
3ba7b418
AG
4607 if (amdgpu_sriov_vf(adev))
4608 return true;
4609
4610 if (amdgpu_gpu_recovery == -1) {
4611 switch (adev->asic_type) {
b3523c45
AD
4612#ifdef CONFIG_DRM_AMDGPU_SI
4613 case CHIP_VERDE:
4614 case CHIP_TAHITI:
4615 case CHIP_PITCAIRN:
4616 case CHIP_OLAND:
4617 case CHIP_HAINAN:
4618#endif
4619#ifdef CONFIG_DRM_AMDGPU_CIK
4620 case CHIP_KAVERI:
4621 case CHIP_KABINI:
4622 case CHIP_MULLINS:
4623#endif
4624 case CHIP_CARRIZO:
4625 case CHIP_STONEY:
4626 case CHIP_CYAN_SKILLFISH:
3ba7b418 4627 goto disabled;
b3523c45
AD
4628 default:
4629 break;
3ba7b418 4630 }
12938fad
CK
4631 }
4632
4633 return true;
3ba7b418
AG
4634
4635disabled:
aac89168 4636 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4637 return false;
12938fad
CK
4638}
4639
5c03e584
FX
4640int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4641{
4642 u32 i;
4643 int ret = 0;
4644
4645 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4646
4647 dev_info(adev->dev, "GPU mode1 reset\n");
4648
4649 /* disable BM */
4650 pci_clear_master(adev->pdev);
4651
4652 amdgpu_device_cache_pci_state(adev->pdev);
4653
4654 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4655 dev_info(adev->dev, "GPU smu mode1 reset\n");
4656 ret = amdgpu_dpm_mode1_reset(adev);
4657 } else {
4658 dev_info(adev->dev, "GPU psp mode1 reset\n");
4659 ret = psp_gpu_reset(adev);
4660 }
4661
4662 if (ret)
4663 dev_err(adev->dev, "GPU mode1 reset failed\n");
4664
4665 amdgpu_device_load_pci_state(adev->pdev);
4666
4667 /* wait for asic to come out of reset */
4668 for (i = 0; i < adev->usec_timeout; i++) {
4669 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4670
4671 if (memsize != 0xffffffff)
4672 break;
4673 udelay(1);
4674 }
4675
4676 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4677 return ret;
4678}
5c6dd71e 4679
e3c1b071 4680int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4681 struct amdgpu_reset_context *reset_context)
26bc5340 4682{
5c1e6fa4 4683 int i, r = 0;
04442bf7
LL
4684 struct amdgpu_job *job = NULL;
4685 bool need_full_reset =
4686 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4687
4688 if (reset_context->reset_req_dev == adev)
4689 job = reset_context->job;
71182665 4690
b602ca5f
TZ
4691 if (amdgpu_sriov_vf(adev)) {
4692 /* stop the data exchange thread */
4693 amdgpu_virt_fini_data_exchange(adev);
4694 }
4695
9e225fb9
AG
4696 amdgpu_fence_driver_isr_toggle(adev, true);
4697
71182665 4698 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4699 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4700 struct amdgpu_ring *ring = adev->rings[i];
4701
51687759 4702 if (!ring || !ring->sched.thread)
0875dc9e 4703 continue;
5740682e 4704
c530b02f
JZ
4705 /*clear job fence from fence drv to avoid force_completion
4706 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4707 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4708
2f9d4084
ML
4709 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4710 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4711 }
d38ceaf9 4712
9e225fb9
AG
4713 amdgpu_fence_driver_isr_toggle(adev, false);
4714
ff99849b 4715 if (job && job->vm)
222b5f04
AG
4716 drm_sched_increase_karma(&job->base);
4717
04442bf7 4718 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4719 /* If reset handler not implemented, continue; otherwise return */
4720 if (r == -ENOSYS)
4721 r = 0;
4722 else
04442bf7
LL
4723 return r;
4724
1d721ed6 4725 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4726 if (!amdgpu_sriov_vf(adev)) {
4727
4728 if (!need_full_reset)
4729 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4730
360cd081
LG
4731 if (!need_full_reset && amdgpu_gpu_recovery &&
4732 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4733 amdgpu_device_ip_pre_soft_reset(adev);
4734 r = amdgpu_device_ip_soft_reset(adev);
4735 amdgpu_device_ip_post_soft_reset(adev);
4736 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4737 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4738 need_full_reset = true;
4739 }
4740 }
4741
4742 if (need_full_reset)
4743 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4744 if (need_full_reset)
4745 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4746 else
4747 clear_bit(AMDGPU_NEED_FULL_RESET,
4748 &reset_context->flags);
26bc5340
AG
4749 }
4750
4751 return r;
4752}
4753
15fd09a0
SA
4754static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4755{
15fd09a0
SA
4756 int i;
4757
38a15ad9 4758 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4759
4760 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4761 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4762 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4763 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4764 }
4765
4766 return 0;
4767}
4768
3d8785f6
SA
4769#ifdef CONFIG_DEV_COREDUMP
4770static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4771 size_t count, void *data, size_t datalen)
4772{
4773 struct drm_printer p;
4774 struct amdgpu_device *adev = data;
4775 struct drm_print_iterator iter;
4776 int i;
4777
4778 iter.data = buffer;
4779 iter.offset = 0;
4780 iter.start = offset;
4781 iter.remain = count;
4782
4783 p = drm_coredump_printer(&iter);
4784
4785 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4786 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4787 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4788 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4789 if (adev->reset_task_info.pid)
4790 drm_printf(&p, "process_name: %s PID: %d\n",
4791 adev->reset_task_info.process_name,
4792 adev->reset_task_info.pid);
4793
4794 if (adev->reset_vram_lost)
4795 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4796 if (adev->num_regs) {
4797 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4798
4799 for (i = 0; i < adev->num_regs; i++)
4800 drm_printf(&p, "0x%08x: 0x%08x\n",
4801 adev->reset_dump_reg_list[i],
4802 adev->reset_dump_reg_value[i]);
4803 }
4804
4805 return count - iter.remain;
4806}
4807
4808static void amdgpu_devcoredump_free(void *data)
4809{
4810}
4811
4812static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4813{
4814 struct drm_device *dev = adev_to_drm(adev);
4815
4816 ktime_get_ts64(&adev->reset_time);
4817 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4818 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4819}
4820#endif
4821
04442bf7
LL
4822int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4823 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4824{
4825 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4826 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4827 int r = 0;
f5c7e779 4828 bool gpu_reset_for_dev_remove = 0;
26bc5340 4829
04442bf7
LL
4830 /* Try reset handler method first */
4831 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4832 reset_list);
15fd09a0 4833 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4834
4835 reset_context->reset_device_list = device_list_handle;
04442bf7 4836 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4837 /* If reset handler not implemented, continue; otherwise return */
4838 if (r == -ENOSYS)
4839 r = 0;
4840 else
04442bf7
LL
4841 return r;
4842
4843 /* Reset handler not implemented, use the default method */
4844 need_full_reset =
4845 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4846 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4847
f5c7e779
YC
4848 gpu_reset_for_dev_remove =
4849 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4850 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4851
26bc5340 4852 /*
655ce9cb 4853 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4854 * to allow proper links negotiation in FW (within 1 sec)
4855 */
7ac71382 4856 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4857 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4858 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4859 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4860 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4861 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4862 r = -EALREADY;
4863 } else
4864 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4865
041a62bc 4866 if (r) {
aac89168 4867 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4868 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4869 break;
ce316fa5
LM
4870 }
4871 }
4872
041a62bc
AG
4873 /* For XGMI wait for all resets to complete before proceed */
4874 if (!r) {
655ce9cb 4875 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4876 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4877 flush_work(&tmp_adev->xgmi_reset_work);
4878 r = tmp_adev->asic_reset_res;
4879 if (r)
4880 break;
ce316fa5
LM
4881 }
4882 }
4883 }
ce316fa5 4884 }
26bc5340 4885
43c4d576 4886 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4887 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4888 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4889 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4890 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4891 }
4892
00eaa571 4893 amdgpu_ras_intr_cleared();
43c4d576 4894 }
00eaa571 4895
f5c7e779
YC
4896 /* Since the mode1 reset affects base ip blocks, the
4897 * phase1 ip blocks need to be resumed. Otherwise there
4898 * will be a BIOS signature error and the psp bootloader
4899 * can't load kdb on the next amdgpu install.
4900 */
4901 if (gpu_reset_for_dev_remove) {
4902 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4903 amdgpu_device_ip_resume_phase1(tmp_adev);
4904
4905 goto end;
4906 }
4907
655ce9cb 4908 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4909 if (need_full_reset) {
4910 /* post card */
e3c1b071 4911 r = amdgpu_device_asic_init(tmp_adev);
4912 if (r) {
aac89168 4913 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4914 } else {
26bc5340 4915 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4916 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4917 if (r)
4918 goto out;
4919
26bc5340
AG
4920 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4921 if (r)
4922 goto out;
4923
4924 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4925#ifdef CONFIG_DEV_COREDUMP
4926 tmp_adev->reset_vram_lost = vram_lost;
4927 memset(&tmp_adev->reset_task_info, 0,
4928 sizeof(tmp_adev->reset_task_info));
4929 if (reset_context->job && reset_context->job->vm)
4930 tmp_adev->reset_task_info =
4931 reset_context->job->vm->task_info;
4932 amdgpu_reset_capture_coredumpm(tmp_adev);
4933#endif
26bc5340 4934 if (vram_lost) {
77e7f829 4935 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4936 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4937 }
4938
26bc5340
AG
4939 r = amdgpu_device_fw_loading(tmp_adev);
4940 if (r)
4941 return r;
4942
4943 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4944 if (r)
4945 goto out;
4946
4947 if (vram_lost)
4948 amdgpu_device_fill_reset_magic(tmp_adev);
4949
fdafb359
EQ
4950 /*
4951 * Add this ASIC as tracked as reset was already
4952 * complete successfully.
4953 */
4954 amdgpu_register_gpu_instance(tmp_adev);
4955
04442bf7
LL
4956 if (!reset_context->hive &&
4957 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4958 amdgpu_xgmi_add_device(tmp_adev);
4959
7c04ca50 4960 r = amdgpu_device_ip_late_init(tmp_adev);
4961 if (r)
4962 goto out;
4963
087451f3 4964 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 4965
e8fbaf03
GC
4966 /*
4967 * The GPU enters bad state once faulty pages
4968 * by ECC has reached the threshold, and ras
4969 * recovery is scheduled next. So add one check
4970 * here to break recovery if it indeed exceeds
4971 * bad page threshold, and remind user to
4972 * retire this GPU or setting one bigger
4973 * bad_page_threshold value to fix this once
4974 * probing driver again.
4975 */
11003c68 4976 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
4977 /* must succeed. */
4978 amdgpu_ras_resume(tmp_adev);
4979 } else {
4980 r = -EINVAL;
4981 goto out;
4982 }
e79a04d5 4983
26bc5340 4984 /* Update PSP FW topology after reset */
04442bf7
LL
4985 if (reset_context->hive &&
4986 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4987 r = amdgpu_xgmi_update_topology(
4988 reset_context->hive, tmp_adev);
26bc5340
AG
4989 }
4990 }
4991
26bc5340
AG
4992out:
4993 if (!r) {
4994 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4995 r = amdgpu_ib_ring_tests(tmp_adev);
4996 if (r) {
4997 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
4998 need_full_reset = true;
4999 r = -EAGAIN;
5000 goto end;
5001 }
5002 }
5003
5004 if (!r)
5005 r = amdgpu_device_recover_vram(tmp_adev);
5006 else
5007 tmp_adev->asic_reset_res = r;
5008 }
5009
5010end:
04442bf7
LL
5011 if (need_full_reset)
5012 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5013 else
5014 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5015 return r;
5016}
5017
e923be99 5018static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5019{
5740682e 5020
a3a09142
AD
5021 switch (amdgpu_asic_reset_method(adev)) {
5022 case AMD_RESET_METHOD_MODE1:
5023 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5024 break;
5025 case AMD_RESET_METHOD_MODE2:
5026 adev->mp1_state = PP_MP1_STATE_RESET;
5027 break;
5028 default:
5029 adev->mp1_state = PP_MP1_STATE_NONE;
5030 break;
5031 }
26bc5340 5032}
d38ceaf9 5033
e923be99 5034static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5035{
89041940 5036 amdgpu_vf_error_trans_all(adev);
a3a09142 5037 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5038}
5039
3f12acc8
EQ
5040static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5041{
5042 struct pci_dev *p = NULL;
5043
5044 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5045 adev->pdev->bus->number, 1);
5046 if (p) {
5047 pm_runtime_enable(&(p->dev));
5048 pm_runtime_resume(&(p->dev));
5049 }
b85e285e
YY
5050
5051 pci_dev_put(p);
3f12acc8
EQ
5052}
5053
5054static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5055{
5056 enum amd_reset_method reset_method;
5057 struct pci_dev *p = NULL;
5058 u64 expires;
5059
5060 /*
5061 * For now, only BACO and mode1 reset are confirmed
5062 * to suffer the audio issue without proper suspended.
5063 */
5064 reset_method = amdgpu_asic_reset_method(adev);
5065 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5066 (reset_method != AMD_RESET_METHOD_MODE1))
5067 return -EINVAL;
5068
5069 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5070 adev->pdev->bus->number, 1);
5071 if (!p)
5072 return -ENODEV;
5073
5074 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5075 if (!expires)
5076 /*
5077 * If we cannot get the audio device autosuspend delay,
5078 * a fixed 4S interval will be used. Considering 3S is
5079 * the audio controller default autosuspend delay setting.
5080 * 4S used here is guaranteed to cover that.
5081 */
54b7feb9 5082 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5083
5084 while (!pm_runtime_status_suspended(&(p->dev))) {
5085 if (!pm_runtime_suspend(&(p->dev)))
5086 break;
5087
5088 if (expires < ktime_get_mono_fast_ns()) {
5089 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5090 pci_dev_put(p);
3f12acc8
EQ
5091 /* TODO: abort the succeeding gpu reset? */
5092 return -ETIMEDOUT;
5093 }
5094 }
5095
5096 pm_runtime_disable(&(p->dev));
5097
b85e285e 5098 pci_dev_put(p);
3f12acc8
EQ
5099 return 0;
5100}
5101
d193b12b 5102static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5103{
5104 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5105
5106#if defined(CONFIG_DEBUG_FS)
5107 if (!amdgpu_sriov_vf(adev))
5108 cancel_work(&adev->reset_work);
5109#endif
5110
5111 if (adev->kfd.dev)
5112 cancel_work(&adev->kfd.reset_work);
5113
5114 if (amdgpu_sriov_vf(adev))
5115 cancel_work(&adev->virt.flr_work);
5116
5117 if (con && adev->ras_enabled)
5118 cancel_work(&con->recovery_work);
5119
5120}
5121
26bc5340 5122/**
6e9c65f7 5123 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5124 *
982a820b 5125 * @adev: amdgpu_device pointer
26bc5340
AG
5126 * @job: which job trigger hang
5127 *
5128 * Attempt to reset the GPU if it has hung (all asics).
5129 * Attempt to do soft-reset or full-reset and reinitialize Asic
5130 * Returns 0 for success or an error on failure.
5131 */
5132
cf727044 5133int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5134 struct amdgpu_job *job,
5135 struct amdgpu_reset_context *reset_context)
26bc5340 5136{
1d721ed6 5137 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5138 bool job_signaled = false;
26bc5340 5139 struct amdgpu_hive_info *hive = NULL;
26bc5340 5140 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5141 int i, r = 0;
bb5c7235 5142 bool need_emergency_restart = false;
3f12acc8 5143 bool audio_suspended = false;
f5c7e779
YC
5144 bool gpu_reset_for_dev_remove = false;
5145
5146 gpu_reset_for_dev_remove =
5147 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5148 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5149
6e3cd2a9 5150 /*
bb5c7235
WS
5151 * Special case: RAS triggered and full reset isn't supported
5152 */
5153 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5154
d5ea093e
AG
5155 /*
5156 * Flush RAM to disk so that after reboot
5157 * the user can read log and see why the system rebooted.
5158 */
bb5c7235 5159 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5160 DRM_WARN("Emergency reboot.");
5161
5162 ksys_sync_helper();
5163 emergency_restart();
5164 }
5165
b823821f 5166 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5167 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5168
175ac6ec
ZL
5169 if (!amdgpu_sriov_vf(adev))
5170 hive = amdgpu_get_xgmi_hive(adev);
681260df 5171 if (hive)
53b3f8f4 5172 mutex_lock(&hive->hive_lock);
26bc5340 5173
f1549c09
LG
5174 reset_context->job = job;
5175 reset_context->hive = hive;
9e94d22c
EQ
5176 /*
5177 * Build list of devices to reset.
5178 * In case we are in XGMI hive mode, resort the device list
5179 * to put adev in the 1st position.
5180 */
5181 INIT_LIST_HEAD(&device_list);
175ac6ec 5182 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5183 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5184 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5185 if (gpu_reset_for_dev_remove && adev->shutdown)
5186 tmp_adev->shutdown = true;
5187 }
655ce9cb 5188 if (!list_is_first(&adev->reset_list, &device_list))
5189 list_rotate_to_front(&adev->reset_list, &device_list);
5190 device_list_handle = &device_list;
26bc5340 5191 } else {
655ce9cb 5192 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5193 device_list_handle = &device_list;
5194 }
5195
e923be99
AG
5196 /* We need to lock reset domain only once both for XGMI and single device */
5197 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5198 reset_list);
3675c2f2 5199 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5200
1d721ed6 5201 /* block all schedulers and reset given job's ring */
655ce9cb 5202 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5203
e923be99 5204 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5205
3f12acc8
EQ
5206 /*
5207 * Try to put the audio codec into suspend state
5208 * before gpu reset started.
5209 *
5210 * Due to the power domain of the graphics device
5211 * is shared with AZ power domain. Without this,
5212 * we may change the audio hardware from behind
5213 * the audio driver's back. That will trigger
5214 * some audio codec errors.
5215 */
5216 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5217 audio_suspended = true;
5218
9e94d22c
EQ
5219 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5220
52fb44cf
EQ
5221 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5222
c004d44e 5223 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5224 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5225
12ffa55d
AG
5226 /*
5227 * Mark these ASICs to be reseted as untracked first
5228 * And add them back after reset completed
5229 */
5230 amdgpu_unregister_gpu_instance(tmp_adev);
5231
163d4cd2 5232 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5233
f1c1314b 5234 /* disable ras on ALL IPs */
bb5c7235 5235 if (!need_emergency_restart &&
b823821f 5236 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5237 amdgpu_ras_suspend(tmp_adev);
5238
1d721ed6
AG
5239 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5240 struct amdgpu_ring *ring = tmp_adev->rings[i];
5241
5242 if (!ring || !ring->sched.thread)
5243 continue;
5244
0b2d2c2e 5245 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5246
bb5c7235 5247 if (need_emergency_restart)
7c6e68c7 5248 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5249 }
8f8c80f4 5250 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5251 }
5252
bb5c7235 5253 if (need_emergency_restart)
7c6e68c7
AG
5254 goto skip_sched_resume;
5255
1d721ed6
AG
5256 /*
5257 * Must check guilty signal here since after this point all old
5258 * HW fences are force signaled.
5259 *
5260 * job->base holds a reference to parent fence
5261 */
f6a3f660 5262 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5263 job_signaled = true;
1d721ed6
AG
5264 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5265 goto skip_hw_reset;
5266 }
5267
26bc5340 5268retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5269 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5270 if (gpu_reset_for_dev_remove) {
5271 /* Workaroud for ASICs need to disable SMC first */
5272 amdgpu_device_smu_fini_early(tmp_adev);
5273 }
f1549c09 5274 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5275 /*TODO Should we stop ?*/
5276 if (r) {
aac89168 5277 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5278 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5279 tmp_adev->asic_reset_res = r;
5280 }
247c7b0d
AG
5281
5282 /*
5283 * Drop all pending non scheduler resets. Scheduler resets
5284 * were already dropped during drm_sched_stop
5285 */
d193b12b 5286 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5287 }
5288
5289 /* Actual ASIC resets if needed.*/
4f30d920 5290 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5291 if (amdgpu_sriov_vf(adev)) {
5292 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5293 if (r)
5294 adev->asic_reset_res = r;
950d6425
SY
5295
5296 /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5297 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5298 amdgpu_ras_resume(adev);
26bc5340 5299 } else {
f1549c09 5300 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5301 if (r && r == -EAGAIN)
26bc5340 5302 goto retry;
f5c7e779
YC
5303
5304 if (!r && gpu_reset_for_dev_remove)
5305 goto recover_end;
26bc5340
AG
5306 }
5307
1d721ed6
AG
5308skip_hw_reset:
5309
26bc5340 5310 /* Post ASIC reset for all devs .*/
655ce9cb 5311 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5312
1d721ed6
AG
5313 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5314 struct amdgpu_ring *ring = tmp_adev->rings[i];
5315
5316 if (!ring || !ring->sched.thread)
5317 continue;
5318
6868a2c4 5319 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5320 }
5321
693073a0 5322 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5323 amdgpu_mes_self_test(tmp_adev);
5324
1053b9c9 5325 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5326 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5327 }
5328
7258fa31
SK
5329 if (tmp_adev->asic_reset_res)
5330 r = tmp_adev->asic_reset_res;
5331
1d721ed6 5332 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5333
5334 if (r) {
5335 /* bad news, how to tell it to userspace ? */
12ffa55d 5336 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5337 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5338 } else {
12ffa55d 5339 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5340 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5341 DRM_WARN("smart shift update failed\n");
26bc5340 5342 }
7c6e68c7 5343 }
26bc5340 5344
7c6e68c7 5345skip_sched_resume:
655ce9cb 5346 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5347 /* unlock kfd: SRIOV would do it separately */
c004d44e 5348 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5349 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5350
5351 /* kfd_post_reset will do nothing if kfd device is not initialized,
5352 * need to bring up kfd here if it's not be initialized before
5353 */
5354 if (!adev->kfd.init_complete)
5355 amdgpu_amdkfd_device_init(adev);
5356
3f12acc8
EQ
5357 if (audio_suspended)
5358 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5359
5360 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5361
5362 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5363 }
5364
f5c7e779 5365recover_end:
e923be99
AG
5366 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5367 reset_list);
5368 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5369
9e94d22c 5370 if (hive) {
9e94d22c 5371 mutex_unlock(&hive->hive_lock);
d95e8e97 5372 amdgpu_put_xgmi_hive(hive);
9e94d22c 5373 }
26bc5340 5374
f287a3c5 5375 if (r)
26bc5340 5376 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5377
5378 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5379 return r;
5380}
5381
e3ecdffa
AD
5382/**
5383 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5384 *
5385 * @adev: amdgpu_device pointer
5386 *
5387 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5388 * and lanes) of the slot the device is in. Handles APUs and
5389 * virtualized environments where PCIE config space may not be available.
5390 */
5494d864 5391static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5392{
5d9a6330 5393 struct pci_dev *pdev;
c5313457
HK
5394 enum pci_bus_speed speed_cap, platform_speed_cap;
5395 enum pcie_link_width platform_link_width;
d0dd7f0c 5396
cd474ba0
AD
5397 if (amdgpu_pcie_gen_cap)
5398 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5399
cd474ba0
AD
5400 if (amdgpu_pcie_lane_cap)
5401 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5402
cd474ba0
AD
5403 /* covers APUs as well */
5404 if (pci_is_root_bus(adev->pdev->bus)) {
5405 if (adev->pm.pcie_gen_mask == 0)
5406 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5407 if (adev->pm.pcie_mlw_mask == 0)
5408 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5409 return;
cd474ba0 5410 }
d0dd7f0c 5411
c5313457
HK
5412 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5413 return;
5414
dbaa922b
AD
5415 pcie_bandwidth_available(adev->pdev, NULL,
5416 &platform_speed_cap, &platform_link_width);
c5313457 5417
cd474ba0 5418 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5419 /* asic caps */
5420 pdev = adev->pdev;
5421 speed_cap = pcie_get_speed_cap(pdev);
5422 if (speed_cap == PCI_SPEED_UNKNOWN) {
5423 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5424 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5425 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5426 } else {
2b3a1f51
FX
5427 if (speed_cap == PCIE_SPEED_32_0GT)
5428 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5429 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5430 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5431 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5432 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5433 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5434 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5435 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5436 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5437 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5438 else if (speed_cap == PCIE_SPEED_8_0GT)
5439 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5440 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5441 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5442 else if (speed_cap == PCIE_SPEED_5_0GT)
5443 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5444 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5445 else
5446 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5447 }
5448 /* platform caps */
c5313457 5449 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5450 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5451 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5452 } else {
2b3a1f51
FX
5453 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5454 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5455 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5456 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5457 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5458 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5459 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5460 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5461 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5462 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5463 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5464 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5465 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5466 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5467 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5468 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5469 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5470 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5471 else
5472 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5473
cd474ba0
AD
5474 }
5475 }
5476 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5477 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5478 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5479 } else {
c5313457 5480 switch (platform_link_width) {
5d9a6330 5481 case PCIE_LNK_X32:
cd474ba0
AD
5482 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5483 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5484 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5485 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5486 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5487 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5488 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5489 break;
5d9a6330 5490 case PCIE_LNK_X16:
cd474ba0
AD
5491 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5492 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5493 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5494 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5495 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5496 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5497 break;
5d9a6330 5498 case PCIE_LNK_X12:
cd474ba0
AD
5499 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5500 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5501 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5502 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5503 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5504 break;
5d9a6330 5505 case PCIE_LNK_X8:
cd474ba0
AD
5506 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5508 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5509 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5510 break;
5d9a6330 5511 case PCIE_LNK_X4:
cd474ba0
AD
5512 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5515 break;
5d9a6330 5516 case PCIE_LNK_X2:
cd474ba0
AD
5517 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5519 break;
5d9a6330 5520 case PCIE_LNK_X1:
cd474ba0
AD
5521 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5522 break;
5523 default:
5524 break;
5525 }
d0dd7f0c
AD
5526 }
5527 }
5528}
d38ceaf9 5529
08a2fd23
RE
5530/**
5531 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5532 *
5533 * @adev: amdgpu_device pointer
5534 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5535 *
5536 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5537 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5538 * @peer_adev.
5539 */
5540bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5541 struct amdgpu_device *peer_adev)
5542{
5543#ifdef CONFIG_HSA_AMD_P2P
5544 uint64_t address_mask = peer_adev->dev->dma_mask ?
5545 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5546 resource_size_t aper_limit =
5547 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5548 bool p2p_access =
5549 !adev->gmc.xgmi.connected_to_cpu &&
5550 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5551
5552 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5553 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5554 !(adev->gmc.aper_base & address_mask ||
5555 aper_limit & address_mask));
5556#else
5557 return false;
5558#endif
5559}
5560
361dbd01
AD
5561int amdgpu_device_baco_enter(struct drm_device *dev)
5562{
1348969a 5563 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5564 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5565
4a580877 5566 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5567 return -ENOTSUPP;
5568
8ab0d6f0 5569 if (ras && adev->ras_enabled &&
acdae216 5570 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5571 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5572
9530273e 5573 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5574}
5575
5576int amdgpu_device_baco_exit(struct drm_device *dev)
5577{
1348969a 5578 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5579 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5580 int ret = 0;
361dbd01 5581
4a580877 5582 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5583 return -ENOTSUPP;
5584
9530273e
EQ
5585 ret = amdgpu_dpm_baco_exit(adev);
5586 if (ret)
5587 return ret;
7a22677b 5588
8ab0d6f0 5589 if (ras && adev->ras_enabled &&
acdae216 5590 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5591 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5592
1bece222
CL
5593 if (amdgpu_passthrough(adev) &&
5594 adev->nbio.funcs->clear_doorbell_interrupt)
5595 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5596
7a22677b 5597 return 0;
361dbd01 5598}
c9a6b82f
AG
5599
5600/**
5601 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5602 * @pdev: PCI device struct
5603 * @state: PCI channel state
5604 *
5605 * Description: Called when a PCI error is detected.
5606 *
5607 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5608 */
5609pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5610{
5611 struct drm_device *dev = pci_get_drvdata(pdev);
5612 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5613 int i;
c9a6b82f
AG
5614
5615 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5616
6894305c
AG
5617 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5618 DRM_WARN("No support for XGMI hive yet...");
5619 return PCI_ERS_RESULT_DISCONNECT;
5620 }
5621
e17e27f9
GC
5622 adev->pci_channel_state = state;
5623
c9a6b82f
AG
5624 switch (state) {
5625 case pci_channel_io_normal:
5626 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5627 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5628 case pci_channel_io_frozen:
5629 /*
d0fb18b5 5630 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5631 * to GPU during PCI error recovery
5632 */
3675c2f2 5633 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5634 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5635
5636 /*
5637 * Block any work scheduling as we do for regular GPU reset
5638 * for the duration of the recovery
5639 */
5640 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5641 struct amdgpu_ring *ring = adev->rings[i];
5642
5643 if (!ring || !ring->sched.thread)
5644 continue;
5645
5646 drm_sched_stop(&ring->sched, NULL);
5647 }
8f8c80f4 5648 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5649 return PCI_ERS_RESULT_NEED_RESET;
5650 case pci_channel_io_perm_failure:
5651 /* Permanent error, prepare for device removal */
5652 return PCI_ERS_RESULT_DISCONNECT;
5653 }
5654
5655 return PCI_ERS_RESULT_NEED_RESET;
5656}
5657
5658/**
5659 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5660 * @pdev: pointer to PCI device
5661 */
5662pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5663{
5664
5665 DRM_INFO("PCI error: mmio enabled callback!!\n");
5666
5667 /* TODO - dump whatever for debugging purposes */
5668
5669 /* This called only if amdgpu_pci_error_detected returns
5670 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5671 * works, no need to reset slot.
5672 */
5673
5674 return PCI_ERS_RESULT_RECOVERED;
5675}
5676
5677/**
5678 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5679 * @pdev: PCI device struct
5680 *
5681 * Description: This routine is called by the pci error recovery
5682 * code after the PCI slot has been reset, just before we
5683 * should resume normal operations.
5684 */
5685pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5686{
5687 struct drm_device *dev = pci_get_drvdata(pdev);
5688 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5689 int r, i;
04442bf7 5690 struct amdgpu_reset_context reset_context;
362c7b91 5691 u32 memsize;
7ac71382 5692 struct list_head device_list;
c9a6b82f
AG
5693
5694 DRM_INFO("PCI error: slot reset callback!!\n");
5695
04442bf7
LL
5696 memset(&reset_context, 0, sizeof(reset_context));
5697
7ac71382 5698 INIT_LIST_HEAD(&device_list);
655ce9cb 5699 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5700
362c7b91
AG
5701 /* wait for asic to come out of reset */
5702 msleep(500);
5703
7ac71382 5704 /* Restore PCI confspace */
c1dd4aa6 5705 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5706
362c7b91
AG
5707 /* confirm ASIC came out of reset */
5708 for (i = 0; i < adev->usec_timeout; i++) {
5709 memsize = amdgpu_asic_get_config_memsize(adev);
5710
5711 if (memsize != 0xffffffff)
5712 break;
5713 udelay(1);
5714 }
5715 if (memsize == 0xffffffff) {
5716 r = -ETIME;
5717 goto out;
5718 }
5719
04442bf7
LL
5720 reset_context.method = AMD_RESET_METHOD_NONE;
5721 reset_context.reset_req_dev = adev;
5722 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5723 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5724
7afefb81 5725 adev->no_hw_access = true;
04442bf7 5726 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5727 adev->no_hw_access = false;
c9a6b82f
AG
5728 if (r)
5729 goto out;
5730
04442bf7 5731 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5732
5733out:
c9a6b82f 5734 if (!r) {
c1dd4aa6
AG
5735 if (amdgpu_device_cache_pci_state(adev->pdev))
5736 pci_restore_state(adev->pdev);
5737
c9a6b82f
AG
5738 DRM_INFO("PCIe error recovery succeeded\n");
5739 } else {
5740 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5741 amdgpu_device_unset_mp1_state(adev);
5742 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5743 }
5744
5745 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5746}
5747
5748/**
5749 * amdgpu_pci_resume() - resume normal ops after PCI reset
5750 * @pdev: pointer to PCI device
5751 *
5752 * Called when the error recovery driver tells us that its
505199a3 5753 * OK to resume normal operation.
c9a6b82f
AG
5754 */
5755void amdgpu_pci_resume(struct pci_dev *pdev)
5756{
5757 struct drm_device *dev = pci_get_drvdata(pdev);
5758 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5759 int i;
c9a6b82f 5760
c9a6b82f
AG
5761
5762 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5763
e17e27f9
GC
5764 /* Only continue execution for the case of pci_channel_io_frozen */
5765 if (adev->pci_channel_state != pci_channel_io_frozen)
5766 return;
5767
acd89fca
AG
5768 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5769 struct amdgpu_ring *ring = adev->rings[i];
5770
5771 if (!ring || !ring->sched.thread)
5772 continue;
5773
acd89fca
AG
5774 drm_sched_start(&ring->sched, true);
5775 }
5776
e923be99
AG
5777 amdgpu_device_unset_mp1_state(adev);
5778 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5779}
c1dd4aa6
AG
5780
5781bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5782{
5783 struct drm_device *dev = pci_get_drvdata(pdev);
5784 struct amdgpu_device *adev = drm_to_adev(dev);
5785 int r;
5786
5787 r = pci_save_state(pdev);
5788 if (!r) {
5789 kfree(adev->pci_state);
5790
5791 adev->pci_state = pci_store_saved_state(pdev);
5792
5793 if (!adev->pci_state) {
5794 DRM_ERROR("Failed to store PCI saved state");
5795 return false;
5796 }
5797 } else {
5798 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5799 return false;
5800 }
5801
5802 return true;
5803}
5804
5805bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5806{
5807 struct drm_device *dev = pci_get_drvdata(pdev);
5808 struct amdgpu_device *adev = drm_to_adev(dev);
5809 int r;
5810
5811 if (!adev->pci_state)
5812 return false;
5813
5814 r = pci_load_saved_state(pdev, adev->pci_state);
5815
5816 if (!r) {
5817 pci_restore_state(pdev);
5818 } else {
5819 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5820 return false;
5821 }
5822
5823 return true;
5824}
5825
810085dd
EH
5826void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5827 struct amdgpu_ring *ring)
5828{
5829#ifdef CONFIG_X86_64
b818a5d3 5830 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5831 return;
5832#endif
5833 if (adev->gmc.xgmi.connected_to_cpu)
5834 return;
5835
5836 if (ring && ring->funcs->emit_hdp_flush)
5837 amdgpu_ring_emit_hdp_flush(ring);
5838 else
5839 amdgpu_asic_flush_hdp(adev, ring);
5840}
c1dd4aa6 5841
810085dd
EH
5842void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5843 struct amdgpu_ring *ring)
5844{
5845#ifdef CONFIG_X86_64
b818a5d3 5846 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5847 return;
5848#endif
5849 if (adev->gmc.xgmi.connected_to_cpu)
5850 return;
c1dd4aa6 5851
810085dd
EH
5852 amdgpu_asic_invalidate_hdp(adev, ring);
5853}
34f3a4a9 5854
89a7a870
AG
5855int amdgpu_in_reset(struct amdgpu_device *adev)
5856{
5857 return atomic_read(&adev->reset_domain->in_gpu_reset);
5858 }
5859
34f3a4a9
LY
5860/**
5861 * amdgpu_device_halt() - bring hardware to some kind of halt state
5862 *
5863 * @adev: amdgpu_device pointer
5864 *
5865 * Bring hardware to some kind of halt state so that no one can touch it
5866 * any more. It will help to maintain error context when error occurred.
5867 * Compare to a simple hang, the system will keep stable at least for SSH
5868 * access. Then it should be trivial to inspect the hardware state and
5869 * see what's going on. Implemented as following:
5870 *
5871 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5872 * clears all CPU mappings to device, disallows remappings through page faults
5873 * 2. amdgpu_irq_disable_all() disables all interrupts
5874 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5875 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5876 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5877 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5878 * flush any in flight DMA operations
5879 */
5880void amdgpu_device_halt(struct amdgpu_device *adev)
5881{
5882 struct pci_dev *pdev = adev->pdev;
e0f943b4 5883 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
5884
5885 drm_dev_unplug(ddev);
5886
5887 amdgpu_irq_disable_all(adev);
5888
5889 amdgpu_fence_driver_hw_fini(adev);
5890
5891 adev->no_hw_access = true;
5892
5893 amdgpu_device_unmap_mmio(adev);
5894
5895 pci_disable_device(pdev);
5896 pci_wait_for_pending_transaction(pdev);
5897}
86700a40
XD
5898
5899u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5900 u32 reg)
5901{
5902 unsigned long flags, address, data;
5903 u32 r;
5904
5905 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5906 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5907
5908 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5909 WREG32(address, reg * 4);
5910 (void)RREG32(address);
5911 r = RREG32(data);
5912 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5913 return r;
5914}
5915
5916void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5917 u32 reg, u32 v)
5918{
5919 unsigned long flags, address, data;
5920
5921 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5922 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5923
5924 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5925 WREG32(address, reg * 4);
5926 (void)RREG32(address);
5927 WREG32(data, v);
5928 (void)RREG32(data);
5929 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5930}
68ce8b24
CK
5931
5932/**
5933 * amdgpu_device_switch_gang - switch to a new gang
5934 * @adev: amdgpu_device pointer
5935 * @gang: the gang to switch to
5936 *
5937 * Try to switch to a new gang.
5938 * Returns: NULL if we switched to the new gang or a reference to the current
5939 * gang leader.
5940 */
5941struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5942 struct dma_fence *gang)
5943{
5944 struct dma_fence *old = NULL;
5945
5946 do {
5947 dma_fence_put(old);
5948 rcu_read_lock();
5949 old = dma_fence_get_rcu_safe(&adev->gang_submit);
5950 rcu_read_unlock();
5951
5952 if (old == gang)
5953 break;
5954
5955 if (!dma_fence_is_signaled(old))
5956 return old;
5957
5958 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5959 old, gang) != old);
5960
5961 dma_fence_put(old);
5962 return NULL;
5963}
220c8cc8
AD
5964
5965bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
5966{
5967 switch (adev->asic_type) {
5968#ifdef CONFIG_DRM_AMDGPU_SI
5969 case CHIP_HAINAN:
5970#endif
5971 case CHIP_TOPAZ:
5972 /* chips with no display hardware */
5973 return false;
5974#ifdef CONFIG_DRM_AMDGPU_SI
5975 case CHIP_TAHITI:
5976 case CHIP_PITCAIRN:
5977 case CHIP_VERDE:
5978 case CHIP_OLAND:
5979#endif
5980#ifdef CONFIG_DRM_AMDGPU_CIK
5981 case CHIP_BONAIRE:
5982 case CHIP_HAWAII:
5983 case CHIP_KAVERI:
5984 case CHIP_KABINI:
5985 case CHIP_MULLINS:
5986#endif
5987 case CHIP_TONGA:
5988 case CHIP_FIJI:
5989 case CHIP_POLARIS10:
5990 case CHIP_POLARIS11:
5991 case CHIP_POLARIS12:
5992 case CHIP_VEGAM:
5993 case CHIP_CARRIZO:
5994 case CHIP_STONEY:
5995 /* chips with display hardware */
5996 return true;
5997 default:
5998 /* IP discovery */
5999 if (!adev->ip_versions[DCE_HWIP][0] ||
6000 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6001 return false;
6002 return true;
6003 }
6004}