drm/amd/display: Fix COLOR_SPACE_YCBCR2020_TYPE matrix
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
fdf2f6c5 38
b7cdb41e 39#include <drm/drm_aperture.h>
4562236b 40#include <drm/drm_atomic_helper.h>
45b64fd9 41#include <drm/drm_fb_helper.h>
fcd70cd3 42#include <drm/drm_probe_helper.h>
d38ceaf9
AD
43#include <drm/amdgpu_drm.h>
44#include <linux/vgaarb.h>
45#include <linux/vga_switcheroo.h>
46#include <linux/efi.h>
47#include "amdgpu.h"
f4b373f4 48#include "amdgpu_trace.h"
d38ceaf9
AD
49#include "amdgpu_i2c.h"
50#include "atom.h"
51#include "amdgpu_atombios.h"
a5bde2f9 52#include "amdgpu_atomfirmware.h"
d0dd7f0c 53#include "amd_pcie.h"
33f34802
KW
54#ifdef CONFIG_DRM_AMDGPU_SI
55#include "si.h"
56#endif
a2e73f56
AD
57#ifdef CONFIG_DRM_AMDGPU_CIK
58#include "cik.h"
59#endif
aaa36a97 60#include "vi.h"
460826e6 61#include "soc15.h"
0a5b8c7b 62#include "nv.h"
d38ceaf9 63#include "bif/bif_4_1_d.h"
bec86378 64#include <linux/firmware.h>
89041940 65#include "amdgpu_vf_error.h"
d38ceaf9 66
ba997709 67#include "amdgpu_amdkfd.h"
d2f52ac8 68#include "amdgpu_pm.h"
d38ceaf9 69
5183411b 70#include "amdgpu_xgmi.h"
c030f2e4 71#include "amdgpu_ras.h"
9c7c85f7 72#include "amdgpu_pmu.h"
bd607166 73#include "amdgpu_fru_eeprom.h"
04442bf7 74#include "amdgpu_reset.h"
5183411b 75
d5ea093e 76#include <linux/suspend.h>
c6a6e2db 77#include <drm/task_barrier.h>
3f12acc8 78#include <linux/pm_runtime.h>
d5ea093e 79
f89f8c6b
AG
80#include <drm/drm_drv.h>
81
e2a75f88 82MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 83MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 84MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 85MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 86MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 87MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 88MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 89
2dc80b00 90#define AMDGPU_RESUME_MS 2000
7258fa31
SK
91#define AMDGPU_MAX_RETRY_LIMIT 2
92#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 93
b7cdb41e
ML
94static const struct drm_driver amdgpu_kms_driver;
95
050091ab 96const char *amdgpu_asic_name[] = {
da69c161
KW
97 "TAHITI",
98 "PITCAIRN",
99 "VERDE",
100 "OLAND",
101 "HAINAN",
d38ceaf9
AD
102 "BONAIRE",
103 "KAVERI",
104 "KABINI",
105 "HAWAII",
106 "MULLINS",
107 "TOPAZ",
108 "TONGA",
48299f95 109 "FIJI",
d38ceaf9 110 "CARRIZO",
139f4917 111 "STONEY",
2cc0c0b5
FC
112 "POLARIS10",
113 "POLARIS11",
c4642a47 114 "POLARIS12",
48ff108d 115 "VEGAM",
d4196f01 116 "VEGA10",
8fab806a 117 "VEGA12",
956fcddc 118 "VEGA20",
2ca8a5d2 119 "RAVEN",
d6c3b24e 120 "ARCTURUS",
1eee4228 121 "RENOIR",
d46b417a 122 "ALDEBARAN",
852a6626 123 "NAVI10",
d0f56dc2 124 "CYAN_SKILLFISH",
87dbad02 125 "NAVI14",
9802f5d7 126 "NAVI12",
ccaf72d3 127 "SIENNA_CICHLID",
ddd8fbe7 128 "NAVY_FLOUNDER",
4f1e9a76 129 "VANGOGH",
a2468e04 130 "DIMGREY_CAVEFISH",
6f169591 131 "BEIGE_GOBY",
ee9236b7 132 "YELLOW_CARP",
3ae695d6 133 "IP DISCOVERY",
d38ceaf9
AD
134 "LAST",
135};
136
dcea6e65
KR
137/**
138 * DOC: pcie_replay_count
139 *
140 * The amdgpu driver provides a sysfs API for reporting the total number
141 * of PCIe replays (NAKs)
142 * The file pcie_replay_count is used for this and returns the total
143 * number of replays as a sum of the NAKs generated and NAKs received
144 */
145
146static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
147 struct device_attribute *attr, char *buf)
148{
149 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 150 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
151 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
152
36000c7a 153 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
154}
155
156static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
157 amdgpu_device_get_pcie_replay_count, NULL);
158
5494d864
AD
159static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
160
bd607166
KR
161/**
162 * DOC: product_name
163 *
164 * The amdgpu driver provides a sysfs API for reporting the product name
165 * for the device
166 * The file serial_number is used for this and returns the product name
167 * as returned from the FRU.
168 * NOTE: This is only available for certain server cards
169 */
170
171static ssize_t amdgpu_device_get_product_name(struct device *dev,
172 struct device_attribute *attr, char *buf)
173{
174 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 175 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 176
36000c7a 177 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
178}
179
180static DEVICE_ATTR(product_name, S_IRUGO,
181 amdgpu_device_get_product_name, NULL);
182
183/**
184 * DOC: product_number
185 *
186 * The amdgpu driver provides a sysfs API for reporting the part number
187 * for the device
188 * The file serial_number is used for this and returns the part number
189 * as returned from the FRU.
190 * NOTE: This is only available for certain server cards
191 */
192
193static ssize_t amdgpu_device_get_product_number(struct device *dev,
194 struct device_attribute *attr, char *buf)
195{
196 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 197 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 198
36000c7a 199 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
200}
201
202static DEVICE_ATTR(product_number, S_IRUGO,
203 amdgpu_device_get_product_number, NULL);
204
205/**
206 * DOC: serial_number
207 *
208 * The amdgpu driver provides a sysfs API for reporting the serial number
209 * for the device
210 * The file serial_number is used for this and returns the serial number
211 * as returned from the FRU.
212 * NOTE: This is only available for certain server cards
213 */
214
215static ssize_t amdgpu_device_get_serial_number(struct device *dev,
216 struct device_attribute *attr, char *buf)
217{
218 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 219 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 220
36000c7a 221 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
222}
223
224static DEVICE_ATTR(serial_number, S_IRUGO,
225 amdgpu_device_get_serial_number, NULL);
226
fd496ca8 227/**
b98c6299 228 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
229 *
230 * @dev: drm_device pointer
231 *
b98c6299 232 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
233 * otherwise return false.
234 */
b98c6299 235bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
236{
237 struct amdgpu_device *adev = drm_to_adev(dev);
238
b98c6299 239 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
240 return true;
241 return false;
242}
243
e3ecdffa 244/**
0330b848 245 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
246 *
247 * @dev: drm_device pointer
248 *
b98c6299 249 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
250 * otherwise return false.
251 */
31af062a 252bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 253{
1348969a 254 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 255
b98c6299
AD
256 if (adev->has_pr3 ||
257 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
258 return true;
259 return false;
260}
261
a69cba42
AD
262/**
263 * amdgpu_device_supports_baco - Does the device support BACO
264 *
265 * @dev: drm_device pointer
266 *
267 * Returns true if the device supporte BACO,
268 * otherwise return false.
269 */
270bool amdgpu_device_supports_baco(struct drm_device *dev)
271{
1348969a 272 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
273
274 return amdgpu_asic_supports_baco(adev);
275}
276
3fa8f89d
S
277/**
278 * amdgpu_device_supports_smart_shift - Is the device dGPU with
279 * smart shift support
280 *
281 * @dev: drm_device pointer
282 *
283 * Returns true if the device is a dGPU with Smart Shift support,
284 * otherwise returns false.
285 */
286bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
287{
288 return (amdgpu_device_supports_boco(dev) &&
289 amdgpu_acpi_is_power_shift_control_supported());
290}
291
6e3cd2a9
MCC
292/*
293 * VRAM access helper functions
294 */
295
e35e2b11 296/**
048af66b 297 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
298 *
299 * @adev: amdgpu_device pointer
300 * @pos: offset of the buffer in vram
301 * @buf: virtual address of the buffer in system memory
302 * @size: read/write size, sizeof(@buf) must > @size
303 * @write: true - write to vram, otherwise - read from vram
304 */
048af66b
KW
305void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
306 void *buf, size_t size, bool write)
e35e2b11 307{
e35e2b11 308 unsigned long flags;
048af66b
KW
309 uint32_t hi = ~0, tmp = 0;
310 uint32_t *data = buf;
ce05ac56 311 uint64_t last;
f89f8c6b 312 int idx;
ce05ac56 313
c58a863b 314 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 315 return;
9d11eb0d 316
048af66b
KW
317 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
318
319 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
320 for (last = pos + size; pos < last; pos += 4) {
321 tmp = pos >> 31;
322
323 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
324 if (tmp != hi) {
325 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
326 hi = tmp;
327 }
328 if (write)
329 WREG32_NO_KIQ(mmMM_DATA, *data++);
330 else
331 *data++ = RREG32_NO_KIQ(mmMM_DATA);
332 }
333
334 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
335 drm_dev_exit(idx);
336}
337
338/**
bbe04dec 339 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
340 *
341 * @adev: amdgpu_device pointer
342 * @pos: offset of the buffer in vram
343 * @buf: virtual address of the buffer in system memory
344 * @size: read/write size, sizeof(@buf) must > @size
345 * @write: true - write to vram, otherwise - read from vram
346 *
347 * The return value means how many bytes have been transferred.
348 */
349size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
350 void *buf, size_t size, bool write)
351{
9d11eb0d 352#ifdef CONFIG_64BIT
048af66b
KW
353 void __iomem *addr;
354 size_t count = 0;
355 uint64_t last;
356
357 if (!adev->mman.aper_base_kaddr)
358 return 0;
359
9d11eb0d
CK
360 last = min(pos + size, adev->gmc.visible_vram_size);
361 if (last > pos) {
048af66b
KW
362 addr = adev->mman.aper_base_kaddr + pos;
363 count = last - pos;
9d11eb0d
CK
364
365 if (write) {
366 memcpy_toio(addr, buf, count);
367 mb();
810085dd 368 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 369 } else {
810085dd 370 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
371 mb();
372 memcpy_fromio(buf, addr, count);
373 }
374
9d11eb0d 375 }
048af66b
KW
376
377 return count;
378#else
379 return 0;
9d11eb0d 380#endif
048af66b 381}
9d11eb0d 382
048af66b
KW
383/**
384 * amdgpu_device_vram_access - read/write a buffer in vram
385 *
386 * @adev: amdgpu_device pointer
387 * @pos: offset of the buffer in vram
388 * @buf: virtual address of the buffer in system memory
389 * @size: read/write size, sizeof(@buf) must > @size
390 * @write: true - write to vram, otherwise - read from vram
391 */
392void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
393 void *buf, size_t size, bool write)
394{
395 size_t count;
e35e2b11 396
048af66b
KW
397 /* try to using vram apreature to access vram first */
398 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
399 size -= count;
400 if (size) {
401 /* using MM to access rest vram */
402 pos += count;
403 buf += count;
404 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
405 }
406}
407
d38ceaf9 408/*
f7ee1874 409 * register access helper functions.
d38ceaf9 410 */
56b53c0b
DL
411
412/* Check if hw access should be skipped because of hotplug or device error */
413bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
414{
7afefb81 415 if (adev->no_hw_access)
56b53c0b
DL
416 return true;
417
418#ifdef CONFIG_LOCKDEP
419 /*
420 * This is a bit complicated to understand, so worth a comment. What we assert
421 * here is that the GPU reset is not running on another thread in parallel.
422 *
423 * For this we trylock the read side of the reset semaphore, if that succeeds
424 * we know that the reset is not running in paralell.
425 *
426 * If the trylock fails we assert that we are either already holding the read
427 * side of the lock or are the reset thread itself and hold the write side of
428 * the lock.
429 */
430 if (in_task()) {
d0fb18b5
AG
431 if (down_read_trylock(&adev->reset_domain->sem))
432 up_read(&adev->reset_domain->sem);
56b53c0b 433 else
d0fb18b5 434 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
435 }
436#endif
437 return false;
438}
439
e3ecdffa 440/**
f7ee1874 441 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
442 *
443 * @adev: amdgpu_device pointer
444 * @reg: dword aligned register offset
445 * @acc_flags: access flags which require special behavior
446 *
447 * Returns the 32 bit value from the offset specified.
448 */
f7ee1874
HZ
449uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
450 uint32_t reg, uint32_t acc_flags)
d38ceaf9 451{
f4b373f4
TSD
452 uint32_t ret;
453
56b53c0b 454 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
455 return 0;
456
f7ee1874
HZ
457 if ((reg * 4) < adev->rmmio_size) {
458 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
459 amdgpu_sriov_runtime(adev) &&
d0fb18b5 460 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 461 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 462 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
463 } else {
464 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
465 }
466 } else {
467 ret = adev->pcie_rreg(adev, reg * 4);
81202807 468 }
bc992ba5 469
f7ee1874 470 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 471
f4b373f4 472 return ret;
d38ceaf9
AD
473}
474
421a2a30
ML
475/*
476 * MMIO register read with bytes helper functions
477 * @offset:bytes offset from MMIO start
478 *
479*/
480
e3ecdffa
AD
481/**
482 * amdgpu_mm_rreg8 - read a memory mapped IO register
483 *
484 * @adev: amdgpu_device pointer
485 * @offset: byte aligned register offset
486 *
487 * Returns the 8 bit value from the offset specified.
488 */
7cbbc745
AG
489uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
490{
56b53c0b 491 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
492 return 0;
493
421a2a30
ML
494 if (offset < adev->rmmio_size)
495 return (readb(adev->rmmio + offset));
496 BUG();
497}
498
499/*
500 * MMIO register write with bytes helper functions
501 * @offset:bytes offset from MMIO start
502 * @value: the value want to be written to the register
503 *
504*/
e3ecdffa
AD
505/**
506 * amdgpu_mm_wreg8 - read a memory mapped IO register
507 *
508 * @adev: amdgpu_device pointer
509 * @offset: byte aligned register offset
510 * @value: 8 bit value to write
511 *
512 * Writes the value specified to the offset specified.
513 */
7cbbc745
AG
514void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
515{
56b53c0b 516 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
517 return;
518
421a2a30
ML
519 if (offset < adev->rmmio_size)
520 writeb(value, adev->rmmio + offset);
521 else
522 BUG();
523}
524
e3ecdffa 525/**
f7ee1874 526 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
527 *
528 * @adev: amdgpu_device pointer
529 * @reg: dword aligned register offset
530 * @v: 32 bit value to write to the register
531 * @acc_flags: access flags which require special behavior
532 *
533 * Writes the value specified to the offset specified.
534 */
f7ee1874
HZ
535void amdgpu_device_wreg(struct amdgpu_device *adev,
536 uint32_t reg, uint32_t v,
537 uint32_t acc_flags)
d38ceaf9 538{
56b53c0b 539 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
540 return;
541
f7ee1874
HZ
542 if ((reg * 4) < adev->rmmio_size) {
543 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
544 amdgpu_sriov_runtime(adev) &&
d0fb18b5 545 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 546 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 547 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
548 } else {
549 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
550 }
551 } else {
552 adev->pcie_wreg(adev, reg * 4, v);
81202807 553 }
bc992ba5 554
f7ee1874 555 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 556}
d38ceaf9 557
03f2abb0 558/**
4cc9f86f 559 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 560 *
71579346
RB
561 * @adev: amdgpu_device pointer
562 * @reg: mmio/rlc register
563 * @v: value to write
564 *
565 * this function is invoked only for the debugfs register access
03f2abb0 566 */
f7ee1874
HZ
567void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
568 uint32_t reg, uint32_t v)
2e0cc4d4 569{
56b53c0b 570 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
571 return;
572
2e0cc4d4 573 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
574 adev->gfx.rlc.funcs &&
575 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 576 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 577 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
578 } else if ((reg * 4) >= adev->rmmio_size) {
579 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
580 } else {
581 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 582 }
d38ceaf9
AD
583}
584
d38ceaf9
AD
585/**
586 * amdgpu_mm_rdoorbell - read a doorbell dword
587 *
588 * @adev: amdgpu_device pointer
589 * @index: doorbell index
590 *
591 * Returns the value in the doorbell aperture at the
592 * requested doorbell index (CIK).
593 */
594u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
595{
56b53c0b 596 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
597 return 0;
598
d38ceaf9
AD
599 if (index < adev->doorbell.num_doorbells) {
600 return readl(adev->doorbell.ptr + index);
601 } else {
602 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
603 return 0;
604 }
605}
606
607/**
608 * amdgpu_mm_wdoorbell - write a doorbell dword
609 *
610 * @adev: amdgpu_device pointer
611 * @index: doorbell index
612 * @v: value to write
613 *
614 * Writes @v to the doorbell aperture at the
615 * requested doorbell index (CIK).
616 */
617void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
618{
56b53c0b 619 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
620 return;
621
d38ceaf9
AD
622 if (index < adev->doorbell.num_doorbells) {
623 writel(v, adev->doorbell.ptr + index);
624 } else {
625 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
626 }
627}
628
832be404
KW
629/**
630 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
631 *
632 * @adev: amdgpu_device pointer
633 * @index: doorbell index
634 *
635 * Returns the value in the doorbell aperture at the
636 * requested doorbell index (VEGA10+).
637 */
638u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
639{
56b53c0b 640 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
641 return 0;
642
832be404
KW
643 if (index < adev->doorbell.num_doorbells) {
644 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
645 } else {
646 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
647 return 0;
648 }
649}
650
651/**
652 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
653 *
654 * @adev: amdgpu_device pointer
655 * @index: doorbell index
656 * @v: value to write
657 *
658 * Writes @v to the doorbell aperture at the
659 * requested doorbell index (VEGA10+).
660 */
661void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
662{
56b53c0b 663 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
664 return;
665
832be404
KW
666 if (index < adev->doorbell.num_doorbells) {
667 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
668 } else {
669 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
670 }
671}
672
1bba3683
HZ
673/**
674 * amdgpu_device_indirect_rreg - read an indirect register
675 *
676 * @adev: amdgpu_device pointer
677 * @pcie_index: mmio register offset
678 * @pcie_data: mmio register offset
22f453fb 679 * @reg_addr: indirect register address to read from
1bba3683
HZ
680 *
681 * Returns the value of indirect register @reg_addr
682 */
683u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
684 u32 pcie_index, u32 pcie_data,
685 u32 reg_addr)
686{
687 unsigned long flags;
688 u32 r;
689 void __iomem *pcie_index_offset;
690 void __iomem *pcie_data_offset;
691
692 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
693 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
694 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
695
696 writel(reg_addr, pcie_index_offset);
697 readl(pcie_index_offset);
698 r = readl(pcie_data_offset);
699 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
700
701 return r;
702}
703
704/**
705 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
706 *
707 * @adev: amdgpu_device pointer
708 * @pcie_index: mmio register offset
709 * @pcie_data: mmio register offset
22f453fb 710 * @reg_addr: indirect register address to read from
1bba3683
HZ
711 *
712 * Returns the value of indirect register @reg_addr
713 */
714u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
715 u32 pcie_index, u32 pcie_data,
716 u32 reg_addr)
717{
718 unsigned long flags;
719 u64 r;
720 void __iomem *pcie_index_offset;
721 void __iomem *pcie_data_offset;
722
723 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
724 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
725 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
726
727 /* read low 32 bits */
728 writel(reg_addr, pcie_index_offset);
729 readl(pcie_index_offset);
730 r = readl(pcie_data_offset);
731 /* read high 32 bits */
732 writel(reg_addr + 4, pcie_index_offset);
733 readl(pcie_index_offset);
734 r |= ((u64)readl(pcie_data_offset) << 32);
735 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
736
737 return r;
738}
739
740/**
741 * amdgpu_device_indirect_wreg - write an indirect register address
742 *
743 * @adev: amdgpu_device pointer
744 * @pcie_index: mmio register offset
745 * @pcie_data: mmio register offset
746 * @reg_addr: indirect register offset
747 * @reg_data: indirect register data
748 *
749 */
750void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
751 u32 pcie_index, u32 pcie_data,
752 u32 reg_addr, u32 reg_data)
753{
754 unsigned long flags;
755 void __iomem *pcie_index_offset;
756 void __iomem *pcie_data_offset;
757
758 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
759 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
760 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
761
762 writel(reg_addr, pcie_index_offset);
763 readl(pcie_index_offset);
764 writel(reg_data, pcie_data_offset);
765 readl(pcie_data_offset);
766 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
767}
768
769/**
770 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
771 *
772 * @adev: amdgpu_device pointer
773 * @pcie_index: mmio register offset
774 * @pcie_data: mmio register offset
775 * @reg_addr: indirect register offset
776 * @reg_data: indirect register data
777 *
778 */
779void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
780 u32 pcie_index, u32 pcie_data,
781 u32 reg_addr, u64 reg_data)
782{
783 unsigned long flags;
784 void __iomem *pcie_index_offset;
785 void __iomem *pcie_data_offset;
786
787 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
788 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
789 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
790
791 /* write low 32 bits */
792 writel(reg_addr, pcie_index_offset);
793 readl(pcie_index_offset);
794 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
795 readl(pcie_data_offset);
796 /* write high 32 bits */
797 writel(reg_addr + 4, pcie_index_offset);
798 readl(pcie_index_offset);
799 writel((u32)(reg_data >> 32), pcie_data_offset);
800 readl(pcie_data_offset);
801 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
802}
803
d38ceaf9
AD
804/**
805 * amdgpu_invalid_rreg - dummy reg read function
806 *
982a820b 807 * @adev: amdgpu_device pointer
d38ceaf9
AD
808 * @reg: offset of register
809 *
810 * Dummy register read function. Used for register blocks
811 * that certain asics don't have (all asics).
812 * Returns the value in the register.
813 */
814static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
815{
816 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
817 BUG();
818 return 0;
819}
820
821/**
822 * amdgpu_invalid_wreg - dummy reg write function
823 *
982a820b 824 * @adev: amdgpu_device pointer
d38ceaf9
AD
825 * @reg: offset of register
826 * @v: value to write to the register
827 *
828 * Dummy register read function. Used for register blocks
829 * that certain asics don't have (all asics).
830 */
831static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
832{
833 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
834 reg, v);
835 BUG();
836}
837
4fa1c6a6
TZ
838/**
839 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
840 *
982a820b 841 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
842 * @reg: offset of register
843 *
844 * Dummy register read function. Used for register blocks
845 * that certain asics don't have (all asics).
846 * Returns the value in the register.
847 */
848static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
849{
850 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
851 BUG();
852 return 0;
853}
854
855/**
856 * amdgpu_invalid_wreg64 - dummy reg write function
857 *
982a820b 858 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
859 * @reg: offset of register
860 * @v: value to write to the register
861 *
862 * Dummy register read function. Used for register blocks
863 * that certain asics don't have (all asics).
864 */
865static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
866{
867 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
868 reg, v);
869 BUG();
870}
871
d38ceaf9
AD
872/**
873 * amdgpu_block_invalid_rreg - dummy reg read function
874 *
982a820b 875 * @adev: amdgpu_device pointer
d38ceaf9
AD
876 * @block: offset of instance
877 * @reg: offset of register
878 *
879 * Dummy register read function. Used for register blocks
880 * that certain asics don't have (all asics).
881 * Returns the value in the register.
882 */
883static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
884 uint32_t block, uint32_t reg)
885{
886 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
887 reg, block);
888 BUG();
889 return 0;
890}
891
892/**
893 * amdgpu_block_invalid_wreg - dummy reg write function
894 *
982a820b 895 * @adev: amdgpu_device pointer
d38ceaf9
AD
896 * @block: offset of instance
897 * @reg: offset of register
898 * @v: value to write to the register
899 *
900 * Dummy register read function. Used for register blocks
901 * that certain asics don't have (all asics).
902 */
903static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
904 uint32_t block,
905 uint32_t reg, uint32_t v)
906{
907 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
908 reg, block, v);
909 BUG();
910}
911
4d2997ab
AD
912/**
913 * amdgpu_device_asic_init - Wrapper for atom asic_init
914 *
982a820b 915 * @adev: amdgpu_device pointer
4d2997ab
AD
916 *
917 * Does any asic specific work and then calls atom asic init.
918 */
919static int amdgpu_device_asic_init(struct amdgpu_device *adev)
920{
921 amdgpu_asic_pre_asic_init(adev);
922
85d1bcc6
HZ
923 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
924 return amdgpu_atomfirmware_asic_init(adev, true);
925 else
926 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
927}
928
e3ecdffa 929/**
7ccfd79f 930 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 931 *
982a820b 932 * @adev: amdgpu_device pointer
e3ecdffa
AD
933 *
934 * Allocates a scratch page of VRAM for use by various things in the
935 * driver.
936 */
7ccfd79f 937static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 938{
7ccfd79f
CK
939 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
940 AMDGPU_GEM_DOMAIN_VRAM |
941 AMDGPU_GEM_DOMAIN_GTT,
942 &adev->mem_scratch.robj,
943 &adev->mem_scratch.gpu_addr,
944 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
945}
946
e3ecdffa 947/**
7ccfd79f 948 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 949 *
982a820b 950 * @adev: amdgpu_device pointer
e3ecdffa
AD
951 *
952 * Frees the VRAM scratch page.
953 */
7ccfd79f 954static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 955{
7ccfd79f 956 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
957}
958
959/**
9c3f2b54 960 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
961 *
962 * @adev: amdgpu_device pointer
963 * @registers: pointer to the register array
964 * @array_size: size of the register array
965 *
966 * Programs an array or registers with and and or masks.
967 * This is a helper for setting golden registers.
968 */
9c3f2b54
AD
969void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
970 const u32 *registers,
971 const u32 array_size)
d38ceaf9
AD
972{
973 u32 tmp, reg, and_mask, or_mask;
974 int i;
975
976 if (array_size % 3)
977 return;
978
979 for (i = 0; i < array_size; i +=3) {
980 reg = registers[i + 0];
981 and_mask = registers[i + 1];
982 or_mask = registers[i + 2];
983
984 if (and_mask == 0xffffffff) {
985 tmp = or_mask;
986 } else {
987 tmp = RREG32(reg);
988 tmp &= ~and_mask;
e0d07657
HZ
989 if (adev->family >= AMDGPU_FAMILY_AI)
990 tmp |= (or_mask & and_mask);
991 else
992 tmp |= or_mask;
d38ceaf9
AD
993 }
994 WREG32(reg, tmp);
995 }
996}
997
e3ecdffa
AD
998/**
999 * amdgpu_device_pci_config_reset - reset the GPU
1000 *
1001 * @adev: amdgpu_device pointer
1002 *
1003 * Resets the GPU using the pci config reset sequence.
1004 * Only applicable to asics prior to vega10.
1005 */
8111c387 1006void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1007{
1008 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1009}
1010
af484df8
AD
1011/**
1012 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1013 *
1014 * @adev: amdgpu_device pointer
1015 *
1016 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1017 */
1018int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1019{
1020 return pci_reset_function(adev->pdev);
1021}
1022
d38ceaf9
AD
1023/*
1024 * GPU doorbell aperture helpers function.
1025 */
1026/**
06ec9070 1027 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1028 *
1029 * @adev: amdgpu_device pointer
1030 *
1031 * Init doorbell driver information (CIK)
1032 * Returns 0 on success, error on failure.
1033 */
06ec9070 1034static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1035{
6585661d 1036
705e519e
CK
1037 /* No doorbell on SI hardware generation */
1038 if (adev->asic_type < CHIP_BONAIRE) {
1039 adev->doorbell.base = 0;
1040 adev->doorbell.size = 0;
1041 adev->doorbell.num_doorbells = 0;
1042 adev->doorbell.ptr = NULL;
1043 return 0;
1044 }
1045
d6895ad3
CK
1046 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1047 return -EINVAL;
1048
22357775
AD
1049 amdgpu_asic_init_doorbell_index(adev);
1050
d38ceaf9
AD
1051 /* doorbell bar mapping */
1052 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1053 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1054
de33a329
JX
1055 if (adev->enable_mes) {
1056 adev->doorbell.num_doorbells =
1057 adev->doorbell.size / sizeof(u32);
1058 } else {
1059 adev->doorbell.num_doorbells =
1060 min_t(u32, adev->doorbell.size / sizeof(u32),
1061 adev->doorbell_index.max_assignment+1);
1062 if (adev->doorbell.num_doorbells == 0)
1063 return -EINVAL;
1064
1065 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1066 * paging queue doorbell use the second page. The
1067 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1068 * doorbells are in the first page. So with paging queue enabled,
1069 * the max num_doorbells should + 1 page (0x400 in dword)
1070 */
1071 if (adev->asic_type >= CHIP_VEGA10)
1072 adev->doorbell.num_doorbells += 0x400;
1073 }
ec3db8a6 1074
8972e5d2
CK
1075 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1076 adev->doorbell.num_doorbells *
1077 sizeof(u32));
1078 if (adev->doorbell.ptr == NULL)
d38ceaf9 1079 return -ENOMEM;
d38ceaf9
AD
1080
1081 return 0;
1082}
1083
1084/**
06ec9070 1085 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1086 *
1087 * @adev: amdgpu_device pointer
1088 *
1089 * Tear down doorbell driver information (CIK)
1090 */
06ec9070 1091static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1092{
1093 iounmap(adev->doorbell.ptr);
1094 adev->doorbell.ptr = NULL;
1095}
1096
22cb0164 1097
d38ceaf9
AD
1098
1099/*
06ec9070 1100 * amdgpu_device_wb_*()
455a7bc2 1101 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1102 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1103 */
1104
1105/**
06ec9070 1106 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1107 *
1108 * @adev: amdgpu_device pointer
1109 *
1110 * Disables Writeback and frees the Writeback memory (all asics).
1111 * Used at driver shutdown.
1112 */
06ec9070 1113static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1114{
1115 if (adev->wb.wb_obj) {
a76ed485
AD
1116 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1117 &adev->wb.gpu_addr,
1118 (void **)&adev->wb.wb);
d38ceaf9
AD
1119 adev->wb.wb_obj = NULL;
1120 }
1121}
1122
1123/**
03f2abb0 1124 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1125 *
1126 * @adev: amdgpu_device pointer
1127 *
455a7bc2 1128 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1129 * Used at driver startup.
1130 * Returns 0 on success or an -error on failure.
1131 */
06ec9070 1132static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1133{
1134 int r;
1135
1136 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1137 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1138 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1139 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1140 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1141 (void **)&adev->wb.wb);
d38ceaf9
AD
1142 if (r) {
1143 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1144 return r;
1145 }
d38ceaf9
AD
1146
1147 adev->wb.num_wb = AMDGPU_MAX_WB;
1148 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1149
1150 /* clear wb memory */
73469585 1151 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1152 }
1153
1154 return 0;
1155}
1156
1157/**
131b4b36 1158 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1159 *
1160 * @adev: amdgpu_device pointer
1161 * @wb: wb index
1162 *
1163 * Allocate a wb slot for use by the driver (all asics).
1164 * Returns 0 on success or -EINVAL on failure.
1165 */
131b4b36 1166int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1167{
1168 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1169
97407b63 1170 if (offset < adev->wb.num_wb) {
7014285a 1171 __set_bit(offset, adev->wb.used);
63ae07ca 1172 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1173 return 0;
1174 } else {
1175 return -EINVAL;
1176 }
1177}
1178
d38ceaf9 1179/**
131b4b36 1180 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1181 *
1182 * @adev: amdgpu_device pointer
1183 * @wb: wb index
1184 *
1185 * Free a wb slot allocated for use by the driver (all asics)
1186 */
131b4b36 1187void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1188{
73469585 1189 wb >>= 3;
d38ceaf9 1190 if (wb < adev->wb.num_wb)
73469585 1191 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1192}
1193
d6895ad3
CK
1194/**
1195 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1196 *
1197 * @adev: amdgpu_device pointer
1198 *
1199 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1200 * to fail, but if any of the BARs is not accessible after the size we abort
1201 * driver loading by returning -ENODEV.
1202 */
1203int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1204{
453f617a 1205 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1206 struct pci_bus *root;
1207 struct resource *res;
1208 unsigned i;
d6895ad3
CK
1209 u16 cmd;
1210 int r;
1211
0c03b912 1212 /* Bypass for VF */
1213 if (amdgpu_sriov_vf(adev))
1214 return 0;
1215
b7221f2b
AD
1216 /* skip if the bios has already enabled large BAR */
1217 if (adev->gmc.real_vram_size &&
1218 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1219 return 0;
1220
31b8adab
CK
1221 /* Check if the root BUS has 64bit memory resources */
1222 root = adev->pdev->bus;
1223 while (root->parent)
1224 root = root->parent;
1225
1226 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1227 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1228 res->start > 0x100000000ull)
1229 break;
1230 }
1231
1232 /* Trying to resize is pointless without a root hub window above 4GB */
1233 if (!res)
1234 return 0;
1235
453f617a
ND
1236 /* Limit the BAR size to what is available */
1237 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1238 rbar_size);
1239
d6895ad3
CK
1240 /* Disable memory decoding while we change the BAR addresses and size */
1241 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1242 pci_write_config_word(adev->pdev, PCI_COMMAND,
1243 cmd & ~PCI_COMMAND_MEMORY);
1244
1245 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1246 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1247 if (adev->asic_type >= CHIP_BONAIRE)
1248 pci_release_resource(adev->pdev, 2);
1249
1250 pci_release_resource(adev->pdev, 0);
1251
1252 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1253 if (r == -ENOSPC)
1254 DRM_INFO("Not enough PCI address space for a large BAR.");
1255 else if (r && r != -ENOTSUPP)
1256 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1257
1258 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1259
1260 /* When the doorbell or fb BAR isn't available we have no chance of
1261 * using the device.
1262 */
06ec9070 1263 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1264 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1265 return -ENODEV;
1266
1267 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1268
1269 return 0;
1270}
a05502e5 1271
d38ceaf9
AD
1272/*
1273 * GPU helpers function.
1274 */
1275/**
39c640c0 1276 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1277 *
1278 * @adev: amdgpu_device pointer
1279 *
c836fec5
JQ
1280 * Check if the asic has been initialized (all asics) at driver startup
1281 * or post is needed if hw reset is performed.
1282 * Returns true if need or false if not.
d38ceaf9 1283 */
39c640c0 1284bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1285{
1286 uint32_t reg;
1287
bec86378
ML
1288 if (amdgpu_sriov_vf(adev))
1289 return false;
1290
1291 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1292 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1293 * some old smc fw still need driver do vPost otherwise gpu hang, while
1294 * those smc fw version above 22.15 doesn't have this flaw, so we force
1295 * vpost executed for smc version below 22.15
bec86378
ML
1296 */
1297 if (adev->asic_type == CHIP_FIJI) {
1298 int err;
1299 uint32_t fw_ver;
1300 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1301 /* force vPost if error occured */
1302 if (err)
1303 return true;
1304
1305 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1306 if (fw_ver < 0x00160e00)
1307 return true;
bec86378 1308 }
bec86378 1309 }
91fe77eb 1310
e3c1b071 1311 /* Don't post if we need to reset whole hive on init */
1312 if (adev->gmc.xgmi.pending_reset)
1313 return false;
1314
91fe77eb 1315 if (adev->has_hw_reset) {
1316 adev->has_hw_reset = false;
1317 return true;
1318 }
1319
1320 /* bios scratch used on CIK+ */
1321 if (adev->asic_type >= CHIP_BONAIRE)
1322 return amdgpu_atombios_scratch_need_asic_init(adev);
1323
1324 /* check MEM_SIZE for older asics */
1325 reg = amdgpu_asic_get_config_memsize(adev);
1326
1327 if ((reg != 0) && (reg != 0xffffffff))
1328 return false;
1329
1330 return true;
bec86378
ML
1331}
1332
0ab5d711
ML
1333/**
1334 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1335 *
1336 * @adev: amdgpu_device pointer
1337 *
1338 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1339 * be set for this device.
1340 *
1341 * Returns true if it should be used or false if not.
1342 */
1343bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1344{
1345 switch (amdgpu_aspm) {
1346 case -1:
1347 break;
1348 case 0:
1349 return false;
1350 case 1:
1351 return true;
1352 default:
1353 return false;
1354 }
1355 return pcie_aspm_enabled(adev->pdev);
1356}
1357
d38ceaf9
AD
1358/* if we get transitioned to only one device, take VGA back */
1359/**
06ec9070 1360 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1361 *
bf44e8ce 1362 * @pdev: PCI device pointer
d38ceaf9
AD
1363 * @state: enable/disable vga decode
1364 *
1365 * Enable/disable vga decode (all asics).
1366 * Returns VGA resource flags.
1367 */
bf44e8ce
CH
1368static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1369 bool state)
d38ceaf9 1370{
bf44e8ce 1371 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1372 amdgpu_asic_set_vga_state(adev, state);
1373 if (state)
1374 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1375 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1376 else
1377 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1378}
1379
e3ecdffa
AD
1380/**
1381 * amdgpu_device_check_block_size - validate the vm block size
1382 *
1383 * @adev: amdgpu_device pointer
1384 *
1385 * Validates the vm block size specified via module parameter.
1386 * The vm block size defines number of bits in page table versus page directory,
1387 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1388 * page table and the remaining bits are in the page directory.
1389 */
06ec9070 1390static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1391{
1392 /* defines number of bits in page table versus page directory,
1393 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1394 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1395 if (amdgpu_vm_block_size == -1)
1396 return;
a1adf8be 1397
bab4fee7 1398 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1399 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1400 amdgpu_vm_block_size);
97489129 1401 amdgpu_vm_block_size = -1;
a1adf8be 1402 }
a1adf8be
CZ
1403}
1404
e3ecdffa
AD
1405/**
1406 * amdgpu_device_check_vm_size - validate the vm size
1407 *
1408 * @adev: amdgpu_device pointer
1409 *
1410 * Validates the vm size in GB specified via module parameter.
1411 * The VM size is the size of the GPU virtual memory space in GB.
1412 */
06ec9070 1413static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1414{
64dab074
AD
1415 /* no need to check the default value */
1416 if (amdgpu_vm_size == -1)
1417 return;
1418
83ca145d
ZJ
1419 if (amdgpu_vm_size < 1) {
1420 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1421 amdgpu_vm_size);
f3368128 1422 amdgpu_vm_size = -1;
83ca145d 1423 }
83ca145d
ZJ
1424}
1425
7951e376
RZ
1426static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1427{
1428 struct sysinfo si;
a9d4fe2f 1429 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1430 uint64_t total_memory;
1431 uint64_t dram_size_seven_GB = 0x1B8000000;
1432 uint64_t dram_size_three_GB = 0xB8000000;
1433
1434 if (amdgpu_smu_memory_pool_size == 0)
1435 return;
1436
1437 if (!is_os_64) {
1438 DRM_WARN("Not 64-bit OS, feature not supported\n");
1439 goto def_value;
1440 }
1441 si_meminfo(&si);
1442 total_memory = (uint64_t)si.totalram * si.mem_unit;
1443
1444 if ((amdgpu_smu_memory_pool_size == 1) ||
1445 (amdgpu_smu_memory_pool_size == 2)) {
1446 if (total_memory < dram_size_three_GB)
1447 goto def_value1;
1448 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1449 (amdgpu_smu_memory_pool_size == 8)) {
1450 if (total_memory < dram_size_seven_GB)
1451 goto def_value1;
1452 } else {
1453 DRM_WARN("Smu memory pool size not supported\n");
1454 goto def_value;
1455 }
1456 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1457
1458 return;
1459
1460def_value1:
1461 DRM_WARN("No enough system memory\n");
1462def_value:
1463 adev->pm.smu_prv_buffer_size = 0;
1464}
1465
9f6a7857
HR
1466static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1467{
1468 if (!(adev->flags & AMD_IS_APU) ||
1469 adev->asic_type < CHIP_RAVEN)
1470 return 0;
1471
1472 switch (adev->asic_type) {
1473 case CHIP_RAVEN:
1474 if (adev->pdev->device == 0x15dd)
1475 adev->apu_flags |= AMD_APU_IS_RAVEN;
1476 if (adev->pdev->device == 0x15d8)
1477 adev->apu_flags |= AMD_APU_IS_PICASSO;
1478 break;
1479 case CHIP_RENOIR:
1480 if ((adev->pdev->device == 0x1636) ||
1481 (adev->pdev->device == 0x164c))
1482 adev->apu_flags |= AMD_APU_IS_RENOIR;
1483 else
1484 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1485 break;
1486 case CHIP_VANGOGH:
1487 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1488 break;
1489 case CHIP_YELLOW_CARP:
1490 break;
d0f56dc2 1491 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1492 if ((adev->pdev->device == 0x13FE) ||
1493 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1494 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1495 break;
9f6a7857 1496 default:
4eaf21b7 1497 break;
9f6a7857
HR
1498 }
1499
1500 return 0;
1501}
1502
d38ceaf9 1503/**
06ec9070 1504 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1505 *
1506 * @adev: amdgpu_device pointer
1507 *
1508 * Validates certain module parameters and updates
1509 * the associated values used by the driver (all asics).
1510 */
912dfc84 1511static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1512{
5b011235
CZ
1513 if (amdgpu_sched_jobs < 4) {
1514 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1515 amdgpu_sched_jobs);
1516 amdgpu_sched_jobs = 4;
76117507 1517 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1518 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1519 amdgpu_sched_jobs);
1520 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1521 }
d38ceaf9 1522
83e74db6 1523 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1524 /* gart size must be greater or equal to 32M */
1525 dev_warn(adev->dev, "gart size (%d) too small\n",
1526 amdgpu_gart_size);
83e74db6 1527 amdgpu_gart_size = -1;
d38ceaf9
AD
1528 }
1529
36d38372 1530 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1531 /* gtt size must be greater or equal to 32M */
36d38372
CK
1532 dev_warn(adev->dev, "gtt size (%d) too small\n",
1533 amdgpu_gtt_size);
1534 amdgpu_gtt_size = -1;
d38ceaf9
AD
1535 }
1536
d07f14be
RH
1537 /* valid range is between 4 and 9 inclusive */
1538 if (amdgpu_vm_fragment_size != -1 &&
1539 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1540 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1541 amdgpu_vm_fragment_size = -1;
1542 }
1543
5d5bd5e3
KW
1544 if (amdgpu_sched_hw_submission < 2) {
1545 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1546 amdgpu_sched_hw_submission);
1547 amdgpu_sched_hw_submission = 2;
1548 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1549 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1550 amdgpu_sched_hw_submission);
1551 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1552 }
1553
2656fd23
AG
1554 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1555 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1556 amdgpu_reset_method = -1;
1557 }
1558
7951e376
RZ
1559 amdgpu_device_check_smu_prv_buffer_size(adev);
1560
06ec9070 1561 amdgpu_device_check_vm_size(adev);
d38ceaf9 1562
06ec9070 1563 amdgpu_device_check_block_size(adev);
6a7f76e7 1564
19aede77 1565 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1566
e3c00faa 1567 return 0;
d38ceaf9
AD
1568}
1569
1570/**
1571 * amdgpu_switcheroo_set_state - set switcheroo state
1572 *
1573 * @pdev: pci dev pointer
1694467b 1574 * @state: vga_switcheroo state
d38ceaf9 1575 *
12024b17 1576 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1577 * the asics before or after it is powered up using ACPI methods.
1578 */
8aba21b7
LT
1579static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1580 enum vga_switcheroo_state state)
d38ceaf9
AD
1581{
1582 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1583 int r;
d38ceaf9 1584
b98c6299 1585 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1586 return;
1587
1588 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1589 pr_info("switched on\n");
d38ceaf9
AD
1590 /* don't suspend or resume card normally */
1591 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1592
8f66090b
TZ
1593 pci_set_power_state(pdev, PCI_D0);
1594 amdgpu_device_load_pci_state(pdev);
1595 r = pci_enable_device(pdev);
de185019
AD
1596 if (r)
1597 DRM_WARN("pci_enable_device failed (%d)\n", r);
1598 amdgpu_device_resume(dev, true);
d38ceaf9 1599
d38ceaf9 1600 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1601 } else {
dd4fa6c1 1602 pr_info("switched off\n");
d38ceaf9 1603 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1604 amdgpu_device_suspend(dev, true);
8f66090b 1605 amdgpu_device_cache_pci_state(pdev);
de185019 1606 /* Shut down the device */
8f66090b
TZ
1607 pci_disable_device(pdev);
1608 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1609 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1610 }
1611}
1612
1613/**
1614 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1615 *
1616 * @pdev: pci dev pointer
1617 *
1618 * Callback for the switcheroo driver. Check of the switcheroo
1619 * state can be changed.
1620 * Returns true if the state can be changed, false if not.
1621 */
1622static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1623{
1624 struct drm_device *dev = pci_get_drvdata(pdev);
1625
1626 /*
1627 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1628 * locking inversion with the driver load path. And the access here is
1629 * completely racy anyway. So don't bother with locking for now.
1630 */
7e13ad89 1631 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1632}
1633
1634static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1635 .set_gpu_state = amdgpu_switcheroo_set_state,
1636 .reprobe = NULL,
1637 .can_switch = amdgpu_switcheroo_can_switch,
1638};
1639
e3ecdffa
AD
1640/**
1641 * amdgpu_device_ip_set_clockgating_state - set the CG state
1642 *
87e3f136 1643 * @dev: amdgpu_device pointer
e3ecdffa
AD
1644 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1645 * @state: clockgating state (gate or ungate)
1646 *
1647 * Sets the requested clockgating state for all instances of
1648 * the hardware IP specified.
1649 * Returns the error code from the last instance.
1650 */
43fa561f 1651int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1652 enum amd_ip_block_type block_type,
1653 enum amd_clockgating_state state)
d38ceaf9 1654{
43fa561f 1655 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1656 int i, r = 0;
1657
1658 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1659 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1660 continue;
c722865a
RZ
1661 if (adev->ip_blocks[i].version->type != block_type)
1662 continue;
1663 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1664 continue;
1665 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1666 (void *)adev, state);
1667 if (r)
1668 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1669 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1670 }
1671 return r;
1672}
1673
e3ecdffa
AD
1674/**
1675 * amdgpu_device_ip_set_powergating_state - set the PG state
1676 *
87e3f136 1677 * @dev: amdgpu_device pointer
e3ecdffa
AD
1678 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1679 * @state: powergating state (gate or ungate)
1680 *
1681 * Sets the requested powergating state for all instances of
1682 * the hardware IP specified.
1683 * Returns the error code from the last instance.
1684 */
43fa561f 1685int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1686 enum amd_ip_block_type block_type,
1687 enum amd_powergating_state state)
d38ceaf9 1688{
43fa561f 1689 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1690 int i, r = 0;
1691
1692 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1693 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1694 continue;
c722865a
RZ
1695 if (adev->ip_blocks[i].version->type != block_type)
1696 continue;
1697 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1698 continue;
1699 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1700 (void *)adev, state);
1701 if (r)
1702 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1703 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1704 }
1705 return r;
1706}
1707
e3ecdffa
AD
1708/**
1709 * amdgpu_device_ip_get_clockgating_state - get the CG state
1710 *
1711 * @adev: amdgpu_device pointer
1712 * @flags: clockgating feature flags
1713 *
1714 * Walks the list of IPs on the device and updates the clockgating
1715 * flags for each IP.
1716 * Updates @flags with the feature flags for each hardware IP where
1717 * clockgating is enabled.
1718 */
2990a1fc 1719void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1720 u64 *flags)
6cb2d4e4
HR
1721{
1722 int i;
1723
1724 for (i = 0; i < adev->num_ip_blocks; i++) {
1725 if (!adev->ip_blocks[i].status.valid)
1726 continue;
1727 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1728 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1729 }
1730}
1731
e3ecdffa
AD
1732/**
1733 * amdgpu_device_ip_wait_for_idle - wait for idle
1734 *
1735 * @adev: amdgpu_device pointer
1736 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1737 *
1738 * Waits for the request hardware IP to be idle.
1739 * Returns 0 for success or a negative error code on failure.
1740 */
2990a1fc
AD
1741int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1742 enum amd_ip_block_type block_type)
5dbbb60b
AD
1743{
1744 int i, r;
1745
1746 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1747 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1748 continue;
a1255107
AD
1749 if (adev->ip_blocks[i].version->type == block_type) {
1750 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1751 if (r)
1752 return r;
1753 break;
1754 }
1755 }
1756 return 0;
1757
1758}
1759
e3ecdffa
AD
1760/**
1761 * amdgpu_device_ip_is_idle - is the hardware IP idle
1762 *
1763 * @adev: amdgpu_device pointer
1764 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1765 *
1766 * Check if the hardware IP is idle or not.
1767 * Returns true if it the IP is idle, false if not.
1768 */
2990a1fc
AD
1769bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1770 enum amd_ip_block_type block_type)
5dbbb60b
AD
1771{
1772 int i;
1773
1774 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1775 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1776 continue;
a1255107
AD
1777 if (adev->ip_blocks[i].version->type == block_type)
1778 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1779 }
1780 return true;
1781
1782}
1783
e3ecdffa
AD
1784/**
1785 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1786 *
1787 * @adev: amdgpu_device pointer
87e3f136 1788 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1789 *
1790 * Returns a pointer to the hardware IP block structure
1791 * if it exists for the asic, otherwise NULL.
1792 */
2990a1fc
AD
1793struct amdgpu_ip_block *
1794amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1795 enum amd_ip_block_type type)
d38ceaf9
AD
1796{
1797 int i;
1798
1799 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1800 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1801 return &adev->ip_blocks[i];
1802
1803 return NULL;
1804}
1805
1806/**
2990a1fc 1807 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1808 *
1809 * @adev: amdgpu_device pointer
5fc3aeeb 1810 * @type: enum amd_ip_block_type
d38ceaf9
AD
1811 * @major: major version
1812 * @minor: minor version
1813 *
1814 * return 0 if equal or greater
1815 * return 1 if smaller or the ip_block doesn't exist
1816 */
2990a1fc
AD
1817int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1818 enum amd_ip_block_type type,
1819 u32 major, u32 minor)
d38ceaf9 1820{
2990a1fc 1821 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1822
a1255107
AD
1823 if (ip_block && ((ip_block->version->major > major) ||
1824 ((ip_block->version->major == major) &&
1825 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1826 return 0;
1827
1828 return 1;
1829}
1830
a1255107 1831/**
2990a1fc 1832 * amdgpu_device_ip_block_add
a1255107
AD
1833 *
1834 * @adev: amdgpu_device pointer
1835 * @ip_block_version: pointer to the IP to add
1836 *
1837 * Adds the IP block driver information to the collection of IPs
1838 * on the asic.
1839 */
2990a1fc
AD
1840int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1841 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1842{
1843 if (!ip_block_version)
1844 return -EINVAL;
1845
7bd939d0
LG
1846 switch (ip_block_version->type) {
1847 case AMD_IP_BLOCK_TYPE_VCN:
1848 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1849 return 0;
1850 break;
1851 case AMD_IP_BLOCK_TYPE_JPEG:
1852 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1853 return 0;
1854 break;
1855 default:
1856 break;
1857 }
1858
e966a725 1859 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1860 ip_block_version->funcs->name);
1861
a1255107
AD
1862 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1863
1864 return 0;
1865}
1866
e3ecdffa
AD
1867/**
1868 * amdgpu_device_enable_virtual_display - enable virtual display feature
1869 *
1870 * @adev: amdgpu_device pointer
1871 *
1872 * Enabled the virtual display feature if the user has enabled it via
1873 * the module parameter virtual_display. This feature provides a virtual
1874 * display hardware on headless boards or in virtualized environments.
1875 * This function parses and validates the configuration string specified by
1876 * the user and configues the virtual display configuration (number of
1877 * virtual connectors, crtcs, etc.) specified.
1878 */
483ef985 1879static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1880{
1881 adev->enable_virtual_display = false;
1882
1883 if (amdgpu_virtual_display) {
8f66090b 1884 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1885 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1886
1887 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1888 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1889 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1890 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1891 if (!strcmp("all", pciaddname)
1892 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1893 long num_crtc;
1894 int res = -1;
1895
9accf2fd 1896 adev->enable_virtual_display = true;
0f66356d
ED
1897
1898 if (pciaddname_tmp)
1899 res = kstrtol(pciaddname_tmp, 10,
1900 &num_crtc);
1901
1902 if (!res) {
1903 if (num_crtc < 1)
1904 num_crtc = 1;
1905 if (num_crtc > 6)
1906 num_crtc = 6;
1907 adev->mode_info.num_crtc = num_crtc;
1908 } else {
1909 adev->mode_info.num_crtc = 1;
1910 }
9accf2fd
ED
1911 break;
1912 }
1913 }
1914
0f66356d
ED
1915 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1916 amdgpu_virtual_display, pci_address_name,
1917 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1918
1919 kfree(pciaddstr);
1920 }
1921}
1922
25263da3
AD
1923void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1924{
1925 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1926 adev->mode_info.num_crtc = 1;
1927 adev->enable_virtual_display = true;
1928 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1929 adev->enable_virtual_display, adev->mode_info.num_crtc);
1930 }
1931}
1932
e3ecdffa
AD
1933/**
1934 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1935 *
1936 * @adev: amdgpu_device pointer
1937 *
1938 * Parses the asic configuration parameters specified in the gpu info
1939 * firmware and makes them availale to the driver for use in configuring
1940 * the asic.
1941 * Returns 0 on success, -EINVAL on failure.
1942 */
e2a75f88
AD
1943static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1944{
e2a75f88 1945 const char *chip_name;
c0a43457 1946 char fw_name[40];
e2a75f88
AD
1947 int err;
1948 const struct gpu_info_firmware_header_v1_0 *hdr;
1949
ab4fe3e1
HR
1950 adev->firmware.gpu_info_fw = NULL;
1951
72de33f8 1952 if (adev->mman.discovery_bin) {
cc375d8c
TY
1953 /*
1954 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1955 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1956 * when DAL no longer needs it.
1957 */
1958 if (adev->asic_type != CHIP_NAVI12)
1959 return 0;
258620d0
AD
1960 }
1961
e2a75f88 1962 switch (adev->asic_type) {
e2a75f88
AD
1963 default:
1964 return 0;
1965 case CHIP_VEGA10:
1966 chip_name = "vega10";
1967 break;
3f76dced
AD
1968 case CHIP_VEGA12:
1969 chip_name = "vega12";
1970 break;
2d2e5e7e 1971 case CHIP_RAVEN:
54f78a76 1972 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1973 chip_name = "raven2";
54f78a76 1974 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1975 chip_name = "picasso";
54c4d17e
FX
1976 else
1977 chip_name = "raven";
2d2e5e7e 1978 break;
65e60f6e
LM
1979 case CHIP_ARCTURUS:
1980 chip_name = "arcturus";
1981 break;
42b325e5
XY
1982 case CHIP_NAVI12:
1983 chip_name = "navi12";
1984 break;
e2a75f88
AD
1985 }
1986
1987 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 1988 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
1989 if (err) {
1990 dev_err(adev->dev,
b31d3063 1991 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
1992 fw_name);
1993 goto out;
1994 }
1995
ab4fe3e1 1996 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1997 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1998
1999 switch (hdr->version_major) {
2000 case 1:
2001 {
2002 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2003 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2004 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2005
cc375d8c
TY
2006 /*
2007 * Should be droped when DAL no longer needs it.
2008 */
2009 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2010 goto parse_soc_bounding_box;
2011
b5ab16bf
AD
2012 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2013 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2014 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2015 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2016 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2017 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2018 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2019 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2020 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2021 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2022 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2023 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2024 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2025 adev->gfx.cu_info.max_waves_per_simd =
2026 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2027 adev->gfx.cu_info.max_scratch_slots_per_cu =
2028 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2029 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2030 if (hdr->version_minor >= 1) {
35c2e910
HZ
2031 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2032 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2033 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2034 adev->gfx.config.num_sc_per_sh =
2035 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2036 adev->gfx.config.num_packer_per_sc =
2037 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2038 }
ec51d3fa
XY
2039
2040parse_soc_bounding_box:
ec51d3fa
XY
2041 /*
2042 * soc bounding box info is not integrated in disocovery table,
258620d0 2043 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2044 */
48321c3d
HW
2045 if (hdr->version_minor == 2) {
2046 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2047 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2048 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2049 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2050 }
e2a75f88
AD
2051 break;
2052 }
2053 default:
2054 dev_err(adev->dev,
2055 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2056 err = -EINVAL;
2057 goto out;
2058 }
2059out:
e2a75f88
AD
2060 return err;
2061}
2062
e3ecdffa
AD
2063/**
2064 * amdgpu_device_ip_early_init - run early init for hardware IPs
2065 *
2066 * @adev: amdgpu_device pointer
2067 *
2068 * Early initialization pass for hardware IPs. The hardware IPs that make
2069 * up each asic are discovered each IP's early_init callback is run. This
2070 * is the first stage in initializing the asic.
2071 * Returns 0 on success, negative error code on failure.
2072 */
06ec9070 2073static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2074{
901e2be2
AD
2075 struct drm_device *dev = adev_to_drm(adev);
2076 struct pci_dev *parent;
aaa36a97 2077 int i, r;
ced69502 2078 bool total;
d38ceaf9 2079
483ef985 2080 amdgpu_device_enable_virtual_display(adev);
a6be7570 2081
00a979f3 2082 if (amdgpu_sriov_vf(adev)) {
00a979f3 2083 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2084 if (r)
2085 return r;
00a979f3
WS
2086 }
2087
d38ceaf9 2088 switch (adev->asic_type) {
33f34802
KW
2089#ifdef CONFIG_DRM_AMDGPU_SI
2090 case CHIP_VERDE:
2091 case CHIP_TAHITI:
2092 case CHIP_PITCAIRN:
2093 case CHIP_OLAND:
2094 case CHIP_HAINAN:
295d0daf 2095 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2096 r = si_set_ip_blocks(adev);
2097 if (r)
2098 return r;
2099 break;
2100#endif
a2e73f56
AD
2101#ifdef CONFIG_DRM_AMDGPU_CIK
2102 case CHIP_BONAIRE:
2103 case CHIP_HAWAII:
2104 case CHIP_KAVERI:
2105 case CHIP_KABINI:
2106 case CHIP_MULLINS:
e1ad2d53 2107 if (adev->flags & AMD_IS_APU)
a2e73f56 2108 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2109 else
2110 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2111
2112 r = cik_set_ip_blocks(adev);
2113 if (r)
2114 return r;
2115 break;
2116#endif
da87c30b
AD
2117 case CHIP_TOPAZ:
2118 case CHIP_TONGA:
2119 case CHIP_FIJI:
2120 case CHIP_POLARIS10:
2121 case CHIP_POLARIS11:
2122 case CHIP_POLARIS12:
2123 case CHIP_VEGAM:
2124 case CHIP_CARRIZO:
2125 case CHIP_STONEY:
2126 if (adev->flags & AMD_IS_APU)
2127 adev->family = AMDGPU_FAMILY_CZ;
2128 else
2129 adev->family = AMDGPU_FAMILY_VI;
2130
2131 r = vi_set_ip_blocks(adev);
2132 if (r)
2133 return r;
2134 break;
d38ceaf9 2135 default:
63352b7f
AD
2136 r = amdgpu_discovery_set_ip_blocks(adev);
2137 if (r)
2138 return r;
2139 break;
d38ceaf9
AD
2140 }
2141
901e2be2
AD
2142 if (amdgpu_has_atpx() &&
2143 (amdgpu_is_atpx_hybrid() ||
2144 amdgpu_has_atpx_dgpu_power_cntl()) &&
2145 ((adev->flags & AMD_IS_APU) == 0) &&
2146 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2147 adev->flags |= AMD_IS_PX;
2148
85ac2021
AD
2149 if (!(adev->flags & AMD_IS_APU)) {
2150 parent = pci_upstream_bridge(adev->pdev);
2151 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2152 }
901e2be2 2153
c004d44e 2154 amdgpu_amdkfd_device_probe(adev);
1884734a 2155
3b94fb10 2156 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2157 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2158 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2159 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2160 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2161
ced69502 2162 total = true;
d38ceaf9
AD
2163 for (i = 0; i < adev->num_ip_blocks; i++) {
2164 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2165 DRM_ERROR("disabled ip block: %d <%s>\n",
2166 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2167 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2168 } else {
a1255107
AD
2169 if (adev->ip_blocks[i].version->funcs->early_init) {
2170 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2171 if (r == -ENOENT) {
a1255107 2172 adev->ip_blocks[i].status.valid = false;
2c1a2784 2173 } else if (r) {
a1255107
AD
2174 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2175 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2176 total = false;
2c1a2784 2177 } else {
a1255107 2178 adev->ip_blocks[i].status.valid = true;
2c1a2784 2179 }
974e6b64 2180 } else {
a1255107 2181 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2182 }
d38ceaf9 2183 }
21a249ca
AD
2184 /* get the vbios after the asic_funcs are set up */
2185 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2186 r = amdgpu_device_parse_gpu_info_fw(adev);
2187 if (r)
2188 return r;
2189
21a249ca
AD
2190 /* Read BIOS */
2191 if (!amdgpu_get_bios(adev))
2192 return -EINVAL;
2193
2194 r = amdgpu_atombios_init(adev);
2195 if (r) {
2196 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2197 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2198 return r;
2199 }
77eabc6f
PJZ
2200
2201 /*get pf2vf msg info at it's earliest time*/
2202 if (amdgpu_sriov_vf(adev))
2203 amdgpu_virt_init_data_exchange(adev);
2204
21a249ca 2205 }
d38ceaf9 2206 }
ced69502
ML
2207 if (!total)
2208 return -ENODEV;
d38ceaf9 2209
395d1fb9
NH
2210 adev->cg_flags &= amdgpu_cg_mask;
2211 adev->pg_flags &= amdgpu_pg_mask;
2212
d38ceaf9
AD
2213 return 0;
2214}
2215
0a4f2520
RZ
2216static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2217{
2218 int i, r;
2219
2220 for (i = 0; i < adev->num_ip_blocks; i++) {
2221 if (!adev->ip_blocks[i].status.sw)
2222 continue;
2223 if (adev->ip_blocks[i].status.hw)
2224 continue;
2225 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2226 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2227 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2228 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2229 if (r) {
2230 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2231 adev->ip_blocks[i].version->funcs->name, r);
2232 return r;
2233 }
2234 adev->ip_blocks[i].status.hw = true;
2235 }
2236 }
2237
2238 return 0;
2239}
2240
2241static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2242{
2243 int i, r;
2244
2245 for (i = 0; i < adev->num_ip_blocks; i++) {
2246 if (!adev->ip_blocks[i].status.sw)
2247 continue;
2248 if (adev->ip_blocks[i].status.hw)
2249 continue;
2250 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2251 if (r) {
2252 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2253 adev->ip_blocks[i].version->funcs->name, r);
2254 return r;
2255 }
2256 adev->ip_blocks[i].status.hw = true;
2257 }
2258
2259 return 0;
2260}
2261
7a3e0bb2
RZ
2262static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2263{
2264 int r = 0;
2265 int i;
80f41f84 2266 uint32_t smu_version;
7a3e0bb2
RZ
2267
2268 if (adev->asic_type >= CHIP_VEGA10) {
2269 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2270 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2271 continue;
2272
e3c1b071 2273 if (!adev->ip_blocks[i].status.sw)
2274 continue;
2275
482f0e53
ML
2276 /* no need to do the fw loading again if already done*/
2277 if (adev->ip_blocks[i].status.hw == true)
2278 break;
2279
53b3f8f4 2280 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2281 r = adev->ip_blocks[i].version->funcs->resume(adev);
2282 if (r) {
2283 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2284 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2285 return r;
2286 }
2287 } else {
2288 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2289 if (r) {
2290 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2291 adev->ip_blocks[i].version->funcs->name, r);
2292 return r;
7a3e0bb2 2293 }
7a3e0bb2 2294 }
482f0e53
ML
2295
2296 adev->ip_blocks[i].status.hw = true;
2297 break;
7a3e0bb2
RZ
2298 }
2299 }
482f0e53 2300
8973d9ec
ED
2301 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2302 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2303
80f41f84 2304 return r;
7a3e0bb2
RZ
2305}
2306
5fd8518d
AG
2307static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2308{
2309 long timeout;
2310 int r, i;
2311
2312 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2313 struct amdgpu_ring *ring = adev->rings[i];
2314
2315 /* No need to setup the GPU scheduler for rings that don't need it */
2316 if (!ring || ring->no_scheduler)
2317 continue;
2318
2319 switch (ring->funcs->type) {
2320 case AMDGPU_RING_TYPE_GFX:
2321 timeout = adev->gfx_timeout;
2322 break;
2323 case AMDGPU_RING_TYPE_COMPUTE:
2324 timeout = adev->compute_timeout;
2325 break;
2326 case AMDGPU_RING_TYPE_SDMA:
2327 timeout = adev->sdma_timeout;
2328 break;
2329 default:
2330 timeout = adev->video_timeout;
2331 break;
2332 }
2333
2334 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2335 ring->num_hw_submission, amdgpu_job_hang_limit,
8ab62eda
JG
2336 timeout, adev->reset_domain->wq,
2337 ring->sched_score, ring->name,
2338 adev->dev);
5fd8518d
AG
2339 if (r) {
2340 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2341 ring->name);
2342 return r;
2343 }
2344 }
2345
2346 return 0;
2347}
2348
2349
e3ecdffa
AD
2350/**
2351 * amdgpu_device_ip_init - run init for hardware IPs
2352 *
2353 * @adev: amdgpu_device pointer
2354 *
2355 * Main initialization pass for hardware IPs. The list of all the hardware
2356 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2357 * are run. sw_init initializes the software state associated with each IP
2358 * and hw_init initializes the hardware associated with each IP.
2359 * Returns 0 on success, negative error code on failure.
2360 */
06ec9070 2361static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2362{
2363 int i, r;
2364
c030f2e4 2365 r = amdgpu_ras_init(adev);
2366 if (r)
2367 return r;
2368
d38ceaf9 2369 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2370 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2371 continue;
a1255107 2372 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2373 if (r) {
a1255107
AD
2374 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2375 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2376 goto init_failed;
2c1a2784 2377 }
a1255107 2378 adev->ip_blocks[i].status.sw = true;
bfca0289 2379
c1c39032
AD
2380 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2381 /* need to do common hw init early so everything is set up for gmc */
2382 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2383 if (r) {
2384 DRM_ERROR("hw_init %d failed %d\n", i, r);
2385 goto init_failed;
2386 }
2387 adev->ip_blocks[i].status.hw = true;
2388 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2389 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2390 /* Try to reserve bad pages early */
2391 if (amdgpu_sriov_vf(adev))
2392 amdgpu_virt_exchange_data(adev);
2393
7ccfd79f 2394 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2395 if (r) {
7ccfd79f 2396 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2397 goto init_failed;
2c1a2784 2398 }
a1255107 2399 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2400 if (r) {
2401 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2402 goto init_failed;
2c1a2784 2403 }
06ec9070 2404 r = amdgpu_device_wb_init(adev);
2c1a2784 2405 if (r) {
06ec9070 2406 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2407 goto init_failed;
2c1a2784 2408 }
a1255107 2409 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2410
2411 /* right after GMC hw init, we create CSA */
8a1fbb4a 2412 if (amdgpu_mcbp) {
1e256e27 2413 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2414 AMDGPU_GEM_DOMAIN_VRAM |
2415 AMDGPU_GEM_DOMAIN_GTT,
2416 AMDGPU_CSA_SIZE);
2493664f
ML
2417 if (r) {
2418 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2419 goto init_failed;
2493664f
ML
2420 }
2421 }
d38ceaf9
AD
2422 }
2423 }
2424
c9ffa427 2425 if (amdgpu_sriov_vf(adev))
22c16d25 2426 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2427
533aed27
AG
2428 r = amdgpu_ib_pool_init(adev);
2429 if (r) {
2430 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2431 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2432 goto init_failed;
2433 }
2434
c8963ea4
RZ
2435 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2436 if (r)
72d3f592 2437 goto init_failed;
0a4f2520
RZ
2438
2439 r = amdgpu_device_ip_hw_init_phase1(adev);
2440 if (r)
72d3f592 2441 goto init_failed;
0a4f2520 2442
7a3e0bb2
RZ
2443 r = amdgpu_device_fw_loading(adev);
2444 if (r)
72d3f592 2445 goto init_failed;
7a3e0bb2 2446
0a4f2520
RZ
2447 r = amdgpu_device_ip_hw_init_phase2(adev);
2448 if (r)
72d3f592 2449 goto init_failed;
d38ceaf9 2450
121a2bc6
AG
2451 /*
2452 * retired pages will be loaded from eeprom and reserved here,
2453 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2454 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2455 * for I2C communication which only true at this point.
b82e65a9
GC
2456 *
2457 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2458 * failure from bad gpu situation and stop amdgpu init process
2459 * accordingly. For other failed cases, it will still release all
2460 * the resource and print error message, rather than returning one
2461 * negative value to upper level.
121a2bc6
AG
2462 *
2463 * Note: theoretically, this should be called before all vram allocations
2464 * to protect retired page from abusing
2465 */
b82e65a9
GC
2466 r = amdgpu_ras_recovery_init(adev);
2467 if (r)
2468 goto init_failed;
121a2bc6 2469
cfbb6b00
AG
2470 /**
2471 * In case of XGMI grab extra reference for reset domain for this device
2472 */
a4c63caf 2473 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2474 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2475 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2476 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2477
dfd0287b
LH
2478 if (WARN_ON(!hive)) {
2479 r = -ENOENT;
2480 goto init_failed;
2481 }
2482
46c67660 2483 if (!hive->reset_domain ||
2484 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2485 r = -ENOENT;
2486 amdgpu_put_xgmi_hive(hive);
2487 goto init_failed;
2488 }
2489
2490 /* Drop the early temporary reset domain we created for device */
2491 amdgpu_reset_put_reset_domain(adev->reset_domain);
2492 adev->reset_domain = hive->reset_domain;
9dfa4860 2493 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2494 }
a4c63caf
AG
2495 }
2496 }
2497
5fd8518d
AG
2498 r = amdgpu_device_init_schedulers(adev);
2499 if (r)
2500 goto init_failed;
e3c1b071 2501
2502 /* Don't init kfd if whole hive need to be reset during init */
c004d44e 2503 if (!adev->gmc.xgmi.pending_reset)
e3c1b071 2504 amdgpu_amdkfd_device_init(adev);
c6332b97 2505
bd607166
KR
2506 amdgpu_fru_get_product_info(adev);
2507
72d3f592 2508init_failed:
c9ffa427 2509 if (amdgpu_sriov_vf(adev))
c6332b97 2510 amdgpu_virt_release_full_gpu(adev, true);
2511
72d3f592 2512 return r;
d38ceaf9
AD
2513}
2514
e3ecdffa
AD
2515/**
2516 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2517 *
2518 * @adev: amdgpu_device pointer
2519 *
2520 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2521 * this function before a GPU reset. If the value is retained after a
2522 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2523 */
06ec9070 2524static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2525{
2526 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2527}
2528
e3ecdffa
AD
2529/**
2530 * amdgpu_device_check_vram_lost - check if vram is valid
2531 *
2532 * @adev: amdgpu_device pointer
2533 *
2534 * Checks the reset magic value written to the gart pointer in VRAM.
2535 * The driver calls this after a GPU reset to see if the contents of
2536 * VRAM is lost or now.
2537 * returns true if vram is lost, false if not.
2538 */
06ec9070 2539static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2540{
dadce777
EQ
2541 if (memcmp(adev->gart.ptr, adev->reset_magic,
2542 AMDGPU_RESET_MAGIC_NUM))
2543 return true;
2544
53b3f8f4 2545 if (!amdgpu_in_reset(adev))
dadce777
EQ
2546 return false;
2547
2548 /*
2549 * For all ASICs with baco/mode1 reset, the VRAM is
2550 * always assumed to be lost.
2551 */
2552 switch (amdgpu_asic_reset_method(adev)) {
2553 case AMD_RESET_METHOD_BACO:
2554 case AMD_RESET_METHOD_MODE1:
2555 return true;
2556 default:
2557 return false;
2558 }
0c49e0b8
CZ
2559}
2560
e3ecdffa 2561/**
1112a46b 2562 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2563 *
2564 * @adev: amdgpu_device pointer
b8b72130 2565 * @state: clockgating state (gate or ungate)
e3ecdffa 2566 *
e3ecdffa 2567 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2568 * set_clockgating_state callbacks are run.
2569 * Late initialization pass enabling clockgating for hardware IPs.
2570 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2571 * Returns 0 on success, negative error code on failure.
2572 */
fdd34271 2573
5d89bb2d
LL
2574int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2575 enum amd_clockgating_state state)
d38ceaf9 2576{
1112a46b 2577 int i, j, r;
d38ceaf9 2578
4a2ba394
SL
2579 if (amdgpu_emu_mode == 1)
2580 return 0;
2581
1112a46b
RZ
2582 for (j = 0; j < adev->num_ip_blocks; j++) {
2583 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2584 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2585 continue;
47198eb7 2586 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2587 if (adev->in_s0ix &&
47198eb7
AD
2588 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2589 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2590 continue;
4a446d55 2591 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2592 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2593 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2594 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2595 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2596 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2597 /* enable clockgating to save power */
a1255107 2598 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2599 state);
4a446d55
AD
2600 if (r) {
2601 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2602 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2603 return r;
2604 }
b0b00ff1 2605 }
d38ceaf9 2606 }
06b18f61 2607
c9f96fd5
RZ
2608 return 0;
2609}
2610
5d89bb2d
LL
2611int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2612 enum amd_powergating_state state)
c9f96fd5 2613{
1112a46b 2614 int i, j, r;
06b18f61 2615
c9f96fd5
RZ
2616 if (amdgpu_emu_mode == 1)
2617 return 0;
2618
1112a46b
RZ
2619 for (j = 0; j < adev->num_ip_blocks; j++) {
2620 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2621 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2622 continue;
47198eb7 2623 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2624 if (adev->in_s0ix &&
47198eb7
AD
2625 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2626 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2627 continue;
c9f96fd5
RZ
2628 /* skip CG for VCE/UVD, it's handled specially */
2629 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2630 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2631 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2632 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2633 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2634 /* enable powergating to save power */
2635 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2636 state);
c9f96fd5
RZ
2637 if (r) {
2638 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2639 adev->ip_blocks[i].version->funcs->name, r);
2640 return r;
2641 }
2642 }
2643 }
2dc80b00
S
2644 return 0;
2645}
2646
beff74bc
AD
2647static int amdgpu_device_enable_mgpu_fan_boost(void)
2648{
2649 struct amdgpu_gpu_instance *gpu_ins;
2650 struct amdgpu_device *adev;
2651 int i, ret = 0;
2652
2653 mutex_lock(&mgpu_info.mutex);
2654
2655 /*
2656 * MGPU fan boost feature should be enabled
2657 * only when there are two or more dGPUs in
2658 * the system
2659 */
2660 if (mgpu_info.num_dgpu < 2)
2661 goto out;
2662
2663 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2664 gpu_ins = &(mgpu_info.gpu_ins[i]);
2665 adev = gpu_ins->adev;
2666 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2667 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2668 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2669 if (ret)
2670 break;
2671
2672 gpu_ins->mgpu_fan_enabled = 1;
2673 }
2674 }
2675
2676out:
2677 mutex_unlock(&mgpu_info.mutex);
2678
2679 return ret;
2680}
2681
e3ecdffa
AD
2682/**
2683 * amdgpu_device_ip_late_init - run late init for hardware IPs
2684 *
2685 * @adev: amdgpu_device pointer
2686 *
2687 * Late initialization pass for hardware IPs. The list of all the hardware
2688 * IPs that make up the asic is walked and the late_init callbacks are run.
2689 * late_init covers any special initialization that an IP requires
2690 * after all of the have been initialized or something that needs to happen
2691 * late in the init process.
2692 * Returns 0 on success, negative error code on failure.
2693 */
06ec9070 2694static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2695{
60599a03 2696 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2697 int i = 0, r;
2698
2699 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2700 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2701 continue;
2702 if (adev->ip_blocks[i].version->funcs->late_init) {
2703 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2704 if (r) {
2705 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2706 adev->ip_blocks[i].version->funcs->name, r);
2707 return r;
2708 }
2dc80b00 2709 }
73f847db 2710 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2711 }
2712
867e24ca 2713 r = amdgpu_ras_late_init(adev);
2714 if (r) {
2715 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2716 return r;
2717 }
2718
a891d239
DL
2719 amdgpu_ras_set_error_query_ready(adev, true);
2720
1112a46b
RZ
2721 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2722 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2723
06ec9070 2724 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2725
beff74bc
AD
2726 r = amdgpu_device_enable_mgpu_fan_boost();
2727 if (r)
2728 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2729
4da8b639 2730 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2731 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2732 adev->asic_type == CHIP_ALDEBARAN ))
bc143d8b 2733 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2734
2735 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2736 mutex_lock(&mgpu_info.mutex);
2737
2738 /*
2739 * Reset device p-state to low as this was booted with high.
2740 *
2741 * This should be performed only after all devices from the same
2742 * hive get initialized.
2743 *
2744 * However, it's unknown how many device in the hive in advance.
2745 * As this is counted one by one during devices initializations.
2746 *
2747 * So, we wait for all XGMI interlinked devices initialized.
2748 * This may bring some delays as those devices may come from
2749 * different hives. But that should be OK.
2750 */
2751 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2752 for (i = 0; i < mgpu_info.num_gpu; i++) {
2753 gpu_instance = &(mgpu_info.gpu_ins[i]);
2754 if (gpu_instance->adev->flags & AMD_IS_APU)
2755 continue;
2756
d84a430d
JK
2757 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2758 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2759 if (r) {
2760 DRM_ERROR("pstate setting failed (%d).\n", r);
2761 break;
2762 }
2763 }
2764 }
2765
2766 mutex_unlock(&mgpu_info.mutex);
2767 }
2768
d38ceaf9
AD
2769 return 0;
2770}
2771
613aa3ea
LY
2772/**
2773 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2774 *
2775 * @adev: amdgpu_device pointer
2776 *
2777 * For ASICs need to disable SMC first
2778 */
2779static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2780{
2781 int i, r;
2782
2783 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2784 return;
2785
2786 for (i = 0; i < adev->num_ip_blocks; i++) {
2787 if (!adev->ip_blocks[i].status.hw)
2788 continue;
2789 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2790 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2791 /* XXX handle errors */
2792 if (r) {
2793 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2794 adev->ip_blocks[i].version->funcs->name, r);
2795 }
2796 adev->ip_blocks[i].status.hw = false;
2797 break;
2798 }
2799 }
2800}
2801
e9669fb7 2802static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2803{
2804 int i, r;
2805
e9669fb7
AG
2806 for (i = 0; i < adev->num_ip_blocks; i++) {
2807 if (!adev->ip_blocks[i].version->funcs->early_fini)
2808 continue;
5278a159 2809
e9669fb7
AG
2810 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2811 if (r) {
2812 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2813 adev->ip_blocks[i].version->funcs->name, r);
2814 }
2815 }
c030f2e4 2816
05df1f01 2817 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2818 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2819
7270e895
TY
2820 amdgpu_amdkfd_suspend(adev, false);
2821
613aa3ea
LY
2822 /* Workaroud for ASICs need to disable SMC first */
2823 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2824
d38ceaf9 2825 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2826 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2827 continue;
8201a67a 2828
a1255107 2829 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2830 /* XXX handle errors */
2c1a2784 2831 if (r) {
a1255107
AD
2832 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2833 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2834 }
8201a67a 2835
a1255107 2836 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2837 }
2838
6effad8a
GC
2839 if (amdgpu_sriov_vf(adev)) {
2840 if (amdgpu_virt_release_full_gpu(adev, false))
2841 DRM_ERROR("failed to release exclusive mode on fini\n");
2842 }
2843
e9669fb7
AG
2844 return 0;
2845}
2846
2847/**
2848 * amdgpu_device_ip_fini - run fini for hardware IPs
2849 *
2850 * @adev: amdgpu_device pointer
2851 *
2852 * Main teardown pass for hardware IPs. The list of all the hardware
2853 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2854 * are run. hw_fini tears down the hardware associated with each IP
2855 * and sw_fini tears down any software state associated with each IP.
2856 * Returns 0 on success, negative error code on failure.
2857 */
2858static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2859{
2860 int i, r;
2861
2862 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2863 amdgpu_virt_release_ras_err_handler_data(adev);
2864
e9669fb7
AG
2865 if (adev->gmc.xgmi.num_physical_nodes > 1)
2866 amdgpu_xgmi_remove_device(adev);
2867
c004d44e 2868 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2869
d38ceaf9 2870 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2871 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2872 continue;
c12aba3a
ML
2873
2874 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2875 amdgpu_ucode_free_bo(adev);
1e256e27 2876 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2877 amdgpu_device_wb_fini(adev);
7ccfd79f 2878 amdgpu_device_mem_scratch_fini(adev);
533aed27 2879 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2880 }
2881
a1255107 2882 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2883 /* XXX handle errors */
2c1a2784 2884 if (r) {
a1255107
AD
2885 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2886 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2887 }
a1255107
AD
2888 adev->ip_blocks[i].status.sw = false;
2889 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2890 }
2891
a6dcfd9c 2892 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2893 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2894 continue;
a1255107
AD
2895 if (adev->ip_blocks[i].version->funcs->late_fini)
2896 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2897 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2898 }
2899
c030f2e4 2900 amdgpu_ras_fini(adev);
2901
d38ceaf9
AD
2902 return 0;
2903}
2904
e3ecdffa 2905/**
beff74bc 2906 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2907 *
1112a46b 2908 * @work: work_struct.
e3ecdffa 2909 */
beff74bc 2910static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2911{
2912 struct amdgpu_device *adev =
beff74bc 2913 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2914 int r;
2915
2916 r = amdgpu_ib_ring_tests(adev);
2917 if (r)
2918 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2919}
2920
1e317b99
RZ
2921static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2922{
2923 struct amdgpu_device *adev =
2924 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2925
90a92662
MD
2926 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2927 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2928
2929 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2930 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2931}
2932
e3ecdffa 2933/**
e7854a03 2934 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2935 *
2936 * @adev: amdgpu_device pointer
2937 *
2938 * Main suspend function for hardware IPs. The list of all the hardware
2939 * IPs that make up the asic is walked, clockgating is disabled and the
2940 * suspend callbacks are run. suspend puts the hardware and software state
2941 * in each IP into a state suitable for suspend.
2942 * Returns 0 on success, negative error code on failure.
2943 */
e7854a03
AD
2944static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2945{
2946 int i, r;
2947
50ec83f0
AD
2948 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2949 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2950
b31d6ada
EQ
2951 /*
2952 * Per PMFW team's suggestion, driver needs to handle gfxoff
2953 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2954 * scenario. Add the missing df cstate disablement here.
2955 */
2956 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2957 dev_warn(adev->dev, "Failed to disallow df cstate");
2958
e7854a03
AD
2959 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2960 if (!adev->ip_blocks[i].status.valid)
2961 continue;
2b9f7848 2962
e7854a03 2963 /* displays are handled separately */
2b9f7848
ND
2964 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2965 continue;
2966
2967 /* XXX handle errors */
2968 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2969 /* XXX handle errors */
2970 if (r) {
2971 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2972 adev->ip_blocks[i].version->funcs->name, r);
2973 return r;
e7854a03 2974 }
2b9f7848
ND
2975
2976 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2977 }
2978
e7854a03
AD
2979 return 0;
2980}
2981
2982/**
2983 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2984 *
2985 * @adev: amdgpu_device pointer
2986 *
2987 * Main suspend function for hardware IPs. The list of all the hardware
2988 * IPs that make up the asic is walked, clockgating is disabled and the
2989 * suspend callbacks are run. suspend puts the hardware and software state
2990 * in each IP into a state suitable for suspend.
2991 * Returns 0 on success, negative error code on failure.
2992 */
2993static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2994{
2995 int i, r;
2996
557f42a2 2997 if (adev->in_s0ix)
bc143d8b 2998 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 2999
d38ceaf9 3000 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3001 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3002 continue;
e7854a03
AD
3003 /* displays are handled in phase1 */
3004 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3005 continue;
bff77e86
LM
3006 /* PSP lost connection when err_event_athub occurs */
3007 if (amdgpu_ras_intr_triggered() &&
3008 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3009 adev->ip_blocks[i].status.hw = false;
3010 continue;
3011 }
e3c1b071 3012
3013 /* skip unnecessary suspend if we do not initialize them yet */
3014 if (adev->gmc.xgmi.pending_reset &&
3015 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3016 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3017 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3018 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3019 adev->ip_blocks[i].status.hw = false;
3020 continue;
3021 }
557f42a2 3022
5620a188 3023 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3024 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3025 * like at runtime. PSP is also part of the always on hardware
3026 * so no need to suspend it.
3027 */
557f42a2 3028 if (adev->in_s0ix &&
32ff160d 3029 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
5620a188
AD
3030 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3031 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3032 continue;
3033
2a7798ea
AD
3034 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3035 if (adev->in_s0ix &&
3036 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3037 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3038 continue;
3039
d38ceaf9 3040 /* XXX handle errors */
a1255107 3041 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3042 /* XXX handle errors */
2c1a2784 3043 if (r) {
a1255107
AD
3044 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3045 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3046 }
876923fb 3047 adev->ip_blocks[i].status.hw = false;
a3a09142 3048 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
3049 if(!amdgpu_sriov_vf(adev)){
3050 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3051 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3052 if (r) {
3053 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3054 adev->mp1_state, r);
3055 return r;
3056 }
a3a09142
AD
3057 }
3058 }
d38ceaf9
AD
3059 }
3060
3061 return 0;
3062}
3063
e7854a03
AD
3064/**
3065 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3066 *
3067 * @adev: amdgpu_device pointer
3068 *
3069 * Main suspend function for hardware IPs. The list of all the hardware
3070 * IPs that make up the asic is walked, clockgating is disabled and the
3071 * suspend callbacks are run. suspend puts the hardware and software state
3072 * in each IP into a state suitable for suspend.
3073 * Returns 0 on success, negative error code on failure.
3074 */
3075int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3076{
3077 int r;
3078
3c73683c
JC
3079 if (amdgpu_sriov_vf(adev)) {
3080 amdgpu_virt_fini_data_exchange(adev);
e7819644 3081 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3082 }
e7819644 3083
e7854a03
AD
3084 r = amdgpu_device_ip_suspend_phase1(adev);
3085 if (r)
3086 return r;
3087 r = amdgpu_device_ip_suspend_phase2(adev);
3088
e7819644
YT
3089 if (amdgpu_sriov_vf(adev))
3090 amdgpu_virt_release_full_gpu(adev, false);
3091
e7854a03
AD
3092 return r;
3093}
3094
06ec9070 3095static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3096{
3097 int i, r;
3098
2cb681b6 3099 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3100 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3101 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3102 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3103 AMD_IP_BLOCK_TYPE_IH,
3104 };
a90ad3c2 3105
95ea3dbc 3106 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3107 int j;
3108 struct amdgpu_ip_block *block;
a90ad3c2 3109
4cd2a96d
J
3110 block = &adev->ip_blocks[i];
3111 block->status.hw = false;
2cb681b6 3112
4cd2a96d 3113 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3114
4cd2a96d 3115 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3116 !block->status.valid)
3117 continue;
3118
3119 r = block->version->funcs->hw_init(adev);
0aaeefcc 3120 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3121 if (r)
3122 return r;
482f0e53 3123 block->status.hw = true;
a90ad3c2
ML
3124 }
3125 }
3126
3127 return 0;
3128}
3129
06ec9070 3130static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3131{
3132 int i, r;
3133
2cb681b6
ML
3134 static enum amd_ip_block_type ip_order[] = {
3135 AMD_IP_BLOCK_TYPE_SMC,
3136 AMD_IP_BLOCK_TYPE_DCE,
3137 AMD_IP_BLOCK_TYPE_GFX,
3138 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 3139 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
3140 AMD_IP_BLOCK_TYPE_VCE,
3141 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 3142 };
a90ad3c2 3143
2cb681b6
ML
3144 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3145 int j;
3146 struct amdgpu_ip_block *block;
a90ad3c2 3147
2cb681b6
ML
3148 for (j = 0; j < adev->num_ip_blocks; j++) {
3149 block = &adev->ip_blocks[j];
3150
3151 if (block->version->type != ip_order[i] ||
482f0e53
ML
3152 !block->status.valid ||
3153 block->status.hw)
2cb681b6
ML
3154 continue;
3155
895bd048
JZ
3156 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3157 r = block->version->funcs->resume(adev);
3158 else
3159 r = block->version->funcs->hw_init(adev);
3160
0aaeefcc 3161 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3162 if (r)
3163 return r;
482f0e53 3164 block->status.hw = true;
a90ad3c2
ML
3165 }
3166 }
3167
3168 return 0;
3169}
3170
e3ecdffa
AD
3171/**
3172 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3173 *
3174 * @adev: amdgpu_device pointer
3175 *
3176 * First resume function for hardware IPs. The list of all the hardware
3177 * IPs that make up the asic is walked and the resume callbacks are run for
3178 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3179 * after a suspend and updates the software state as necessary. This
3180 * function is also used for restoring the GPU after a GPU reset.
3181 * Returns 0 on success, negative error code on failure.
3182 */
06ec9070 3183static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3184{
3185 int i, r;
3186
a90ad3c2 3187 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3188 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3189 continue;
a90ad3c2 3190 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3191 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3192 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3193 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3194
fcf0649f
CZ
3195 r = adev->ip_blocks[i].version->funcs->resume(adev);
3196 if (r) {
3197 DRM_ERROR("resume of IP block <%s> failed %d\n",
3198 adev->ip_blocks[i].version->funcs->name, r);
3199 return r;
3200 }
482f0e53 3201 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3202 }
3203 }
3204
3205 return 0;
3206}
3207
e3ecdffa
AD
3208/**
3209 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3210 *
3211 * @adev: amdgpu_device pointer
3212 *
3213 * First resume function for hardware IPs. The list of all the hardware
3214 * IPs that make up the asic is walked and the resume callbacks are run for
3215 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3216 * functional state after a suspend and updates the software state as
3217 * necessary. This function is also used for restoring the GPU after a GPU
3218 * reset.
3219 * Returns 0 on success, negative error code on failure.
3220 */
06ec9070 3221static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3222{
3223 int i, r;
3224
3225 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3226 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3227 continue;
fcf0649f 3228 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3229 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3230 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3231 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3232 continue;
a1255107 3233 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3234 if (r) {
a1255107
AD
3235 DRM_ERROR("resume of IP block <%s> failed %d\n",
3236 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3237 return r;
2c1a2784 3238 }
482f0e53 3239 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3240 }
3241
3242 return 0;
3243}
3244
e3ecdffa
AD
3245/**
3246 * amdgpu_device_ip_resume - run resume for hardware IPs
3247 *
3248 * @adev: amdgpu_device pointer
3249 *
3250 * Main resume function for hardware IPs. The hardware IPs
3251 * are split into two resume functions because they are
3252 * are also used in in recovering from a GPU reset and some additional
3253 * steps need to be take between them. In this case (S3/S4) they are
3254 * run sequentially.
3255 * Returns 0 on success, negative error code on failure.
3256 */
06ec9070 3257static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3258{
3259 int r;
3260
9cec53c1
JZ
3261 r = amdgpu_amdkfd_resume_iommu(adev);
3262 if (r)
3263 return r;
3264
06ec9070 3265 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3266 if (r)
3267 return r;
7a3e0bb2
RZ
3268
3269 r = amdgpu_device_fw_loading(adev);
3270 if (r)
3271 return r;
3272
06ec9070 3273 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3274
3275 return r;
3276}
3277
e3ecdffa
AD
3278/**
3279 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3280 *
3281 * @adev: amdgpu_device pointer
3282 *
3283 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3284 */
4e99a44e 3285static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3286{
6867e1b5
ML
3287 if (amdgpu_sriov_vf(adev)) {
3288 if (adev->is_atom_fw) {
58ff791a 3289 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3290 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3291 } else {
3292 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3293 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3294 }
3295
3296 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3297 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3298 }
048765ad
AR
3299}
3300
e3ecdffa
AD
3301/**
3302 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3303 *
3304 * @asic_type: AMD asic type
3305 *
3306 * Check if there is DC (new modesetting infrastructre) support for an asic.
3307 * returns true if DC has support, false if not.
3308 */
4562236b
HW
3309bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3310{
3311 switch (asic_type) {
0637d417
AD
3312#ifdef CONFIG_DRM_AMDGPU_SI
3313 case CHIP_HAINAN:
3314#endif
3315 case CHIP_TOPAZ:
3316 /* chips with no display hardware */
3317 return false;
4562236b 3318#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3319 case CHIP_TAHITI:
3320 case CHIP_PITCAIRN:
3321 case CHIP_VERDE:
3322 case CHIP_OLAND:
2d32ffd6
AD
3323 /*
3324 * We have systems in the wild with these ASICs that require
3325 * LVDS and VGA support which is not supported with DC.
3326 *
3327 * Fallback to the non-DC driver here by default so as not to
3328 * cause regressions.
3329 */
3330#if defined(CONFIG_DRM_AMD_DC_SI)
3331 return amdgpu_dc > 0;
3332#else
3333 return false;
64200c46 3334#endif
4562236b 3335 case CHIP_BONAIRE:
0d6fbccb 3336 case CHIP_KAVERI:
367e6687
AD
3337 case CHIP_KABINI:
3338 case CHIP_MULLINS:
d9fda248
HW
3339 /*
3340 * We have systems in the wild with these ASICs that require
b5a0168e 3341 * VGA support which is not supported with DC.
d9fda248
HW
3342 *
3343 * Fallback to the non-DC driver here by default so as not to
3344 * cause regressions.
3345 */
3346 return amdgpu_dc > 0;
f7f12b25 3347 default:
fd187853 3348 return amdgpu_dc != 0;
f7f12b25 3349#else
4562236b 3350 default:
93b09a9a 3351 if (amdgpu_dc > 0)
044a48f4 3352 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3353 "but isn't supported by ASIC, ignoring\n");
4562236b 3354 return false;
f7f12b25 3355#endif
4562236b
HW
3356 }
3357}
3358
3359/**
3360 * amdgpu_device_has_dc_support - check if dc is supported
3361 *
982a820b 3362 * @adev: amdgpu_device pointer
4562236b
HW
3363 *
3364 * Returns true for supported, false for not supported
3365 */
3366bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3367{
25263da3 3368 if (adev->enable_virtual_display ||
abaf210c 3369 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3370 return false;
3371
4562236b
HW
3372 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3373}
3374
d4535e2c
AG
3375static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3376{
3377 struct amdgpu_device *adev =
3378 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3379 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3380
c6a6e2db
AG
3381 /* It's a bug to not have a hive within this function */
3382 if (WARN_ON(!hive))
3383 return;
3384
3385 /*
3386 * Use task barrier to synchronize all xgmi reset works across the
3387 * hive. task_barrier_enter and task_barrier_exit will block
3388 * until all the threads running the xgmi reset works reach
3389 * those points. task_barrier_full will do both blocks.
3390 */
3391 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3392
3393 task_barrier_enter(&hive->tb);
4a580877 3394 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3395
3396 if (adev->asic_reset_res)
3397 goto fail;
3398
3399 task_barrier_exit(&hive->tb);
4a580877 3400 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3401
3402 if (adev->asic_reset_res)
3403 goto fail;
43c4d576 3404
5e67bba3 3405 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3406 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3407 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3408 } else {
3409
3410 task_barrier_full(&hive->tb);
3411 adev->asic_reset_res = amdgpu_asic_reset(adev);
3412 }
ce316fa5 3413
c6a6e2db 3414fail:
d4535e2c 3415 if (adev->asic_reset_res)
fed184e9 3416 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3417 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3418 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3419}
3420
71f98027
AD
3421static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3422{
3423 char *input = amdgpu_lockup_timeout;
3424 char *timeout_setting = NULL;
3425 int index = 0;
3426 long timeout;
3427 int ret = 0;
3428
3429 /*
67387dfe
AD
3430 * By default timeout for non compute jobs is 10000
3431 * and 60000 for compute jobs.
71f98027 3432 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3433 * jobs are 60000 by default.
71f98027
AD
3434 */
3435 adev->gfx_timeout = msecs_to_jiffies(10000);
3436 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3437 if (amdgpu_sriov_vf(adev))
3438 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3439 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3440 else
67387dfe 3441 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3442
f440ff44 3443 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3444 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3445 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3446 ret = kstrtol(timeout_setting, 0, &timeout);
3447 if (ret)
3448 return ret;
3449
3450 if (timeout == 0) {
3451 index++;
3452 continue;
3453 } else if (timeout < 0) {
3454 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3455 dev_warn(adev->dev, "lockup timeout disabled");
3456 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3457 } else {
3458 timeout = msecs_to_jiffies(timeout);
3459 }
3460
3461 switch (index++) {
3462 case 0:
3463 adev->gfx_timeout = timeout;
3464 break;
3465 case 1:
3466 adev->compute_timeout = timeout;
3467 break;
3468 case 2:
3469 adev->sdma_timeout = timeout;
3470 break;
3471 case 3:
3472 adev->video_timeout = timeout;
3473 break;
3474 default:
3475 break;
3476 }
3477 }
3478 /*
3479 * There is only one value specified and
3480 * it should apply to all non-compute jobs.
3481 */
bcccee89 3482 if (index == 1) {
71f98027 3483 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3484 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3485 adev->compute_timeout = adev->gfx_timeout;
3486 }
71f98027
AD
3487 }
3488
3489 return ret;
3490}
d4535e2c 3491
4a74c38c
PY
3492/**
3493 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3494 *
3495 * @adev: amdgpu_device pointer
3496 *
3497 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3498 */
3499static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3500{
3501 struct iommu_domain *domain;
3502
3503 domain = iommu_get_domain_for_dev(adev->dev);
3504 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3505 adev->ram_is_direct_mapped = true;
3506}
3507
77f3a5cd
ND
3508static const struct attribute *amdgpu_dev_attributes[] = {
3509 &dev_attr_product_name.attr,
3510 &dev_attr_product_number.attr,
3511 &dev_attr_serial_number.attr,
3512 &dev_attr_pcie_replay_count.attr,
3513 NULL
3514};
3515
d38ceaf9
AD
3516/**
3517 * amdgpu_device_init - initialize the driver
3518 *
3519 * @adev: amdgpu_device pointer
d38ceaf9
AD
3520 * @flags: driver flags
3521 *
3522 * Initializes the driver info and hw (all asics).
3523 * Returns 0 for success or an error on failure.
3524 * Called at driver startup.
3525 */
3526int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3527 uint32_t flags)
3528{
8aba21b7
LT
3529 struct drm_device *ddev = adev_to_drm(adev);
3530 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3531 int r, i;
b98c6299 3532 bool px = false;
95844d20 3533 u32 max_MBps;
d38ceaf9
AD
3534
3535 adev->shutdown = false;
d38ceaf9 3536 adev->flags = flags;
4e66d7d2
YZ
3537
3538 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3539 adev->asic_type = amdgpu_force_asic_type;
3540 else
3541 adev->asic_type = flags & AMD_ASIC_MASK;
3542
d38ceaf9 3543 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3544 if (amdgpu_emu_mode == 1)
8bdab6bb 3545 adev->usec_timeout *= 10;
770d13b1 3546 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3547 adev->accel_working = false;
3548 adev->num_rings = 0;
68ce8b24 3549 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3550 adev->mman.buffer_funcs = NULL;
3551 adev->mman.buffer_funcs_ring = NULL;
3552 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3553 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3554 adev->gmc.gmc_funcs = NULL;
7bd939d0 3555 adev->harvest_ip_mask = 0x0;
f54d1867 3556 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3557 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3558
3559 adev->smc_rreg = &amdgpu_invalid_rreg;
3560 adev->smc_wreg = &amdgpu_invalid_wreg;
3561 adev->pcie_rreg = &amdgpu_invalid_rreg;
3562 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3563 adev->pciep_rreg = &amdgpu_invalid_rreg;
3564 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3565 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3566 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3567 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3568 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3569 adev->didt_rreg = &amdgpu_invalid_rreg;
3570 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3571 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3572 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3573 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3574 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3575
3e39ab90
AD
3576 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3577 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3578 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3579
3580 /* mutex initialization are all done here so we
3581 * can recall function without having locking issues */
0e5ca0d1 3582 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3583 mutex_init(&adev->pm.mutex);
3584 mutex_init(&adev->gfx.gpu_clock_mutex);
3585 mutex_init(&adev->srbm_mutex);
b8866c26 3586 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3587 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3588 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3589 mutex_init(&adev->mn_lock);
e23b74aa 3590 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3591 hash_init(adev->mn_hash);
32eaeae0 3592 mutex_init(&adev->psp.mutex);
bd052211 3593 mutex_init(&adev->notifier_lock);
8cda7a4f 3594 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3595 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3596
ab3b9de6 3597 amdgpu_device_init_apu_flags(adev);
9f6a7857 3598
912dfc84
EQ
3599 r = amdgpu_device_check_arguments(adev);
3600 if (r)
3601 return r;
d38ceaf9 3602
d38ceaf9
AD
3603 spin_lock_init(&adev->mmio_idx_lock);
3604 spin_lock_init(&adev->smc_idx_lock);
3605 spin_lock_init(&adev->pcie_idx_lock);
3606 spin_lock_init(&adev->uvd_ctx_idx_lock);
3607 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3608 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3609 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3610 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3611 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3612
0c4e7fa5
CZ
3613 INIT_LIST_HEAD(&adev->shadow_list);
3614 mutex_init(&adev->shadow_list_lock);
3615
655ce9cb 3616 INIT_LIST_HEAD(&adev->reset_list);
3617
6492e1b0 3618 INIT_LIST_HEAD(&adev->ras_list);
3619
beff74bc
AD
3620 INIT_DELAYED_WORK(&adev->delayed_init_work,
3621 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3622 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3623 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3624
d4535e2c
AG
3625 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3626
d23ee13f 3627 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3628 adev->gfx.gfx_off_residency = 0;
3629 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3630 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3631
b265bdbd
EQ
3632 atomic_set(&adev->throttling_logging_enabled, 1);
3633 /*
3634 * If throttling continues, logging will be performed every minute
3635 * to avoid log flooding. "-1" is subtracted since the thermal
3636 * throttling interrupt comes every second. Thus, the total logging
3637 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3638 * for throttling interrupt) = 60 seconds.
3639 */
3640 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3641 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3642
0fa49558
AX
3643 /* Registers mapping */
3644 /* TODO: block userspace mapping of io register */
da69c161
KW
3645 if (adev->asic_type >= CHIP_BONAIRE) {
3646 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3647 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3648 } else {
3649 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3650 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3651 }
d38ceaf9 3652
6c08e0ef
EQ
3653 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3654 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3655
d38ceaf9
AD
3656 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3657 if (adev->rmmio == NULL) {
3658 return -ENOMEM;
3659 }
3660 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3661 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3662
5494d864
AD
3663 amdgpu_device_get_pcie_info(adev);
3664
b239c017
JX
3665 if (amdgpu_mcbp)
3666 DRM_INFO("MCBP is enabled\n");
3667
436afdfa
PY
3668 /*
3669 * Reset domain needs to be present early, before XGMI hive discovered
3670 * (if any) and intitialized to use reset sem and in_gpu reset flag
3671 * early on during init and before calling to RREG32.
3672 */
3673 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3674 if (!adev->reset_domain)
3675 return -ENOMEM;
3676
3aa0115d
ML
3677 /* detect hw virtualization here */
3678 amdgpu_detect_virtualization(adev);
3679
dffa11b4
ML
3680 r = amdgpu_device_get_job_timeout_settings(adev);
3681 if (r) {
3682 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3683 return r;
a190d1c7
XY
3684 }
3685
d38ceaf9 3686 /* early init functions */
06ec9070 3687 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3688 if (r)
4ef87d8f 3689 return r;
d38ceaf9 3690
b7cdb41e
ML
3691 /* Get rid of things like offb */
3692 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3693 if (r)
3694 return r;
3695
4d33e704
SK
3696 /* Enable TMZ based on IP_VERSION */
3697 amdgpu_gmc_tmz_set(adev);
3698
957b0787 3699 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3700 /* Need to get xgmi info early to decide the reset behavior*/
3701 if (adev->gmc.xgmi.supported) {
3702 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3703 if (r)
3704 return r;
3705 }
3706
8e6d0b69 3707 /* enable PCIE atomic ops */
3708 if (amdgpu_sriov_vf(adev))
3709 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
e15c9d06 3710 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
8e6d0b69 3711 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3712 else
3713 adev->have_atomics_support =
3714 !pci_enable_atomic_ops_to_root(adev->pdev,
3715 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3716 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3717 if (!adev->have_atomics_support)
3718 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3719
6585661d
OZ
3720 /* doorbell bar mapping and doorbell index init*/
3721 amdgpu_device_doorbell_init(adev);
3722
9475a943
SL
3723 if (amdgpu_emu_mode == 1) {
3724 /* post the asic on emulation mode */
3725 emu_soc_asic_init(adev);
bfca0289 3726 goto fence_driver_init;
9475a943 3727 }
bfca0289 3728
04442bf7
LL
3729 amdgpu_reset_init(adev);
3730
4e99a44e
ML
3731 /* detect if we are with an SRIOV vbios */
3732 amdgpu_device_detect_sriov_bios(adev);
048765ad 3733
95e8e59e
AD
3734 /* check if we need to reset the asic
3735 * E.g., driver was not cleanly unloaded previously, etc.
3736 */
f14899fd 3737 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3738 if (adev->gmc.xgmi.num_physical_nodes) {
3739 dev_info(adev->dev, "Pending hive reset.\n");
3740 adev->gmc.xgmi.pending_reset = true;
3741 /* Only need to init necessary block for SMU to handle the reset */
3742 for (i = 0; i < adev->num_ip_blocks; i++) {
3743 if (!adev->ip_blocks[i].status.valid)
3744 continue;
3745 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3746 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3747 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3748 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3749 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3750 adev->ip_blocks[i].version->funcs->name);
3751 adev->ip_blocks[i].status.hw = true;
3752 }
3753 }
3754 } else {
3755 r = amdgpu_asic_reset(adev);
3756 if (r) {
3757 dev_err(adev->dev, "asic reset on init failed\n");
3758 goto failed;
3759 }
95e8e59e
AD
3760 }
3761 }
3762
8f66090b 3763 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3764
d38ceaf9 3765 /* Post card if necessary */
39c640c0 3766 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3767 if (!adev->bios) {
bec86378 3768 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3769 r = -EINVAL;
3770 goto failed;
d38ceaf9 3771 }
bec86378 3772 DRM_INFO("GPU posting now...\n");
4d2997ab 3773 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3774 if (r) {
3775 dev_err(adev->dev, "gpu post error!\n");
3776 goto failed;
3777 }
d38ceaf9
AD
3778 }
3779
88b64e95
AD
3780 if (adev->is_atom_fw) {
3781 /* Initialize clocks */
3782 r = amdgpu_atomfirmware_get_clock_info(adev);
3783 if (r) {
3784 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3785 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3786 goto failed;
3787 }
3788 } else {
a5bde2f9
AD
3789 /* Initialize clocks */
3790 r = amdgpu_atombios_get_clock_info(adev);
3791 if (r) {
3792 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3793 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3794 goto failed;
a5bde2f9
AD
3795 }
3796 /* init i2c buses */
4562236b
HW
3797 if (!amdgpu_device_has_dc_support(adev))
3798 amdgpu_atombios_i2c_init(adev);
2c1a2784 3799 }
d38ceaf9 3800
bfca0289 3801fence_driver_init:
d38ceaf9 3802 /* Fence driver */
067f44c8 3803 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3804 if (r) {
067f44c8 3805 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3806 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3807 goto failed;
2c1a2784 3808 }
d38ceaf9
AD
3809
3810 /* init the mode config */
4a580877 3811 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3812
06ec9070 3813 r = amdgpu_device_ip_init(adev);
d38ceaf9 3814 if (r) {
8840a387 3815 /* failed in exclusive mode due to timeout */
3816 if (amdgpu_sriov_vf(adev) &&
3817 !amdgpu_sriov_runtime(adev) &&
3818 amdgpu_virt_mmio_blocked(adev) &&
3819 !amdgpu_virt_wait_reset(adev)) {
3820 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3821 /* Don't send request since VF is inactive. */
3822 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3823 adev->virt.ops = NULL;
8840a387 3824 r = -EAGAIN;
970fd197 3825 goto release_ras_con;
8840a387 3826 }
06ec9070 3827 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3828 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3829 goto release_ras_con;
d38ceaf9
AD
3830 }
3831
8d35a259
LG
3832 amdgpu_fence_driver_hw_init(adev);
3833
d69b8971
YZ
3834 dev_info(adev->dev,
3835 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3836 adev->gfx.config.max_shader_engines,
3837 adev->gfx.config.max_sh_per_se,
3838 adev->gfx.config.max_cu_per_sh,
3839 adev->gfx.cu_info.number);
3840
d38ceaf9
AD
3841 adev->accel_working = true;
3842
e59c0205
AX
3843 amdgpu_vm_check_compute_bug(adev);
3844
95844d20
MO
3845 /* Initialize the buffer migration limit. */
3846 if (amdgpu_moverate >= 0)
3847 max_MBps = amdgpu_moverate;
3848 else
3849 max_MBps = 8; /* Allow 8 MB/s. */
3850 /* Get a log2 for easy divisions. */
3851 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3852
d2f52ac8 3853 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3854 if (r) {
3855 adev->pm_sysfs_en = false;
d2f52ac8 3856 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3857 } else
3858 adev->pm_sysfs_en = true;
d2f52ac8 3859
5bb23532 3860 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3861 if (r) {
3862 adev->ucode_sysfs_en = false;
5bb23532 3863 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3864 } else
3865 adev->ucode_sysfs_en = true;
5bb23532 3866
8424f2cc
LG
3867 r = amdgpu_psp_sysfs_init(adev);
3868 if (r) {
3869 adev->psp_sysfs_en = false;
3870 if (!amdgpu_sriov_vf(adev))
3871 DRM_ERROR("Creating psp sysfs failed\n");
3872 } else
3873 adev->psp_sysfs_en = true;
3874
b0adca4d
EQ
3875 /*
3876 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3877 * Otherwise the mgpu fan boost feature will be skipped due to the
3878 * gpu instance is counted less.
3879 */
3880 amdgpu_register_gpu_instance(adev);
3881
d38ceaf9
AD
3882 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3883 * explicit gating rather than handling it automatically.
3884 */
e3c1b071 3885 if (!adev->gmc.xgmi.pending_reset) {
3886 r = amdgpu_device_ip_late_init(adev);
3887 if (r) {
3888 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3889 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3890 goto release_ras_con;
e3c1b071 3891 }
3892 /* must succeed. */
3893 amdgpu_ras_resume(adev);
3894 queue_delayed_work(system_wq, &adev->delayed_init_work,
3895 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3896 }
d38ceaf9 3897
2c738637
ML
3898 if (amdgpu_sriov_vf(adev))
3899 flush_delayed_work(&adev->delayed_init_work);
3900
77f3a5cd 3901 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3902 if (r)
77f3a5cd 3903 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3904
d155bef0
AB
3905 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3906 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3907 if (r)
3908 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3909
c1dd4aa6
AG
3910 /* Have stored pci confspace at hand for restore in sudden PCI error */
3911 if (amdgpu_device_cache_pci_state(adev->pdev))
3912 pci_restore_state(pdev);
3913
8c3dd61c
KHF
3914 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3915 /* this will fail for cards that aren't VGA class devices, just
3916 * ignore it */
3917 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3918 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c
KHF
3919
3920 if (amdgpu_device_supports_px(ddev)) {
3921 px = true;
3922 vga_switcheroo_register_client(adev->pdev,
3923 &amdgpu_switcheroo_ops, px);
3924 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3925 }
3926
e3c1b071 3927 if (adev->gmc.xgmi.pending_reset)
3928 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3929 msecs_to_jiffies(AMDGPU_RESUME_MS));
3930
4a74c38c
PY
3931 amdgpu_device_check_iommu_direct_map(adev);
3932
d38ceaf9 3933 return 0;
83ba126a 3934
970fd197
SY
3935release_ras_con:
3936 amdgpu_release_ras_context(adev);
3937
83ba126a 3938failed:
89041940 3939 amdgpu_vf_error_trans_all(adev);
8840a387 3940
83ba126a 3941 return r;
d38ceaf9
AD
3942}
3943
07775fc1
AG
3944static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3945{
62d5f9f7 3946
07775fc1
AG
3947 /* Clear all CPU mappings pointing to this device */
3948 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3949
3950 /* Unmap all mapped bars - Doorbell, registers and VRAM */
3951 amdgpu_device_doorbell_fini(adev);
3952
3953 iounmap(adev->rmmio);
3954 adev->rmmio = NULL;
3955 if (adev->mman.aper_base_kaddr)
3956 iounmap(adev->mman.aper_base_kaddr);
3957 adev->mman.aper_base_kaddr = NULL;
3958
3959 /* Memory manager related */
3960 if (!adev->gmc.xgmi.connected_to_cpu) {
3961 arch_phys_wc_del(adev->gmc.vram_mtrr);
3962 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3963 }
3964}
3965
d38ceaf9 3966/**
bbe04dec 3967 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3968 *
3969 * @adev: amdgpu_device pointer
3970 *
3971 * Tear down the driver info (all asics).
3972 * Called at driver shutdown.
3973 */
72c8c97b 3974void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 3975{
aac89168 3976 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3977 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3978 adev->shutdown = true;
9f875167 3979
752c683d
ML
3980 /* make sure IB test finished before entering exclusive mode
3981 * to avoid preemption on IB test
3982 * */
519b8b76 3983 if (amdgpu_sriov_vf(adev)) {
752c683d 3984 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3985 amdgpu_virt_fini_data_exchange(adev);
3986 }
752c683d 3987
e5b03032
ML
3988 /* disable all interrupts */
3989 amdgpu_irq_disable_all(adev);
ff97cba8 3990 if (adev->mode_info.mode_config_initialized){
1053b9c9 3991 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 3992 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3993 else
4a580877 3994 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3995 }
8d35a259 3996 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 3997
98f56188
YY
3998 if (adev->mman.initialized) {
3999 flush_delayed_work(&adev->mman.bdev.wq);
4000 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
4001 }
4002
7c868b59
YT
4003 if (adev->pm_sysfs_en)
4004 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4005 if (adev->ucode_sysfs_en)
4006 amdgpu_ucode_sysfs_fini(adev);
8424f2cc
LG
4007 if (adev->psp_sysfs_en)
4008 amdgpu_psp_sysfs_fini(adev);
72c8c97b
AG
4009 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4010
232d1d43
SY
4011 /* disable ras feature must before hw fini */
4012 amdgpu_ras_pre_fini(adev);
4013
e9669fb7 4014 amdgpu_device_ip_fini_early(adev);
d10d0daa 4015
a3848df6
YW
4016 amdgpu_irq_fini_hw(adev);
4017
b6fd6e0f
SK
4018 if (adev->mman.initialized)
4019 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4020
d10d0daa 4021 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4022
fac53471 4023 amdgpu_device_unmap_mmio(adev);
87172e89 4024
72c8c97b
AG
4025}
4026
4027void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4028{
62d5f9f7
LS
4029 int idx;
4030
8d35a259 4031 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4032 amdgpu_device_ip_fini(adev);
b31d3063 4033 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4034 adev->accel_working = false;
68ce8b24 4035 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4036
4037 amdgpu_reset_fini(adev);
4038
d38ceaf9 4039 /* free i2c buses */
4562236b
HW
4040 if (!amdgpu_device_has_dc_support(adev))
4041 amdgpu_i2c_fini(adev);
bfca0289
SL
4042
4043 if (amdgpu_emu_mode != 1)
4044 amdgpu_atombios_fini(adev);
4045
d38ceaf9
AD
4046 kfree(adev->bios);
4047 adev->bios = NULL;
b98c6299 4048 if (amdgpu_device_supports_px(adev_to_drm(adev))) {
84c8b22e 4049 vga_switcheroo_unregister_client(adev->pdev);
83ba126a 4050 vga_switcheroo_fini_domain_pm_ops(adev->dev);
b98c6299 4051 }
38d6be81 4052 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4053 vga_client_unregister(adev->pdev);
e9bc1bf7 4054
62d5f9f7
LS
4055 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4056
4057 iounmap(adev->rmmio);
4058 adev->rmmio = NULL;
4059 amdgpu_device_doorbell_fini(adev);
4060 drm_dev_exit(idx);
4061 }
4062
d155bef0
AB
4063 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4064 amdgpu_pmu_fini(adev);
72de33f8 4065 if (adev->mman.discovery_bin)
a190d1c7 4066 amdgpu_discovery_fini(adev);
72c8c97b 4067
cfbb6b00
AG
4068 amdgpu_reset_put_reset_domain(adev->reset_domain);
4069 adev->reset_domain = NULL;
4070
72c8c97b
AG
4071 kfree(adev->pci_state);
4072
d38ceaf9
AD
4073}
4074
58144d28
ND
4075/**
4076 * amdgpu_device_evict_resources - evict device resources
4077 * @adev: amdgpu device object
4078 *
4079 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4080 * of the vram memory type. Mainly used for evicting device resources
4081 * at suspend time.
4082 *
4083 */
7863c155 4084static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4085{
7863c155
ML
4086 int ret;
4087
e53d9665
ML
4088 /* No need to evict vram on APUs for suspend to ram or s2idle */
4089 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4090 return 0;
58144d28 4091
7863c155
ML
4092 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4093 if (ret)
58144d28 4094 DRM_WARN("evicting device resources failed\n");
7863c155 4095 return ret;
58144d28 4096}
d38ceaf9
AD
4097
4098/*
4099 * Suspend & resume.
4100 */
4101/**
810ddc3a 4102 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4103 *
87e3f136 4104 * @dev: drm dev pointer
87e3f136 4105 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4106 *
4107 * Puts the hw in the suspend state (all asics).
4108 * Returns 0 for success or an error on failure.
4109 * Called at driver suspend.
4110 */
de185019 4111int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4112{
a2e15b0e 4113 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4114 int r = 0;
d38ceaf9 4115
d38ceaf9
AD
4116 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4117 return 0;
4118
44779b43 4119 adev->in_suspend = true;
3fa8f89d 4120
47ea2076
SF
4121 /* Evict the majority of BOs before grabbing the full access */
4122 r = amdgpu_device_evict_resources(adev);
4123 if (r)
4124 return r;
4125
d7274ec7
BZ
4126 if (amdgpu_sriov_vf(adev)) {
4127 amdgpu_virt_fini_data_exchange(adev);
4128 r = amdgpu_virt_request_full_gpu(adev, false);
4129 if (r)
4130 return r;
4131 }
4132
3fa8f89d
S
4133 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4134 DRM_WARN("smart shift update failed\n");
4135
d38ceaf9
AD
4136 drm_kms_helper_poll_disable(dev);
4137
5f818173 4138 if (fbcon)
087451f3 4139 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4140
beff74bc 4141 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4142
5e6932fe 4143 amdgpu_ras_suspend(adev);
4144
2196927b 4145 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4146
c004d44e 4147 if (!adev->in_s0ix)
5d3a2d95 4148 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4149
7863c155
ML
4150 r = amdgpu_device_evict_resources(adev);
4151 if (r)
4152 return r;
d38ceaf9 4153
8d35a259 4154 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4155
2196927b 4156 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4157
d7274ec7
BZ
4158 if (amdgpu_sriov_vf(adev))
4159 amdgpu_virt_release_full_gpu(adev, false);
4160
d38ceaf9
AD
4161 return 0;
4162}
4163
4164/**
810ddc3a 4165 * amdgpu_device_resume - initiate device resume
d38ceaf9 4166 *
87e3f136 4167 * @dev: drm dev pointer
87e3f136 4168 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4169 *
4170 * Bring the hw back to operating state (all asics).
4171 * Returns 0 for success or an error on failure.
4172 * Called at driver resume.
4173 */
de185019 4174int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4175{
1348969a 4176 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4177 int r = 0;
d38ceaf9 4178
d7274ec7
BZ
4179 if (amdgpu_sriov_vf(adev)) {
4180 r = amdgpu_virt_request_full_gpu(adev, true);
4181 if (r)
4182 return r;
4183 }
4184
d38ceaf9
AD
4185 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4186 return 0;
4187
62498733 4188 if (adev->in_s0ix)
bc143d8b 4189 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4190
d38ceaf9 4191 /* post card */
39c640c0 4192 if (amdgpu_device_need_post(adev)) {
4d2997ab 4193 r = amdgpu_device_asic_init(adev);
74b0b157 4194 if (r)
aac89168 4195 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4196 }
d38ceaf9 4197
06ec9070 4198 r = amdgpu_device_ip_resume(adev);
d7274ec7 4199
e6707218 4200 if (r) {
aac89168 4201 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4202 goto exit;
e6707218 4203 }
8d35a259 4204 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4205
06ec9070 4206 r = amdgpu_device_ip_late_init(adev);
03161a6e 4207 if (r)
3c22c1ea 4208 goto exit;
d38ceaf9 4209
beff74bc
AD
4210 queue_delayed_work(system_wq, &adev->delayed_init_work,
4211 msecs_to_jiffies(AMDGPU_RESUME_MS));
4212
c004d44e 4213 if (!adev->in_s0ix) {
5d3a2d95
AD
4214 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4215 if (r)
3c22c1ea 4216 goto exit;
5d3a2d95 4217 }
756e6880 4218
3c22c1ea
SF
4219exit:
4220 if (amdgpu_sriov_vf(adev)) {
4221 amdgpu_virt_init_data_exchange(adev);
4222 amdgpu_virt_release_full_gpu(adev, true);
4223 }
4224
4225 if (r)
4226 return r;
4227
96a5d8d4 4228 /* Make sure IB tests flushed */
beff74bc 4229 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4230
a2e15b0e 4231 if (fbcon)
087451f3 4232 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9
AD
4233
4234 drm_kms_helper_poll_enable(dev);
23a1a9e5 4235
5e6932fe 4236 amdgpu_ras_resume(adev);
4237
d09ef243
AD
4238 if (adev->mode_info.num_crtc) {
4239 /*
4240 * Most of the connector probing functions try to acquire runtime pm
4241 * refs to ensure that the GPU is powered on when connector polling is
4242 * performed. Since we're calling this from a runtime PM callback,
4243 * trying to acquire rpm refs will cause us to deadlock.
4244 *
4245 * Since we're guaranteed to be holding the rpm lock, it's safe to
4246 * temporarily disable the rpm helpers so this doesn't deadlock us.
4247 */
23a1a9e5 4248#ifdef CONFIG_PM
d09ef243 4249 dev->dev->power.disable_depth++;
23a1a9e5 4250#endif
d09ef243
AD
4251 if (!adev->dc_enabled)
4252 drm_helper_hpd_irq_event(dev);
4253 else
4254 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4255#ifdef CONFIG_PM
d09ef243 4256 dev->dev->power.disable_depth--;
23a1a9e5 4257#endif
d09ef243 4258 }
44779b43
RZ
4259 adev->in_suspend = false;
4260
3fa8f89d
S
4261 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4262 DRM_WARN("smart shift update failed\n");
4263
4d3b9ae5 4264 return 0;
d38ceaf9
AD
4265}
4266
e3ecdffa
AD
4267/**
4268 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4269 *
4270 * @adev: amdgpu_device pointer
4271 *
4272 * The list of all the hardware IPs that make up the asic is walked and
4273 * the check_soft_reset callbacks are run. check_soft_reset determines
4274 * if the asic is still hung or not.
4275 * Returns true if any of the IPs are still in a hung state, false if not.
4276 */
06ec9070 4277static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4278{
4279 int i;
4280 bool asic_hang = false;
4281
f993d628
ML
4282 if (amdgpu_sriov_vf(adev))
4283 return true;
4284
8bc04c29
AD
4285 if (amdgpu_asic_need_full_reset(adev))
4286 return true;
4287
63fbf42f 4288 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4289 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4290 continue;
a1255107
AD
4291 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4292 adev->ip_blocks[i].status.hang =
4293 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4294 if (adev->ip_blocks[i].status.hang) {
aac89168 4295 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4296 asic_hang = true;
4297 }
4298 }
4299 return asic_hang;
4300}
4301
e3ecdffa
AD
4302/**
4303 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4304 *
4305 * @adev: amdgpu_device pointer
4306 *
4307 * The list of all the hardware IPs that make up the asic is walked and the
4308 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4309 * handles any IP specific hardware or software state changes that are
4310 * necessary for a soft reset to succeed.
4311 * Returns 0 on success, negative error code on failure.
4312 */
06ec9070 4313static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4314{
4315 int i, r = 0;
4316
4317 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4318 if (!adev->ip_blocks[i].status.valid)
d31a501e 4319 continue;
a1255107
AD
4320 if (adev->ip_blocks[i].status.hang &&
4321 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4322 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4323 if (r)
4324 return r;
4325 }
4326 }
4327
4328 return 0;
4329}
4330
e3ecdffa
AD
4331/**
4332 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4333 *
4334 * @adev: amdgpu_device pointer
4335 *
4336 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4337 * reset is necessary to recover.
4338 * Returns true if a full asic reset is required, false if not.
4339 */
06ec9070 4340static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4341{
da146d3b
AD
4342 int i;
4343
8bc04c29
AD
4344 if (amdgpu_asic_need_full_reset(adev))
4345 return true;
4346
da146d3b 4347 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4348 if (!adev->ip_blocks[i].status.valid)
da146d3b 4349 continue;
a1255107
AD
4350 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4351 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4352 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4353 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4354 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4355 if (adev->ip_blocks[i].status.hang) {
aac89168 4356 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4357 return true;
4358 }
4359 }
35d782fe
CZ
4360 }
4361 return false;
4362}
4363
e3ecdffa
AD
4364/**
4365 * amdgpu_device_ip_soft_reset - do a soft reset
4366 *
4367 * @adev: amdgpu_device pointer
4368 *
4369 * The list of all the hardware IPs that make up the asic is walked and the
4370 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4371 * IP specific hardware or software state changes that are necessary to soft
4372 * reset the IP.
4373 * Returns 0 on success, negative error code on failure.
4374 */
06ec9070 4375static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4376{
4377 int i, r = 0;
4378
4379 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4380 if (!adev->ip_blocks[i].status.valid)
35d782fe 4381 continue;
a1255107
AD
4382 if (adev->ip_blocks[i].status.hang &&
4383 adev->ip_blocks[i].version->funcs->soft_reset) {
4384 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4385 if (r)
4386 return r;
4387 }
4388 }
4389
4390 return 0;
4391}
4392
e3ecdffa
AD
4393/**
4394 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4395 *
4396 * @adev: amdgpu_device pointer
4397 *
4398 * The list of all the hardware IPs that make up the asic is walked and the
4399 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4400 * handles any IP specific hardware or software state changes that are
4401 * necessary after the IP has been soft reset.
4402 * Returns 0 on success, negative error code on failure.
4403 */
06ec9070 4404static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4405{
4406 int i, r = 0;
4407
4408 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4409 if (!adev->ip_blocks[i].status.valid)
35d782fe 4410 continue;
a1255107
AD
4411 if (adev->ip_blocks[i].status.hang &&
4412 adev->ip_blocks[i].version->funcs->post_soft_reset)
4413 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4414 if (r)
4415 return r;
4416 }
4417
4418 return 0;
4419}
4420
e3ecdffa 4421/**
c33adbc7 4422 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4423 *
4424 * @adev: amdgpu_device pointer
4425 *
4426 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4427 * restore things like GPUVM page tables after a GPU reset where
4428 * the contents of VRAM might be lost.
403009bf
CK
4429 *
4430 * Returns:
4431 * 0 on success, negative error code on failure.
e3ecdffa 4432 */
c33adbc7 4433static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4434{
c41d1cf6 4435 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4436 struct amdgpu_bo *shadow;
e18aaea7 4437 struct amdgpu_bo_vm *vmbo;
403009bf 4438 long r = 1, tmo;
c41d1cf6
ML
4439
4440 if (amdgpu_sriov_runtime(adev))
b045d3af 4441 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4442 else
4443 tmo = msecs_to_jiffies(100);
4444
aac89168 4445 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4446 mutex_lock(&adev->shadow_list_lock);
e18aaea7
ND
4447 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4448 shadow = &vmbo->bo;
403009bf 4449 /* No need to recover an evicted BO */
d3116756
CK
4450 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4451 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4452 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4453 continue;
4454
4455 r = amdgpu_bo_restore_shadow(shadow, &next);
4456 if (r)
4457 break;
4458
c41d1cf6 4459 if (fence) {
1712fb1a 4460 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4461 dma_fence_put(fence);
4462 fence = next;
1712fb1a 4463 if (tmo == 0) {
4464 r = -ETIMEDOUT;
c41d1cf6 4465 break;
1712fb1a 4466 } else if (tmo < 0) {
4467 r = tmo;
4468 break;
4469 }
403009bf
CK
4470 } else {
4471 fence = next;
c41d1cf6 4472 }
c41d1cf6
ML
4473 }
4474 mutex_unlock(&adev->shadow_list_lock);
4475
403009bf
CK
4476 if (fence)
4477 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4478 dma_fence_put(fence);
4479
1712fb1a 4480 if (r < 0 || tmo <= 0) {
aac89168 4481 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4482 return -EIO;
4483 }
c41d1cf6 4484
aac89168 4485 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4486 return 0;
c41d1cf6
ML
4487}
4488
a90ad3c2 4489
e3ecdffa 4490/**
06ec9070 4491 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4492 *
982a820b 4493 * @adev: amdgpu_device pointer
87e3f136 4494 * @from_hypervisor: request from hypervisor
5740682e
ML
4495 *
4496 * do VF FLR and reinitialize Asic
3f48c681 4497 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4498 */
4499static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4500 bool from_hypervisor)
5740682e
ML
4501{
4502 int r;
a5f67c93 4503 struct amdgpu_hive_info *hive = NULL;
7258fa31 4504 int retry_limit = 0;
5740682e 4505
7258fa31 4506retry:
c004d44e 4507 amdgpu_amdkfd_pre_reset(adev);
428890a3 4508
5740682e
ML
4509 if (from_hypervisor)
4510 r = amdgpu_virt_request_full_gpu(adev, true);
4511 else
4512 r = amdgpu_virt_reset_gpu(adev);
4513 if (r)
4514 return r;
a90ad3c2
ML
4515
4516 /* Resume IP prior to SMC */
06ec9070 4517 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4518 if (r)
4519 goto error;
a90ad3c2 4520
c9ffa427 4521 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4522
7a3e0bb2
RZ
4523 r = amdgpu_device_fw_loading(adev);
4524 if (r)
4525 return r;
4526
a90ad3c2 4527 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4528 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4529 if (r)
4530 goto error;
a90ad3c2 4531
a5f67c93
ZL
4532 hive = amdgpu_get_xgmi_hive(adev);
4533 /* Update PSP FW topology after reset */
4534 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4535 r = amdgpu_xgmi_update_topology(hive, adev);
4536
4537 if (hive)
4538 amdgpu_put_xgmi_hive(hive);
4539
4540 if (!r) {
4541 amdgpu_irq_gpu_reset_resume_helper(adev);
4542 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4543
c004d44e 4544 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4545 }
a90ad3c2 4546
abc34253 4547error:
c41d1cf6 4548 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4549 amdgpu_inc_vram_lost(adev);
c33adbc7 4550 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4551 }
437f3e0b 4552 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4553
7258fa31
SK
4554 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4555 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4556 retry_limit++;
4557 goto retry;
4558 } else
4559 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4560 }
4561
a90ad3c2
ML
4562 return r;
4563}
4564
9a1cddd6 4565/**
4566 * amdgpu_device_has_job_running - check if there is any job in mirror list
4567 *
982a820b 4568 * @adev: amdgpu_device pointer
9a1cddd6 4569 *
4570 * check if there is any job in mirror list
4571 */
4572bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4573{
4574 int i;
4575 struct drm_sched_job *job;
4576
4577 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4578 struct amdgpu_ring *ring = adev->rings[i];
4579
4580 if (!ring || !ring->sched.thread)
4581 continue;
4582
4583 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4584 job = list_first_entry_or_null(&ring->sched.pending_list,
4585 struct drm_sched_job, list);
9a1cddd6 4586 spin_unlock(&ring->sched.job_list_lock);
4587 if (job)
4588 return true;
4589 }
4590 return false;
4591}
4592
12938fad
CK
4593/**
4594 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4595 *
982a820b 4596 * @adev: amdgpu_device pointer
12938fad
CK
4597 *
4598 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4599 * a hung GPU.
4600 */
4601bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4602{
12938fad 4603
3ba7b418
AG
4604 if (amdgpu_gpu_recovery == 0)
4605 goto disabled;
4606
1a11a65d
YC
4607 /* Skip soft reset check in fatal error mode */
4608 if (!amdgpu_ras_is_poison_mode_supported(adev))
4609 return true;
4610
3ba7b418
AG
4611 if (amdgpu_sriov_vf(adev))
4612 return true;
4613
4614 if (amdgpu_gpu_recovery == -1) {
4615 switch (adev->asic_type) {
b3523c45
AD
4616#ifdef CONFIG_DRM_AMDGPU_SI
4617 case CHIP_VERDE:
4618 case CHIP_TAHITI:
4619 case CHIP_PITCAIRN:
4620 case CHIP_OLAND:
4621 case CHIP_HAINAN:
4622#endif
4623#ifdef CONFIG_DRM_AMDGPU_CIK
4624 case CHIP_KAVERI:
4625 case CHIP_KABINI:
4626 case CHIP_MULLINS:
4627#endif
4628 case CHIP_CARRIZO:
4629 case CHIP_STONEY:
4630 case CHIP_CYAN_SKILLFISH:
3ba7b418 4631 goto disabled;
b3523c45
AD
4632 default:
4633 break;
3ba7b418 4634 }
12938fad
CK
4635 }
4636
4637 return true;
3ba7b418
AG
4638
4639disabled:
aac89168 4640 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4641 return false;
12938fad
CK
4642}
4643
5c03e584
FX
4644int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4645{
4646 u32 i;
4647 int ret = 0;
4648
4649 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4650
4651 dev_info(adev->dev, "GPU mode1 reset\n");
4652
4653 /* disable BM */
4654 pci_clear_master(adev->pdev);
4655
4656 amdgpu_device_cache_pci_state(adev->pdev);
4657
4658 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4659 dev_info(adev->dev, "GPU smu mode1 reset\n");
4660 ret = amdgpu_dpm_mode1_reset(adev);
4661 } else {
4662 dev_info(adev->dev, "GPU psp mode1 reset\n");
4663 ret = psp_gpu_reset(adev);
4664 }
4665
4666 if (ret)
4667 dev_err(adev->dev, "GPU mode1 reset failed\n");
4668
4669 amdgpu_device_load_pci_state(adev->pdev);
4670
4671 /* wait for asic to come out of reset */
4672 for (i = 0; i < adev->usec_timeout; i++) {
4673 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4674
4675 if (memsize != 0xffffffff)
4676 break;
4677 udelay(1);
4678 }
4679
4680 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4681 return ret;
4682}
5c6dd71e 4683
e3c1b071 4684int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4685 struct amdgpu_reset_context *reset_context)
26bc5340 4686{
5c1e6fa4 4687 int i, r = 0;
04442bf7
LL
4688 struct amdgpu_job *job = NULL;
4689 bool need_full_reset =
4690 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4691
4692 if (reset_context->reset_req_dev == adev)
4693 job = reset_context->job;
71182665 4694
b602ca5f
TZ
4695 if (amdgpu_sriov_vf(adev)) {
4696 /* stop the data exchange thread */
4697 amdgpu_virt_fini_data_exchange(adev);
4698 }
4699
9e225fb9
AG
4700 amdgpu_fence_driver_isr_toggle(adev, true);
4701
71182665 4702 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4703 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4704 struct amdgpu_ring *ring = adev->rings[i];
4705
51687759 4706 if (!ring || !ring->sched.thread)
0875dc9e 4707 continue;
5740682e 4708
c530b02f
JZ
4709 /*clear job fence from fence drv to avoid force_completion
4710 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4711 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4712
2f9d4084
ML
4713 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4714 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4715 }
d38ceaf9 4716
9e225fb9
AG
4717 amdgpu_fence_driver_isr_toggle(adev, false);
4718
ff99849b 4719 if (job && job->vm)
222b5f04
AG
4720 drm_sched_increase_karma(&job->base);
4721
04442bf7 4722 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4723 /* If reset handler not implemented, continue; otherwise return */
4724 if (r == -ENOSYS)
4725 r = 0;
4726 else
04442bf7
LL
4727 return r;
4728
1d721ed6 4729 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4730 if (!amdgpu_sriov_vf(adev)) {
4731
4732 if (!need_full_reset)
4733 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4734
360cd081
LG
4735 if (!need_full_reset && amdgpu_gpu_recovery &&
4736 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4737 amdgpu_device_ip_pre_soft_reset(adev);
4738 r = amdgpu_device_ip_soft_reset(adev);
4739 amdgpu_device_ip_post_soft_reset(adev);
4740 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4741 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4742 need_full_reset = true;
4743 }
4744 }
4745
4746 if (need_full_reset)
4747 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4748 if (need_full_reset)
4749 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4750 else
4751 clear_bit(AMDGPU_NEED_FULL_RESET,
4752 &reset_context->flags);
26bc5340
AG
4753 }
4754
4755 return r;
4756}
4757
15fd09a0
SA
4758static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4759{
15fd09a0
SA
4760 int i;
4761
38a15ad9 4762 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4763
4764 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4765 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4766 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4767 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4768 }
4769
4770 return 0;
4771}
4772
3d8785f6
SA
4773#ifdef CONFIG_DEV_COREDUMP
4774static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4775 size_t count, void *data, size_t datalen)
4776{
4777 struct drm_printer p;
4778 struct amdgpu_device *adev = data;
4779 struct drm_print_iterator iter;
4780 int i;
4781
4782 iter.data = buffer;
4783 iter.offset = 0;
4784 iter.start = offset;
4785 iter.remain = count;
4786
4787 p = drm_coredump_printer(&iter);
4788
4789 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4790 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4791 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4792 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4793 if (adev->reset_task_info.pid)
4794 drm_printf(&p, "process_name: %s PID: %d\n",
4795 adev->reset_task_info.process_name,
4796 adev->reset_task_info.pid);
4797
4798 if (adev->reset_vram_lost)
4799 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4800 if (adev->num_regs) {
4801 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4802
4803 for (i = 0; i < adev->num_regs; i++)
4804 drm_printf(&p, "0x%08x: 0x%08x\n",
4805 adev->reset_dump_reg_list[i],
4806 adev->reset_dump_reg_value[i]);
4807 }
4808
4809 return count - iter.remain;
4810}
4811
4812static void amdgpu_devcoredump_free(void *data)
4813{
4814}
4815
4816static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4817{
4818 struct drm_device *dev = adev_to_drm(adev);
4819
4820 ktime_get_ts64(&adev->reset_time);
4821 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4822 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4823}
4824#endif
4825
04442bf7
LL
4826int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4827 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4828{
4829 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4830 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4831 int r = 0;
f5c7e779 4832 bool gpu_reset_for_dev_remove = 0;
26bc5340 4833
04442bf7
LL
4834 /* Try reset handler method first */
4835 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4836 reset_list);
15fd09a0 4837 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4838
4839 reset_context->reset_device_list = device_list_handle;
04442bf7 4840 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4841 /* If reset handler not implemented, continue; otherwise return */
4842 if (r == -ENOSYS)
4843 r = 0;
4844 else
04442bf7
LL
4845 return r;
4846
4847 /* Reset handler not implemented, use the default method */
4848 need_full_reset =
4849 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4850 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4851
f5c7e779
YC
4852 gpu_reset_for_dev_remove =
4853 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4854 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4855
26bc5340 4856 /*
655ce9cb 4857 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4858 * to allow proper links negotiation in FW (within 1 sec)
4859 */
7ac71382 4860 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4861 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4862 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4863 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4864 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4865 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4866 r = -EALREADY;
4867 } else
4868 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4869
041a62bc 4870 if (r) {
aac89168 4871 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4872 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4873 break;
ce316fa5
LM
4874 }
4875 }
4876
041a62bc
AG
4877 /* For XGMI wait for all resets to complete before proceed */
4878 if (!r) {
655ce9cb 4879 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4880 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4881 flush_work(&tmp_adev->xgmi_reset_work);
4882 r = tmp_adev->asic_reset_res;
4883 if (r)
4884 break;
ce316fa5
LM
4885 }
4886 }
4887 }
ce316fa5 4888 }
26bc5340 4889
43c4d576 4890 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4891 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4892 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4893 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4894 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4895 }
4896
00eaa571 4897 amdgpu_ras_intr_cleared();
43c4d576 4898 }
00eaa571 4899
f5c7e779
YC
4900 /* Since the mode1 reset affects base ip blocks, the
4901 * phase1 ip blocks need to be resumed. Otherwise there
4902 * will be a BIOS signature error and the psp bootloader
4903 * can't load kdb on the next amdgpu install.
4904 */
4905 if (gpu_reset_for_dev_remove) {
4906 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4907 amdgpu_device_ip_resume_phase1(tmp_adev);
4908
4909 goto end;
4910 }
4911
655ce9cb 4912 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4913 if (need_full_reset) {
4914 /* post card */
e3c1b071 4915 r = amdgpu_device_asic_init(tmp_adev);
4916 if (r) {
aac89168 4917 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4918 } else {
26bc5340 4919 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4920 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4921 if (r)
4922 goto out;
4923
26bc5340
AG
4924 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4925 if (r)
4926 goto out;
4927
4928 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4929#ifdef CONFIG_DEV_COREDUMP
4930 tmp_adev->reset_vram_lost = vram_lost;
4931 memset(&tmp_adev->reset_task_info, 0,
4932 sizeof(tmp_adev->reset_task_info));
4933 if (reset_context->job && reset_context->job->vm)
4934 tmp_adev->reset_task_info =
4935 reset_context->job->vm->task_info;
4936 amdgpu_reset_capture_coredumpm(tmp_adev);
4937#endif
26bc5340 4938 if (vram_lost) {
77e7f829 4939 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4940 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4941 }
4942
26bc5340
AG
4943 r = amdgpu_device_fw_loading(tmp_adev);
4944 if (r)
4945 return r;
4946
4947 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4948 if (r)
4949 goto out;
4950
4951 if (vram_lost)
4952 amdgpu_device_fill_reset_magic(tmp_adev);
4953
fdafb359
EQ
4954 /*
4955 * Add this ASIC as tracked as reset was already
4956 * complete successfully.
4957 */
4958 amdgpu_register_gpu_instance(tmp_adev);
4959
04442bf7
LL
4960 if (!reset_context->hive &&
4961 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4962 amdgpu_xgmi_add_device(tmp_adev);
4963
7c04ca50 4964 r = amdgpu_device_ip_late_init(tmp_adev);
4965 if (r)
4966 goto out;
4967
087451f3 4968 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 4969
e8fbaf03
GC
4970 /*
4971 * The GPU enters bad state once faulty pages
4972 * by ECC has reached the threshold, and ras
4973 * recovery is scheduled next. So add one check
4974 * here to break recovery if it indeed exceeds
4975 * bad page threshold, and remind user to
4976 * retire this GPU or setting one bigger
4977 * bad_page_threshold value to fix this once
4978 * probing driver again.
4979 */
11003c68 4980 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
4981 /* must succeed. */
4982 amdgpu_ras_resume(tmp_adev);
4983 } else {
4984 r = -EINVAL;
4985 goto out;
4986 }
e79a04d5 4987
26bc5340 4988 /* Update PSP FW topology after reset */
04442bf7
LL
4989 if (reset_context->hive &&
4990 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4991 r = amdgpu_xgmi_update_topology(
4992 reset_context->hive, tmp_adev);
26bc5340
AG
4993 }
4994 }
4995
26bc5340
AG
4996out:
4997 if (!r) {
4998 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4999 r = amdgpu_ib_ring_tests(tmp_adev);
5000 if (r) {
5001 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5002 need_full_reset = true;
5003 r = -EAGAIN;
5004 goto end;
5005 }
5006 }
5007
5008 if (!r)
5009 r = amdgpu_device_recover_vram(tmp_adev);
5010 else
5011 tmp_adev->asic_reset_res = r;
5012 }
5013
5014end:
04442bf7
LL
5015 if (need_full_reset)
5016 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5017 else
5018 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5019 return r;
5020}
5021
e923be99 5022static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5023{
5740682e 5024
a3a09142
AD
5025 switch (amdgpu_asic_reset_method(adev)) {
5026 case AMD_RESET_METHOD_MODE1:
5027 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5028 break;
5029 case AMD_RESET_METHOD_MODE2:
5030 adev->mp1_state = PP_MP1_STATE_RESET;
5031 break;
5032 default:
5033 adev->mp1_state = PP_MP1_STATE_NONE;
5034 break;
5035 }
26bc5340 5036}
d38ceaf9 5037
e923be99 5038static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5039{
89041940 5040 amdgpu_vf_error_trans_all(adev);
a3a09142 5041 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5042}
5043
3f12acc8
EQ
5044static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5045{
5046 struct pci_dev *p = NULL;
5047
5048 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5049 adev->pdev->bus->number, 1);
5050 if (p) {
5051 pm_runtime_enable(&(p->dev));
5052 pm_runtime_resume(&(p->dev));
5053 }
b85e285e
YY
5054
5055 pci_dev_put(p);
3f12acc8
EQ
5056}
5057
5058static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5059{
5060 enum amd_reset_method reset_method;
5061 struct pci_dev *p = NULL;
5062 u64 expires;
5063
5064 /*
5065 * For now, only BACO and mode1 reset are confirmed
5066 * to suffer the audio issue without proper suspended.
5067 */
5068 reset_method = amdgpu_asic_reset_method(adev);
5069 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5070 (reset_method != AMD_RESET_METHOD_MODE1))
5071 return -EINVAL;
5072
5073 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5074 adev->pdev->bus->number, 1);
5075 if (!p)
5076 return -ENODEV;
5077
5078 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5079 if (!expires)
5080 /*
5081 * If we cannot get the audio device autosuspend delay,
5082 * a fixed 4S interval will be used. Considering 3S is
5083 * the audio controller default autosuspend delay setting.
5084 * 4S used here is guaranteed to cover that.
5085 */
54b7feb9 5086 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5087
5088 while (!pm_runtime_status_suspended(&(p->dev))) {
5089 if (!pm_runtime_suspend(&(p->dev)))
5090 break;
5091
5092 if (expires < ktime_get_mono_fast_ns()) {
5093 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5094 pci_dev_put(p);
3f12acc8
EQ
5095 /* TODO: abort the succeeding gpu reset? */
5096 return -ETIMEDOUT;
5097 }
5098 }
5099
5100 pm_runtime_disable(&(p->dev));
5101
b85e285e 5102 pci_dev_put(p);
3f12acc8
EQ
5103 return 0;
5104}
5105
d193b12b 5106static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5107{
5108 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5109
5110#if defined(CONFIG_DEBUG_FS)
5111 if (!amdgpu_sriov_vf(adev))
5112 cancel_work(&adev->reset_work);
5113#endif
5114
5115 if (adev->kfd.dev)
5116 cancel_work(&adev->kfd.reset_work);
5117
5118 if (amdgpu_sriov_vf(adev))
5119 cancel_work(&adev->virt.flr_work);
5120
5121 if (con && adev->ras_enabled)
5122 cancel_work(&con->recovery_work);
5123
5124}
5125
26bc5340 5126/**
6e9c65f7 5127 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5128 *
982a820b 5129 * @adev: amdgpu_device pointer
26bc5340
AG
5130 * @job: which job trigger hang
5131 *
5132 * Attempt to reset the GPU if it has hung (all asics).
5133 * Attempt to do soft-reset or full-reset and reinitialize Asic
5134 * Returns 0 for success or an error on failure.
5135 */
5136
cf727044 5137int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5138 struct amdgpu_job *job,
5139 struct amdgpu_reset_context *reset_context)
26bc5340 5140{
1d721ed6 5141 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5142 bool job_signaled = false;
26bc5340 5143 struct amdgpu_hive_info *hive = NULL;
26bc5340 5144 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5145 int i, r = 0;
bb5c7235 5146 bool need_emergency_restart = false;
3f12acc8 5147 bool audio_suspended = false;
f5c7e779
YC
5148 bool gpu_reset_for_dev_remove = false;
5149
5150 gpu_reset_for_dev_remove =
5151 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5152 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5153
6e3cd2a9 5154 /*
bb5c7235
WS
5155 * Special case: RAS triggered and full reset isn't supported
5156 */
5157 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5158
d5ea093e
AG
5159 /*
5160 * Flush RAM to disk so that after reboot
5161 * the user can read log and see why the system rebooted.
5162 */
bb5c7235 5163 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5164 DRM_WARN("Emergency reboot.");
5165
5166 ksys_sync_helper();
5167 emergency_restart();
5168 }
5169
b823821f 5170 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5171 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5172
175ac6ec
ZL
5173 if (!amdgpu_sriov_vf(adev))
5174 hive = amdgpu_get_xgmi_hive(adev);
681260df 5175 if (hive)
53b3f8f4 5176 mutex_lock(&hive->hive_lock);
26bc5340 5177
f1549c09
LG
5178 reset_context->job = job;
5179 reset_context->hive = hive;
9e94d22c
EQ
5180 /*
5181 * Build list of devices to reset.
5182 * In case we are in XGMI hive mode, resort the device list
5183 * to put adev in the 1st position.
5184 */
5185 INIT_LIST_HEAD(&device_list);
175ac6ec 5186 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5187 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5188 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5189 if (gpu_reset_for_dev_remove && adev->shutdown)
5190 tmp_adev->shutdown = true;
5191 }
655ce9cb 5192 if (!list_is_first(&adev->reset_list, &device_list))
5193 list_rotate_to_front(&adev->reset_list, &device_list);
5194 device_list_handle = &device_list;
26bc5340 5195 } else {
655ce9cb 5196 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5197 device_list_handle = &device_list;
5198 }
5199
e923be99
AG
5200 /* We need to lock reset domain only once both for XGMI and single device */
5201 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5202 reset_list);
3675c2f2 5203 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5204
1d721ed6 5205 /* block all schedulers and reset given job's ring */
655ce9cb 5206 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5207
e923be99 5208 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5209
3f12acc8
EQ
5210 /*
5211 * Try to put the audio codec into suspend state
5212 * before gpu reset started.
5213 *
5214 * Due to the power domain of the graphics device
5215 * is shared with AZ power domain. Without this,
5216 * we may change the audio hardware from behind
5217 * the audio driver's back. That will trigger
5218 * some audio codec errors.
5219 */
5220 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5221 audio_suspended = true;
5222
9e94d22c
EQ
5223 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5224
52fb44cf
EQ
5225 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5226
c004d44e 5227 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5228 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5229
12ffa55d
AG
5230 /*
5231 * Mark these ASICs to be reseted as untracked first
5232 * And add them back after reset completed
5233 */
5234 amdgpu_unregister_gpu_instance(tmp_adev);
5235
163d4cd2 5236 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5237
f1c1314b 5238 /* disable ras on ALL IPs */
bb5c7235 5239 if (!need_emergency_restart &&
b823821f 5240 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5241 amdgpu_ras_suspend(tmp_adev);
5242
1d721ed6
AG
5243 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5244 struct amdgpu_ring *ring = tmp_adev->rings[i];
5245
5246 if (!ring || !ring->sched.thread)
5247 continue;
5248
0b2d2c2e 5249 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5250
bb5c7235 5251 if (need_emergency_restart)
7c6e68c7 5252 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5253 }
8f8c80f4 5254 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5255 }
5256
bb5c7235 5257 if (need_emergency_restart)
7c6e68c7
AG
5258 goto skip_sched_resume;
5259
1d721ed6
AG
5260 /*
5261 * Must check guilty signal here since after this point all old
5262 * HW fences are force signaled.
5263 *
5264 * job->base holds a reference to parent fence
5265 */
f6a3f660 5266 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5267 job_signaled = true;
1d721ed6
AG
5268 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5269 goto skip_hw_reset;
5270 }
5271
26bc5340 5272retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5273 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5274 if (gpu_reset_for_dev_remove) {
5275 /* Workaroud for ASICs need to disable SMC first */
5276 amdgpu_device_smu_fini_early(tmp_adev);
5277 }
f1549c09 5278 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5279 /*TODO Should we stop ?*/
5280 if (r) {
aac89168 5281 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5282 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5283 tmp_adev->asic_reset_res = r;
5284 }
247c7b0d
AG
5285
5286 /*
5287 * Drop all pending non scheduler resets. Scheduler resets
5288 * were already dropped during drm_sched_stop
5289 */
d193b12b 5290 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5291 }
5292
5293 /* Actual ASIC resets if needed.*/
4f30d920 5294 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5295 if (amdgpu_sriov_vf(adev)) {
5296 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5297 if (r)
5298 adev->asic_reset_res = r;
950d6425
SY
5299
5300 /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5301 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5302 amdgpu_ras_resume(adev);
26bc5340 5303 } else {
f1549c09 5304 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5305 if (r && r == -EAGAIN)
26bc5340 5306 goto retry;
f5c7e779
YC
5307
5308 if (!r && gpu_reset_for_dev_remove)
5309 goto recover_end;
26bc5340
AG
5310 }
5311
1d721ed6
AG
5312skip_hw_reset:
5313
26bc5340 5314 /* Post ASIC reset for all devs .*/
655ce9cb 5315 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5316
1d721ed6
AG
5317 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5318 struct amdgpu_ring *ring = tmp_adev->rings[i];
5319
5320 if (!ring || !ring->sched.thread)
5321 continue;
5322
6868a2c4 5323 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5324 }
5325
693073a0 5326 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5327 amdgpu_mes_self_test(tmp_adev);
5328
1053b9c9 5329 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5330 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5331 }
5332
7258fa31
SK
5333 if (tmp_adev->asic_reset_res)
5334 r = tmp_adev->asic_reset_res;
5335
1d721ed6 5336 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5337
5338 if (r) {
5339 /* bad news, how to tell it to userspace ? */
12ffa55d 5340 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5341 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5342 } else {
12ffa55d 5343 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5344 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5345 DRM_WARN("smart shift update failed\n");
26bc5340 5346 }
7c6e68c7 5347 }
26bc5340 5348
7c6e68c7 5349skip_sched_resume:
655ce9cb 5350 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5351 /* unlock kfd: SRIOV would do it separately */
c004d44e 5352 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5353 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5354
5355 /* kfd_post_reset will do nothing if kfd device is not initialized,
5356 * need to bring up kfd here if it's not be initialized before
5357 */
5358 if (!adev->kfd.init_complete)
5359 amdgpu_amdkfd_device_init(adev);
5360
3f12acc8
EQ
5361 if (audio_suspended)
5362 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5363
5364 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5365
5366 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5367 }
5368
f5c7e779 5369recover_end:
e923be99
AG
5370 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5371 reset_list);
5372 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5373
9e94d22c 5374 if (hive) {
9e94d22c 5375 mutex_unlock(&hive->hive_lock);
d95e8e97 5376 amdgpu_put_xgmi_hive(hive);
9e94d22c 5377 }
26bc5340 5378
f287a3c5 5379 if (r)
26bc5340 5380 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5381
5382 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5383 return r;
5384}
5385
e3ecdffa
AD
5386/**
5387 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5388 *
5389 * @adev: amdgpu_device pointer
5390 *
5391 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5392 * and lanes) of the slot the device is in. Handles APUs and
5393 * virtualized environments where PCIE config space may not be available.
5394 */
5494d864 5395static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5396{
5d9a6330 5397 struct pci_dev *pdev;
c5313457
HK
5398 enum pci_bus_speed speed_cap, platform_speed_cap;
5399 enum pcie_link_width platform_link_width;
d0dd7f0c 5400
cd474ba0
AD
5401 if (amdgpu_pcie_gen_cap)
5402 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5403
cd474ba0
AD
5404 if (amdgpu_pcie_lane_cap)
5405 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5406
cd474ba0
AD
5407 /* covers APUs as well */
5408 if (pci_is_root_bus(adev->pdev->bus)) {
5409 if (adev->pm.pcie_gen_mask == 0)
5410 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5411 if (adev->pm.pcie_mlw_mask == 0)
5412 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5413 return;
cd474ba0 5414 }
d0dd7f0c 5415
c5313457
HK
5416 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5417 return;
5418
dbaa922b
AD
5419 pcie_bandwidth_available(adev->pdev, NULL,
5420 &platform_speed_cap, &platform_link_width);
c5313457 5421
cd474ba0 5422 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5423 /* asic caps */
5424 pdev = adev->pdev;
5425 speed_cap = pcie_get_speed_cap(pdev);
5426 if (speed_cap == PCI_SPEED_UNKNOWN) {
5427 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5428 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5429 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5430 } else {
2b3a1f51
FX
5431 if (speed_cap == PCIE_SPEED_32_0GT)
5432 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5433 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5434 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5435 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5436 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5437 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5438 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5439 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5440 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5441 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5442 else if (speed_cap == PCIE_SPEED_8_0GT)
5443 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5444 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5445 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5446 else if (speed_cap == PCIE_SPEED_5_0GT)
5447 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5448 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5449 else
5450 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5451 }
5452 /* platform caps */
c5313457 5453 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5454 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5455 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5456 } else {
2b3a1f51
FX
5457 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5458 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5459 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5460 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5461 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5462 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5463 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5464 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5465 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5466 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5467 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5468 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5469 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5470 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5471 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5472 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5473 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5474 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5475 else
5476 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5477
cd474ba0
AD
5478 }
5479 }
5480 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5481 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5482 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5483 } else {
c5313457 5484 switch (platform_link_width) {
5d9a6330 5485 case PCIE_LNK_X32:
cd474ba0
AD
5486 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5487 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5488 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5489 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5490 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5491 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5492 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5493 break;
5d9a6330 5494 case PCIE_LNK_X16:
cd474ba0
AD
5495 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5496 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5497 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5498 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5499 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5500 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5501 break;
5d9a6330 5502 case PCIE_LNK_X12:
cd474ba0
AD
5503 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5504 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5505 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5506 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5507 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5508 break;
5d9a6330 5509 case PCIE_LNK_X8:
cd474ba0
AD
5510 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5511 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5512 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5514 break;
5d9a6330 5515 case PCIE_LNK_X4:
cd474ba0
AD
5516 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5519 break;
5d9a6330 5520 case PCIE_LNK_X2:
cd474ba0
AD
5521 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5523 break;
5d9a6330 5524 case PCIE_LNK_X1:
cd474ba0
AD
5525 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5526 break;
5527 default:
5528 break;
5529 }
d0dd7f0c
AD
5530 }
5531 }
5532}
d38ceaf9 5533
08a2fd23
RE
5534/**
5535 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5536 *
5537 * @adev: amdgpu_device pointer
5538 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5539 *
5540 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5541 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5542 * @peer_adev.
5543 */
5544bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5545 struct amdgpu_device *peer_adev)
5546{
5547#ifdef CONFIG_HSA_AMD_P2P
5548 uint64_t address_mask = peer_adev->dev->dma_mask ?
5549 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5550 resource_size_t aper_limit =
5551 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5552 bool p2p_access =
5553 !adev->gmc.xgmi.connected_to_cpu &&
5554 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5555
5556 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5557 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5558 !(adev->gmc.aper_base & address_mask ||
5559 aper_limit & address_mask));
5560#else
5561 return false;
5562#endif
5563}
5564
361dbd01
AD
5565int amdgpu_device_baco_enter(struct drm_device *dev)
5566{
1348969a 5567 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5568 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5569
4a580877 5570 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5571 return -ENOTSUPP;
5572
8ab0d6f0 5573 if (ras && adev->ras_enabled &&
acdae216 5574 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5575 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5576
9530273e 5577 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5578}
5579
5580int amdgpu_device_baco_exit(struct drm_device *dev)
5581{
1348969a 5582 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5583 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5584 int ret = 0;
361dbd01 5585
4a580877 5586 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5587 return -ENOTSUPP;
5588
9530273e
EQ
5589 ret = amdgpu_dpm_baco_exit(adev);
5590 if (ret)
5591 return ret;
7a22677b 5592
8ab0d6f0 5593 if (ras && adev->ras_enabled &&
acdae216 5594 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5595 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5596
1bece222
CL
5597 if (amdgpu_passthrough(adev) &&
5598 adev->nbio.funcs->clear_doorbell_interrupt)
5599 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5600
7a22677b 5601 return 0;
361dbd01 5602}
c9a6b82f
AG
5603
5604/**
5605 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5606 * @pdev: PCI device struct
5607 * @state: PCI channel state
5608 *
5609 * Description: Called when a PCI error is detected.
5610 *
5611 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5612 */
5613pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5614{
5615 struct drm_device *dev = pci_get_drvdata(pdev);
5616 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5617 int i;
c9a6b82f
AG
5618
5619 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5620
6894305c
AG
5621 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5622 DRM_WARN("No support for XGMI hive yet...");
5623 return PCI_ERS_RESULT_DISCONNECT;
5624 }
5625
e17e27f9
GC
5626 adev->pci_channel_state = state;
5627
c9a6b82f
AG
5628 switch (state) {
5629 case pci_channel_io_normal:
5630 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5631 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5632 case pci_channel_io_frozen:
5633 /*
d0fb18b5 5634 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5635 * to GPU during PCI error recovery
5636 */
3675c2f2 5637 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5638 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5639
5640 /*
5641 * Block any work scheduling as we do for regular GPU reset
5642 * for the duration of the recovery
5643 */
5644 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5645 struct amdgpu_ring *ring = adev->rings[i];
5646
5647 if (!ring || !ring->sched.thread)
5648 continue;
5649
5650 drm_sched_stop(&ring->sched, NULL);
5651 }
8f8c80f4 5652 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5653 return PCI_ERS_RESULT_NEED_RESET;
5654 case pci_channel_io_perm_failure:
5655 /* Permanent error, prepare for device removal */
5656 return PCI_ERS_RESULT_DISCONNECT;
5657 }
5658
5659 return PCI_ERS_RESULT_NEED_RESET;
5660}
5661
5662/**
5663 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5664 * @pdev: pointer to PCI device
5665 */
5666pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5667{
5668
5669 DRM_INFO("PCI error: mmio enabled callback!!\n");
5670
5671 /* TODO - dump whatever for debugging purposes */
5672
5673 /* This called only if amdgpu_pci_error_detected returns
5674 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5675 * works, no need to reset slot.
5676 */
5677
5678 return PCI_ERS_RESULT_RECOVERED;
5679}
5680
5681/**
5682 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5683 * @pdev: PCI device struct
5684 *
5685 * Description: This routine is called by the pci error recovery
5686 * code after the PCI slot has been reset, just before we
5687 * should resume normal operations.
5688 */
5689pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5690{
5691 struct drm_device *dev = pci_get_drvdata(pdev);
5692 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5693 int r, i;
04442bf7 5694 struct amdgpu_reset_context reset_context;
362c7b91 5695 u32 memsize;
7ac71382 5696 struct list_head device_list;
c9a6b82f
AG
5697
5698 DRM_INFO("PCI error: slot reset callback!!\n");
5699
04442bf7
LL
5700 memset(&reset_context, 0, sizeof(reset_context));
5701
7ac71382 5702 INIT_LIST_HEAD(&device_list);
655ce9cb 5703 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5704
362c7b91
AG
5705 /* wait for asic to come out of reset */
5706 msleep(500);
5707
7ac71382 5708 /* Restore PCI confspace */
c1dd4aa6 5709 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5710
362c7b91
AG
5711 /* confirm ASIC came out of reset */
5712 for (i = 0; i < adev->usec_timeout; i++) {
5713 memsize = amdgpu_asic_get_config_memsize(adev);
5714
5715 if (memsize != 0xffffffff)
5716 break;
5717 udelay(1);
5718 }
5719 if (memsize == 0xffffffff) {
5720 r = -ETIME;
5721 goto out;
5722 }
5723
04442bf7
LL
5724 reset_context.method = AMD_RESET_METHOD_NONE;
5725 reset_context.reset_req_dev = adev;
5726 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5727 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5728
7afefb81 5729 adev->no_hw_access = true;
04442bf7 5730 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5731 adev->no_hw_access = false;
c9a6b82f
AG
5732 if (r)
5733 goto out;
5734
04442bf7 5735 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5736
5737out:
c9a6b82f 5738 if (!r) {
c1dd4aa6
AG
5739 if (amdgpu_device_cache_pci_state(adev->pdev))
5740 pci_restore_state(adev->pdev);
5741
c9a6b82f
AG
5742 DRM_INFO("PCIe error recovery succeeded\n");
5743 } else {
5744 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5745 amdgpu_device_unset_mp1_state(adev);
5746 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5747 }
5748
5749 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5750}
5751
5752/**
5753 * amdgpu_pci_resume() - resume normal ops after PCI reset
5754 * @pdev: pointer to PCI device
5755 *
5756 * Called when the error recovery driver tells us that its
505199a3 5757 * OK to resume normal operation.
c9a6b82f
AG
5758 */
5759void amdgpu_pci_resume(struct pci_dev *pdev)
5760{
5761 struct drm_device *dev = pci_get_drvdata(pdev);
5762 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5763 int i;
c9a6b82f 5764
c9a6b82f
AG
5765
5766 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5767
e17e27f9
GC
5768 /* Only continue execution for the case of pci_channel_io_frozen */
5769 if (adev->pci_channel_state != pci_channel_io_frozen)
5770 return;
5771
acd89fca
AG
5772 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5773 struct amdgpu_ring *ring = adev->rings[i];
5774
5775 if (!ring || !ring->sched.thread)
5776 continue;
5777
acd89fca
AG
5778 drm_sched_start(&ring->sched, true);
5779 }
5780
e923be99
AG
5781 amdgpu_device_unset_mp1_state(adev);
5782 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5783}
c1dd4aa6
AG
5784
5785bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5786{
5787 struct drm_device *dev = pci_get_drvdata(pdev);
5788 struct amdgpu_device *adev = drm_to_adev(dev);
5789 int r;
5790
5791 r = pci_save_state(pdev);
5792 if (!r) {
5793 kfree(adev->pci_state);
5794
5795 adev->pci_state = pci_store_saved_state(pdev);
5796
5797 if (!adev->pci_state) {
5798 DRM_ERROR("Failed to store PCI saved state");
5799 return false;
5800 }
5801 } else {
5802 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5803 return false;
5804 }
5805
5806 return true;
5807}
5808
5809bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5810{
5811 struct drm_device *dev = pci_get_drvdata(pdev);
5812 struct amdgpu_device *adev = drm_to_adev(dev);
5813 int r;
5814
5815 if (!adev->pci_state)
5816 return false;
5817
5818 r = pci_load_saved_state(pdev, adev->pci_state);
5819
5820 if (!r) {
5821 pci_restore_state(pdev);
5822 } else {
5823 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5824 return false;
5825 }
5826
5827 return true;
5828}
5829
810085dd
EH
5830void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5831 struct amdgpu_ring *ring)
5832{
5833#ifdef CONFIG_X86_64
b818a5d3 5834 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5835 return;
5836#endif
5837 if (adev->gmc.xgmi.connected_to_cpu)
5838 return;
5839
5840 if (ring && ring->funcs->emit_hdp_flush)
5841 amdgpu_ring_emit_hdp_flush(ring);
5842 else
5843 amdgpu_asic_flush_hdp(adev, ring);
5844}
c1dd4aa6 5845
810085dd
EH
5846void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5847 struct amdgpu_ring *ring)
5848{
5849#ifdef CONFIG_X86_64
b818a5d3 5850 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5851 return;
5852#endif
5853 if (adev->gmc.xgmi.connected_to_cpu)
5854 return;
c1dd4aa6 5855
810085dd
EH
5856 amdgpu_asic_invalidate_hdp(adev, ring);
5857}
34f3a4a9 5858
89a7a870
AG
5859int amdgpu_in_reset(struct amdgpu_device *adev)
5860{
5861 return atomic_read(&adev->reset_domain->in_gpu_reset);
5862 }
5863
34f3a4a9
LY
5864/**
5865 * amdgpu_device_halt() - bring hardware to some kind of halt state
5866 *
5867 * @adev: amdgpu_device pointer
5868 *
5869 * Bring hardware to some kind of halt state so that no one can touch it
5870 * any more. It will help to maintain error context when error occurred.
5871 * Compare to a simple hang, the system will keep stable at least for SSH
5872 * access. Then it should be trivial to inspect the hardware state and
5873 * see what's going on. Implemented as following:
5874 *
5875 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5876 * clears all CPU mappings to device, disallows remappings through page faults
5877 * 2. amdgpu_irq_disable_all() disables all interrupts
5878 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5879 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5880 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5881 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5882 * flush any in flight DMA operations
5883 */
5884void amdgpu_device_halt(struct amdgpu_device *adev)
5885{
5886 struct pci_dev *pdev = adev->pdev;
e0f943b4 5887 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
5888
5889 drm_dev_unplug(ddev);
5890
5891 amdgpu_irq_disable_all(adev);
5892
5893 amdgpu_fence_driver_hw_fini(adev);
5894
5895 adev->no_hw_access = true;
5896
5897 amdgpu_device_unmap_mmio(adev);
5898
5899 pci_disable_device(pdev);
5900 pci_wait_for_pending_transaction(pdev);
5901}
86700a40
XD
5902
5903u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5904 u32 reg)
5905{
5906 unsigned long flags, address, data;
5907 u32 r;
5908
5909 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5910 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5911
5912 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5913 WREG32(address, reg * 4);
5914 (void)RREG32(address);
5915 r = RREG32(data);
5916 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5917 return r;
5918}
5919
5920void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5921 u32 reg, u32 v)
5922{
5923 unsigned long flags, address, data;
5924
5925 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5926 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5927
5928 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5929 WREG32(address, reg * 4);
5930 (void)RREG32(address);
5931 WREG32(data, v);
5932 (void)RREG32(data);
5933 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5934}
68ce8b24
CK
5935
5936/**
5937 * amdgpu_device_switch_gang - switch to a new gang
5938 * @adev: amdgpu_device pointer
5939 * @gang: the gang to switch to
5940 *
5941 * Try to switch to a new gang.
5942 * Returns: NULL if we switched to the new gang or a reference to the current
5943 * gang leader.
5944 */
5945struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5946 struct dma_fence *gang)
5947{
5948 struct dma_fence *old = NULL;
5949
5950 do {
5951 dma_fence_put(old);
5952 rcu_read_lock();
5953 old = dma_fence_get_rcu_safe(&adev->gang_submit);
5954 rcu_read_unlock();
5955
5956 if (old == gang)
5957 break;
5958
5959 if (!dma_fence_is_signaled(old))
5960 return old;
5961
5962 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5963 old, gang) != old);
5964
5965 dma_fence_put(old);
5966 return NULL;
5967}
220c8cc8
AD
5968
5969bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
5970{
5971 switch (adev->asic_type) {
5972#ifdef CONFIG_DRM_AMDGPU_SI
5973 case CHIP_HAINAN:
5974#endif
5975 case CHIP_TOPAZ:
5976 /* chips with no display hardware */
5977 return false;
5978#ifdef CONFIG_DRM_AMDGPU_SI
5979 case CHIP_TAHITI:
5980 case CHIP_PITCAIRN:
5981 case CHIP_VERDE:
5982 case CHIP_OLAND:
5983#endif
5984#ifdef CONFIG_DRM_AMDGPU_CIK
5985 case CHIP_BONAIRE:
5986 case CHIP_HAWAII:
5987 case CHIP_KAVERI:
5988 case CHIP_KABINI:
5989 case CHIP_MULLINS:
5990#endif
5991 case CHIP_TONGA:
5992 case CHIP_FIJI:
5993 case CHIP_POLARIS10:
5994 case CHIP_POLARIS11:
5995 case CHIP_POLARIS12:
5996 case CHIP_VEGAM:
5997 case CHIP_CARRIZO:
5998 case CHIP_STONEY:
5999 /* chips with display hardware */
6000 return true;
6001 default:
6002 /* IP discovery */
6003 if (!adev->ip_versions[DCE_HWIP][0] ||
6004 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6005 return false;
6006 return true;
6007 }
6008}