drm/amdgpu: expose more memory stats in fdinfo
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
901e2be2 34#include <linux/pci.h>
3d8785f6
SA
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
08a2fd23 37#include <linux/pci-p2pdma.h>
d37a3929 38#include <linux/apple-gmux.h>
fdf2f6c5 39
b7cdb41e 40#include <drm/drm_aperture.h>
4562236b 41#include <drm/drm_atomic_helper.h>
973ad627 42#include <drm/drm_crtc_helper.h>
45b64fd9 43#include <drm/drm_fb_helper.h>
fcd70cd3 44#include <drm/drm_probe_helper.h>
d38ceaf9
AD
45#include <drm/amdgpu_drm.h>
46#include <linux/vgaarb.h>
47#include <linux/vga_switcheroo.h>
48#include <linux/efi.h>
49#include "amdgpu.h"
f4b373f4 50#include "amdgpu_trace.h"
d38ceaf9
AD
51#include "amdgpu_i2c.h"
52#include "atom.h"
53#include "amdgpu_atombios.h"
a5bde2f9 54#include "amdgpu_atomfirmware.h"
d0dd7f0c 55#include "amd_pcie.h"
33f34802
KW
56#ifdef CONFIG_DRM_AMDGPU_SI
57#include "si.h"
58#endif
a2e73f56
AD
59#ifdef CONFIG_DRM_AMDGPU_CIK
60#include "cik.h"
61#endif
aaa36a97 62#include "vi.h"
460826e6 63#include "soc15.h"
0a5b8c7b 64#include "nv.h"
d38ceaf9 65#include "bif/bif_4_1_d.h"
bec86378 66#include <linux/firmware.h>
89041940 67#include "amdgpu_vf_error.h"
d38ceaf9 68
ba997709 69#include "amdgpu_amdkfd.h"
d2f52ac8 70#include "amdgpu_pm.h"
d38ceaf9 71
5183411b 72#include "amdgpu_xgmi.h"
c030f2e4 73#include "amdgpu_ras.h"
9c7c85f7 74#include "amdgpu_pmu.h"
bd607166 75#include "amdgpu_fru_eeprom.h"
04442bf7 76#include "amdgpu_reset.h"
5183411b 77
d5ea093e 78#include <linux/suspend.h>
c6a6e2db 79#include <drm/task_barrier.h>
3f12acc8 80#include <linux/pm_runtime.h>
d5ea093e 81
f89f8c6b
AG
82#include <drm/drm_drv.h>
83
e2a75f88 84MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 85MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 86MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 87MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 88MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 89MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
42b325e5 90MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 91
2dc80b00 92#define AMDGPU_RESUME_MS 2000
7258fa31
SK
93#define AMDGPU_MAX_RETRY_LIMIT 2
94#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
2dc80b00 95
b7cdb41e
ML
96static const struct drm_driver amdgpu_kms_driver;
97
050091ab 98const char *amdgpu_asic_name[] = {
da69c161
KW
99 "TAHITI",
100 "PITCAIRN",
101 "VERDE",
102 "OLAND",
103 "HAINAN",
d38ceaf9
AD
104 "BONAIRE",
105 "KAVERI",
106 "KABINI",
107 "HAWAII",
108 "MULLINS",
109 "TOPAZ",
110 "TONGA",
48299f95 111 "FIJI",
d38ceaf9 112 "CARRIZO",
139f4917 113 "STONEY",
2cc0c0b5
FC
114 "POLARIS10",
115 "POLARIS11",
c4642a47 116 "POLARIS12",
48ff108d 117 "VEGAM",
d4196f01 118 "VEGA10",
8fab806a 119 "VEGA12",
956fcddc 120 "VEGA20",
2ca8a5d2 121 "RAVEN",
d6c3b24e 122 "ARCTURUS",
1eee4228 123 "RENOIR",
d46b417a 124 "ALDEBARAN",
852a6626 125 "NAVI10",
d0f56dc2 126 "CYAN_SKILLFISH",
87dbad02 127 "NAVI14",
9802f5d7 128 "NAVI12",
ccaf72d3 129 "SIENNA_CICHLID",
ddd8fbe7 130 "NAVY_FLOUNDER",
4f1e9a76 131 "VANGOGH",
a2468e04 132 "DIMGREY_CAVEFISH",
6f169591 133 "BEIGE_GOBY",
ee9236b7 134 "YELLOW_CARP",
3ae695d6 135 "IP DISCOVERY",
d38ceaf9
AD
136 "LAST",
137};
138
dcea6e65
KR
139/**
140 * DOC: pcie_replay_count
141 *
142 * The amdgpu driver provides a sysfs API for reporting the total number
143 * of PCIe replays (NAKs)
144 * The file pcie_replay_count is used for this and returns the total
145 * number of replays as a sum of the NAKs generated and NAKs received
146 */
147
148static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
149 struct device_attribute *attr, char *buf)
150{
151 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 152 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
153 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
154
36000c7a 155 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
156}
157
158static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
159 amdgpu_device_get_pcie_replay_count, NULL);
160
5494d864
AD
161static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
162
bd607166
KR
163/**
164 * DOC: product_name
165 *
166 * The amdgpu driver provides a sysfs API for reporting the product name
167 * for the device
2c496a6c 168 * The file product_name is used for this and returns the product name
bd607166
KR
169 * as returned from the FRU.
170 * NOTE: This is only available for certain server cards
171 */
172
173static ssize_t amdgpu_device_get_product_name(struct device *dev,
174 struct device_attribute *attr, char *buf)
175{
176 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 177 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 178
36000c7a 179 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
180}
181
182static DEVICE_ATTR(product_name, S_IRUGO,
183 amdgpu_device_get_product_name, NULL);
184
185/**
186 * DOC: product_number
187 *
188 * The amdgpu driver provides a sysfs API for reporting the part number
189 * for the device
2c496a6c 190 * The file product_number is used for this and returns the part number
bd607166
KR
191 * as returned from the FRU.
192 * NOTE: This is only available for certain server cards
193 */
194
195static ssize_t amdgpu_device_get_product_number(struct device *dev,
196 struct device_attribute *attr, char *buf)
197{
198 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 199 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 200
36000c7a 201 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
202}
203
204static DEVICE_ATTR(product_number, S_IRUGO,
205 amdgpu_device_get_product_number, NULL);
206
207/**
208 * DOC: serial_number
209 *
210 * The amdgpu driver provides a sysfs API for reporting the serial number
211 * for the device
212 * The file serial_number is used for this and returns the serial number
213 * as returned from the FRU.
214 * NOTE: This is only available for certain server cards
215 */
216
217static ssize_t amdgpu_device_get_serial_number(struct device *dev,
218 struct device_attribute *attr, char *buf)
219{
220 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 221 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 222
36000c7a 223 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
224}
225
226static DEVICE_ATTR(serial_number, S_IRUGO,
227 amdgpu_device_get_serial_number, NULL);
228
fd496ca8 229/**
b98c6299 230 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
231 *
232 * @dev: drm_device pointer
233 *
b98c6299 234 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
235 * otherwise return false.
236 */
b98c6299 237bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
238{
239 struct amdgpu_device *adev = drm_to_adev(dev);
240
b98c6299 241 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
242 return true;
243 return false;
244}
245
e3ecdffa 246/**
0330b848 247 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
248 *
249 * @dev: drm_device pointer
250 *
b98c6299 251 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
252 * otherwise return false.
253 */
31af062a 254bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 255{
1348969a 256 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 257
b98c6299
AD
258 if (adev->has_pr3 ||
259 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
260 return true;
261 return false;
262}
263
a69cba42
AD
264/**
265 * amdgpu_device_supports_baco - Does the device support BACO
266 *
267 * @dev: drm_device pointer
268 *
269 * Returns true if the device supporte BACO,
270 * otherwise return false.
271 */
272bool amdgpu_device_supports_baco(struct drm_device *dev)
273{
1348969a 274 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
275
276 return amdgpu_asic_supports_baco(adev);
277}
278
3fa8f89d
S
279/**
280 * amdgpu_device_supports_smart_shift - Is the device dGPU with
281 * smart shift support
282 *
283 * @dev: drm_device pointer
284 *
285 * Returns true if the device is a dGPU with Smart Shift support,
286 * otherwise returns false.
287 */
288bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
289{
290 return (amdgpu_device_supports_boco(dev) &&
291 amdgpu_acpi_is_power_shift_control_supported());
292}
293
6e3cd2a9
MCC
294/*
295 * VRAM access helper functions
296 */
297
e35e2b11 298/**
048af66b 299 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
300 *
301 * @adev: amdgpu_device pointer
302 * @pos: offset of the buffer in vram
303 * @buf: virtual address of the buffer in system memory
304 * @size: read/write size, sizeof(@buf) must > @size
305 * @write: true - write to vram, otherwise - read from vram
306 */
048af66b
KW
307void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
308 void *buf, size_t size, bool write)
e35e2b11 309{
e35e2b11 310 unsigned long flags;
048af66b
KW
311 uint32_t hi = ~0, tmp = 0;
312 uint32_t *data = buf;
ce05ac56 313 uint64_t last;
f89f8c6b 314 int idx;
ce05ac56 315
c58a863b 316 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 317 return;
9d11eb0d 318
048af66b
KW
319 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
320
321 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
322 for (last = pos + size; pos < last; pos += 4) {
323 tmp = pos >> 31;
324
325 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
326 if (tmp != hi) {
327 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
328 hi = tmp;
329 }
330 if (write)
331 WREG32_NO_KIQ(mmMM_DATA, *data++);
332 else
333 *data++ = RREG32_NO_KIQ(mmMM_DATA);
334 }
335
336 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
337 drm_dev_exit(idx);
338}
339
340/**
bbe04dec 341 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
342 *
343 * @adev: amdgpu_device pointer
344 * @pos: offset of the buffer in vram
345 * @buf: virtual address of the buffer in system memory
346 * @size: read/write size, sizeof(@buf) must > @size
347 * @write: true - write to vram, otherwise - read from vram
348 *
349 * The return value means how many bytes have been transferred.
350 */
351size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
352 void *buf, size_t size, bool write)
353{
9d11eb0d 354#ifdef CONFIG_64BIT
048af66b
KW
355 void __iomem *addr;
356 size_t count = 0;
357 uint64_t last;
358
359 if (!adev->mman.aper_base_kaddr)
360 return 0;
361
9d11eb0d
CK
362 last = min(pos + size, adev->gmc.visible_vram_size);
363 if (last > pos) {
048af66b
KW
364 addr = adev->mman.aper_base_kaddr + pos;
365 count = last - pos;
9d11eb0d
CK
366
367 if (write) {
368 memcpy_toio(addr, buf, count);
369 mb();
810085dd 370 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 371 } else {
810085dd 372 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
373 mb();
374 memcpy_fromio(buf, addr, count);
375 }
376
9d11eb0d 377 }
048af66b
KW
378
379 return count;
380#else
381 return 0;
9d11eb0d 382#endif
048af66b 383}
9d11eb0d 384
048af66b
KW
385/**
386 * amdgpu_device_vram_access - read/write a buffer in vram
387 *
388 * @adev: amdgpu_device pointer
389 * @pos: offset of the buffer in vram
390 * @buf: virtual address of the buffer in system memory
391 * @size: read/write size, sizeof(@buf) must > @size
392 * @write: true - write to vram, otherwise - read from vram
393 */
394void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
395 void *buf, size_t size, bool write)
396{
397 size_t count;
e35e2b11 398
048af66b
KW
399 /* try to using vram apreature to access vram first */
400 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
401 size -= count;
402 if (size) {
403 /* using MM to access rest vram */
404 pos += count;
405 buf += count;
406 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
407 }
408}
409
d38ceaf9 410/*
f7ee1874 411 * register access helper functions.
d38ceaf9 412 */
56b53c0b
DL
413
414/* Check if hw access should be skipped because of hotplug or device error */
415bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
416{
7afefb81 417 if (adev->no_hw_access)
56b53c0b
DL
418 return true;
419
420#ifdef CONFIG_LOCKDEP
421 /*
422 * This is a bit complicated to understand, so worth a comment. What we assert
423 * here is that the GPU reset is not running on another thread in parallel.
424 *
425 * For this we trylock the read side of the reset semaphore, if that succeeds
426 * we know that the reset is not running in paralell.
427 *
428 * If the trylock fails we assert that we are either already holding the read
429 * side of the lock or are the reset thread itself and hold the write side of
430 * the lock.
431 */
432 if (in_task()) {
d0fb18b5
AG
433 if (down_read_trylock(&adev->reset_domain->sem))
434 up_read(&adev->reset_domain->sem);
56b53c0b 435 else
d0fb18b5 436 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
437 }
438#endif
439 return false;
440}
441
e3ecdffa 442/**
f7ee1874 443 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
444 *
445 * @adev: amdgpu_device pointer
446 * @reg: dword aligned register offset
447 * @acc_flags: access flags which require special behavior
448 *
449 * Returns the 32 bit value from the offset specified.
450 */
f7ee1874
HZ
451uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
452 uint32_t reg, uint32_t acc_flags)
d38ceaf9 453{
f4b373f4
TSD
454 uint32_t ret;
455
56b53c0b 456 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
457 return 0;
458
f7ee1874
HZ
459 if ((reg * 4) < adev->rmmio_size) {
460 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
461 amdgpu_sriov_runtime(adev) &&
d0fb18b5 462 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 463 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 464 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
465 } else {
466 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
467 }
468 } else {
469 ret = adev->pcie_rreg(adev, reg * 4);
81202807 470 }
bc992ba5 471
f7ee1874 472 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 473
f4b373f4 474 return ret;
d38ceaf9
AD
475}
476
421a2a30
ML
477/*
478 * MMIO register read with bytes helper functions
479 * @offset:bytes offset from MMIO start
480 *
481*/
482
e3ecdffa
AD
483/**
484 * amdgpu_mm_rreg8 - read a memory mapped IO register
485 *
486 * @adev: amdgpu_device pointer
487 * @offset: byte aligned register offset
488 *
489 * Returns the 8 bit value from the offset specified.
490 */
7cbbc745
AG
491uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
492{
56b53c0b 493 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
494 return 0;
495
421a2a30
ML
496 if (offset < adev->rmmio_size)
497 return (readb(adev->rmmio + offset));
498 BUG();
499}
500
501/*
502 * MMIO register write with bytes helper functions
503 * @offset:bytes offset from MMIO start
504 * @value: the value want to be written to the register
505 *
506*/
e3ecdffa
AD
507/**
508 * amdgpu_mm_wreg8 - read a memory mapped IO register
509 *
510 * @adev: amdgpu_device pointer
511 * @offset: byte aligned register offset
512 * @value: 8 bit value to write
513 *
514 * Writes the value specified to the offset specified.
515 */
7cbbc745
AG
516void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
517{
56b53c0b 518 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
519 return;
520
421a2a30
ML
521 if (offset < adev->rmmio_size)
522 writeb(value, adev->rmmio + offset);
523 else
524 BUG();
525}
526
e3ecdffa 527/**
f7ee1874 528 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
529 *
530 * @adev: amdgpu_device pointer
531 * @reg: dword aligned register offset
532 * @v: 32 bit value to write to the register
533 * @acc_flags: access flags which require special behavior
534 *
535 * Writes the value specified to the offset specified.
536 */
f7ee1874
HZ
537void amdgpu_device_wreg(struct amdgpu_device *adev,
538 uint32_t reg, uint32_t v,
539 uint32_t acc_flags)
d38ceaf9 540{
56b53c0b 541 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
542 return;
543
f7ee1874
HZ
544 if ((reg * 4) < adev->rmmio_size) {
545 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
546 amdgpu_sriov_runtime(adev) &&
d0fb18b5 547 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 548 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 549 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
550 } else {
551 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
552 }
553 } else {
554 adev->pcie_wreg(adev, reg * 4, v);
81202807 555 }
bc992ba5 556
f7ee1874 557 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 558}
d38ceaf9 559
03f2abb0 560/**
4cc9f86f 561 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4 562 *
71579346
RB
563 * @adev: amdgpu_device pointer
564 * @reg: mmio/rlc register
565 * @v: value to write
566 *
567 * this function is invoked only for the debugfs register access
03f2abb0 568 */
f7ee1874
HZ
569void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
570 uint32_t reg, uint32_t v)
2e0cc4d4 571{
56b53c0b 572 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
573 return;
574
2e0cc4d4 575 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
576 adev->gfx.rlc.funcs &&
577 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 578 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1b2dc99e 579 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
580 } else if ((reg * 4) >= adev->rmmio_size) {
581 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
582 } else {
583 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 584 }
d38ceaf9
AD
585}
586
d38ceaf9
AD
587/**
588 * amdgpu_mm_rdoorbell - read a doorbell dword
589 *
590 * @adev: amdgpu_device pointer
591 * @index: doorbell index
592 *
593 * Returns the value in the doorbell aperture at the
594 * requested doorbell index (CIK).
595 */
596u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
597{
56b53c0b 598 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
599 return 0;
600
d38ceaf9
AD
601 if (index < adev->doorbell.num_doorbells) {
602 return readl(adev->doorbell.ptr + index);
603 } else {
604 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
605 return 0;
606 }
607}
608
609/**
610 * amdgpu_mm_wdoorbell - write a doorbell dword
611 *
612 * @adev: amdgpu_device pointer
613 * @index: doorbell index
614 * @v: value to write
615 *
616 * Writes @v to the doorbell aperture at the
617 * requested doorbell index (CIK).
618 */
619void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
620{
56b53c0b 621 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
622 return;
623
d38ceaf9
AD
624 if (index < adev->doorbell.num_doorbells) {
625 writel(v, adev->doorbell.ptr + index);
626 } else {
627 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
628 }
629}
630
832be404
KW
631/**
632 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
633 *
634 * @adev: amdgpu_device pointer
635 * @index: doorbell index
636 *
637 * Returns the value in the doorbell aperture at the
638 * requested doorbell index (VEGA10+).
639 */
640u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
641{
56b53c0b 642 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
643 return 0;
644
832be404
KW
645 if (index < adev->doorbell.num_doorbells) {
646 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
647 } else {
648 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
649 return 0;
650 }
651}
652
653/**
654 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
655 *
656 * @adev: amdgpu_device pointer
657 * @index: doorbell index
658 * @v: value to write
659 *
660 * Writes @v to the doorbell aperture at the
661 * requested doorbell index (VEGA10+).
662 */
663void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
664{
56b53c0b 665 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
666 return;
667
832be404
KW
668 if (index < adev->doorbell.num_doorbells) {
669 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
670 } else {
671 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
672 }
673}
674
1bba3683
HZ
675/**
676 * amdgpu_device_indirect_rreg - read an indirect register
677 *
678 * @adev: amdgpu_device pointer
679 * @pcie_index: mmio register offset
680 * @pcie_data: mmio register offset
22f453fb 681 * @reg_addr: indirect register address to read from
1bba3683
HZ
682 *
683 * Returns the value of indirect register @reg_addr
684 */
685u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
686 u32 pcie_index, u32 pcie_data,
687 u32 reg_addr)
688{
689 unsigned long flags;
690 u32 r;
691 void __iomem *pcie_index_offset;
692 void __iomem *pcie_data_offset;
693
694 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
695 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
696 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
697
698 writel(reg_addr, pcie_index_offset);
699 readl(pcie_index_offset);
700 r = readl(pcie_data_offset);
701 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
702
703 return r;
704}
705
706/**
707 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
708 *
709 * @adev: amdgpu_device pointer
710 * @pcie_index: mmio register offset
711 * @pcie_data: mmio register offset
22f453fb 712 * @reg_addr: indirect register address to read from
1bba3683
HZ
713 *
714 * Returns the value of indirect register @reg_addr
715 */
716u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
717 u32 pcie_index, u32 pcie_data,
718 u32 reg_addr)
719{
720 unsigned long flags;
721 u64 r;
722 void __iomem *pcie_index_offset;
723 void __iomem *pcie_data_offset;
724
725 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
726 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
727 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
728
729 /* read low 32 bits */
730 writel(reg_addr, pcie_index_offset);
731 readl(pcie_index_offset);
732 r = readl(pcie_data_offset);
733 /* read high 32 bits */
734 writel(reg_addr + 4, pcie_index_offset);
735 readl(pcie_index_offset);
736 r |= ((u64)readl(pcie_data_offset) << 32);
737 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
738
739 return r;
740}
741
742/**
743 * amdgpu_device_indirect_wreg - write an indirect register address
744 *
745 * @adev: amdgpu_device pointer
746 * @pcie_index: mmio register offset
747 * @pcie_data: mmio register offset
748 * @reg_addr: indirect register offset
749 * @reg_data: indirect register data
750 *
751 */
752void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
753 u32 pcie_index, u32 pcie_data,
754 u32 reg_addr, u32 reg_data)
755{
756 unsigned long flags;
757 void __iomem *pcie_index_offset;
758 void __iomem *pcie_data_offset;
759
760 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
761 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
762 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
763
764 writel(reg_addr, pcie_index_offset);
765 readl(pcie_index_offset);
766 writel(reg_data, pcie_data_offset);
767 readl(pcie_data_offset);
768 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
769}
770
771/**
772 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
773 *
774 * @adev: amdgpu_device pointer
775 * @pcie_index: mmio register offset
776 * @pcie_data: mmio register offset
777 * @reg_addr: indirect register offset
778 * @reg_data: indirect register data
779 *
780 */
781void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
782 u32 pcie_index, u32 pcie_data,
783 u32 reg_addr, u64 reg_data)
784{
785 unsigned long flags;
786 void __iomem *pcie_index_offset;
787 void __iomem *pcie_data_offset;
788
789 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
790 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
791 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
792
793 /* write low 32 bits */
794 writel(reg_addr, pcie_index_offset);
795 readl(pcie_index_offset);
796 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
797 readl(pcie_data_offset);
798 /* write high 32 bits */
799 writel(reg_addr + 4, pcie_index_offset);
800 readl(pcie_index_offset);
801 writel((u32)(reg_data >> 32), pcie_data_offset);
802 readl(pcie_data_offset);
803 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
804}
805
d38ceaf9
AD
806/**
807 * amdgpu_invalid_rreg - dummy reg read function
808 *
982a820b 809 * @adev: amdgpu_device pointer
d38ceaf9
AD
810 * @reg: offset of register
811 *
812 * Dummy register read function. Used for register blocks
813 * that certain asics don't have (all asics).
814 * Returns the value in the register.
815 */
816static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
817{
818 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
819 BUG();
820 return 0;
821}
822
823/**
824 * amdgpu_invalid_wreg - dummy reg write function
825 *
982a820b 826 * @adev: amdgpu_device pointer
d38ceaf9
AD
827 * @reg: offset of register
828 * @v: value to write to the register
829 *
830 * Dummy register read function. Used for register blocks
831 * that certain asics don't have (all asics).
832 */
833static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
834{
835 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
836 reg, v);
837 BUG();
838}
839
4fa1c6a6
TZ
840/**
841 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
842 *
982a820b 843 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
844 * @reg: offset of register
845 *
846 * Dummy register read function. Used for register blocks
847 * that certain asics don't have (all asics).
848 * Returns the value in the register.
849 */
850static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
851{
852 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
853 BUG();
854 return 0;
855}
856
857/**
858 * amdgpu_invalid_wreg64 - dummy reg write function
859 *
982a820b 860 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
861 * @reg: offset of register
862 * @v: value to write to the register
863 *
864 * Dummy register read function. Used for register blocks
865 * that certain asics don't have (all asics).
866 */
867static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
868{
869 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
870 reg, v);
871 BUG();
872}
873
d38ceaf9
AD
874/**
875 * amdgpu_block_invalid_rreg - dummy reg read function
876 *
982a820b 877 * @adev: amdgpu_device pointer
d38ceaf9
AD
878 * @block: offset of instance
879 * @reg: offset of register
880 *
881 * Dummy register read function. Used for register blocks
882 * that certain asics don't have (all asics).
883 * Returns the value in the register.
884 */
885static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
886 uint32_t block, uint32_t reg)
887{
888 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
889 reg, block);
890 BUG();
891 return 0;
892}
893
894/**
895 * amdgpu_block_invalid_wreg - dummy reg write function
896 *
982a820b 897 * @adev: amdgpu_device pointer
d38ceaf9
AD
898 * @block: offset of instance
899 * @reg: offset of register
900 * @v: value to write to the register
901 *
902 * Dummy register read function. Used for register blocks
903 * that certain asics don't have (all asics).
904 */
905static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
906 uint32_t block,
907 uint32_t reg, uint32_t v)
908{
909 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
910 reg, block, v);
911 BUG();
912}
913
4d2997ab
AD
914/**
915 * amdgpu_device_asic_init - Wrapper for atom asic_init
916 *
982a820b 917 * @adev: amdgpu_device pointer
4d2997ab
AD
918 *
919 * Does any asic specific work and then calls atom asic init.
920 */
921static int amdgpu_device_asic_init(struct amdgpu_device *adev)
922{
923 amdgpu_asic_pre_asic_init(adev);
924
85d1bcc6
HZ
925 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
926 return amdgpu_atomfirmware_asic_init(adev, true);
927 else
928 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
4d2997ab
AD
929}
930
e3ecdffa 931/**
7ccfd79f 932 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
e3ecdffa 933 *
982a820b 934 * @adev: amdgpu_device pointer
e3ecdffa
AD
935 *
936 * Allocates a scratch page of VRAM for use by various things in the
937 * driver.
938 */
7ccfd79f 939static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
d38ceaf9 940{
7ccfd79f
CK
941 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
942 AMDGPU_GEM_DOMAIN_VRAM |
943 AMDGPU_GEM_DOMAIN_GTT,
944 &adev->mem_scratch.robj,
945 &adev->mem_scratch.gpu_addr,
946 (void **)&adev->mem_scratch.ptr);
d38ceaf9
AD
947}
948
e3ecdffa 949/**
7ccfd79f 950 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
e3ecdffa 951 *
982a820b 952 * @adev: amdgpu_device pointer
e3ecdffa
AD
953 *
954 * Frees the VRAM scratch page.
955 */
7ccfd79f 956static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 957{
7ccfd79f 958 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
d38ceaf9
AD
959}
960
961/**
9c3f2b54 962 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
963 *
964 * @adev: amdgpu_device pointer
965 * @registers: pointer to the register array
966 * @array_size: size of the register array
967 *
968 * Programs an array or registers with and and or masks.
969 * This is a helper for setting golden registers.
970 */
9c3f2b54
AD
971void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
972 const u32 *registers,
973 const u32 array_size)
d38ceaf9
AD
974{
975 u32 tmp, reg, and_mask, or_mask;
976 int i;
977
978 if (array_size % 3)
979 return;
980
981 for (i = 0; i < array_size; i +=3) {
982 reg = registers[i + 0];
983 and_mask = registers[i + 1];
984 or_mask = registers[i + 2];
985
986 if (and_mask == 0xffffffff) {
987 tmp = or_mask;
988 } else {
989 tmp = RREG32(reg);
990 tmp &= ~and_mask;
e0d07657
HZ
991 if (adev->family >= AMDGPU_FAMILY_AI)
992 tmp |= (or_mask & and_mask);
993 else
994 tmp |= or_mask;
d38ceaf9
AD
995 }
996 WREG32(reg, tmp);
997 }
998}
999
e3ecdffa
AD
1000/**
1001 * amdgpu_device_pci_config_reset - reset the GPU
1002 *
1003 * @adev: amdgpu_device pointer
1004 *
1005 * Resets the GPU using the pci config reset sequence.
1006 * Only applicable to asics prior to vega10.
1007 */
8111c387 1008void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
1009{
1010 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1011}
1012
af484df8
AD
1013/**
1014 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1015 *
1016 * @adev: amdgpu_device pointer
1017 *
1018 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1019 */
1020int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1021{
1022 return pci_reset_function(adev->pdev);
1023}
1024
d38ceaf9
AD
1025/*
1026 * GPU doorbell aperture helpers function.
1027 */
1028/**
06ec9070 1029 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1030 *
1031 * @adev: amdgpu_device pointer
1032 *
1033 * Init doorbell driver information (CIK)
1034 * Returns 0 on success, error on failure.
1035 */
06ec9070 1036static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1037{
6585661d 1038
705e519e
CK
1039 /* No doorbell on SI hardware generation */
1040 if (adev->asic_type < CHIP_BONAIRE) {
1041 adev->doorbell.base = 0;
1042 adev->doorbell.size = 0;
1043 adev->doorbell.num_doorbells = 0;
1044 adev->doorbell.ptr = NULL;
1045 return 0;
1046 }
1047
d6895ad3
CK
1048 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1049 return -EINVAL;
1050
22357775
AD
1051 amdgpu_asic_init_doorbell_index(adev);
1052
d38ceaf9
AD
1053 /* doorbell bar mapping */
1054 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1055 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1056
de33a329
JX
1057 if (adev->enable_mes) {
1058 adev->doorbell.num_doorbells =
1059 adev->doorbell.size / sizeof(u32);
1060 } else {
1061 adev->doorbell.num_doorbells =
1062 min_t(u32, adev->doorbell.size / sizeof(u32),
1063 adev->doorbell_index.max_assignment+1);
1064 if (adev->doorbell.num_doorbells == 0)
1065 return -EINVAL;
1066
1067 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1068 * paging queue doorbell use the second page. The
1069 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1070 * doorbells are in the first page. So with paging queue enabled,
1071 * the max num_doorbells should + 1 page (0x400 in dword)
1072 */
1073 if (adev->asic_type >= CHIP_VEGA10)
1074 adev->doorbell.num_doorbells += 0x400;
1075 }
ec3db8a6 1076
8972e5d2
CK
1077 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1078 adev->doorbell.num_doorbells *
1079 sizeof(u32));
1080 if (adev->doorbell.ptr == NULL)
d38ceaf9 1081 return -ENOMEM;
d38ceaf9
AD
1082
1083 return 0;
1084}
1085
1086/**
06ec9070 1087 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1088 *
1089 * @adev: amdgpu_device pointer
1090 *
1091 * Tear down doorbell driver information (CIK)
1092 */
06ec9070 1093static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1094{
1095 iounmap(adev->doorbell.ptr);
1096 adev->doorbell.ptr = NULL;
1097}
1098
22cb0164 1099
d38ceaf9
AD
1100
1101/*
06ec9070 1102 * amdgpu_device_wb_*()
455a7bc2 1103 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1104 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1105 */
1106
1107/**
06ec9070 1108 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1109 *
1110 * @adev: amdgpu_device pointer
1111 *
1112 * Disables Writeback and frees the Writeback memory (all asics).
1113 * Used at driver shutdown.
1114 */
06ec9070 1115static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1116{
1117 if (adev->wb.wb_obj) {
a76ed485
AD
1118 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1119 &adev->wb.gpu_addr,
1120 (void **)&adev->wb.wb);
d38ceaf9
AD
1121 adev->wb.wb_obj = NULL;
1122 }
1123}
1124
1125/**
03f2abb0 1126 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1127 *
1128 * @adev: amdgpu_device pointer
1129 *
455a7bc2 1130 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1131 * Used at driver startup.
1132 * Returns 0 on success or an -error on failure.
1133 */
06ec9070 1134static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1135{
1136 int r;
1137
1138 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1139 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1140 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1141 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1142 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1143 (void **)&adev->wb.wb);
d38ceaf9
AD
1144 if (r) {
1145 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1146 return r;
1147 }
d38ceaf9
AD
1148
1149 adev->wb.num_wb = AMDGPU_MAX_WB;
1150 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1151
1152 /* clear wb memory */
73469585 1153 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1154 }
1155
1156 return 0;
1157}
1158
1159/**
131b4b36 1160 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1161 *
1162 * @adev: amdgpu_device pointer
1163 * @wb: wb index
1164 *
1165 * Allocate a wb slot for use by the driver (all asics).
1166 * Returns 0 on success or -EINVAL on failure.
1167 */
131b4b36 1168int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1169{
1170 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1171
97407b63 1172 if (offset < adev->wb.num_wb) {
7014285a 1173 __set_bit(offset, adev->wb.used);
63ae07ca 1174 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1175 return 0;
1176 } else {
1177 return -EINVAL;
1178 }
1179}
1180
d38ceaf9 1181/**
131b4b36 1182 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1183 *
1184 * @adev: amdgpu_device pointer
1185 * @wb: wb index
1186 *
1187 * Free a wb slot allocated for use by the driver (all asics)
1188 */
131b4b36 1189void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1190{
73469585 1191 wb >>= 3;
d38ceaf9 1192 if (wb < adev->wb.num_wb)
73469585 1193 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1194}
1195
d6895ad3
CK
1196/**
1197 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1198 *
1199 * @adev: amdgpu_device pointer
1200 *
1201 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1202 * to fail, but if any of the BARs is not accessible after the size we abort
1203 * driver loading by returning -ENODEV.
1204 */
1205int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1206{
453f617a 1207 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1208 struct pci_bus *root;
1209 struct resource *res;
1210 unsigned i;
d6895ad3
CK
1211 u16 cmd;
1212 int r;
1213
0c03b912 1214 /* Bypass for VF */
1215 if (amdgpu_sriov_vf(adev))
1216 return 0;
1217
b7221f2b
AD
1218 /* skip if the bios has already enabled large BAR */
1219 if (adev->gmc.real_vram_size &&
1220 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1221 return 0;
1222
31b8adab
CK
1223 /* Check if the root BUS has 64bit memory resources */
1224 root = adev->pdev->bus;
1225 while (root->parent)
1226 root = root->parent;
1227
1228 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1229 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1230 res->start > 0x100000000ull)
1231 break;
1232 }
1233
1234 /* Trying to resize is pointless without a root hub window above 4GB */
1235 if (!res)
1236 return 0;
1237
453f617a
ND
1238 /* Limit the BAR size to what is available */
1239 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1240 rbar_size);
1241
d6895ad3
CK
1242 /* Disable memory decoding while we change the BAR addresses and size */
1243 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1244 pci_write_config_word(adev->pdev, PCI_COMMAND,
1245 cmd & ~PCI_COMMAND_MEMORY);
1246
1247 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1248 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1249 if (adev->asic_type >= CHIP_BONAIRE)
1250 pci_release_resource(adev->pdev, 2);
1251
1252 pci_release_resource(adev->pdev, 0);
1253
1254 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1255 if (r == -ENOSPC)
1256 DRM_INFO("Not enough PCI address space for a large BAR.");
1257 else if (r && r != -ENOTSUPP)
1258 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1259
1260 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1261
1262 /* When the doorbell or fb BAR isn't available we have no chance of
1263 * using the device.
1264 */
06ec9070 1265 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1266 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1267 return -ENODEV;
1268
1269 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1270
1271 return 0;
1272}
a05502e5 1273
d38ceaf9
AD
1274/*
1275 * GPU helpers function.
1276 */
1277/**
39c640c0 1278 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1279 *
1280 * @adev: amdgpu_device pointer
1281 *
c836fec5
JQ
1282 * Check if the asic has been initialized (all asics) at driver startup
1283 * or post is needed if hw reset is performed.
1284 * Returns true if need or false if not.
d38ceaf9 1285 */
39c640c0 1286bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1287{
1288 uint32_t reg;
1289
bec86378
ML
1290 if (amdgpu_sriov_vf(adev))
1291 return false;
1292
1293 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1294 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1295 * some old smc fw still need driver do vPost otherwise gpu hang, while
1296 * those smc fw version above 22.15 doesn't have this flaw, so we force
1297 * vpost executed for smc version below 22.15
bec86378
ML
1298 */
1299 if (adev->asic_type == CHIP_FIJI) {
1300 int err;
1301 uint32_t fw_ver;
1302 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1303 /* force vPost if error occured */
1304 if (err)
1305 return true;
1306
1307 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1308 if (fw_ver < 0x00160e00)
1309 return true;
bec86378 1310 }
bec86378 1311 }
91fe77eb 1312
e3c1b071 1313 /* Don't post if we need to reset whole hive on init */
1314 if (adev->gmc.xgmi.pending_reset)
1315 return false;
1316
91fe77eb 1317 if (adev->has_hw_reset) {
1318 adev->has_hw_reset = false;
1319 return true;
1320 }
1321
1322 /* bios scratch used on CIK+ */
1323 if (adev->asic_type >= CHIP_BONAIRE)
1324 return amdgpu_atombios_scratch_need_asic_init(adev);
1325
1326 /* check MEM_SIZE for older asics */
1327 reg = amdgpu_asic_get_config_memsize(adev);
1328
1329 if ((reg != 0) && (reg != 0xffffffff))
1330 return false;
1331
1332 return true;
bec86378
ML
1333}
1334
0ab5d711
ML
1335/**
1336 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1337 *
1338 * @adev: amdgpu_device pointer
1339 *
1340 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1341 * be set for this device.
1342 *
1343 * Returns true if it should be used or false if not.
1344 */
1345bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1346{
1347 switch (amdgpu_aspm) {
1348 case -1:
1349 break;
1350 case 0:
1351 return false;
1352 case 1:
1353 return true;
1354 default:
1355 return false;
1356 }
1357 return pcie_aspm_enabled(adev->pdev);
1358}
1359
d38ceaf9
AD
1360/* if we get transitioned to only one device, take VGA back */
1361/**
06ec9070 1362 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1363 *
bf44e8ce 1364 * @pdev: PCI device pointer
d38ceaf9
AD
1365 * @state: enable/disable vga decode
1366 *
1367 * Enable/disable vga decode (all asics).
1368 * Returns VGA resource flags.
1369 */
bf44e8ce
CH
1370static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1371 bool state)
d38ceaf9 1372{
bf44e8ce 1373 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1374 amdgpu_asic_set_vga_state(adev, state);
1375 if (state)
1376 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1377 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1378 else
1379 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1380}
1381
e3ecdffa
AD
1382/**
1383 * amdgpu_device_check_block_size - validate the vm block size
1384 *
1385 * @adev: amdgpu_device pointer
1386 *
1387 * Validates the vm block size specified via module parameter.
1388 * The vm block size defines number of bits in page table versus page directory,
1389 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1390 * page table and the remaining bits are in the page directory.
1391 */
06ec9070 1392static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1393{
1394 /* defines number of bits in page table versus page directory,
1395 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1396 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1397 if (amdgpu_vm_block_size == -1)
1398 return;
a1adf8be 1399
bab4fee7 1400 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1401 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1402 amdgpu_vm_block_size);
97489129 1403 amdgpu_vm_block_size = -1;
a1adf8be 1404 }
a1adf8be
CZ
1405}
1406
e3ecdffa
AD
1407/**
1408 * amdgpu_device_check_vm_size - validate the vm size
1409 *
1410 * @adev: amdgpu_device pointer
1411 *
1412 * Validates the vm size in GB specified via module parameter.
1413 * The VM size is the size of the GPU virtual memory space in GB.
1414 */
06ec9070 1415static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1416{
64dab074
AD
1417 /* no need to check the default value */
1418 if (amdgpu_vm_size == -1)
1419 return;
1420
83ca145d
ZJ
1421 if (amdgpu_vm_size < 1) {
1422 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1423 amdgpu_vm_size);
f3368128 1424 amdgpu_vm_size = -1;
83ca145d 1425 }
83ca145d
ZJ
1426}
1427
7951e376
RZ
1428static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1429{
1430 struct sysinfo si;
a9d4fe2f 1431 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1432 uint64_t total_memory;
1433 uint64_t dram_size_seven_GB = 0x1B8000000;
1434 uint64_t dram_size_three_GB = 0xB8000000;
1435
1436 if (amdgpu_smu_memory_pool_size == 0)
1437 return;
1438
1439 if (!is_os_64) {
1440 DRM_WARN("Not 64-bit OS, feature not supported\n");
1441 goto def_value;
1442 }
1443 si_meminfo(&si);
1444 total_memory = (uint64_t)si.totalram * si.mem_unit;
1445
1446 if ((amdgpu_smu_memory_pool_size == 1) ||
1447 (amdgpu_smu_memory_pool_size == 2)) {
1448 if (total_memory < dram_size_three_GB)
1449 goto def_value1;
1450 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1451 (amdgpu_smu_memory_pool_size == 8)) {
1452 if (total_memory < dram_size_seven_GB)
1453 goto def_value1;
1454 } else {
1455 DRM_WARN("Smu memory pool size not supported\n");
1456 goto def_value;
1457 }
1458 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1459
1460 return;
1461
1462def_value1:
1463 DRM_WARN("No enough system memory\n");
1464def_value:
1465 adev->pm.smu_prv_buffer_size = 0;
1466}
1467
9f6a7857
HR
1468static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1469{
1470 if (!(adev->flags & AMD_IS_APU) ||
1471 adev->asic_type < CHIP_RAVEN)
1472 return 0;
1473
1474 switch (adev->asic_type) {
1475 case CHIP_RAVEN:
1476 if (adev->pdev->device == 0x15dd)
1477 adev->apu_flags |= AMD_APU_IS_RAVEN;
1478 if (adev->pdev->device == 0x15d8)
1479 adev->apu_flags |= AMD_APU_IS_PICASSO;
1480 break;
1481 case CHIP_RENOIR:
1482 if ((adev->pdev->device == 0x1636) ||
1483 (adev->pdev->device == 0x164c))
1484 adev->apu_flags |= AMD_APU_IS_RENOIR;
1485 else
1486 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1487 break;
1488 case CHIP_VANGOGH:
1489 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1490 break;
1491 case CHIP_YELLOW_CARP:
1492 break;
d0f56dc2 1493 case CHIP_CYAN_SKILLFISH:
dfcc3e8c
AD
1494 if ((adev->pdev->device == 0x13FE) ||
1495 (adev->pdev->device == 0x143F))
d0f56dc2
TZ
1496 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1497 break;
9f6a7857 1498 default:
4eaf21b7 1499 break;
9f6a7857
HR
1500 }
1501
1502 return 0;
1503}
1504
d38ceaf9 1505/**
06ec9070 1506 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1507 *
1508 * @adev: amdgpu_device pointer
1509 *
1510 * Validates certain module parameters and updates
1511 * the associated values used by the driver (all asics).
1512 */
912dfc84 1513static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1514{
5b011235
CZ
1515 if (amdgpu_sched_jobs < 4) {
1516 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1517 amdgpu_sched_jobs);
1518 amdgpu_sched_jobs = 4;
76117507 1519 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1520 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1521 amdgpu_sched_jobs);
1522 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1523 }
d38ceaf9 1524
83e74db6 1525 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1526 /* gart size must be greater or equal to 32M */
1527 dev_warn(adev->dev, "gart size (%d) too small\n",
1528 amdgpu_gart_size);
83e74db6 1529 amdgpu_gart_size = -1;
d38ceaf9
AD
1530 }
1531
36d38372 1532 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1533 /* gtt size must be greater or equal to 32M */
36d38372
CK
1534 dev_warn(adev->dev, "gtt size (%d) too small\n",
1535 amdgpu_gtt_size);
1536 amdgpu_gtt_size = -1;
d38ceaf9
AD
1537 }
1538
d07f14be
RH
1539 /* valid range is between 4 and 9 inclusive */
1540 if (amdgpu_vm_fragment_size != -1 &&
1541 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1542 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1543 amdgpu_vm_fragment_size = -1;
1544 }
1545
5d5bd5e3
KW
1546 if (amdgpu_sched_hw_submission < 2) {
1547 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1548 amdgpu_sched_hw_submission);
1549 amdgpu_sched_hw_submission = 2;
1550 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1551 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1552 amdgpu_sched_hw_submission);
1553 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1554 }
1555
2656fd23
AG
1556 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1557 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1558 amdgpu_reset_method = -1;
1559 }
1560
7951e376
RZ
1561 amdgpu_device_check_smu_prv_buffer_size(adev);
1562
06ec9070 1563 amdgpu_device_check_vm_size(adev);
d38ceaf9 1564
06ec9070 1565 amdgpu_device_check_block_size(adev);
6a7f76e7 1566
19aede77 1567 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1568
e3c00faa 1569 return 0;
d38ceaf9
AD
1570}
1571
1572/**
1573 * amdgpu_switcheroo_set_state - set switcheroo state
1574 *
1575 * @pdev: pci dev pointer
1694467b 1576 * @state: vga_switcheroo state
d38ceaf9 1577 *
12024b17 1578 * Callback for the switcheroo driver. Suspends or resumes
d38ceaf9
AD
1579 * the asics before or after it is powered up using ACPI methods.
1580 */
8aba21b7
LT
1581static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1582 enum vga_switcheroo_state state)
d38ceaf9
AD
1583{
1584 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1585 int r;
d38ceaf9 1586
b98c6299 1587 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1588 return;
1589
1590 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1591 pr_info("switched on\n");
d38ceaf9
AD
1592 /* don't suspend or resume card normally */
1593 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1594
8f66090b
TZ
1595 pci_set_power_state(pdev, PCI_D0);
1596 amdgpu_device_load_pci_state(pdev);
1597 r = pci_enable_device(pdev);
de185019
AD
1598 if (r)
1599 DRM_WARN("pci_enable_device failed (%d)\n", r);
1600 amdgpu_device_resume(dev, true);
d38ceaf9 1601
d38ceaf9 1602 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1603 } else {
dd4fa6c1 1604 pr_info("switched off\n");
d38ceaf9 1605 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1606 amdgpu_device_suspend(dev, true);
8f66090b 1607 amdgpu_device_cache_pci_state(pdev);
de185019 1608 /* Shut down the device */
8f66090b
TZ
1609 pci_disable_device(pdev);
1610 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1611 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1612 }
1613}
1614
1615/**
1616 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1617 *
1618 * @pdev: pci dev pointer
1619 *
1620 * Callback for the switcheroo driver. Check of the switcheroo
1621 * state can be changed.
1622 * Returns true if the state can be changed, false if not.
1623 */
1624static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1625{
1626 struct drm_device *dev = pci_get_drvdata(pdev);
1627
1628 /*
1629 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1630 * locking inversion with the driver load path. And the access here is
1631 * completely racy anyway. So don't bother with locking for now.
1632 */
7e13ad89 1633 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1634}
1635
1636static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1637 .set_gpu_state = amdgpu_switcheroo_set_state,
1638 .reprobe = NULL,
1639 .can_switch = amdgpu_switcheroo_can_switch,
1640};
1641
e3ecdffa
AD
1642/**
1643 * amdgpu_device_ip_set_clockgating_state - set the CG state
1644 *
87e3f136 1645 * @dev: amdgpu_device pointer
e3ecdffa
AD
1646 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1647 * @state: clockgating state (gate or ungate)
1648 *
1649 * Sets the requested clockgating state for all instances of
1650 * the hardware IP specified.
1651 * Returns the error code from the last instance.
1652 */
43fa561f 1653int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1654 enum amd_ip_block_type block_type,
1655 enum amd_clockgating_state state)
d38ceaf9 1656{
43fa561f 1657 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1658 int i, r = 0;
1659
1660 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1661 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1662 continue;
c722865a
RZ
1663 if (adev->ip_blocks[i].version->type != block_type)
1664 continue;
1665 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1666 continue;
1667 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1668 (void *)adev, state);
1669 if (r)
1670 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1671 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1672 }
1673 return r;
1674}
1675
e3ecdffa
AD
1676/**
1677 * amdgpu_device_ip_set_powergating_state - set the PG state
1678 *
87e3f136 1679 * @dev: amdgpu_device pointer
e3ecdffa
AD
1680 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1681 * @state: powergating state (gate or ungate)
1682 *
1683 * Sets the requested powergating state for all instances of
1684 * the hardware IP specified.
1685 * Returns the error code from the last instance.
1686 */
43fa561f 1687int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1688 enum amd_ip_block_type block_type,
1689 enum amd_powergating_state state)
d38ceaf9 1690{
43fa561f 1691 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1692 int i, r = 0;
1693
1694 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1695 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1696 continue;
c722865a
RZ
1697 if (adev->ip_blocks[i].version->type != block_type)
1698 continue;
1699 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1700 continue;
1701 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1702 (void *)adev, state);
1703 if (r)
1704 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1705 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1706 }
1707 return r;
1708}
1709
e3ecdffa
AD
1710/**
1711 * amdgpu_device_ip_get_clockgating_state - get the CG state
1712 *
1713 * @adev: amdgpu_device pointer
1714 * @flags: clockgating feature flags
1715 *
1716 * Walks the list of IPs on the device and updates the clockgating
1717 * flags for each IP.
1718 * Updates @flags with the feature flags for each hardware IP where
1719 * clockgating is enabled.
1720 */
2990a1fc 1721void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
25faeddc 1722 u64 *flags)
6cb2d4e4
HR
1723{
1724 int i;
1725
1726 for (i = 0; i < adev->num_ip_blocks; i++) {
1727 if (!adev->ip_blocks[i].status.valid)
1728 continue;
1729 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1730 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1731 }
1732}
1733
e3ecdffa
AD
1734/**
1735 * amdgpu_device_ip_wait_for_idle - wait for idle
1736 *
1737 * @adev: amdgpu_device pointer
1738 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1739 *
1740 * Waits for the request hardware IP to be idle.
1741 * Returns 0 for success or a negative error code on failure.
1742 */
2990a1fc
AD
1743int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1744 enum amd_ip_block_type block_type)
5dbbb60b
AD
1745{
1746 int i, r;
1747
1748 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1749 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1750 continue;
a1255107
AD
1751 if (adev->ip_blocks[i].version->type == block_type) {
1752 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1753 if (r)
1754 return r;
1755 break;
1756 }
1757 }
1758 return 0;
1759
1760}
1761
e3ecdffa
AD
1762/**
1763 * amdgpu_device_ip_is_idle - is the hardware IP idle
1764 *
1765 * @adev: amdgpu_device pointer
1766 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1767 *
1768 * Check if the hardware IP is idle or not.
1769 * Returns true if it the IP is idle, false if not.
1770 */
2990a1fc
AD
1771bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1772 enum amd_ip_block_type block_type)
5dbbb60b
AD
1773{
1774 int i;
1775
1776 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1777 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1778 continue;
a1255107
AD
1779 if (adev->ip_blocks[i].version->type == block_type)
1780 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1781 }
1782 return true;
1783
1784}
1785
e3ecdffa
AD
1786/**
1787 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1788 *
1789 * @adev: amdgpu_device pointer
87e3f136 1790 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1791 *
1792 * Returns a pointer to the hardware IP block structure
1793 * if it exists for the asic, otherwise NULL.
1794 */
2990a1fc
AD
1795struct amdgpu_ip_block *
1796amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1797 enum amd_ip_block_type type)
d38ceaf9
AD
1798{
1799 int i;
1800
1801 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1802 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1803 return &adev->ip_blocks[i];
1804
1805 return NULL;
1806}
1807
1808/**
2990a1fc 1809 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1810 *
1811 * @adev: amdgpu_device pointer
5fc3aeeb 1812 * @type: enum amd_ip_block_type
d38ceaf9
AD
1813 * @major: major version
1814 * @minor: minor version
1815 *
1816 * return 0 if equal or greater
1817 * return 1 if smaller or the ip_block doesn't exist
1818 */
2990a1fc
AD
1819int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1820 enum amd_ip_block_type type,
1821 u32 major, u32 minor)
d38ceaf9 1822{
2990a1fc 1823 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1824
a1255107
AD
1825 if (ip_block && ((ip_block->version->major > major) ||
1826 ((ip_block->version->major == major) &&
1827 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1828 return 0;
1829
1830 return 1;
1831}
1832
a1255107 1833/**
2990a1fc 1834 * amdgpu_device_ip_block_add
a1255107
AD
1835 *
1836 * @adev: amdgpu_device pointer
1837 * @ip_block_version: pointer to the IP to add
1838 *
1839 * Adds the IP block driver information to the collection of IPs
1840 * on the asic.
1841 */
2990a1fc
AD
1842int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1843 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1844{
1845 if (!ip_block_version)
1846 return -EINVAL;
1847
7bd939d0
LG
1848 switch (ip_block_version->type) {
1849 case AMD_IP_BLOCK_TYPE_VCN:
1850 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1851 return 0;
1852 break;
1853 case AMD_IP_BLOCK_TYPE_JPEG:
1854 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1855 return 0;
1856 break;
1857 default:
1858 break;
1859 }
1860
e966a725 1861 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1862 ip_block_version->funcs->name);
1863
a1255107
AD
1864 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1865
1866 return 0;
1867}
1868
e3ecdffa
AD
1869/**
1870 * amdgpu_device_enable_virtual_display - enable virtual display feature
1871 *
1872 * @adev: amdgpu_device pointer
1873 *
1874 * Enabled the virtual display feature if the user has enabled it via
1875 * the module parameter virtual_display. This feature provides a virtual
1876 * display hardware on headless boards or in virtualized environments.
1877 * This function parses and validates the configuration string specified by
1878 * the user and configues the virtual display configuration (number of
1879 * virtual connectors, crtcs, etc.) specified.
1880 */
483ef985 1881static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1882{
1883 adev->enable_virtual_display = false;
1884
1885 if (amdgpu_virtual_display) {
8f66090b 1886 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1887 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1888
1889 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1890 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1891 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1892 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1893 if (!strcmp("all", pciaddname)
1894 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1895 long num_crtc;
1896 int res = -1;
1897
9accf2fd 1898 adev->enable_virtual_display = true;
0f66356d
ED
1899
1900 if (pciaddname_tmp)
1901 res = kstrtol(pciaddname_tmp, 10,
1902 &num_crtc);
1903
1904 if (!res) {
1905 if (num_crtc < 1)
1906 num_crtc = 1;
1907 if (num_crtc > 6)
1908 num_crtc = 6;
1909 adev->mode_info.num_crtc = num_crtc;
1910 } else {
1911 adev->mode_info.num_crtc = 1;
1912 }
9accf2fd
ED
1913 break;
1914 }
1915 }
1916
0f66356d
ED
1917 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1918 amdgpu_virtual_display, pci_address_name,
1919 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1920
1921 kfree(pciaddstr);
1922 }
1923}
1924
25263da3
AD
1925void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1926{
1927 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1928 adev->mode_info.num_crtc = 1;
1929 adev->enable_virtual_display = true;
1930 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1931 adev->enable_virtual_display, adev->mode_info.num_crtc);
1932 }
1933}
1934
e3ecdffa
AD
1935/**
1936 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1937 *
1938 * @adev: amdgpu_device pointer
1939 *
1940 * Parses the asic configuration parameters specified in the gpu info
1941 * firmware and makes them availale to the driver for use in configuring
1942 * the asic.
1943 * Returns 0 on success, -EINVAL on failure.
1944 */
e2a75f88
AD
1945static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1946{
e2a75f88 1947 const char *chip_name;
c0a43457 1948 char fw_name[40];
e2a75f88
AD
1949 int err;
1950 const struct gpu_info_firmware_header_v1_0 *hdr;
1951
ab4fe3e1
HR
1952 adev->firmware.gpu_info_fw = NULL;
1953
72de33f8 1954 if (adev->mman.discovery_bin) {
cc375d8c
TY
1955 /*
1956 * FIXME: The bounding box is still needed by Navi12, so
e24d0e91 1957 * temporarily read it from gpu_info firmware. Should be dropped
cc375d8c
TY
1958 * when DAL no longer needs it.
1959 */
1960 if (adev->asic_type != CHIP_NAVI12)
1961 return 0;
258620d0
AD
1962 }
1963
e2a75f88 1964 switch (adev->asic_type) {
e2a75f88
AD
1965 default:
1966 return 0;
1967 case CHIP_VEGA10:
1968 chip_name = "vega10";
1969 break;
3f76dced
AD
1970 case CHIP_VEGA12:
1971 chip_name = "vega12";
1972 break;
2d2e5e7e 1973 case CHIP_RAVEN:
54f78a76 1974 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1975 chip_name = "raven2";
54f78a76 1976 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1977 chip_name = "picasso";
54c4d17e
FX
1978 else
1979 chip_name = "raven";
2d2e5e7e 1980 break;
65e60f6e
LM
1981 case CHIP_ARCTURUS:
1982 chip_name = "arcturus";
1983 break;
42b325e5
XY
1984 case CHIP_NAVI12:
1985 chip_name = "navi12";
1986 break;
e2a75f88
AD
1987 }
1988
1989 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
b31d3063 1990 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
e2a75f88
AD
1991 if (err) {
1992 dev_err(adev->dev,
b31d3063 1993 "Failed to get gpu_info firmware \"%s\"\n",
e2a75f88
AD
1994 fw_name);
1995 goto out;
1996 }
1997
ab4fe3e1 1998 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1999 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2000
2001 switch (hdr->version_major) {
2002 case 1:
2003 {
2004 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2005 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2006 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2007
cc375d8c
TY
2008 /*
2009 * Should be droped when DAL no longer needs it.
2010 */
2011 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2012 goto parse_soc_bounding_box;
2013
b5ab16bf
AD
2014 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2015 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2016 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2017 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2018 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2019 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2020 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2021 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2022 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2023 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2024 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2025 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2026 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2027 adev->gfx.cu_info.max_waves_per_simd =
2028 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2029 adev->gfx.cu_info.max_scratch_slots_per_cu =
2030 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2031 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2032 if (hdr->version_minor >= 1) {
35c2e910
HZ
2033 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2034 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2035 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2036 adev->gfx.config.num_sc_per_sh =
2037 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2038 adev->gfx.config.num_packer_per_sc =
2039 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2040 }
ec51d3fa
XY
2041
2042parse_soc_bounding_box:
ec51d3fa
XY
2043 /*
2044 * soc bounding box info is not integrated in disocovery table,
258620d0 2045 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2046 */
48321c3d
HW
2047 if (hdr->version_minor == 2) {
2048 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2049 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2050 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2051 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2052 }
e2a75f88
AD
2053 break;
2054 }
2055 default:
2056 dev_err(adev->dev,
2057 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2058 err = -EINVAL;
2059 goto out;
2060 }
2061out:
e2a75f88
AD
2062 return err;
2063}
2064
e3ecdffa
AD
2065/**
2066 * amdgpu_device_ip_early_init - run early init for hardware IPs
2067 *
2068 * @adev: amdgpu_device pointer
2069 *
2070 * Early initialization pass for hardware IPs. The hardware IPs that make
2071 * up each asic are discovered each IP's early_init callback is run. This
2072 * is the first stage in initializing the asic.
2073 * Returns 0 on success, negative error code on failure.
2074 */
06ec9070 2075static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2076{
901e2be2
AD
2077 struct drm_device *dev = adev_to_drm(adev);
2078 struct pci_dev *parent;
aaa36a97 2079 int i, r;
ced69502 2080 bool total;
d38ceaf9 2081
483ef985 2082 amdgpu_device_enable_virtual_display(adev);
a6be7570 2083
00a979f3 2084 if (amdgpu_sriov_vf(adev)) {
00a979f3 2085 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2086 if (r)
2087 return r;
00a979f3
WS
2088 }
2089
d38ceaf9 2090 switch (adev->asic_type) {
33f34802
KW
2091#ifdef CONFIG_DRM_AMDGPU_SI
2092 case CHIP_VERDE:
2093 case CHIP_TAHITI:
2094 case CHIP_PITCAIRN:
2095 case CHIP_OLAND:
2096 case CHIP_HAINAN:
295d0daf 2097 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2098 r = si_set_ip_blocks(adev);
2099 if (r)
2100 return r;
2101 break;
2102#endif
a2e73f56
AD
2103#ifdef CONFIG_DRM_AMDGPU_CIK
2104 case CHIP_BONAIRE:
2105 case CHIP_HAWAII:
2106 case CHIP_KAVERI:
2107 case CHIP_KABINI:
2108 case CHIP_MULLINS:
e1ad2d53 2109 if (adev->flags & AMD_IS_APU)
a2e73f56 2110 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2111 else
2112 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2113
2114 r = cik_set_ip_blocks(adev);
2115 if (r)
2116 return r;
2117 break;
2118#endif
da87c30b
AD
2119 case CHIP_TOPAZ:
2120 case CHIP_TONGA:
2121 case CHIP_FIJI:
2122 case CHIP_POLARIS10:
2123 case CHIP_POLARIS11:
2124 case CHIP_POLARIS12:
2125 case CHIP_VEGAM:
2126 case CHIP_CARRIZO:
2127 case CHIP_STONEY:
2128 if (adev->flags & AMD_IS_APU)
2129 adev->family = AMDGPU_FAMILY_CZ;
2130 else
2131 adev->family = AMDGPU_FAMILY_VI;
2132
2133 r = vi_set_ip_blocks(adev);
2134 if (r)
2135 return r;
2136 break;
d38ceaf9 2137 default:
63352b7f
AD
2138 r = amdgpu_discovery_set_ip_blocks(adev);
2139 if (r)
2140 return r;
2141 break;
d38ceaf9
AD
2142 }
2143
901e2be2
AD
2144 if (amdgpu_has_atpx() &&
2145 (amdgpu_is_atpx_hybrid() ||
2146 amdgpu_has_atpx_dgpu_power_cntl()) &&
2147 ((adev->flags & AMD_IS_APU) == 0) &&
2148 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2149 adev->flags |= AMD_IS_PX;
2150
85ac2021
AD
2151 if (!(adev->flags & AMD_IS_APU)) {
2152 parent = pci_upstream_bridge(adev->pdev);
2153 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2154 }
901e2be2 2155
c004d44e 2156 amdgpu_amdkfd_device_probe(adev);
1884734a 2157
3b94fb10 2158 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2159 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2160 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2161 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2162 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2163
ced69502 2164 total = true;
d38ceaf9
AD
2165 for (i = 0; i < adev->num_ip_blocks; i++) {
2166 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2167 DRM_ERROR("disabled ip block: %d <%s>\n",
2168 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2169 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2170 } else {
a1255107
AD
2171 if (adev->ip_blocks[i].version->funcs->early_init) {
2172 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2173 if (r == -ENOENT) {
a1255107 2174 adev->ip_blocks[i].status.valid = false;
2c1a2784 2175 } else if (r) {
a1255107
AD
2176 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2177 adev->ip_blocks[i].version->funcs->name, r);
ced69502 2178 total = false;
2c1a2784 2179 } else {
a1255107 2180 adev->ip_blocks[i].status.valid = true;
2c1a2784 2181 }
974e6b64 2182 } else {
a1255107 2183 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2184 }
d38ceaf9 2185 }
21a249ca
AD
2186 /* get the vbios after the asic_funcs are set up */
2187 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2188 r = amdgpu_device_parse_gpu_info_fw(adev);
2189 if (r)
2190 return r;
2191
21a249ca
AD
2192 /* Read BIOS */
2193 if (!amdgpu_get_bios(adev))
2194 return -EINVAL;
2195
2196 r = amdgpu_atombios_init(adev);
2197 if (r) {
2198 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2199 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2200 return r;
2201 }
77eabc6f
PJZ
2202
2203 /*get pf2vf msg info at it's earliest time*/
2204 if (amdgpu_sriov_vf(adev))
2205 amdgpu_virt_init_data_exchange(adev);
2206
21a249ca 2207 }
d38ceaf9 2208 }
ced69502
ML
2209 if (!total)
2210 return -ENODEV;
d38ceaf9 2211
395d1fb9
NH
2212 adev->cg_flags &= amdgpu_cg_mask;
2213 adev->pg_flags &= amdgpu_pg_mask;
2214
d38ceaf9
AD
2215 return 0;
2216}
2217
0a4f2520
RZ
2218static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2219{
2220 int i, r;
2221
2222 for (i = 0; i < adev->num_ip_blocks; i++) {
2223 if (!adev->ip_blocks[i].status.sw)
2224 continue;
2225 if (adev->ip_blocks[i].status.hw)
2226 continue;
2227 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2228 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2229 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2230 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2231 if (r) {
2232 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2233 adev->ip_blocks[i].version->funcs->name, r);
2234 return r;
2235 }
2236 adev->ip_blocks[i].status.hw = true;
2237 }
2238 }
2239
2240 return 0;
2241}
2242
2243static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2244{
2245 int i, r;
2246
2247 for (i = 0; i < adev->num_ip_blocks; i++) {
2248 if (!adev->ip_blocks[i].status.sw)
2249 continue;
2250 if (adev->ip_blocks[i].status.hw)
2251 continue;
2252 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2253 if (r) {
2254 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2255 adev->ip_blocks[i].version->funcs->name, r);
2256 return r;
2257 }
2258 adev->ip_blocks[i].status.hw = true;
2259 }
2260
2261 return 0;
2262}
2263
7a3e0bb2
RZ
2264static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2265{
2266 int r = 0;
2267 int i;
80f41f84 2268 uint32_t smu_version;
7a3e0bb2
RZ
2269
2270 if (adev->asic_type >= CHIP_VEGA10) {
2271 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2272 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2273 continue;
2274
e3c1b071 2275 if (!adev->ip_blocks[i].status.sw)
2276 continue;
2277
482f0e53
ML
2278 /* no need to do the fw loading again if already done*/
2279 if (adev->ip_blocks[i].status.hw == true)
2280 break;
2281
53b3f8f4 2282 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2283 r = adev->ip_blocks[i].version->funcs->resume(adev);
2284 if (r) {
2285 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2286 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2287 return r;
2288 }
2289 } else {
2290 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2291 if (r) {
2292 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2293 adev->ip_blocks[i].version->funcs->name, r);
2294 return r;
7a3e0bb2 2295 }
7a3e0bb2 2296 }
482f0e53
ML
2297
2298 adev->ip_blocks[i].status.hw = true;
2299 break;
7a3e0bb2
RZ
2300 }
2301 }
482f0e53 2302
8973d9ec
ED
2303 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2304 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2305
80f41f84 2306 return r;
7a3e0bb2
RZ
2307}
2308
5fd8518d
AG
2309static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2310{
2311 long timeout;
2312 int r, i;
2313
2314 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2315 struct amdgpu_ring *ring = adev->rings[i];
2316
2317 /* No need to setup the GPU scheduler for rings that don't need it */
2318 if (!ring || ring->no_scheduler)
2319 continue;
2320
2321 switch (ring->funcs->type) {
2322 case AMDGPU_RING_TYPE_GFX:
2323 timeout = adev->gfx_timeout;
2324 break;
2325 case AMDGPU_RING_TYPE_COMPUTE:
2326 timeout = adev->compute_timeout;
2327 break;
2328 case AMDGPU_RING_TYPE_SDMA:
2329 timeout = adev->sdma_timeout;
2330 break;
2331 default:
2332 timeout = adev->video_timeout;
2333 break;
2334 }
2335
2336 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2337 ring->num_hw_submission, amdgpu_job_hang_limit,
8ab62eda
JG
2338 timeout, adev->reset_domain->wq,
2339 ring->sched_score, ring->name,
2340 adev->dev);
5fd8518d
AG
2341 if (r) {
2342 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2343 ring->name);
2344 return r;
2345 }
2346 }
2347
2348 return 0;
2349}
2350
2351
e3ecdffa
AD
2352/**
2353 * amdgpu_device_ip_init - run init for hardware IPs
2354 *
2355 * @adev: amdgpu_device pointer
2356 *
2357 * Main initialization pass for hardware IPs. The list of all the hardware
2358 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2359 * are run. sw_init initializes the software state associated with each IP
2360 * and hw_init initializes the hardware associated with each IP.
2361 * Returns 0 on success, negative error code on failure.
2362 */
06ec9070 2363static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2364{
2365 int i, r;
2366
c030f2e4 2367 r = amdgpu_ras_init(adev);
2368 if (r)
2369 return r;
2370
d38ceaf9 2371 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2372 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2373 continue;
a1255107 2374 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2375 if (r) {
a1255107
AD
2376 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2377 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2378 goto init_failed;
2c1a2784 2379 }
a1255107 2380 adev->ip_blocks[i].status.sw = true;
bfca0289 2381
c1c39032
AD
2382 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2383 /* need to do common hw init early so everything is set up for gmc */
2384 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2385 if (r) {
2386 DRM_ERROR("hw_init %d failed %d\n", i, r);
2387 goto init_failed;
2388 }
2389 adev->ip_blocks[i].status.hw = true;
2390 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2391 /* need to do gmc hw init early so we can allocate gpu mem */
892deb48
VS
2392 /* Try to reserve bad pages early */
2393 if (amdgpu_sriov_vf(adev))
2394 amdgpu_virt_exchange_data(adev);
2395
7ccfd79f 2396 r = amdgpu_device_mem_scratch_init(adev);
2c1a2784 2397 if (r) {
7ccfd79f 2398 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
72d3f592 2399 goto init_failed;
2c1a2784 2400 }
a1255107 2401 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2402 if (r) {
2403 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2404 goto init_failed;
2c1a2784 2405 }
06ec9070 2406 r = amdgpu_device_wb_init(adev);
2c1a2784 2407 if (r) {
06ec9070 2408 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2409 goto init_failed;
2c1a2784 2410 }
a1255107 2411 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2412
2413 /* right after GMC hw init, we create CSA */
8a1fbb4a 2414 if (amdgpu_mcbp) {
1e256e27 2415 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
58ab2c08
CK
2416 AMDGPU_GEM_DOMAIN_VRAM |
2417 AMDGPU_GEM_DOMAIN_GTT,
2418 AMDGPU_CSA_SIZE);
2493664f
ML
2419 if (r) {
2420 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2421 goto init_failed;
2493664f
ML
2422 }
2423 }
d38ceaf9
AD
2424 }
2425 }
2426
c9ffa427 2427 if (amdgpu_sriov_vf(adev))
22c16d25 2428 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2429
533aed27
AG
2430 r = amdgpu_ib_pool_init(adev);
2431 if (r) {
2432 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2433 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2434 goto init_failed;
2435 }
2436
c8963ea4
RZ
2437 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2438 if (r)
72d3f592 2439 goto init_failed;
0a4f2520
RZ
2440
2441 r = amdgpu_device_ip_hw_init_phase1(adev);
2442 if (r)
72d3f592 2443 goto init_failed;
0a4f2520 2444
7a3e0bb2
RZ
2445 r = amdgpu_device_fw_loading(adev);
2446 if (r)
72d3f592 2447 goto init_failed;
7a3e0bb2 2448
0a4f2520
RZ
2449 r = amdgpu_device_ip_hw_init_phase2(adev);
2450 if (r)
72d3f592 2451 goto init_failed;
d38ceaf9 2452
121a2bc6
AG
2453 /*
2454 * retired pages will be loaded from eeprom and reserved here,
2455 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2456 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2457 * for I2C communication which only true at this point.
b82e65a9
GC
2458 *
2459 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2460 * failure from bad gpu situation and stop amdgpu init process
2461 * accordingly. For other failed cases, it will still release all
2462 * the resource and print error message, rather than returning one
2463 * negative value to upper level.
121a2bc6
AG
2464 *
2465 * Note: theoretically, this should be called before all vram allocations
2466 * to protect retired page from abusing
2467 */
b82e65a9
GC
2468 r = amdgpu_ras_recovery_init(adev);
2469 if (r)
2470 goto init_failed;
121a2bc6 2471
cfbb6b00
AG
2472 /**
2473 * In case of XGMI grab extra reference for reset domain for this device
2474 */
a4c63caf 2475 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00 2476 if (amdgpu_xgmi_add_device(adev) == 0) {
46c67660 2477 if (!amdgpu_sriov_vf(adev)) {
2efc30f0
VC
2478 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2479
dfd0287b
LH
2480 if (WARN_ON(!hive)) {
2481 r = -ENOENT;
2482 goto init_failed;
2483 }
2484
46c67660 2485 if (!hive->reset_domain ||
2486 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2487 r = -ENOENT;
2488 amdgpu_put_xgmi_hive(hive);
2489 goto init_failed;
2490 }
2491
2492 /* Drop the early temporary reset domain we created for device */
2493 amdgpu_reset_put_reset_domain(adev->reset_domain);
2494 adev->reset_domain = hive->reset_domain;
9dfa4860 2495 amdgpu_put_xgmi_hive(hive);
cfbb6b00 2496 }
a4c63caf
AG
2497 }
2498 }
2499
5fd8518d
AG
2500 r = amdgpu_device_init_schedulers(adev);
2501 if (r)
2502 goto init_failed;
e3c1b071 2503
2504 /* Don't init kfd if whole hive need to be reset during init */
c004d44e 2505 if (!adev->gmc.xgmi.pending_reset)
e3c1b071 2506 amdgpu_amdkfd_device_init(adev);
c6332b97 2507
bd607166
KR
2508 amdgpu_fru_get_product_info(adev);
2509
72d3f592 2510init_failed:
c9ffa427 2511 if (amdgpu_sriov_vf(adev))
c6332b97 2512 amdgpu_virt_release_full_gpu(adev, true);
2513
72d3f592 2514 return r;
d38ceaf9
AD
2515}
2516
e3ecdffa
AD
2517/**
2518 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2519 *
2520 * @adev: amdgpu_device pointer
2521 *
2522 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2523 * this function before a GPU reset. If the value is retained after a
2524 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2525 */
06ec9070 2526static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2527{
2528 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2529}
2530
e3ecdffa
AD
2531/**
2532 * amdgpu_device_check_vram_lost - check if vram is valid
2533 *
2534 * @adev: amdgpu_device pointer
2535 *
2536 * Checks the reset magic value written to the gart pointer in VRAM.
2537 * The driver calls this after a GPU reset to see if the contents of
2538 * VRAM is lost or now.
2539 * returns true if vram is lost, false if not.
2540 */
06ec9070 2541static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2542{
dadce777
EQ
2543 if (memcmp(adev->gart.ptr, adev->reset_magic,
2544 AMDGPU_RESET_MAGIC_NUM))
2545 return true;
2546
53b3f8f4 2547 if (!amdgpu_in_reset(adev))
dadce777
EQ
2548 return false;
2549
2550 /*
2551 * For all ASICs with baco/mode1 reset, the VRAM is
2552 * always assumed to be lost.
2553 */
2554 switch (amdgpu_asic_reset_method(adev)) {
2555 case AMD_RESET_METHOD_BACO:
2556 case AMD_RESET_METHOD_MODE1:
2557 return true;
2558 default:
2559 return false;
2560 }
0c49e0b8
CZ
2561}
2562
e3ecdffa 2563/**
1112a46b 2564 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2565 *
2566 * @adev: amdgpu_device pointer
b8b72130 2567 * @state: clockgating state (gate or ungate)
e3ecdffa 2568 *
e3ecdffa 2569 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2570 * set_clockgating_state callbacks are run.
2571 * Late initialization pass enabling clockgating for hardware IPs.
2572 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2573 * Returns 0 on success, negative error code on failure.
2574 */
fdd34271 2575
5d89bb2d
LL
2576int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2577 enum amd_clockgating_state state)
d38ceaf9 2578{
1112a46b 2579 int i, j, r;
d38ceaf9 2580
4a2ba394
SL
2581 if (amdgpu_emu_mode == 1)
2582 return 0;
2583
1112a46b
RZ
2584 for (j = 0; j < adev->num_ip_blocks; j++) {
2585 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2586 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2587 continue;
47198eb7 2588 /* skip CG for GFX, SDMA on S0ix */
5d70a549 2589 if (adev->in_s0ix &&
47198eb7
AD
2590 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2591 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2592 continue;
4a446d55 2593 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2594 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2595 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2596 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2597 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2598 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2599 /* enable clockgating to save power */
a1255107 2600 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2601 state);
4a446d55
AD
2602 if (r) {
2603 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2604 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2605 return r;
2606 }
b0b00ff1 2607 }
d38ceaf9 2608 }
06b18f61 2609
c9f96fd5
RZ
2610 return 0;
2611}
2612
5d89bb2d
LL
2613int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2614 enum amd_powergating_state state)
c9f96fd5 2615{
1112a46b 2616 int i, j, r;
06b18f61 2617
c9f96fd5
RZ
2618 if (amdgpu_emu_mode == 1)
2619 return 0;
2620
1112a46b
RZ
2621 for (j = 0; j < adev->num_ip_blocks; j++) {
2622 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2623 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2624 continue;
47198eb7 2625 /* skip PG for GFX, SDMA on S0ix */
5d70a549 2626 if (adev->in_s0ix &&
47198eb7
AD
2627 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2628 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
5d70a549 2629 continue;
c9f96fd5
RZ
2630 /* skip CG for VCE/UVD, it's handled specially */
2631 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2632 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2633 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2634 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2635 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2636 /* enable powergating to save power */
2637 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2638 state);
c9f96fd5
RZ
2639 if (r) {
2640 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2641 adev->ip_blocks[i].version->funcs->name, r);
2642 return r;
2643 }
2644 }
2645 }
2dc80b00
S
2646 return 0;
2647}
2648
beff74bc
AD
2649static int amdgpu_device_enable_mgpu_fan_boost(void)
2650{
2651 struct amdgpu_gpu_instance *gpu_ins;
2652 struct amdgpu_device *adev;
2653 int i, ret = 0;
2654
2655 mutex_lock(&mgpu_info.mutex);
2656
2657 /*
2658 * MGPU fan boost feature should be enabled
2659 * only when there are two or more dGPUs in
2660 * the system
2661 */
2662 if (mgpu_info.num_dgpu < 2)
2663 goto out;
2664
2665 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2666 gpu_ins = &(mgpu_info.gpu_ins[i]);
2667 adev = gpu_ins->adev;
2668 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2669 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2670 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2671 if (ret)
2672 break;
2673
2674 gpu_ins->mgpu_fan_enabled = 1;
2675 }
2676 }
2677
2678out:
2679 mutex_unlock(&mgpu_info.mutex);
2680
2681 return ret;
2682}
2683
e3ecdffa
AD
2684/**
2685 * amdgpu_device_ip_late_init - run late init for hardware IPs
2686 *
2687 * @adev: amdgpu_device pointer
2688 *
2689 * Late initialization pass for hardware IPs. The list of all the hardware
2690 * IPs that make up the asic is walked and the late_init callbacks are run.
2691 * late_init covers any special initialization that an IP requires
2692 * after all of the have been initialized or something that needs to happen
2693 * late in the init process.
2694 * Returns 0 on success, negative error code on failure.
2695 */
06ec9070 2696static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2697{
60599a03 2698 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2699 int i = 0, r;
2700
2701 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2702 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2703 continue;
2704 if (adev->ip_blocks[i].version->funcs->late_init) {
2705 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2706 if (r) {
2707 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2708 adev->ip_blocks[i].version->funcs->name, r);
2709 return r;
2710 }
2dc80b00 2711 }
73f847db 2712 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2713 }
2714
867e24ca 2715 r = amdgpu_ras_late_init(adev);
2716 if (r) {
2717 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2718 return r;
2719 }
2720
a891d239
DL
2721 amdgpu_ras_set_error_query_ready(adev, true);
2722
1112a46b
RZ
2723 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2724 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2725
06ec9070 2726 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2727
beff74bc
AD
2728 r = amdgpu_device_enable_mgpu_fan_boost();
2729 if (r)
2730 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2731
4da8b639 2732 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2733 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2734 adev->asic_type == CHIP_ALDEBARAN ))
bc143d8b 2735 amdgpu_dpm_handle_passthrough_sbr(adev, true);
60599a03
EQ
2736
2737 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2738 mutex_lock(&mgpu_info.mutex);
2739
2740 /*
2741 * Reset device p-state to low as this was booted with high.
2742 *
2743 * This should be performed only after all devices from the same
2744 * hive get initialized.
2745 *
2746 * However, it's unknown how many device in the hive in advance.
2747 * As this is counted one by one during devices initializations.
2748 *
2749 * So, we wait for all XGMI interlinked devices initialized.
2750 * This may bring some delays as those devices may come from
2751 * different hives. But that should be OK.
2752 */
2753 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2754 for (i = 0; i < mgpu_info.num_gpu; i++) {
2755 gpu_instance = &(mgpu_info.gpu_ins[i]);
2756 if (gpu_instance->adev->flags & AMD_IS_APU)
2757 continue;
2758
d84a430d
JK
2759 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2760 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2761 if (r) {
2762 DRM_ERROR("pstate setting failed (%d).\n", r);
2763 break;
2764 }
2765 }
2766 }
2767
2768 mutex_unlock(&mgpu_info.mutex);
2769 }
2770
d38ceaf9
AD
2771 return 0;
2772}
2773
613aa3ea
LY
2774/**
2775 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2776 *
2777 * @adev: amdgpu_device pointer
2778 *
2779 * For ASICs need to disable SMC first
2780 */
2781static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2782{
2783 int i, r;
2784
2785 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2786 return;
2787
2788 for (i = 0; i < adev->num_ip_blocks; i++) {
2789 if (!adev->ip_blocks[i].status.hw)
2790 continue;
2791 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2792 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2793 /* XXX handle errors */
2794 if (r) {
2795 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2796 adev->ip_blocks[i].version->funcs->name, r);
2797 }
2798 adev->ip_blocks[i].status.hw = false;
2799 break;
2800 }
2801 }
2802}
2803
e9669fb7 2804static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2805{
2806 int i, r;
2807
e9669fb7
AG
2808 for (i = 0; i < adev->num_ip_blocks; i++) {
2809 if (!adev->ip_blocks[i].version->funcs->early_fini)
2810 continue;
5278a159 2811
e9669fb7
AG
2812 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2813 if (r) {
2814 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2815 adev->ip_blocks[i].version->funcs->name, r);
2816 }
2817 }
c030f2e4 2818
05df1f01 2819 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2820 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2821
7270e895
TY
2822 amdgpu_amdkfd_suspend(adev, false);
2823
613aa3ea
LY
2824 /* Workaroud for ASICs need to disable SMC first */
2825 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2826
d38ceaf9 2827 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2828 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2829 continue;
8201a67a 2830
a1255107 2831 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2832 /* XXX handle errors */
2c1a2784 2833 if (r) {
a1255107
AD
2834 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2835 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2836 }
8201a67a 2837
a1255107 2838 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2839 }
2840
6effad8a
GC
2841 if (amdgpu_sriov_vf(adev)) {
2842 if (amdgpu_virt_release_full_gpu(adev, false))
2843 DRM_ERROR("failed to release exclusive mode on fini\n");
2844 }
2845
e9669fb7
AG
2846 return 0;
2847}
2848
2849/**
2850 * amdgpu_device_ip_fini - run fini for hardware IPs
2851 *
2852 * @adev: amdgpu_device pointer
2853 *
2854 * Main teardown pass for hardware IPs. The list of all the hardware
2855 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2856 * are run. hw_fini tears down the hardware associated with each IP
2857 * and sw_fini tears down any software state associated with each IP.
2858 * Returns 0 on success, negative error code on failure.
2859 */
2860static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2861{
2862 int i, r;
2863
2864 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2865 amdgpu_virt_release_ras_err_handler_data(adev);
2866
e9669fb7
AG
2867 if (adev->gmc.xgmi.num_physical_nodes > 1)
2868 amdgpu_xgmi_remove_device(adev);
2869
c004d44e 2870 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2871
d38ceaf9 2872 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2873 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2874 continue;
c12aba3a
ML
2875
2876 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2877 amdgpu_ucode_free_bo(adev);
1e256e27 2878 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a 2879 amdgpu_device_wb_fini(adev);
7ccfd79f 2880 amdgpu_device_mem_scratch_fini(adev);
533aed27 2881 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2882 }
2883
a1255107 2884 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2885 /* XXX handle errors */
2c1a2784 2886 if (r) {
a1255107
AD
2887 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2888 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2889 }
a1255107
AD
2890 adev->ip_blocks[i].status.sw = false;
2891 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2892 }
2893
a6dcfd9c 2894 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2895 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2896 continue;
a1255107
AD
2897 if (adev->ip_blocks[i].version->funcs->late_fini)
2898 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2899 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2900 }
2901
c030f2e4 2902 amdgpu_ras_fini(adev);
2903
d38ceaf9
AD
2904 return 0;
2905}
2906
e3ecdffa 2907/**
beff74bc 2908 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2909 *
1112a46b 2910 * @work: work_struct.
e3ecdffa 2911 */
beff74bc 2912static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2913{
2914 struct amdgpu_device *adev =
beff74bc 2915 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2916 int r;
2917
2918 r = amdgpu_ib_ring_tests(adev);
2919 if (r)
2920 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2921}
2922
1e317b99
RZ
2923static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2924{
2925 struct amdgpu_device *adev =
2926 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2927
90a92662
MD
2928 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2929 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2930
2931 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2932 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2933}
2934
e3ecdffa 2935/**
e7854a03 2936 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2937 *
2938 * @adev: amdgpu_device pointer
2939 *
2940 * Main suspend function for hardware IPs. The list of all the hardware
2941 * IPs that make up the asic is walked, clockgating is disabled and the
2942 * suspend callbacks are run. suspend puts the hardware and software state
2943 * in each IP into a state suitable for suspend.
2944 * Returns 0 on success, negative error code on failure.
2945 */
e7854a03
AD
2946static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2947{
2948 int i, r;
2949
50ec83f0
AD
2950 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2951 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2952
b31d6ada
EQ
2953 /*
2954 * Per PMFW team's suggestion, driver needs to handle gfxoff
2955 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2956 * scenario. Add the missing df cstate disablement here.
2957 */
2958 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2959 dev_warn(adev->dev, "Failed to disallow df cstate");
2960
e7854a03
AD
2961 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2962 if (!adev->ip_blocks[i].status.valid)
2963 continue;
2b9f7848 2964
e7854a03 2965 /* displays are handled separately */
2b9f7848
ND
2966 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2967 continue;
2968
2969 /* XXX handle errors */
2970 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2971 /* XXX handle errors */
2972 if (r) {
2973 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2974 adev->ip_blocks[i].version->funcs->name, r);
2975 return r;
e7854a03 2976 }
2b9f7848
ND
2977
2978 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2979 }
2980
e7854a03
AD
2981 return 0;
2982}
2983
2984/**
2985 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2986 *
2987 * @adev: amdgpu_device pointer
2988 *
2989 * Main suspend function for hardware IPs. The list of all the hardware
2990 * IPs that make up the asic is walked, clockgating is disabled and the
2991 * suspend callbacks are run. suspend puts the hardware and software state
2992 * in each IP into a state suitable for suspend.
2993 * Returns 0 on success, negative error code on failure.
2994 */
2995static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2996{
2997 int i, r;
2998
557f42a2 2999 if (adev->in_s0ix)
bc143d8b 3000 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
34416931 3001
d38ceaf9 3002 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 3003 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 3004 continue;
e7854a03
AD
3005 /* displays are handled in phase1 */
3006 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3007 continue;
bff77e86
LM
3008 /* PSP lost connection when err_event_athub occurs */
3009 if (amdgpu_ras_intr_triggered() &&
3010 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3011 adev->ip_blocks[i].status.hw = false;
3012 continue;
3013 }
e3c1b071 3014
3015 /* skip unnecessary suspend if we do not initialize them yet */
3016 if (adev->gmc.xgmi.pending_reset &&
3017 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3018 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3019 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3020 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3021 adev->ip_blocks[i].status.hw = false;
3022 continue;
3023 }
557f42a2 3024
afa6646b 3025 /* skip suspend of gfx/mes and psp for S0ix
32ff160d
AD
3026 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3027 * like at runtime. PSP is also part of the always on hardware
3028 * so no need to suspend it.
3029 */
557f42a2 3030 if (adev->in_s0ix &&
32ff160d 3031 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
afa6646b
AD
3032 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3033 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
557f42a2
AD
3034 continue;
3035
2a7798ea
AD
3036 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3037 if (adev->in_s0ix &&
3038 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3039 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3040 continue;
3041
e11c7750
TH
3042 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3043 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3044 * from this location and RLC Autoload automatically also gets loaded
3045 * from here based on PMFW -> PSP message during re-init sequence.
3046 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3047 * the TMR and reload FWs again for IMU enabled APU ASICs.
3048 */
3049 if (amdgpu_in_reset(adev) &&
3050 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3051 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3052 continue;
3053
d38ceaf9 3054 /* XXX handle errors */
a1255107 3055 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 3056 /* XXX handle errors */
2c1a2784 3057 if (r) {
a1255107
AD
3058 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3059 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 3060 }
876923fb 3061 adev->ip_blocks[i].status.hw = false;
a3a09142 3062 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
3063 if(!amdgpu_sriov_vf(adev)){
3064 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3065 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3066 if (r) {
3067 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3068 adev->mp1_state, r);
3069 return r;
3070 }
a3a09142
AD
3071 }
3072 }
d38ceaf9
AD
3073 }
3074
3075 return 0;
3076}
3077
e7854a03
AD
3078/**
3079 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3080 *
3081 * @adev: amdgpu_device pointer
3082 *
3083 * Main suspend function for hardware IPs. The list of all the hardware
3084 * IPs that make up the asic is walked, clockgating is disabled and the
3085 * suspend callbacks are run. suspend puts the hardware and software state
3086 * in each IP into a state suitable for suspend.
3087 * Returns 0 on success, negative error code on failure.
3088 */
3089int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3090{
3091 int r;
3092
3c73683c
JC
3093 if (amdgpu_sriov_vf(adev)) {
3094 amdgpu_virt_fini_data_exchange(adev);
e7819644 3095 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3096 }
e7819644 3097
e7854a03
AD
3098 r = amdgpu_device_ip_suspend_phase1(adev);
3099 if (r)
3100 return r;
3101 r = amdgpu_device_ip_suspend_phase2(adev);
3102
e7819644
YT
3103 if (amdgpu_sriov_vf(adev))
3104 amdgpu_virt_release_full_gpu(adev, false);
3105
e7854a03
AD
3106 return r;
3107}
3108
06ec9070 3109static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3110{
3111 int i, r;
3112
2cb681b6 3113 static enum amd_ip_block_type ip_order[] = {
2cb681b6 3114 AMD_IP_BLOCK_TYPE_COMMON,
c1c39032 3115 AMD_IP_BLOCK_TYPE_GMC,
39186aef 3116 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3117 AMD_IP_BLOCK_TYPE_IH,
3118 };
a90ad3c2 3119
95ea3dbc 3120 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3121 int j;
3122 struct amdgpu_ip_block *block;
a90ad3c2 3123
4cd2a96d
J
3124 block = &adev->ip_blocks[i];
3125 block->status.hw = false;
2cb681b6 3126
4cd2a96d 3127 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3128
4cd2a96d 3129 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3130 !block->status.valid)
3131 continue;
3132
3133 r = block->version->funcs->hw_init(adev);
0aaeefcc 3134 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3135 if (r)
3136 return r;
482f0e53 3137 block->status.hw = true;
a90ad3c2
ML
3138 }
3139 }
3140
3141 return 0;
3142}
3143
06ec9070 3144static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3145{
3146 int i, r;
3147
2cb681b6
ML
3148 static enum amd_ip_block_type ip_order[] = {
3149 AMD_IP_BLOCK_TYPE_SMC,
3150 AMD_IP_BLOCK_TYPE_DCE,
3151 AMD_IP_BLOCK_TYPE_GFX,
3152 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 3153 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
3154 AMD_IP_BLOCK_TYPE_VCE,
3155 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 3156 };
a90ad3c2 3157
2cb681b6
ML
3158 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3159 int j;
3160 struct amdgpu_ip_block *block;
a90ad3c2 3161
2cb681b6
ML
3162 for (j = 0; j < adev->num_ip_blocks; j++) {
3163 block = &adev->ip_blocks[j];
3164
3165 if (block->version->type != ip_order[i] ||
482f0e53
ML
3166 !block->status.valid ||
3167 block->status.hw)
2cb681b6
ML
3168 continue;
3169
895bd048
JZ
3170 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3171 r = block->version->funcs->resume(adev);
3172 else
3173 r = block->version->funcs->hw_init(adev);
3174
0aaeefcc 3175 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3176 if (r)
3177 return r;
482f0e53 3178 block->status.hw = true;
a90ad3c2
ML
3179 }
3180 }
3181
3182 return 0;
3183}
3184
e3ecdffa
AD
3185/**
3186 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3187 *
3188 * @adev: amdgpu_device pointer
3189 *
3190 * First resume function for hardware IPs. The list of all the hardware
3191 * IPs that make up the asic is walked and the resume callbacks are run for
3192 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3193 * after a suspend and updates the software state as necessary. This
3194 * function is also used for restoring the GPU after a GPU reset.
3195 * Returns 0 on success, negative error code on failure.
3196 */
06ec9070 3197static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3198{
3199 int i, r;
3200
a90ad3c2 3201 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3202 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3203 continue;
a90ad3c2 3204 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3205 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
d7274ec7
BZ
3206 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3207 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
482f0e53 3208
fcf0649f
CZ
3209 r = adev->ip_blocks[i].version->funcs->resume(adev);
3210 if (r) {
3211 DRM_ERROR("resume of IP block <%s> failed %d\n",
3212 adev->ip_blocks[i].version->funcs->name, r);
3213 return r;
3214 }
482f0e53 3215 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3216 }
3217 }
3218
3219 return 0;
3220}
3221
e3ecdffa
AD
3222/**
3223 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3224 *
3225 * @adev: amdgpu_device pointer
3226 *
3227 * First resume function for hardware IPs. The list of all the hardware
3228 * IPs that make up the asic is walked and the resume callbacks are run for
3229 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3230 * functional state after a suspend and updates the software state as
3231 * necessary. This function is also used for restoring the GPU after a GPU
3232 * reset.
3233 * Returns 0 on success, negative error code on failure.
3234 */
06ec9070 3235static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3236{
3237 int i, r;
3238
3239 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3240 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3241 continue;
fcf0649f 3242 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3243 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3244 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3245 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3246 continue;
a1255107 3247 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3248 if (r) {
a1255107
AD
3249 DRM_ERROR("resume of IP block <%s> failed %d\n",
3250 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3251 return r;
2c1a2784 3252 }
482f0e53 3253 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3254 }
3255
3256 return 0;
3257}
3258
e3ecdffa
AD
3259/**
3260 * amdgpu_device_ip_resume - run resume for hardware IPs
3261 *
3262 * @adev: amdgpu_device pointer
3263 *
3264 * Main resume function for hardware IPs. The hardware IPs
3265 * are split into two resume functions because they are
3266 * are also used in in recovering from a GPU reset and some additional
3267 * steps need to be take between them. In this case (S3/S4) they are
3268 * run sequentially.
3269 * Returns 0 on success, negative error code on failure.
3270 */
06ec9070 3271static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3272{
3273 int r;
3274
9cec53c1
JZ
3275 r = amdgpu_amdkfd_resume_iommu(adev);
3276 if (r)
3277 return r;
3278
06ec9070 3279 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3280 if (r)
3281 return r;
7a3e0bb2
RZ
3282
3283 r = amdgpu_device_fw_loading(adev);
3284 if (r)
3285 return r;
3286
06ec9070 3287 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3288
3289 return r;
3290}
3291
e3ecdffa
AD
3292/**
3293 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3294 *
3295 * @adev: amdgpu_device pointer
3296 *
3297 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3298 */
4e99a44e 3299static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3300{
6867e1b5
ML
3301 if (amdgpu_sriov_vf(adev)) {
3302 if (adev->is_atom_fw) {
58ff791a 3303 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3304 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3305 } else {
3306 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3307 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3308 }
3309
3310 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3311 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3312 }
048765ad
AR
3313}
3314
e3ecdffa
AD
3315/**
3316 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3317 *
3318 * @asic_type: AMD asic type
3319 *
3320 * Check if there is DC (new modesetting infrastructre) support for an asic.
3321 * returns true if DC has support, false if not.
3322 */
4562236b
HW
3323bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3324{
3325 switch (asic_type) {
0637d417
AD
3326#ifdef CONFIG_DRM_AMDGPU_SI
3327 case CHIP_HAINAN:
3328#endif
3329 case CHIP_TOPAZ:
3330 /* chips with no display hardware */
3331 return false;
4562236b 3332#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3333 case CHIP_TAHITI:
3334 case CHIP_PITCAIRN:
3335 case CHIP_VERDE:
3336 case CHIP_OLAND:
2d32ffd6
AD
3337 /*
3338 * We have systems in the wild with these ASICs that require
3339 * LVDS and VGA support which is not supported with DC.
3340 *
3341 * Fallback to the non-DC driver here by default so as not to
3342 * cause regressions.
3343 */
3344#if defined(CONFIG_DRM_AMD_DC_SI)
3345 return amdgpu_dc > 0;
3346#else
3347 return false;
64200c46 3348#endif
4562236b 3349 case CHIP_BONAIRE:
0d6fbccb 3350 case CHIP_KAVERI:
367e6687
AD
3351 case CHIP_KABINI:
3352 case CHIP_MULLINS:
d9fda248
HW
3353 /*
3354 * We have systems in the wild with these ASICs that require
b5a0168e 3355 * VGA support which is not supported with DC.
d9fda248
HW
3356 *
3357 * Fallback to the non-DC driver here by default so as not to
3358 * cause regressions.
3359 */
3360 return amdgpu_dc > 0;
f7f12b25 3361 default:
fd187853 3362 return amdgpu_dc != 0;
f7f12b25 3363#else
4562236b 3364 default:
93b09a9a 3365 if (amdgpu_dc > 0)
044a48f4 3366 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3367 "but isn't supported by ASIC, ignoring\n");
4562236b 3368 return false;
f7f12b25 3369#endif
4562236b
HW
3370 }
3371}
3372
3373/**
3374 * amdgpu_device_has_dc_support - check if dc is supported
3375 *
982a820b 3376 * @adev: amdgpu_device pointer
4562236b
HW
3377 *
3378 * Returns true for supported, false for not supported
3379 */
3380bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3381{
25263da3 3382 if (adev->enable_virtual_display ||
abaf210c 3383 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3384 return false;
3385
4562236b
HW
3386 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3387}
3388
d4535e2c
AG
3389static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3390{
3391 struct amdgpu_device *adev =
3392 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3393 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3394
c6a6e2db
AG
3395 /* It's a bug to not have a hive within this function */
3396 if (WARN_ON(!hive))
3397 return;
3398
3399 /*
3400 * Use task barrier to synchronize all xgmi reset works across the
3401 * hive. task_barrier_enter and task_barrier_exit will block
3402 * until all the threads running the xgmi reset works reach
3403 * those points. task_barrier_full will do both blocks.
3404 */
3405 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3406
3407 task_barrier_enter(&hive->tb);
4a580877 3408 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3409
3410 if (adev->asic_reset_res)
3411 goto fail;
3412
3413 task_barrier_exit(&hive->tb);
4a580877 3414 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3415
3416 if (adev->asic_reset_res)
3417 goto fail;
43c4d576 3418
5e67bba3 3419 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3420 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3421 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
c6a6e2db
AG
3422 } else {
3423
3424 task_barrier_full(&hive->tb);
3425 adev->asic_reset_res = amdgpu_asic_reset(adev);
3426 }
ce316fa5 3427
c6a6e2db 3428fail:
d4535e2c 3429 if (adev->asic_reset_res)
fed184e9 3430 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3431 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3432 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3433}
3434
71f98027
AD
3435static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3436{
3437 char *input = amdgpu_lockup_timeout;
3438 char *timeout_setting = NULL;
3439 int index = 0;
3440 long timeout;
3441 int ret = 0;
3442
3443 /*
67387dfe
AD
3444 * By default timeout for non compute jobs is 10000
3445 * and 60000 for compute jobs.
71f98027 3446 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3447 * jobs are 60000 by default.
71f98027
AD
3448 */
3449 adev->gfx_timeout = msecs_to_jiffies(10000);
3450 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3451 if (amdgpu_sriov_vf(adev))
3452 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3453 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3454 else
67387dfe 3455 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3456
f440ff44 3457 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3458 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3459 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3460 ret = kstrtol(timeout_setting, 0, &timeout);
3461 if (ret)
3462 return ret;
3463
3464 if (timeout == 0) {
3465 index++;
3466 continue;
3467 } else if (timeout < 0) {
3468 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3469 dev_warn(adev->dev, "lockup timeout disabled");
3470 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3471 } else {
3472 timeout = msecs_to_jiffies(timeout);
3473 }
3474
3475 switch (index++) {
3476 case 0:
3477 adev->gfx_timeout = timeout;
3478 break;
3479 case 1:
3480 adev->compute_timeout = timeout;
3481 break;
3482 case 2:
3483 adev->sdma_timeout = timeout;
3484 break;
3485 case 3:
3486 adev->video_timeout = timeout;
3487 break;
3488 default:
3489 break;
3490 }
3491 }
3492 /*
3493 * There is only one value specified and
3494 * it should apply to all non-compute jobs.
3495 */
bcccee89 3496 if (index == 1) {
71f98027 3497 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3498 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3499 adev->compute_timeout = adev->gfx_timeout;
3500 }
71f98027
AD
3501 }
3502
3503 return ret;
3504}
d4535e2c 3505
4a74c38c
PY
3506/**
3507 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3508 *
3509 * @adev: amdgpu_device pointer
3510 *
3511 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3512 */
3513static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3514{
3515 struct iommu_domain *domain;
3516
3517 domain = iommu_get_domain_for_dev(adev->dev);
3518 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3519 adev->ram_is_direct_mapped = true;
3520}
3521
77f3a5cd
ND
3522static const struct attribute *amdgpu_dev_attributes[] = {
3523 &dev_attr_product_name.attr,
3524 &dev_attr_product_number.attr,
3525 &dev_attr_serial_number.attr,
3526 &dev_attr_pcie_replay_count.attr,
3527 NULL
3528};
3529
d38ceaf9
AD
3530/**
3531 * amdgpu_device_init - initialize the driver
3532 *
3533 * @adev: amdgpu_device pointer
d38ceaf9
AD
3534 * @flags: driver flags
3535 *
3536 * Initializes the driver info and hw (all asics).
3537 * Returns 0 for success or an error on failure.
3538 * Called at driver startup.
3539 */
3540int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3541 uint32_t flags)
3542{
8aba21b7
LT
3543 struct drm_device *ddev = adev_to_drm(adev);
3544 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3545 int r, i;
b98c6299 3546 bool px = false;
95844d20 3547 u32 max_MBps;
d38ceaf9
AD
3548
3549 adev->shutdown = false;
d38ceaf9 3550 adev->flags = flags;
4e66d7d2
YZ
3551
3552 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3553 adev->asic_type = amdgpu_force_asic_type;
3554 else
3555 adev->asic_type = flags & AMD_ASIC_MASK;
3556
d38ceaf9 3557 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3558 if (amdgpu_emu_mode == 1)
8bdab6bb 3559 adev->usec_timeout *= 10;
770d13b1 3560 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3561 adev->accel_working = false;
3562 adev->num_rings = 0;
68ce8b24 3563 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
d38ceaf9
AD
3564 adev->mman.buffer_funcs = NULL;
3565 adev->mman.buffer_funcs_ring = NULL;
3566 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3567 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3568 adev->gmc.gmc_funcs = NULL;
7bd939d0 3569 adev->harvest_ip_mask = 0x0;
f54d1867 3570 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3571 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3572
3573 adev->smc_rreg = &amdgpu_invalid_rreg;
3574 adev->smc_wreg = &amdgpu_invalid_wreg;
3575 adev->pcie_rreg = &amdgpu_invalid_rreg;
3576 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3577 adev->pciep_rreg = &amdgpu_invalid_rreg;
3578 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3579 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3580 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3581 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3582 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3583 adev->didt_rreg = &amdgpu_invalid_rreg;
3584 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3585 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3586 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3587 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3588 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3589
3e39ab90
AD
3590 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3591 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3592 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3593
3594 /* mutex initialization are all done here so we
3595 * can recall function without having locking issues */
0e5ca0d1 3596 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3597 mutex_init(&adev->pm.mutex);
3598 mutex_init(&adev->gfx.gpu_clock_mutex);
3599 mutex_init(&adev->srbm_mutex);
b8866c26 3600 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3601 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3602 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3603 mutex_init(&adev->mn_lock);
e23b74aa 3604 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3605 hash_init(adev->mn_hash);
32eaeae0 3606 mutex_init(&adev->psp.mutex);
bd052211 3607 mutex_init(&adev->notifier_lock);
8cda7a4f 3608 mutex_init(&adev->pm.stable_pstate_ctx_lock);
f113cc32 3609 mutex_init(&adev->benchmark_mutex);
d38ceaf9 3610
ab3b9de6 3611 amdgpu_device_init_apu_flags(adev);
9f6a7857 3612
912dfc84
EQ
3613 r = amdgpu_device_check_arguments(adev);
3614 if (r)
3615 return r;
d38ceaf9 3616
d38ceaf9
AD
3617 spin_lock_init(&adev->mmio_idx_lock);
3618 spin_lock_init(&adev->smc_idx_lock);
3619 spin_lock_init(&adev->pcie_idx_lock);
3620 spin_lock_init(&adev->uvd_ctx_idx_lock);
3621 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3622 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3623 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3624 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3625 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3626
0c4e7fa5
CZ
3627 INIT_LIST_HEAD(&adev->shadow_list);
3628 mutex_init(&adev->shadow_list_lock);
3629
655ce9cb 3630 INIT_LIST_HEAD(&adev->reset_list);
3631
6492e1b0 3632 INIT_LIST_HEAD(&adev->ras_list);
3633
beff74bc
AD
3634 INIT_DELAYED_WORK(&adev->delayed_init_work,
3635 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3636 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3637 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3638
d4535e2c
AG
3639 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3640
d23ee13f 3641 adev->gfx.gfx_off_req_count = 1;
0ad7347a
AA
3642 adev->gfx.gfx_off_residency = 0;
3643 adev->gfx.gfx_off_entrycount = 0;
b6e79d9a 3644 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3645
b265bdbd
EQ
3646 atomic_set(&adev->throttling_logging_enabled, 1);
3647 /*
3648 * If throttling continues, logging will be performed every minute
3649 * to avoid log flooding. "-1" is subtracted since the thermal
3650 * throttling interrupt comes every second. Thus, the total logging
3651 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3652 * for throttling interrupt) = 60 seconds.
3653 */
3654 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3655 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3656
0fa49558
AX
3657 /* Registers mapping */
3658 /* TODO: block userspace mapping of io register */
da69c161
KW
3659 if (adev->asic_type >= CHIP_BONAIRE) {
3660 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3661 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3662 } else {
3663 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3664 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3665 }
d38ceaf9 3666
6c08e0ef
EQ
3667 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3668 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3669
d38ceaf9
AD
3670 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3671 if (adev->rmmio == NULL) {
3672 return -ENOMEM;
3673 }
3674 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3675 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3676
5494d864
AD
3677 amdgpu_device_get_pcie_info(adev);
3678
b239c017
JX
3679 if (amdgpu_mcbp)
3680 DRM_INFO("MCBP is enabled\n");
3681
436afdfa
PY
3682 /*
3683 * Reset domain needs to be present early, before XGMI hive discovered
3684 * (if any) and intitialized to use reset sem and in_gpu reset flag
3685 * early on during init and before calling to RREG32.
3686 */
3687 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3688 if (!adev->reset_domain)
3689 return -ENOMEM;
3690
3aa0115d
ML
3691 /* detect hw virtualization here */
3692 amdgpu_detect_virtualization(adev);
3693
dffa11b4
ML
3694 r = amdgpu_device_get_job_timeout_settings(adev);
3695 if (r) {
3696 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3697 return r;
a190d1c7
XY
3698 }
3699
d38ceaf9 3700 /* early init functions */
06ec9070 3701 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3702 if (r)
4ef87d8f 3703 return r;
d38ceaf9 3704
b7cdb41e
ML
3705 /* Get rid of things like offb */
3706 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3707 if (r)
3708 return r;
3709
4d33e704
SK
3710 /* Enable TMZ based on IP_VERSION */
3711 amdgpu_gmc_tmz_set(adev);
3712
957b0787 3713 amdgpu_gmc_noretry_set(adev);
4a0165f0
VS
3714 /* Need to get xgmi info early to decide the reset behavior*/
3715 if (adev->gmc.xgmi.supported) {
3716 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3717 if (r)
3718 return r;
3719 }
3720
8e6d0b69 3721 /* enable PCIE atomic ops */
3722 if (amdgpu_sriov_vf(adev))
3723 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
e15c9d06 3724 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
8e6d0b69 3725 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3726 else
3727 adev->have_atomics_support =
3728 !pci_enable_atomic_ops_to_root(adev->pdev,
3729 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3730 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3731 if (!adev->have_atomics_support)
3732 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3733
6585661d
OZ
3734 /* doorbell bar mapping and doorbell index init*/
3735 amdgpu_device_doorbell_init(adev);
3736
9475a943
SL
3737 if (amdgpu_emu_mode == 1) {
3738 /* post the asic on emulation mode */
3739 emu_soc_asic_init(adev);
bfca0289 3740 goto fence_driver_init;
9475a943 3741 }
bfca0289 3742
04442bf7
LL
3743 amdgpu_reset_init(adev);
3744
4e99a44e
ML
3745 /* detect if we are with an SRIOV vbios */
3746 amdgpu_device_detect_sriov_bios(adev);
048765ad 3747
95e8e59e
AD
3748 /* check if we need to reset the asic
3749 * E.g., driver was not cleanly unloaded previously, etc.
3750 */
f14899fd 3751 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3752 if (adev->gmc.xgmi.num_physical_nodes) {
3753 dev_info(adev->dev, "Pending hive reset.\n");
3754 adev->gmc.xgmi.pending_reset = true;
3755 /* Only need to init necessary block for SMU to handle the reset */
3756 for (i = 0; i < adev->num_ip_blocks; i++) {
3757 if (!adev->ip_blocks[i].status.valid)
3758 continue;
3759 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3760 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3761 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3762 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3763 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3764 adev->ip_blocks[i].version->funcs->name);
3765 adev->ip_blocks[i].status.hw = true;
3766 }
3767 }
3768 } else {
3769 r = amdgpu_asic_reset(adev);
3770 if (r) {
3771 dev_err(adev->dev, "asic reset on init failed\n");
3772 goto failed;
3773 }
95e8e59e
AD
3774 }
3775 }
3776
d38ceaf9 3777 /* Post card if necessary */
39c640c0 3778 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3779 if (!adev->bios) {
bec86378 3780 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3781 r = -EINVAL;
3782 goto failed;
d38ceaf9 3783 }
bec86378 3784 DRM_INFO("GPU posting now...\n");
4d2997ab 3785 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3786 if (r) {
3787 dev_err(adev->dev, "gpu post error!\n");
3788 goto failed;
3789 }
d38ceaf9
AD
3790 }
3791
88b64e95
AD
3792 if (adev->is_atom_fw) {
3793 /* Initialize clocks */
3794 r = amdgpu_atomfirmware_get_clock_info(adev);
3795 if (r) {
3796 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3797 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3798 goto failed;
3799 }
3800 } else {
a5bde2f9
AD
3801 /* Initialize clocks */
3802 r = amdgpu_atombios_get_clock_info(adev);
3803 if (r) {
3804 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3805 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3806 goto failed;
a5bde2f9
AD
3807 }
3808 /* init i2c buses */
4562236b
HW
3809 if (!amdgpu_device_has_dc_support(adev))
3810 amdgpu_atombios_i2c_init(adev);
2c1a2784 3811 }
d38ceaf9 3812
bfca0289 3813fence_driver_init:
d38ceaf9 3814 /* Fence driver */
067f44c8 3815 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3816 if (r) {
067f44c8 3817 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3818 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3819 goto failed;
2c1a2784 3820 }
d38ceaf9
AD
3821
3822 /* init the mode config */
4a580877 3823 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3824
06ec9070 3825 r = amdgpu_device_ip_init(adev);
d38ceaf9 3826 if (r) {
8840a387 3827 /* failed in exclusive mode due to timeout */
3828 if (amdgpu_sriov_vf(adev) &&
3829 !amdgpu_sriov_runtime(adev) &&
3830 amdgpu_virt_mmio_blocked(adev) &&
3831 !amdgpu_virt_wait_reset(adev)) {
3832 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3833 /* Don't send request since VF is inactive. */
3834 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3835 adev->virt.ops = NULL;
8840a387 3836 r = -EAGAIN;
970fd197 3837 goto release_ras_con;
8840a387 3838 }
06ec9070 3839 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3840 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3841 goto release_ras_con;
d38ceaf9
AD
3842 }
3843
8d35a259
LG
3844 amdgpu_fence_driver_hw_init(adev);
3845
d69b8971
YZ
3846 dev_info(adev->dev,
3847 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3848 adev->gfx.config.max_shader_engines,
3849 adev->gfx.config.max_sh_per_se,
3850 adev->gfx.config.max_cu_per_sh,
3851 adev->gfx.cu_info.number);
3852
d38ceaf9
AD
3853 adev->accel_working = true;
3854
e59c0205
AX
3855 amdgpu_vm_check_compute_bug(adev);
3856
95844d20
MO
3857 /* Initialize the buffer migration limit. */
3858 if (amdgpu_moverate >= 0)
3859 max_MBps = amdgpu_moverate;
3860 else
3861 max_MBps = 8; /* Allow 8 MB/s. */
3862 /* Get a log2 for easy divisions. */
3863 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3864
d2f52ac8 3865 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3866 if (r) {
3867 adev->pm_sysfs_en = false;
d2f52ac8 3868 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3869 } else
3870 adev->pm_sysfs_en = true;
d2f52ac8 3871
5bb23532 3872 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3873 if (r) {
3874 adev->ucode_sysfs_en = false;
5bb23532 3875 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3876 } else
3877 adev->ucode_sysfs_en = true;
5bb23532 3878
8424f2cc
LG
3879 r = amdgpu_psp_sysfs_init(adev);
3880 if (r) {
3881 adev->psp_sysfs_en = false;
3882 if (!amdgpu_sriov_vf(adev))
3883 DRM_ERROR("Creating psp sysfs failed\n");
3884 } else
3885 adev->psp_sysfs_en = true;
3886
b0adca4d
EQ
3887 /*
3888 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3889 * Otherwise the mgpu fan boost feature will be skipped due to the
3890 * gpu instance is counted less.
3891 */
3892 amdgpu_register_gpu_instance(adev);
3893
d38ceaf9
AD
3894 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3895 * explicit gating rather than handling it automatically.
3896 */
e3c1b071 3897 if (!adev->gmc.xgmi.pending_reset) {
3898 r = amdgpu_device_ip_late_init(adev);
3899 if (r) {
3900 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3901 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3902 goto release_ras_con;
e3c1b071 3903 }
3904 /* must succeed. */
3905 amdgpu_ras_resume(adev);
3906 queue_delayed_work(system_wq, &adev->delayed_init_work,
3907 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3908 }
d38ceaf9 3909
2c738637
ML
3910 if (amdgpu_sriov_vf(adev))
3911 flush_delayed_work(&adev->delayed_init_work);
3912
77f3a5cd 3913 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3914 if (r)
77f3a5cd 3915 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3916
d155bef0
AB
3917 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3918 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3919 if (r)
3920 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3921
c1dd4aa6
AG
3922 /* Have stored pci confspace at hand for restore in sudden PCI error */
3923 if (amdgpu_device_cache_pci_state(adev->pdev))
3924 pci_restore_state(pdev);
3925
8c3dd61c
KHF
3926 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3927 /* this will fail for cards that aren't VGA class devices, just
3928 * ignore it */
3929 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3930 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c 3931
d37a3929
OC
3932 px = amdgpu_device_supports_px(ddev);
3933
3934 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3935 apple_gmux_detect(NULL, NULL)))
8c3dd61c
KHF
3936 vga_switcheroo_register_client(adev->pdev,
3937 &amdgpu_switcheroo_ops, px);
d37a3929
OC
3938
3939 if (px)
8c3dd61c 3940 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
8c3dd61c 3941
e3c1b071 3942 if (adev->gmc.xgmi.pending_reset)
3943 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3944 msecs_to_jiffies(AMDGPU_RESUME_MS));
3945
4a74c38c
PY
3946 amdgpu_device_check_iommu_direct_map(adev);
3947
d38ceaf9 3948 return 0;
83ba126a 3949
970fd197
SY
3950release_ras_con:
3951 amdgpu_release_ras_context(adev);
3952
83ba126a 3953failed:
89041940 3954 amdgpu_vf_error_trans_all(adev);
8840a387 3955
83ba126a 3956 return r;
d38ceaf9
AD
3957}
3958
07775fc1
AG
3959static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3960{
62d5f9f7 3961
07775fc1
AG
3962 /* Clear all CPU mappings pointing to this device */
3963 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3964
3965 /* Unmap all mapped bars - Doorbell, registers and VRAM */
3966 amdgpu_device_doorbell_fini(adev);
3967
3968 iounmap(adev->rmmio);
3969 adev->rmmio = NULL;
3970 if (adev->mman.aper_base_kaddr)
3971 iounmap(adev->mman.aper_base_kaddr);
3972 adev->mman.aper_base_kaddr = NULL;
3973
3974 /* Memory manager related */
3975 if (!adev->gmc.xgmi.connected_to_cpu) {
3976 arch_phys_wc_del(adev->gmc.vram_mtrr);
3977 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3978 }
3979}
3980
d38ceaf9 3981/**
bbe04dec 3982 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3983 *
3984 * @adev: amdgpu_device pointer
3985 *
3986 * Tear down the driver info (all asics).
3987 * Called at driver shutdown.
3988 */
72c8c97b 3989void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 3990{
aac89168 3991 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3992 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3993 adev->shutdown = true;
9f875167 3994
752c683d
ML
3995 /* make sure IB test finished before entering exclusive mode
3996 * to avoid preemption on IB test
3997 * */
519b8b76 3998 if (amdgpu_sriov_vf(adev)) {
752c683d 3999 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
4000 amdgpu_virt_fini_data_exchange(adev);
4001 }
752c683d 4002
e5b03032
ML
4003 /* disable all interrupts */
4004 amdgpu_irq_disable_all(adev);
ff97cba8 4005 if (adev->mode_info.mode_config_initialized){
1053b9c9 4006 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 4007 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 4008 else
4a580877 4009 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 4010 }
8d35a259 4011 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 4012
cd3a8a59 4013 if (adev->mman.initialized)
9bff18d1 4014 drain_workqueue(adev->mman.bdev.wq);
98f56188 4015
7c868b59
YT
4016 if (adev->pm_sysfs_en)
4017 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
4018 if (adev->ucode_sysfs_en)
4019 amdgpu_ucode_sysfs_fini(adev);
8424f2cc
LG
4020 if (adev->psp_sysfs_en)
4021 amdgpu_psp_sysfs_fini(adev);
72c8c97b
AG
4022 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4023
232d1d43
SY
4024 /* disable ras feature must before hw fini */
4025 amdgpu_ras_pre_fini(adev);
4026
e9669fb7 4027 amdgpu_device_ip_fini_early(adev);
d10d0daa 4028
a3848df6
YW
4029 amdgpu_irq_fini_hw(adev);
4030
b6fd6e0f
SK
4031 if (adev->mman.initialized)
4032 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 4033
d10d0daa 4034 amdgpu_gart_dummy_page_fini(adev);
07775fc1 4035
39934d3e
VP
4036 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4037 amdgpu_device_unmap_mmio(adev);
87172e89 4038
72c8c97b
AG
4039}
4040
4041void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4042{
62d5f9f7 4043 int idx;
d37a3929 4044 bool px;
62d5f9f7 4045
8d35a259 4046 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 4047 amdgpu_device_ip_fini(adev);
b31d3063 4048 amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
d38ceaf9 4049 adev->accel_working = false;
68ce8b24 4050 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
04442bf7
LL
4051
4052 amdgpu_reset_fini(adev);
4053
d38ceaf9 4054 /* free i2c buses */
4562236b
HW
4055 if (!amdgpu_device_has_dc_support(adev))
4056 amdgpu_i2c_fini(adev);
bfca0289
SL
4057
4058 if (amdgpu_emu_mode != 1)
4059 amdgpu_atombios_fini(adev);
4060
d38ceaf9
AD
4061 kfree(adev->bios);
4062 adev->bios = NULL;
d37a3929
OC
4063
4064 px = amdgpu_device_supports_px(adev_to_drm(adev));
4065
4066 if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4067 apple_gmux_detect(NULL, NULL)))
84c8b22e 4068 vga_switcheroo_unregister_client(adev->pdev);
d37a3929
OC
4069
4070 if (px)
83ba126a 4071 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d37a3929 4072
38d6be81 4073 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4074 vga_client_unregister(adev->pdev);
e9bc1bf7 4075
62d5f9f7
LS
4076 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4077
4078 iounmap(adev->rmmio);
4079 adev->rmmio = NULL;
4080 amdgpu_device_doorbell_fini(adev);
4081 drm_dev_exit(idx);
4082 }
4083
d155bef0
AB
4084 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4085 amdgpu_pmu_fini(adev);
72de33f8 4086 if (adev->mman.discovery_bin)
a190d1c7 4087 amdgpu_discovery_fini(adev);
72c8c97b 4088
cfbb6b00
AG
4089 amdgpu_reset_put_reset_domain(adev->reset_domain);
4090 adev->reset_domain = NULL;
4091
72c8c97b
AG
4092 kfree(adev->pci_state);
4093
d38ceaf9
AD
4094}
4095
58144d28
ND
4096/**
4097 * amdgpu_device_evict_resources - evict device resources
4098 * @adev: amdgpu device object
4099 *
4100 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4101 * of the vram memory type. Mainly used for evicting device resources
4102 * at suspend time.
4103 *
4104 */
7863c155 4105static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
58144d28 4106{
7863c155
ML
4107 int ret;
4108
e53d9665
ML
4109 /* No need to evict vram on APUs for suspend to ram or s2idle */
4110 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
7863c155 4111 return 0;
58144d28 4112
7863c155
ML
4113 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4114 if (ret)
58144d28 4115 DRM_WARN("evicting device resources failed\n");
7863c155 4116 return ret;
58144d28 4117}
d38ceaf9
AD
4118
4119/*
4120 * Suspend & resume.
4121 */
4122/**
810ddc3a 4123 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4124 *
87e3f136 4125 * @dev: drm dev pointer
87e3f136 4126 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4127 *
4128 * Puts the hw in the suspend state (all asics).
4129 * Returns 0 for success or an error on failure.
4130 * Called at driver suspend.
4131 */
de185019 4132int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4133{
a2e15b0e 4134 struct amdgpu_device *adev = drm_to_adev(dev);
d7274ec7 4135 int r = 0;
d38ceaf9 4136
d38ceaf9
AD
4137 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4138 return 0;
4139
44779b43 4140 adev->in_suspend = true;
3fa8f89d 4141
47ea2076
SF
4142 /* Evict the majority of BOs before grabbing the full access */
4143 r = amdgpu_device_evict_resources(adev);
4144 if (r)
4145 return r;
4146
d7274ec7
BZ
4147 if (amdgpu_sriov_vf(adev)) {
4148 amdgpu_virt_fini_data_exchange(adev);
4149 r = amdgpu_virt_request_full_gpu(adev, false);
4150 if (r)
4151 return r;
4152 }
4153
3fa8f89d
S
4154 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4155 DRM_WARN("smart shift update failed\n");
4156
d38ceaf9
AD
4157 drm_kms_helper_poll_disable(dev);
4158
5f818173 4159 if (fbcon)
087451f3 4160 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4161
beff74bc 4162 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4163
5e6932fe 4164 amdgpu_ras_suspend(adev);
4165
2196927b 4166 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4167
c004d44e 4168 if (!adev->in_s0ix)
5d3a2d95 4169 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4170
7863c155
ML
4171 r = amdgpu_device_evict_resources(adev);
4172 if (r)
4173 return r;
d38ceaf9 4174
8d35a259 4175 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4176
2196927b 4177 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4178
d7274ec7
BZ
4179 if (amdgpu_sriov_vf(adev))
4180 amdgpu_virt_release_full_gpu(adev, false);
4181
d38ceaf9
AD
4182 return 0;
4183}
4184
4185/**
810ddc3a 4186 * amdgpu_device_resume - initiate device resume
d38ceaf9 4187 *
87e3f136 4188 * @dev: drm dev pointer
87e3f136 4189 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4190 *
4191 * Bring the hw back to operating state (all asics).
4192 * Returns 0 for success or an error on failure.
4193 * Called at driver resume.
4194 */
de185019 4195int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4196{
1348969a 4197 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4198 int r = 0;
d38ceaf9 4199
d7274ec7
BZ
4200 if (amdgpu_sriov_vf(adev)) {
4201 r = amdgpu_virt_request_full_gpu(adev, true);
4202 if (r)
4203 return r;
4204 }
4205
d38ceaf9
AD
4206 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4207 return 0;
4208
62498733 4209 if (adev->in_s0ix)
bc143d8b 4210 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
628c36d7 4211
d38ceaf9 4212 /* post card */
39c640c0 4213 if (amdgpu_device_need_post(adev)) {
4d2997ab 4214 r = amdgpu_device_asic_init(adev);
74b0b157 4215 if (r)
aac89168 4216 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4217 }
d38ceaf9 4218
06ec9070 4219 r = amdgpu_device_ip_resume(adev);
d7274ec7 4220
e6707218 4221 if (r) {
aac89168 4222 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
3c22c1ea 4223 goto exit;
e6707218 4224 }
8d35a259 4225 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4226
06ec9070 4227 r = amdgpu_device_ip_late_init(adev);
03161a6e 4228 if (r)
3c22c1ea 4229 goto exit;
d38ceaf9 4230
beff74bc
AD
4231 queue_delayed_work(system_wq, &adev->delayed_init_work,
4232 msecs_to_jiffies(AMDGPU_RESUME_MS));
4233
c004d44e 4234 if (!adev->in_s0ix) {
5d3a2d95
AD
4235 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4236 if (r)
3c22c1ea 4237 goto exit;
5d3a2d95 4238 }
756e6880 4239
3c22c1ea
SF
4240exit:
4241 if (amdgpu_sriov_vf(adev)) {
4242 amdgpu_virt_init_data_exchange(adev);
4243 amdgpu_virt_release_full_gpu(adev, true);
4244 }
4245
4246 if (r)
4247 return r;
4248
96a5d8d4 4249 /* Make sure IB tests flushed */
beff74bc 4250 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4251
a2e15b0e 4252 if (fbcon)
087451f3 4253 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9
AD
4254
4255 drm_kms_helper_poll_enable(dev);
23a1a9e5 4256
5e6932fe 4257 amdgpu_ras_resume(adev);
4258
d09ef243
AD
4259 if (adev->mode_info.num_crtc) {
4260 /*
4261 * Most of the connector probing functions try to acquire runtime pm
4262 * refs to ensure that the GPU is powered on when connector polling is
4263 * performed. Since we're calling this from a runtime PM callback,
4264 * trying to acquire rpm refs will cause us to deadlock.
4265 *
4266 * Since we're guaranteed to be holding the rpm lock, it's safe to
4267 * temporarily disable the rpm helpers so this doesn't deadlock us.
4268 */
23a1a9e5 4269#ifdef CONFIG_PM
d09ef243 4270 dev->dev->power.disable_depth++;
23a1a9e5 4271#endif
d09ef243
AD
4272 if (!adev->dc_enabled)
4273 drm_helper_hpd_irq_event(dev);
4274 else
4275 drm_kms_helper_hotplug_event(dev);
23a1a9e5 4276#ifdef CONFIG_PM
d09ef243 4277 dev->dev->power.disable_depth--;
23a1a9e5 4278#endif
d09ef243 4279 }
44779b43
RZ
4280 adev->in_suspend = false;
4281
dc907c9d
JX
4282 if (adev->enable_mes)
4283 amdgpu_mes_self_test(adev);
4284
3fa8f89d
S
4285 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4286 DRM_WARN("smart shift update failed\n");
4287
4d3b9ae5 4288 return 0;
d38ceaf9
AD
4289}
4290
e3ecdffa
AD
4291/**
4292 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4293 *
4294 * @adev: amdgpu_device pointer
4295 *
4296 * The list of all the hardware IPs that make up the asic is walked and
4297 * the check_soft_reset callbacks are run. check_soft_reset determines
4298 * if the asic is still hung or not.
4299 * Returns true if any of the IPs are still in a hung state, false if not.
4300 */
06ec9070 4301static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4302{
4303 int i;
4304 bool asic_hang = false;
4305
f993d628
ML
4306 if (amdgpu_sriov_vf(adev))
4307 return true;
4308
8bc04c29
AD
4309 if (amdgpu_asic_need_full_reset(adev))
4310 return true;
4311
63fbf42f 4312 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4313 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4314 continue;
a1255107
AD
4315 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4316 adev->ip_blocks[i].status.hang =
4317 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4318 if (adev->ip_blocks[i].status.hang) {
aac89168 4319 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4320 asic_hang = true;
4321 }
4322 }
4323 return asic_hang;
4324}
4325
e3ecdffa
AD
4326/**
4327 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4328 *
4329 * @adev: amdgpu_device pointer
4330 *
4331 * The list of all the hardware IPs that make up the asic is walked and the
4332 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4333 * handles any IP specific hardware or software state changes that are
4334 * necessary for a soft reset to succeed.
4335 * Returns 0 on success, negative error code on failure.
4336 */
06ec9070 4337static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4338{
4339 int i, r = 0;
4340
4341 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4342 if (!adev->ip_blocks[i].status.valid)
d31a501e 4343 continue;
a1255107
AD
4344 if (adev->ip_blocks[i].status.hang &&
4345 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4346 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4347 if (r)
4348 return r;
4349 }
4350 }
4351
4352 return 0;
4353}
4354
e3ecdffa
AD
4355/**
4356 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4357 *
4358 * @adev: amdgpu_device pointer
4359 *
4360 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4361 * reset is necessary to recover.
4362 * Returns true if a full asic reset is required, false if not.
4363 */
06ec9070 4364static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4365{
da146d3b
AD
4366 int i;
4367
8bc04c29
AD
4368 if (amdgpu_asic_need_full_reset(adev))
4369 return true;
4370
da146d3b 4371 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4372 if (!adev->ip_blocks[i].status.valid)
da146d3b 4373 continue;
a1255107
AD
4374 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4375 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4376 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4377 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4378 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4379 if (adev->ip_blocks[i].status.hang) {
aac89168 4380 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4381 return true;
4382 }
4383 }
35d782fe
CZ
4384 }
4385 return false;
4386}
4387
e3ecdffa
AD
4388/**
4389 * amdgpu_device_ip_soft_reset - do a soft reset
4390 *
4391 * @adev: amdgpu_device pointer
4392 *
4393 * The list of all the hardware IPs that make up the asic is walked and the
4394 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4395 * IP specific hardware or software state changes that are necessary to soft
4396 * reset the IP.
4397 * Returns 0 on success, negative error code on failure.
4398 */
06ec9070 4399static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4400{
4401 int i, r = 0;
4402
4403 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4404 if (!adev->ip_blocks[i].status.valid)
35d782fe 4405 continue;
a1255107
AD
4406 if (adev->ip_blocks[i].status.hang &&
4407 adev->ip_blocks[i].version->funcs->soft_reset) {
4408 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4409 if (r)
4410 return r;
4411 }
4412 }
4413
4414 return 0;
4415}
4416
e3ecdffa
AD
4417/**
4418 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4419 *
4420 * @adev: amdgpu_device pointer
4421 *
4422 * The list of all the hardware IPs that make up the asic is walked and the
4423 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4424 * handles any IP specific hardware or software state changes that are
4425 * necessary after the IP has been soft reset.
4426 * Returns 0 on success, negative error code on failure.
4427 */
06ec9070 4428static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4429{
4430 int i, r = 0;
4431
4432 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4433 if (!adev->ip_blocks[i].status.valid)
35d782fe 4434 continue;
a1255107
AD
4435 if (adev->ip_blocks[i].status.hang &&
4436 adev->ip_blocks[i].version->funcs->post_soft_reset)
4437 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4438 if (r)
4439 return r;
4440 }
4441
4442 return 0;
4443}
4444
e3ecdffa 4445/**
c33adbc7 4446 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4447 *
4448 * @adev: amdgpu_device pointer
4449 *
4450 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4451 * restore things like GPUVM page tables after a GPU reset where
4452 * the contents of VRAM might be lost.
403009bf
CK
4453 *
4454 * Returns:
4455 * 0 on success, negative error code on failure.
e3ecdffa 4456 */
c33adbc7 4457static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4458{
c41d1cf6 4459 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4460 struct amdgpu_bo *shadow;
e18aaea7 4461 struct amdgpu_bo_vm *vmbo;
403009bf 4462 long r = 1, tmo;
c41d1cf6
ML
4463
4464 if (amdgpu_sriov_runtime(adev))
b045d3af 4465 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4466 else
4467 tmo = msecs_to_jiffies(100);
4468
aac89168 4469 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4470 mutex_lock(&adev->shadow_list_lock);
e18aaea7
ND
4471 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4472 shadow = &vmbo->bo;
403009bf 4473 /* No need to recover an evicted BO */
d3116756
CK
4474 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4475 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4476 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4477 continue;
4478
4479 r = amdgpu_bo_restore_shadow(shadow, &next);
4480 if (r)
4481 break;
4482
c41d1cf6 4483 if (fence) {
1712fb1a 4484 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4485 dma_fence_put(fence);
4486 fence = next;
1712fb1a 4487 if (tmo == 0) {
4488 r = -ETIMEDOUT;
c41d1cf6 4489 break;
1712fb1a 4490 } else if (tmo < 0) {
4491 r = tmo;
4492 break;
4493 }
403009bf
CK
4494 } else {
4495 fence = next;
c41d1cf6 4496 }
c41d1cf6
ML
4497 }
4498 mutex_unlock(&adev->shadow_list_lock);
4499
403009bf
CK
4500 if (fence)
4501 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4502 dma_fence_put(fence);
4503
1712fb1a 4504 if (r < 0 || tmo <= 0) {
aac89168 4505 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4506 return -EIO;
4507 }
c41d1cf6 4508
aac89168 4509 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4510 return 0;
c41d1cf6
ML
4511}
4512
a90ad3c2 4513
e3ecdffa 4514/**
06ec9070 4515 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4516 *
982a820b 4517 * @adev: amdgpu_device pointer
87e3f136 4518 * @from_hypervisor: request from hypervisor
5740682e
ML
4519 *
4520 * do VF FLR and reinitialize Asic
3f48c681 4521 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4522 */
4523static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4524 bool from_hypervisor)
5740682e
ML
4525{
4526 int r;
a5f67c93 4527 struct amdgpu_hive_info *hive = NULL;
7258fa31 4528 int retry_limit = 0;
5740682e 4529
7258fa31 4530retry:
c004d44e 4531 amdgpu_amdkfd_pre_reset(adev);
428890a3 4532
5740682e
ML
4533 if (from_hypervisor)
4534 r = amdgpu_virt_request_full_gpu(adev, true);
4535 else
4536 r = amdgpu_virt_reset_gpu(adev);
4537 if (r)
4538 return r;
a90ad3c2
ML
4539
4540 /* Resume IP prior to SMC */
06ec9070 4541 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4542 if (r)
4543 goto error;
a90ad3c2 4544
c9ffa427 4545 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4546
7a3e0bb2
RZ
4547 r = amdgpu_device_fw_loading(adev);
4548 if (r)
4549 return r;
4550
a90ad3c2 4551 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4552 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4553 if (r)
4554 goto error;
a90ad3c2 4555
a5f67c93
ZL
4556 hive = amdgpu_get_xgmi_hive(adev);
4557 /* Update PSP FW topology after reset */
4558 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4559 r = amdgpu_xgmi_update_topology(hive, adev);
4560
4561 if (hive)
4562 amdgpu_put_xgmi_hive(hive);
4563
4564 if (!r) {
4565 amdgpu_irq_gpu_reset_resume_helper(adev);
4566 r = amdgpu_ib_ring_tests(adev);
9c12f5cd 4567
c004d44e 4568 amdgpu_amdkfd_post_reset(adev);
a5f67c93 4569 }
a90ad3c2 4570
abc34253 4571error:
c41d1cf6 4572 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4573 amdgpu_inc_vram_lost(adev);
c33adbc7 4574 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4575 }
437f3e0b 4576 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2 4577
7258fa31
SK
4578 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4579 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4580 retry_limit++;
4581 goto retry;
4582 } else
4583 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4584 }
4585
a90ad3c2
ML
4586 return r;
4587}
4588
9a1cddd6 4589/**
4590 * amdgpu_device_has_job_running - check if there is any job in mirror list
4591 *
982a820b 4592 * @adev: amdgpu_device pointer
9a1cddd6 4593 *
4594 * check if there is any job in mirror list
4595 */
4596bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4597{
4598 int i;
4599 struct drm_sched_job *job;
4600
4601 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4602 struct amdgpu_ring *ring = adev->rings[i];
4603
4604 if (!ring || !ring->sched.thread)
4605 continue;
4606
4607 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4608 job = list_first_entry_or_null(&ring->sched.pending_list,
4609 struct drm_sched_job, list);
9a1cddd6 4610 spin_unlock(&ring->sched.job_list_lock);
4611 if (job)
4612 return true;
4613 }
4614 return false;
4615}
4616
12938fad
CK
4617/**
4618 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4619 *
982a820b 4620 * @adev: amdgpu_device pointer
12938fad
CK
4621 *
4622 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4623 * a hung GPU.
4624 */
4625bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4626{
12938fad 4627
3ba7b418
AG
4628 if (amdgpu_gpu_recovery == 0)
4629 goto disabled;
4630
1a11a65d
YC
4631 /* Skip soft reset check in fatal error mode */
4632 if (!amdgpu_ras_is_poison_mode_supported(adev))
4633 return true;
4634
3ba7b418
AG
4635 if (amdgpu_sriov_vf(adev))
4636 return true;
4637
4638 if (amdgpu_gpu_recovery == -1) {
4639 switch (adev->asic_type) {
b3523c45
AD
4640#ifdef CONFIG_DRM_AMDGPU_SI
4641 case CHIP_VERDE:
4642 case CHIP_TAHITI:
4643 case CHIP_PITCAIRN:
4644 case CHIP_OLAND:
4645 case CHIP_HAINAN:
4646#endif
4647#ifdef CONFIG_DRM_AMDGPU_CIK
4648 case CHIP_KAVERI:
4649 case CHIP_KABINI:
4650 case CHIP_MULLINS:
4651#endif
4652 case CHIP_CARRIZO:
4653 case CHIP_STONEY:
4654 case CHIP_CYAN_SKILLFISH:
3ba7b418 4655 goto disabled;
b3523c45
AD
4656 default:
4657 break;
3ba7b418 4658 }
12938fad
CK
4659 }
4660
4661 return true;
3ba7b418
AG
4662
4663disabled:
aac89168 4664 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4665 return false;
12938fad
CK
4666}
4667
5c03e584
FX
4668int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4669{
4670 u32 i;
4671 int ret = 0;
4672
4673 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4674
4675 dev_info(adev->dev, "GPU mode1 reset\n");
4676
4677 /* disable BM */
4678 pci_clear_master(adev->pdev);
4679
4680 amdgpu_device_cache_pci_state(adev->pdev);
4681
4682 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4683 dev_info(adev->dev, "GPU smu mode1 reset\n");
4684 ret = amdgpu_dpm_mode1_reset(adev);
4685 } else {
4686 dev_info(adev->dev, "GPU psp mode1 reset\n");
4687 ret = psp_gpu_reset(adev);
4688 }
4689
4690 if (ret)
4691 dev_err(adev->dev, "GPU mode1 reset failed\n");
4692
4693 amdgpu_device_load_pci_state(adev->pdev);
4694
4695 /* wait for asic to come out of reset */
4696 for (i = 0; i < adev->usec_timeout; i++) {
4697 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4698
4699 if (memsize != 0xffffffff)
4700 break;
4701 udelay(1);
4702 }
4703
4704 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4705 return ret;
4706}
5c6dd71e 4707
e3c1b071 4708int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4709 struct amdgpu_reset_context *reset_context)
26bc5340 4710{
5c1e6fa4 4711 int i, r = 0;
04442bf7
LL
4712 struct amdgpu_job *job = NULL;
4713 bool need_full_reset =
4714 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4715
4716 if (reset_context->reset_req_dev == adev)
4717 job = reset_context->job;
71182665 4718
b602ca5f
TZ
4719 if (amdgpu_sriov_vf(adev)) {
4720 /* stop the data exchange thread */
4721 amdgpu_virt_fini_data_exchange(adev);
4722 }
4723
9e225fb9
AG
4724 amdgpu_fence_driver_isr_toggle(adev, true);
4725
71182665 4726 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4727 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4728 struct amdgpu_ring *ring = adev->rings[i];
4729
51687759 4730 if (!ring || !ring->sched.thread)
0875dc9e 4731 continue;
5740682e 4732
c530b02f
JZ
4733 /*clear job fence from fence drv to avoid force_completion
4734 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4735 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4736
2f9d4084
ML
4737 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4738 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4739 }
d38ceaf9 4740
9e225fb9
AG
4741 amdgpu_fence_driver_isr_toggle(adev, false);
4742
ff99849b 4743 if (job && job->vm)
222b5f04
AG
4744 drm_sched_increase_karma(&job->base);
4745
04442bf7 4746 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4747 /* If reset handler not implemented, continue; otherwise return */
4748 if (r == -ENOSYS)
4749 r = 0;
4750 else
04442bf7
LL
4751 return r;
4752
1d721ed6 4753 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4754 if (!amdgpu_sriov_vf(adev)) {
4755
4756 if (!need_full_reset)
4757 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4758
360cd081
LG
4759 if (!need_full_reset && amdgpu_gpu_recovery &&
4760 amdgpu_device_ip_check_soft_reset(adev)) {
26bc5340
AG
4761 amdgpu_device_ip_pre_soft_reset(adev);
4762 r = amdgpu_device_ip_soft_reset(adev);
4763 amdgpu_device_ip_post_soft_reset(adev);
4764 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4765 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4766 need_full_reset = true;
4767 }
4768 }
4769
4770 if (need_full_reset)
4771 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4772 if (need_full_reset)
4773 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4774 else
4775 clear_bit(AMDGPU_NEED_FULL_RESET,
4776 &reset_context->flags);
26bc5340
AG
4777 }
4778
4779 return r;
4780}
4781
15fd09a0
SA
4782static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4783{
15fd09a0
SA
4784 int i;
4785
38a15ad9 4786 lockdep_assert_held(&adev->reset_domain->sem);
15fd09a0
SA
4787
4788 for (i = 0; i < adev->num_regs; i++) {
651d7ee6
SA
4789 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4790 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4791 adev->reset_dump_reg_value[i]);
15fd09a0
SA
4792 }
4793
4794 return 0;
4795}
4796
3d8785f6
SA
4797#ifdef CONFIG_DEV_COREDUMP
4798static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4799 size_t count, void *data, size_t datalen)
4800{
4801 struct drm_printer p;
4802 struct amdgpu_device *adev = data;
4803 struct drm_print_iterator iter;
4804 int i;
4805
4806 iter.data = buffer;
4807 iter.offset = 0;
4808 iter.start = offset;
4809 iter.remain = count;
4810
4811 p = drm_coredump_printer(&iter);
4812
4813 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4814 drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4815 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4816 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4817 if (adev->reset_task_info.pid)
4818 drm_printf(&p, "process_name: %s PID: %d\n",
4819 adev->reset_task_info.process_name,
4820 adev->reset_task_info.pid);
4821
4822 if (adev->reset_vram_lost)
4823 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4824 if (adev->num_regs) {
4825 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
4826
4827 for (i = 0; i < adev->num_regs; i++)
4828 drm_printf(&p, "0x%08x: 0x%08x\n",
4829 adev->reset_dump_reg_list[i],
4830 adev->reset_dump_reg_value[i]);
4831 }
4832
4833 return count - iter.remain;
4834}
4835
4836static void amdgpu_devcoredump_free(void *data)
4837{
4838}
4839
4840static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4841{
4842 struct drm_device *dev = adev_to_drm(adev);
4843
4844 ktime_get_ts64(&adev->reset_time);
4845 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4846 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4847}
4848#endif
4849
04442bf7
LL
4850int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4851 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4852{
4853 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4854 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340 4855 int r = 0;
f5c7e779 4856 bool gpu_reset_for_dev_remove = 0;
26bc5340 4857
04442bf7
LL
4858 /* Try reset handler method first */
4859 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4860 reset_list);
15fd09a0 4861 amdgpu_reset_reg_dumps(tmp_adev);
0a83bb35
LL
4862
4863 reset_context->reset_device_list = device_list_handle;
04442bf7 4864 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4865 /* If reset handler not implemented, continue; otherwise return */
4866 if (r == -ENOSYS)
4867 r = 0;
4868 else
04442bf7
LL
4869 return r;
4870
4871 /* Reset handler not implemented, use the default method */
4872 need_full_reset =
4873 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4874 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4875
f5c7e779
YC
4876 gpu_reset_for_dev_remove =
4877 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4878 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4879
26bc5340 4880 /*
655ce9cb 4881 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4882 * to allow proper links negotiation in FW (within 1 sec)
4883 */
7ac71382 4884 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4885 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4886 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4887 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4888 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4889 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4890 r = -EALREADY;
4891 } else
4892 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4893
041a62bc 4894 if (r) {
aac89168 4895 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4896 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4897 break;
ce316fa5
LM
4898 }
4899 }
4900
041a62bc
AG
4901 /* For XGMI wait for all resets to complete before proceed */
4902 if (!r) {
655ce9cb 4903 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4904 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4905 flush_work(&tmp_adev->xgmi_reset_work);
4906 r = tmp_adev->asic_reset_res;
4907 if (r)
4908 break;
ce316fa5
LM
4909 }
4910 }
4911 }
ce316fa5 4912 }
26bc5340 4913
43c4d576 4914 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4915 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5e67bba3 4916 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4917 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4918 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
43c4d576
JC
4919 }
4920
00eaa571 4921 amdgpu_ras_intr_cleared();
43c4d576 4922 }
00eaa571 4923
f5c7e779
YC
4924 /* Since the mode1 reset affects base ip blocks, the
4925 * phase1 ip blocks need to be resumed. Otherwise there
4926 * will be a BIOS signature error and the psp bootloader
4927 * can't load kdb on the next amdgpu install.
4928 */
4929 if (gpu_reset_for_dev_remove) {
4930 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4931 amdgpu_device_ip_resume_phase1(tmp_adev);
4932
4933 goto end;
4934 }
4935
655ce9cb 4936 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4937 if (need_full_reset) {
4938 /* post card */
e3c1b071 4939 r = amdgpu_device_asic_init(tmp_adev);
4940 if (r) {
aac89168 4941 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4942 } else {
26bc5340 4943 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4944 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4945 if (r)
4946 goto out;
4947
26bc5340
AG
4948 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4949 if (r)
4950 goto out;
4951
4952 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3d8785f6
SA
4953#ifdef CONFIG_DEV_COREDUMP
4954 tmp_adev->reset_vram_lost = vram_lost;
4955 memset(&tmp_adev->reset_task_info, 0,
4956 sizeof(tmp_adev->reset_task_info));
4957 if (reset_context->job && reset_context->job->vm)
4958 tmp_adev->reset_task_info =
4959 reset_context->job->vm->task_info;
4960 amdgpu_reset_capture_coredumpm(tmp_adev);
4961#endif
26bc5340 4962 if (vram_lost) {
77e7f829 4963 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4964 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4965 }
4966
26bc5340
AG
4967 r = amdgpu_device_fw_loading(tmp_adev);
4968 if (r)
4969 return r;
4970
4971 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4972 if (r)
4973 goto out;
4974
4975 if (vram_lost)
4976 amdgpu_device_fill_reset_magic(tmp_adev);
4977
fdafb359
EQ
4978 /*
4979 * Add this ASIC as tracked as reset was already
4980 * complete successfully.
4981 */
4982 amdgpu_register_gpu_instance(tmp_adev);
4983
04442bf7
LL
4984 if (!reset_context->hive &&
4985 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4986 amdgpu_xgmi_add_device(tmp_adev);
4987
7c04ca50 4988 r = amdgpu_device_ip_late_init(tmp_adev);
4989 if (r)
4990 goto out;
4991
087451f3 4992 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 4993
e8fbaf03
GC
4994 /*
4995 * The GPU enters bad state once faulty pages
4996 * by ECC has reached the threshold, and ras
4997 * recovery is scheduled next. So add one check
4998 * here to break recovery if it indeed exceeds
4999 * bad page threshold, and remind user to
5000 * retire this GPU or setting one bigger
5001 * bad_page_threshold value to fix this once
5002 * probing driver again.
5003 */
11003c68 5004 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
5005 /* must succeed. */
5006 amdgpu_ras_resume(tmp_adev);
5007 } else {
5008 r = -EINVAL;
5009 goto out;
5010 }
e79a04d5 5011
26bc5340 5012 /* Update PSP FW topology after reset */
04442bf7
LL
5013 if (reset_context->hive &&
5014 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5015 r = amdgpu_xgmi_update_topology(
5016 reset_context->hive, tmp_adev);
26bc5340
AG
5017 }
5018 }
5019
26bc5340
AG
5020out:
5021 if (!r) {
5022 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5023 r = amdgpu_ib_ring_tests(tmp_adev);
5024 if (r) {
5025 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
5026 need_full_reset = true;
5027 r = -EAGAIN;
5028 goto end;
5029 }
5030 }
5031
5032 if (!r)
5033 r = amdgpu_device_recover_vram(tmp_adev);
5034 else
5035 tmp_adev->asic_reset_res = r;
5036 }
5037
5038end:
04442bf7
LL
5039 if (need_full_reset)
5040 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5041 else
5042 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
5043 return r;
5044}
5045
e923be99 5046static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 5047{
5740682e 5048
a3a09142
AD
5049 switch (amdgpu_asic_reset_method(adev)) {
5050 case AMD_RESET_METHOD_MODE1:
5051 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5052 break;
5053 case AMD_RESET_METHOD_MODE2:
5054 adev->mp1_state = PP_MP1_STATE_RESET;
5055 break;
5056 default:
5057 adev->mp1_state = PP_MP1_STATE_NONE;
5058 break;
5059 }
26bc5340 5060}
d38ceaf9 5061
e923be99 5062static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 5063{
89041940 5064 amdgpu_vf_error_trans_all(adev);
a3a09142 5065 adev->mp1_state = PP_MP1_STATE_NONE;
91fb309d
HC
5066}
5067
3f12acc8
EQ
5068static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5069{
5070 struct pci_dev *p = NULL;
5071
5072 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5073 adev->pdev->bus->number, 1);
5074 if (p) {
5075 pm_runtime_enable(&(p->dev));
5076 pm_runtime_resume(&(p->dev));
5077 }
b85e285e
YY
5078
5079 pci_dev_put(p);
3f12acc8
EQ
5080}
5081
5082static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5083{
5084 enum amd_reset_method reset_method;
5085 struct pci_dev *p = NULL;
5086 u64 expires;
5087
5088 /*
5089 * For now, only BACO and mode1 reset are confirmed
5090 * to suffer the audio issue without proper suspended.
5091 */
5092 reset_method = amdgpu_asic_reset_method(adev);
5093 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5094 (reset_method != AMD_RESET_METHOD_MODE1))
5095 return -EINVAL;
5096
5097 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5098 adev->pdev->bus->number, 1);
5099 if (!p)
5100 return -ENODEV;
5101
5102 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5103 if (!expires)
5104 /*
5105 * If we cannot get the audio device autosuspend delay,
5106 * a fixed 4S interval will be used. Considering 3S is
5107 * the audio controller default autosuspend delay setting.
5108 * 4S used here is guaranteed to cover that.
5109 */
54b7feb9 5110 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
5111
5112 while (!pm_runtime_status_suspended(&(p->dev))) {
5113 if (!pm_runtime_suspend(&(p->dev)))
5114 break;
5115
5116 if (expires < ktime_get_mono_fast_ns()) {
5117 dev_warn(adev->dev, "failed to suspend display audio\n");
b85e285e 5118 pci_dev_put(p);
3f12acc8
EQ
5119 /* TODO: abort the succeeding gpu reset? */
5120 return -ETIMEDOUT;
5121 }
5122 }
5123
5124 pm_runtime_disable(&(p->dev));
5125
b85e285e 5126 pci_dev_put(p);
3f12acc8
EQ
5127 return 0;
5128}
5129
d193b12b 5130static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
247c7b0d
AG
5131{
5132 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5133
5134#if defined(CONFIG_DEBUG_FS)
5135 if (!amdgpu_sriov_vf(adev))
5136 cancel_work(&adev->reset_work);
5137#endif
5138
5139 if (adev->kfd.dev)
5140 cancel_work(&adev->kfd.reset_work);
5141
5142 if (amdgpu_sriov_vf(adev))
5143 cancel_work(&adev->virt.flr_work);
5144
5145 if (con && adev->ras_enabled)
5146 cancel_work(&con->recovery_work);
5147
5148}
5149
26bc5340 5150/**
6e9c65f7 5151 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
26bc5340 5152 *
982a820b 5153 * @adev: amdgpu_device pointer
26bc5340
AG
5154 * @job: which job trigger hang
5155 *
5156 * Attempt to reset the GPU if it has hung (all asics).
5157 * Attempt to do soft-reset or full-reset and reinitialize Asic
5158 * Returns 0 for success or an error on failure.
5159 */
5160
cf727044 5161int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
f1549c09
LG
5162 struct amdgpu_job *job,
5163 struct amdgpu_reset_context *reset_context)
26bc5340 5164{
1d721ed6 5165 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5166 bool job_signaled = false;
26bc5340 5167 struct amdgpu_hive_info *hive = NULL;
26bc5340 5168 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5169 int i, r = 0;
bb5c7235 5170 bool need_emergency_restart = false;
3f12acc8 5171 bool audio_suspended = false;
f5c7e779
YC
5172 bool gpu_reset_for_dev_remove = false;
5173
5174 gpu_reset_for_dev_remove =
5175 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5176 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340 5177
6e3cd2a9 5178 /*
bb5c7235
WS
5179 * Special case: RAS triggered and full reset isn't supported
5180 */
5181 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5182
d5ea093e
AG
5183 /*
5184 * Flush RAM to disk so that after reboot
5185 * the user can read log and see why the system rebooted.
5186 */
bb5c7235 5187 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5188 DRM_WARN("Emergency reboot.");
5189
5190 ksys_sync_helper();
5191 emergency_restart();
5192 }
5193
b823821f 5194 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5195 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5196
175ac6ec
ZL
5197 if (!amdgpu_sriov_vf(adev))
5198 hive = amdgpu_get_xgmi_hive(adev);
681260df 5199 if (hive)
53b3f8f4 5200 mutex_lock(&hive->hive_lock);
26bc5340 5201
f1549c09
LG
5202 reset_context->job = job;
5203 reset_context->hive = hive;
9e94d22c
EQ
5204 /*
5205 * Build list of devices to reset.
5206 * In case we are in XGMI hive mode, resort the device list
5207 * to put adev in the 1st position.
5208 */
5209 INIT_LIST_HEAD(&device_list);
175ac6ec 5210 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
83d29a5f 5211 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
655ce9cb 5212 list_add_tail(&tmp_adev->reset_list, &device_list);
83d29a5f
YC
5213 if (gpu_reset_for_dev_remove && adev->shutdown)
5214 tmp_adev->shutdown = true;
5215 }
655ce9cb 5216 if (!list_is_first(&adev->reset_list, &device_list))
5217 list_rotate_to_front(&adev->reset_list, &device_list);
5218 device_list_handle = &device_list;
26bc5340 5219 } else {
655ce9cb 5220 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5221 device_list_handle = &device_list;
5222 }
5223
e923be99
AG
5224 /* We need to lock reset domain only once both for XGMI and single device */
5225 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5226 reset_list);
3675c2f2 5227 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5228
1d721ed6 5229 /* block all schedulers and reset given job's ring */
655ce9cb 5230 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5231
e923be99 5232 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5233
3f12acc8
EQ
5234 /*
5235 * Try to put the audio codec into suspend state
5236 * before gpu reset started.
5237 *
5238 * Due to the power domain of the graphics device
5239 * is shared with AZ power domain. Without this,
5240 * we may change the audio hardware from behind
5241 * the audio driver's back. That will trigger
5242 * some audio codec errors.
5243 */
5244 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5245 audio_suspended = true;
5246
9e94d22c
EQ
5247 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5248
52fb44cf
EQ
5249 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5250
c004d44e 5251 if (!amdgpu_sriov_vf(tmp_adev))
428890a3 5252 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5253
12ffa55d
AG
5254 /*
5255 * Mark these ASICs to be reseted as untracked first
5256 * And add them back after reset completed
5257 */
5258 amdgpu_unregister_gpu_instance(tmp_adev);
5259
163d4cd2 5260 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
565d1941 5261
f1c1314b 5262 /* disable ras on ALL IPs */
bb5c7235 5263 if (!need_emergency_restart &&
b823821f 5264 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5265 amdgpu_ras_suspend(tmp_adev);
5266
1d721ed6
AG
5267 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5268 struct amdgpu_ring *ring = tmp_adev->rings[i];
5269
5270 if (!ring || !ring->sched.thread)
5271 continue;
5272
0b2d2c2e 5273 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5274
bb5c7235 5275 if (need_emergency_restart)
7c6e68c7 5276 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5277 }
8f8c80f4 5278 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5279 }
5280
bb5c7235 5281 if (need_emergency_restart)
7c6e68c7
AG
5282 goto skip_sched_resume;
5283
1d721ed6
AG
5284 /*
5285 * Must check guilty signal here since after this point all old
5286 * HW fences are force signaled.
5287 *
5288 * job->base holds a reference to parent fence
5289 */
f6a3f660 5290 if (job && dma_fence_is_signaled(&job->hw_fence)) {
1d721ed6 5291 job_signaled = true;
1d721ed6
AG
5292 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5293 goto skip_hw_reset;
5294 }
5295
26bc5340 5296retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5297 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f5c7e779
YC
5298 if (gpu_reset_for_dev_remove) {
5299 /* Workaroud for ASICs need to disable SMC first */
5300 amdgpu_device_smu_fini_early(tmp_adev);
5301 }
f1549c09 5302 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
26bc5340
AG
5303 /*TODO Should we stop ?*/
5304 if (r) {
aac89168 5305 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5306 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5307 tmp_adev->asic_reset_res = r;
5308 }
247c7b0d
AG
5309
5310 /*
5311 * Drop all pending non scheduler resets. Scheduler resets
5312 * were already dropped during drm_sched_stop
5313 */
d193b12b 5314 amdgpu_device_stop_pending_resets(tmp_adev);
26bc5340
AG
5315 }
5316
5317 /* Actual ASIC resets if needed.*/
4f30d920 5318 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5319 if (amdgpu_sriov_vf(adev)) {
5320 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5321 if (r)
5322 adev->asic_reset_res = r;
950d6425
SY
5323
5324 /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5325 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5326 amdgpu_ras_resume(adev);
26bc5340 5327 } else {
f1549c09 5328 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
b98a1648 5329 if (r && r == -EAGAIN)
26bc5340 5330 goto retry;
f5c7e779
YC
5331
5332 if (!r && gpu_reset_for_dev_remove)
5333 goto recover_end;
26bc5340
AG
5334 }
5335
1d721ed6
AG
5336skip_hw_reset:
5337
26bc5340 5338 /* Post ASIC reset for all devs .*/
655ce9cb 5339 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5340
1d721ed6
AG
5341 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5342 struct amdgpu_ring *ring = tmp_adev->rings[i];
5343
5344 if (!ring || !ring->sched.thread)
5345 continue;
5346
6868a2c4 5347 drm_sched_start(&ring->sched, true);
1d721ed6
AG
5348 }
5349
693073a0 5350 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
ed67f729
JX
5351 amdgpu_mes_self_test(tmp_adev);
5352
1053b9c9 5353 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5354 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5355 }
5356
7258fa31
SK
5357 if (tmp_adev->asic_reset_res)
5358 r = tmp_adev->asic_reset_res;
5359
1d721ed6 5360 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5361
5362 if (r) {
5363 /* bad news, how to tell it to userspace ? */
12ffa55d 5364 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5365 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5366 } else {
12ffa55d 5367 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5368 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5369 DRM_WARN("smart shift update failed\n");
26bc5340 5370 }
7c6e68c7 5371 }
26bc5340 5372
7c6e68c7 5373skip_sched_resume:
655ce9cb 5374 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5375 /* unlock kfd: SRIOV would do it separately */
c004d44e 5376 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
428890a3 5377 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5378
5379 /* kfd_post_reset will do nothing if kfd device is not initialized,
5380 * need to bring up kfd here if it's not be initialized before
5381 */
5382 if (!adev->kfd.init_complete)
5383 amdgpu_amdkfd_device_init(adev);
5384
3f12acc8
EQ
5385 if (audio_suspended)
5386 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5387
5388 amdgpu_device_unset_mp1_state(tmp_adev);
d293470e
YC
5389
5390 amdgpu_ras_set_error_query_ready(tmp_adev, true);
26bc5340
AG
5391 }
5392
f5c7e779 5393recover_end:
e923be99
AG
5394 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5395 reset_list);
5396 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5397
9e94d22c 5398 if (hive) {
9e94d22c 5399 mutex_unlock(&hive->hive_lock);
d95e8e97 5400 amdgpu_put_xgmi_hive(hive);
9e94d22c 5401 }
26bc5340 5402
f287a3c5 5403 if (r)
26bc5340 5404 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
ab9a0b1f
AG
5405
5406 atomic_set(&adev->reset_domain->reset_res, r);
d38ceaf9
AD
5407 return r;
5408}
5409
e3ecdffa
AD
5410/**
5411 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5412 *
5413 * @adev: amdgpu_device pointer
5414 *
5415 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5416 * and lanes) of the slot the device is in. Handles APUs and
5417 * virtualized environments where PCIE config space may not be available.
5418 */
5494d864 5419static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5420{
5d9a6330 5421 struct pci_dev *pdev;
c5313457
HK
5422 enum pci_bus_speed speed_cap, platform_speed_cap;
5423 enum pcie_link_width platform_link_width;
d0dd7f0c 5424
cd474ba0
AD
5425 if (amdgpu_pcie_gen_cap)
5426 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5427
cd474ba0
AD
5428 if (amdgpu_pcie_lane_cap)
5429 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5430
cd474ba0
AD
5431 /* covers APUs as well */
5432 if (pci_is_root_bus(adev->pdev->bus)) {
5433 if (adev->pm.pcie_gen_mask == 0)
5434 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5435 if (adev->pm.pcie_mlw_mask == 0)
5436 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5437 return;
cd474ba0 5438 }
d0dd7f0c 5439
c5313457
HK
5440 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5441 return;
5442
dbaa922b
AD
5443 pcie_bandwidth_available(adev->pdev, NULL,
5444 &platform_speed_cap, &platform_link_width);
c5313457 5445
cd474ba0 5446 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5447 /* asic caps */
5448 pdev = adev->pdev;
5449 speed_cap = pcie_get_speed_cap(pdev);
5450 if (speed_cap == PCI_SPEED_UNKNOWN) {
5451 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5452 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5453 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5454 } else {
2b3a1f51
FX
5455 if (speed_cap == PCIE_SPEED_32_0GT)
5456 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5457 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5458 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5459 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5460 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5461 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5462 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5463 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5464 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5465 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5466 else if (speed_cap == PCIE_SPEED_8_0GT)
5467 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5468 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5469 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5470 else if (speed_cap == PCIE_SPEED_5_0GT)
5471 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5472 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5473 else
5474 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5475 }
5476 /* platform caps */
c5313457 5477 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5478 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5479 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5480 } else {
2b3a1f51
FX
5481 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5482 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5483 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5484 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5485 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5486 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5487 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5488 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5489 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5490 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5491 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5492 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5493 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5494 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5495 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5496 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5497 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5498 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5499 else
5500 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5501
cd474ba0
AD
5502 }
5503 }
5504 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5505 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5506 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5507 } else {
c5313457 5508 switch (platform_link_width) {
5d9a6330 5509 case PCIE_LNK_X32:
cd474ba0
AD
5510 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5511 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5512 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5517 break;
5d9a6330 5518 case PCIE_LNK_X16:
cd474ba0
AD
5519 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5520 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5521 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5525 break;
5d9a6330 5526 case PCIE_LNK_X12:
cd474ba0
AD
5527 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5528 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5529 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5531 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5532 break;
5d9a6330 5533 case PCIE_LNK_X8:
cd474ba0
AD
5534 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5535 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5536 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5537 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5538 break;
5d9a6330 5539 case PCIE_LNK_X4:
cd474ba0
AD
5540 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5541 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5542 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5543 break;
5d9a6330 5544 case PCIE_LNK_X2:
cd474ba0
AD
5545 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5546 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5547 break;
5d9a6330 5548 case PCIE_LNK_X1:
cd474ba0
AD
5549 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5550 break;
5551 default:
5552 break;
5553 }
d0dd7f0c
AD
5554 }
5555 }
5556}
d38ceaf9 5557
08a2fd23
RE
5558/**
5559 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5560 *
5561 * @adev: amdgpu_device pointer
5562 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5563 *
5564 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5565 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5566 * @peer_adev.
5567 */
5568bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5569 struct amdgpu_device *peer_adev)
5570{
5571#ifdef CONFIG_HSA_AMD_P2P
5572 uint64_t address_mask = peer_adev->dev->dma_mask ?
5573 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5574 resource_size_t aper_limit =
5575 adev->gmc.aper_base + adev->gmc.aper_size - 1;
bb66ecbf
LL
5576 bool p2p_access =
5577 !adev->gmc.xgmi.connected_to_cpu &&
5578 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
08a2fd23
RE
5579
5580 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5581 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5582 !(adev->gmc.aper_base & address_mask ||
5583 aper_limit & address_mask));
5584#else
5585 return false;
5586#endif
5587}
5588
361dbd01
AD
5589int amdgpu_device_baco_enter(struct drm_device *dev)
5590{
1348969a 5591 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5592 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5593
4a580877 5594 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5595 return -ENOTSUPP;
5596
8ab0d6f0 5597 if (ras && adev->ras_enabled &&
acdae216 5598 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5599 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5600
9530273e 5601 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5602}
5603
5604int amdgpu_device_baco_exit(struct drm_device *dev)
5605{
1348969a 5606 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5607 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5608 int ret = 0;
361dbd01 5609
4a580877 5610 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5611 return -ENOTSUPP;
5612
9530273e
EQ
5613 ret = amdgpu_dpm_baco_exit(adev);
5614 if (ret)
5615 return ret;
7a22677b 5616
8ab0d6f0 5617 if (ras && adev->ras_enabled &&
acdae216 5618 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5619 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5620
1bece222
CL
5621 if (amdgpu_passthrough(adev) &&
5622 adev->nbio.funcs->clear_doorbell_interrupt)
5623 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5624
7a22677b 5625 return 0;
361dbd01 5626}
c9a6b82f
AG
5627
5628/**
5629 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5630 * @pdev: PCI device struct
5631 * @state: PCI channel state
5632 *
5633 * Description: Called when a PCI error is detected.
5634 *
5635 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5636 */
5637pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5638{
5639 struct drm_device *dev = pci_get_drvdata(pdev);
5640 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5641 int i;
c9a6b82f
AG
5642
5643 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5644
6894305c
AG
5645 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5646 DRM_WARN("No support for XGMI hive yet...");
5647 return PCI_ERS_RESULT_DISCONNECT;
5648 }
5649
e17e27f9
GC
5650 adev->pci_channel_state = state;
5651
c9a6b82f
AG
5652 switch (state) {
5653 case pci_channel_io_normal:
5654 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5655 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5656 case pci_channel_io_frozen:
5657 /*
d0fb18b5 5658 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5659 * to GPU during PCI error recovery
5660 */
3675c2f2 5661 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5662 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5663
5664 /*
5665 * Block any work scheduling as we do for regular GPU reset
5666 * for the duration of the recovery
5667 */
5668 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5669 struct amdgpu_ring *ring = adev->rings[i];
5670
5671 if (!ring || !ring->sched.thread)
5672 continue;
5673
5674 drm_sched_stop(&ring->sched, NULL);
5675 }
8f8c80f4 5676 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5677 return PCI_ERS_RESULT_NEED_RESET;
5678 case pci_channel_io_perm_failure:
5679 /* Permanent error, prepare for device removal */
5680 return PCI_ERS_RESULT_DISCONNECT;
5681 }
5682
5683 return PCI_ERS_RESULT_NEED_RESET;
5684}
5685
5686/**
5687 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5688 * @pdev: pointer to PCI device
5689 */
5690pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5691{
5692
5693 DRM_INFO("PCI error: mmio enabled callback!!\n");
5694
5695 /* TODO - dump whatever for debugging purposes */
5696
5697 /* This called only if amdgpu_pci_error_detected returns
5698 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5699 * works, no need to reset slot.
5700 */
5701
5702 return PCI_ERS_RESULT_RECOVERED;
5703}
5704
5705/**
5706 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5707 * @pdev: PCI device struct
5708 *
5709 * Description: This routine is called by the pci error recovery
5710 * code after the PCI slot has been reset, just before we
5711 * should resume normal operations.
5712 */
5713pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5714{
5715 struct drm_device *dev = pci_get_drvdata(pdev);
5716 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5717 int r, i;
04442bf7 5718 struct amdgpu_reset_context reset_context;
362c7b91 5719 u32 memsize;
7ac71382 5720 struct list_head device_list;
c9a6b82f
AG
5721
5722 DRM_INFO("PCI error: slot reset callback!!\n");
5723
04442bf7
LL
5724 memset(&reset_context, 0, sizeof(reset_context));
5725
7ac71382 5726 INIT_LIST_HEAD(&device_list);
655ce9cb 5727 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5728
362c7b91
AG
5729 /* wait for asic to come out of reset */
5730 msleep(500);
5731
7ac71382 5732 /* Restore PCI confspace */
c1dd4aa6 5733 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5734
362c7b91
AG
5735 /* confirm ASIC came out of reset */
5736 for (i = 0; i < adev->usec_timeout; i++) {
5737 memsize = amdgpu_asic_get_config_memsize(adev);
5738
5739 if (memsize != 0xffffffff)
5740 break;
5741 udelay(1);
5742 }
5743 if (memsize == 0xffffffff) {
5744 r = -ETIME;
5745 goto out;
5746 }
5747
04442bf7
LL
5748 reset_context.method = AMD_RESET_METHOD_NONE;
5749 reset_context.reset_req_dev = adev;
5750 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5751 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5752
7afefb81 5753 adev->no_hw_access = true;
04442bf7 5754 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5755 adev->no_hw_access = false;
c9a6b82f
AG
5756 if (r)
5757 goto out;
5758
04442bf7 5759 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5760
5761out:
c9a6b82f 5762 if (!r) {
c1dd4aa6
AG
5763 if (amdgpu_device_cache_pci_state(adev->pdev))
5764 pci_restore_state(adev->pdev);
5765
c9a6b82f
AG
5766 DRM_INFO("PCIe error recovery succeeded\n");
5767 } else {
5768 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5769 amdgpu_device_unset_mp1_state(adev);
5770 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5771 }
5772
5773 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5774}
5775
5776/**
5777 * amdgpu_pci_resume() - resume normal ops after PCI reset
5778 * @pdev: pointer to PCI device
5779 *
5780 * Called when the error recovery driver tells us that its
505199a3 5781 * OK to resume normal operation.
c9a6b82f
AG
5782 */
5783void amdgpu_pci_resume(struct pci_dev *pdev)
5784{
5785 struct drm_device *dev = pci_get_drvdata(pdev);
5786 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5787 int i;
c9a6b82f 5788
c9a6b82f
AG
5789
5790 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5791
e17e27f9
GC
5792 /* Only continue execution for the case of pci_channel_io_frozen */
5793 if (adev->pci_channel_state != pci_channel_io_frozen)
5794 return;
5795
acd89fca
AG
5796 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5797 struct amdgpu_ring *ring = adev->rings[i];
5798
5799 if (!ring || !ring->sched.thread)
5800 continue;
5801
acd89fca
AG
5802 drm_sched_start(&ring->sched, true);
5803 }
5804
e923be99
AG
5805 amdgpu_device_unset_mp1_state(adev);
5806 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5807}
c1dd4aa6
AG
5808
5809bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5810{
5811 struct drm_device *dev = pci_get_drvdata(pdev);
5812 struct amdgpu_device *adev = drm_to_adev(dev);
5813 int r;
5814
5815 r = pci_save_state(pdev);
5816 if (!r) {
5817 kfree(adev->pci_state);
5818
5819 adev->pci_state = pci_store_saved_state(pdev);
5820
5821 if (!adev->pci_state) {
5822 DRM_ERROR("Failed to store PCI saved state");
5823 return false;
5824 }
5825 } else {
5826 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5827 return false;
5828 }
5829
5830 return true;
5831}
5832
5833bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5834{
5835 struct drm_device *dev = pci_get_drvdata(pdev);
5836 struct amdgpu_device *adev = drm_to_adev(dev);
5837 int r;
5838
5839 if (!adev->pci_state)
5840 return false;
5841
5842 r = pci_load_saved_state(pdev, adev->pci_state);
5843
5844 if (!r) {
5845 pci_restore_state(pdev);
5846 } else {
5847 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5848 return false;
5849 }
5850
5851 return true;
5852}
5853
810085dd
EH
5854void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5855 struct amdgpu_ring *ring)
5856{
5857#ifdef CONFIG_X86_64
b818a5d3 5858 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5859 return;
5860#endif
5861 if (adev->gmc.xgmi.connected_to_cpu)
5862 return;
5863
5864 if (ring && ring->funcs->emit_hdp_flush)
5865 amdgpu_ring_emit_hdp_flush(ring);
5866 else
5867 amdgpu_asic_flush_hdp(adev, ring);
5868}
c1dd4aa6 5869
810085dd
EH
5870void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5871 struct amdgpu_ring *ring)
5872{
5873#ifdef CONFIG_X86_64
b818a5d3 5874 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
810085dd
EH
5875 return;
5876#endif
5877 if (adev->gmc.xgmi.connected_to_cpu)
5878 return;
c1dd4aa6 5879
810085dd
EH
5880 amdgpu_asic_invalidate_hdp(adev, ring);
5881}
34f3a4a9 5882
89a7a870
AG
5883int amdgpu_in_reset(struct amdgpu_device *adev)
5884{
5885 return atomic_read(&adev->reset_domain->in_gpu_reset);
53a17b6b
TZ
5886}
5887
34f3a4a9
LY
5888/**
5889 * amdgpu_device_halt() - bring hardware to some kind of halt state
5890 *
5891 * @adev: amdgpu_device pointer
5892 *
5893 * Bring hardware to some kind of halt state so that no one can touch it
5894 * any more. It will help to maintain error context when error occurred.
5895 * Compare to a simple hang, the system will keep stable at least for SSH
5896 * access. Then it should be trivial to inspect the hardware state and
5897 * see what's going on. Implemented as following:
5898 *
5899 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5900 * clears all CPU mappings to device, disallows remappings through page faults
5901 * 2. amdgpu_irq_disable_all() disables all interrupts
5902 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5903 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5904 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5905 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5906 * flush any in flight DMA operations
5907 */
5908void amdgpu_device_halt(struct amdgpu_device *adev)
5909{
5910 struct pci_dev *pdev = adev->pdev;
e0f943b4 5911 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
5912
5913 drm_dev_unplug(ddev);
5914
5915 amdgpu_irq_disable_all(adev);
5916
5917 amdgpu_fence_driver_hw_fini(adev);
5918
5919 adev->no_hw_access = true;
5920
5921 amdgpu_device_unmap_mmio(adev);
5922
5923 pci_disable_device(pdev);
5924 pci_wait_for_pending_transaction(pdev);
5925}
86700a40
XD
5926
5927u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5928 u32 reg)
5929{
5930 unsigned long flags, address, data;
5931 u32 r;
5932
5933 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5934 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5935
5936 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5937 WREG32(address, reg * 4);
5938 (void)RREG32(address);
5939 r = RREG32(data);
5940 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5941 return r;
5942}
5943
5944void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5945 u32 reg, u32 v)
5946{
5947 unsigned long flags, address, data;
5948
5949 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5950 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5951
5952 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5953 WREG32(address, reg * 4);
5954 (void)RREG32(address);
5955 WREG32(data, v);
5956 (void)RREG32(data);
5957 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5958}
68ce8b24
CK
5959
5960/**
5961 * amdgpu_device_switch_gang - switch to a new gang
5962 * @adev: amdgpu_device pointer
5963 * @gang: the gang to switch to
5964 *
5965 * Try to switch to a new gang.
5966 * Returns: NULL if we switched to the new gang or a reference to the current
5967 * gang leader.
5968 */
5969struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
5970 struct dma_fence *gang)
5971{
5972 struct dma_fence *old = NULL;
5973
5974 do {
5975 dma_fence_put(old);
5976 rcu_read_lock();
5977 old = dma_fence_get_rcu_safe(&adev->gang_submit);
5978 rcu_read_unlock();
5979
5980 if (old == gang)
5981 break;
5982
5983 if (!dma_fence_is_signaled(old))
5984 return old;
5985
5986 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
5987 old, gang) != old);
5988
5989 dma_fence_put(old);
5990 return NULL;
5991}
220c8cc8
AD
5992
5993bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
5994{
5995 switch (adev->asic_type) {
5996#ifdef CONFIG_DRM_AMDGPU_SI
5997 case CHIP_HAINAN:
5998#endif
5999 case CHIP_TOPAZ:
6000 /* chips with no display hardware */
6001 return false;
6002#ifdef CONFIG_DRM_AMDGPU_SI
6003 case CHIP_TAHITI:
6004 case CHIP_PITCAIRN:
6005 case CHIP_VERDE:
6006 case CHIP_OLAND:
6007#endif
6008#ifdef CONFIG_DRM_AMDGPU_CIK
6009 case CHIP_BONAIRE:
6010 case CHIP_HAWAII:
6011 case CHIP_KAVERI:
6012 case CHIP_KABINI:
6013 case CHIP_MULLINS:
6014#endif
6015 case CHIP_TONGA:
6016 case CHIP_FIJI:
6017 case CHIP_POLARIS10:
6018 case CHIP_POLARIS11:
6019 case CHIP_POLARIS12:
6020 case CHIP_VEGAM:
6021 case CHIP_CARRIZO:
6022 case CHIP_STONEY:
6023 /* chips with display hardware */
6024 return true;
6025 default:
6026 /* IP discovery */
6027 if (!adev->ip_versions[DCE_HWIP][0] ||
6028 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6029 return false;
6030 return true;
6031 }
6032}