drm: implement a method to free unused pages
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
fdf2f6c5 34
4562236b 35#include <drm/drm_atomic_helper.h>
fcd70cd3 36#include <drm/drm_probe_helper.h>
d38ceaf9
AD
37#include <drm/amdgpu_drm.h>
38#include <linux/vgaarb.h>
39#include <linux/vga_switcheroo.h>
40#include <linux/efi.h>
41#include "amdgpu.h"
f4b373f4 42#include "amdgpu_trace.h"
d38ceaf9
AD
43#include "amdgpu_i2c.h"
44#include "atom.h"
45#include "amdgpu_atombios.h"
a5bde2f9 46#include "amdgpu_atomfirmware.h"
d0dd7f0c 47#include "amd_pcie.h"
33f34802
KW
48#ifdef CONFIG_DRM_AMDGPU_SI
49#include "si.h"
50#endif
a2e73f56
AD
51#ifdef CONFIG_DRM_AMDGPU_CIK
52#include "cik.h"
53#endif
aaa36a97 54#include "vi.h"
460826e6 55#include "soc15.h"
0a5b8c7b 56#include "nv.h"
d38ceaf9 57#include "bif/bif_4_1_d.h"
9accf2fd 58#include <linux/pci.h>
bec86378 59#include <linux/firmware.h>
89041940 60#include "amdgpu_vf_error.h"
d38ceaf9 61
ba997709 62#include "amdgpu_amdkfd.h"
d2f52ac8 63#include "amdgpu_pm.h"
d38ceaf9 64
5183411b 65#include "amdgpu_xgmi.h"
c030f2e4 66#include "amdgpu_ras.h"
9c7c85f7 67#include "amdgpu_pmu.h"
bd607166 68#include "amdgpu_fru_eeprom.h"
04442bf7 69#include "amdgpu_reset.h"
5183411b 70
d5ea093e 71#include <linux/suspend.h>
c6a6e2db 72#include <drm/task_barrier.h>
3f12acc8 73#include <linux/pm_runtime.h>
d5ea093e 74
f89f8c6b
AG
75#include <drm/drm_drv.h>
76
e2a75f88 77MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 78MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 79MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 80MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 81MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 82MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 83MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 84MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 85MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 86MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
4e52a9f8 87MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
8bf84f60 88MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin");
e2a75f88 89
2dc80b00
S
90#define AMDGPU_RESUME_MS 2000
91
050091ab 92const char *amdgpu_asic_name[] = {
da69c161
KW
93 "TAHITI",
94 "PITCAIRN",
95 "VERDE",
96 "OLAND",
97 "HAINAN",
d38ceaf9
AD
98 "BONAIRE",
99 "KAVERI",
100 "KABINI",
101 "HAWAII",
102 "MULLINS",
103 "TOPAZ",
104 "TONGA",
48299f95 105 "FIJI",
d38ceaf9 106 "CARRIZO",
139f4917 107 "STONEY",
2cc0c0b5
FC
108 "POLARIS10",
109 "POLARIS11",
c4642a47 110 "POLARIS12",
48ff108d 111 "VEGAM",
d4196f01 112 "VEGA10",
8fab806a 113 "VEGA12",
956fcddc 114 "VEGA20",
2ca8a5d2 115 "RAVEN",
d6c3b24e 116 "ARCTURUS",
1eee4228 117 "RENOIR",
d46b417a 118 "ALDEBARAN",
852a6626 119 "NAVI10",
d0f56dc2 120 "CYAN_SKILLFISH",
87dbad02 121 "NAVI14",
9802f5d7 122 "NAVI12",
ccaf72d3 123 "SIENNA_CICHLID",
ddd8fbe7 124 "NAVY_FLOUNDER",
4f1e9a76 125 "VANGOGH",
a2468e04 126 "DIMGREY_CAVEFISH",
6f169591 127 "BEIGE_GOBY",
ee9236b7 128 "YELLOW_CARP",
3ae695d6 129 "IP DISCOVERY",
d38ceaf9
AD
130 "LAST",
131};
132
dcea6e65
KR
133/**
134 * DOC: pcie_replay_count
135 *
136 * The amdgpu driver provides a sysfs API for reporting the total number
137 * of PCIe replays (NAKs)
138 * The file pcie_replay_count is used for this and returns the total
139 * number of replays as a sum of the NAKs generated and NAKs received
140 */
141
142static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
143 struct device_attribute *attr, char *buf)
144{
145 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 146 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
147 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
148
36000c7a 149 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
150}
151
152static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
153 amdgpu_device_get_pcie_replay_count, NULL);
154
5494d864
AD
155static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
156
bd607166
KR
157/**
158 * DOC: product_name
159 *
160 * The amdgpu driver provides a sysfs API for reporting the product name
161 * for the device
162 * The file serial_number is used for this and returns the product name
163 * as returned from the FRU.
164 * NOTE: This is only available for certain server cards
165 */
166
167static ssize_t amdgpu_device_get_product_name(struct device *dev,
168 struct device_attribute *attr, char *buf)
169{
170 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 171 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 172
36000c7a 173 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
174}
175
176static DEVICE_ATTR(product_name, S_IRUGO,
177 amdgpu_device_get_product_name, NULL);
178
179/**
180 * DOC: product_number
181 *
182 * The amdgpu driver provides a sysfs API for reporting the part number
183 * for the device
184 * The file serial_number is used for this and returns the part number
185 * as returned from the FRU.
186 * NOTE: This is only available for certain server cards
187 */
188
189static ssize_t amdgpu_device_get_product_number(struct device *dev,
190 struct device_attribute *attr, char *buf)
191{
192 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 193 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 194
36000c7a 195 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
196}
197
198static DEVICE_ATTR(product_number, S_IRUGO,
199 amdgpu_device_get_product_number, NULL);
200
201/**
202 * DOC: serial_number
203 *
204 * The amdgpu driver provides a sysfs API for reporting the serial number
205 * for the device
206 * The file serial_number is used for this and returns the serial number
207 * as returned from the FRU.
208 * NOTE: This is only available for certain server cards
209 */
210
211static ssize_t amdgpu_device_get_serial_number(struct device *dev,
212 struct device_attribute *attr, char *buf)
213{
214 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 215 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 216
36000c7a 217 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
218}
219
220static DEVICE_ATTR(serial_number, S_IRUGO,
221 amdgpu_device_get_serial_number, NULL);
222
fd496ca8 223/**
b98c6299 224 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
225 *
226 * @dev: drm_device pointer
227 *
b98c6299 228 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
229 * otherwise return false.
230 */
b98c6299 231bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
232{
233 struct amdgpu_device *adev = drm_to_adev(dev);
234
b98c6299 235 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
236 return true;
237 return false;
238}
239
e3ecdffa 240/**
0330b848 241 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
242 *
243 * @dev: drm_device pointer
244 *
b98c6299 245 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
246 * otherwise return false.
247 */
31af062a 248bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 249{
1348969a 250 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 251
b98c6299
AD
252 if (adev->has_pr3 ||
253 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
254 return true;
255 return false;
256}
257
a69cba42
AD
258/**
259 * amdgpu_device_supports_baco - Does the device support BACO
260 *
261 * @dev: drm_device pointer
262 *
263 * Returns true if the device supporte BACO,
264 * otherwise return false.
265 */
266bool amdgpu_device_supports_baco(struct drm_device *dev)
267{
1348969a 268 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
269
270 return amdgpu_asic_supports_baco(adev);
271}
272
3fa8f89d
S
273/**
274 * amdgpu_device_supports_smart_shift - Is the device dGPU with
275 * smart shift support
276 *
277 * @dev: drm_device pointer
278 *
279 * Returns true if the device is a dGPU with Smart Shift support,
280 * otherwise returns false.
281 */
282bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
283{
284 return (amdgpu_device_supports_boco(dev) &&
285 amdgpu_acpi_is_power_shift_control_supported());
286}
287
6e3cd2a9
MCC
288/*
289 * VRAM access helper functions
290 */
291
e35e2b11 292/**
048af66b 293 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
294 *
295 * @adev: amdgpu_device pointer
296 * @pos: offset of the buffer in vram
297 * @buf: virtual address of the buffer in system memory
298 * @size: read/write size, sizeof(@buf) must > @size
299 * @write: true - write to vram, otherwise - read from vram
300 */
048af66b
KW
301void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
302 void *buf, size_t size, bool write)
e35e2b11 303{
e35e2b11 304 unsigned long flags;
048af66b
KW
305 uint32_t hi = ~0, tmp = 0;
306 uint32_t *data = buf;
ce05ac56 307 uint64_t last;
f89f8c6b 308 int idx;
ce05ac56 309
c58a863b 310 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 311 return;
9d11eb0d 312
048af66b
KW
313 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
314
315 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
316 for (last = pos + size; pos < last; pos += 4) {
317 tmp = pos >> 31;
318
319 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
320 if (tmp != hi) {
321 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
322 hi = tmp;
323 }
324 if (write)
325 WREG32_NO_KIQ(mmMM_DATA, *data++);
326 else
327 *data++ = RREG32_NO_KIQ(mmMM_DATA);
328 }
329
330 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
331 drm_dev_exit(idx);
332}
333
334/**
bbe04dec 335 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
336 *
337 * @adev: amdgpu_device pointer
338 * @pos: offset of the buffer in vram
339 * @buf: virtual address of the buffer in system memory
340 * @size: read/write size, sizeof(@buf) must > @size
341 * @write: true - write to vram, otherwise - read from vram
342 *
343 * The return value means how many bytes have been transferred.
344 */
345size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
346 void *buf, size_t size, bool write)
347{
9d11eb0d 348#ifdef CONFIG_64BIT
048af66b
KW
349 void __iomem *addr;
350 size_t count = 0;
351 uint64_t last;
352
353 if (!adev->mman.aper_base_kaddr)
354 return 0;
355
9d11eb0d
CK
356 last = min(pos + size, adev->gmc.visible_vram_size);
357 if (last > pos) {
048af66b
KW
358 addr = adev->mman.aper_base_kaddr + pos;
359 count = last - pos;
9d11eb0d
CK
360
361 if (write) {
362 memcpy_toio(addr, buf, count);
363 mb();
810085dd 364 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 365 } else {
810085dd 366 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
367 mb();
368 memcpy_fromio(buf, addr, count);
369 }
370
9d11eb0d 371 }
048af66b
KW
372
373 return count;
374#else
375 return 0;
9d11eb0d 376#endif
048af66b 377}
9d11eb0d 378
048af66b
KW
379/**
380 * amdgpu_device_vram_access - read/write a buffer in vram
381 *
382 * @adev: amdgpu_device pointer
383 * @pos: offset of the buffer in vram
384 * @buf: virtual address of the buffer in system memory
385 * @size: read/write size, sizeof(@buf) must > @size
386 * @write: true - write to vram, otherwise - read from vram
387 */
388void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
389 void *buf, size_t size, bool write)
390{
391 size_t count;
e35e2b11 392
048af66b
KW
393 /* try to using vram apreature to access vram first */
394 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
395 size -= count;
396 if (size) {
397 /* using MM to access rest vram */
398 pos += count;
399 buf += count;
400 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
401 }
402}
403
d38ceaf9 404/*
f7ee1874 405 * register access helper functions.
d38ceaf9 406 */
56b53c0b
DL
407
408/* Check if hw access should be skipped because of hotplug or device error */
409bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
410{
7afefb81 411 if (adev->no_hw_access)
56b53c0b
DL
412 return true;
413
414#ifdef CONFIG_LOCKDEP
415 /*
416 * This is a bit complicated to understand, so worth a comment. What we assert
417 * here is that the GPU reset is not running on another thread in parallel.
418 *
419 * For this we trylock the read side of the reset semaphore, if that succeeds
420 * we know that the reset is not running in paralell.
421 *
422 * If the trylock fails we assert that we are either already holding the read
423 * side of the lock or are the reset thread itself and hold the write side of
424 * the lock.
425 */
426 if (in_task()) {
d0fb18b5
AG
427 if (down_read_trylock(&adev->reset_domain->sem))
428 up_read(&adev->reset_domain->sem);
56b53c0b 429 else
d0fb18b5 430 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
431 }
432#endif
433 return false;
434}
435
e3ecdffa 436/**
f7ee1874 437 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
438 *
439 * @adev: amdgpu_device pointer
440 * @reg: dword aligned register offset
441 * @acc_flags: access flags which require special behavior
442 *
443 * Returns the 32 bit value from the offset specified.
444 */
f7ee1874
HZ
445uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
446 uint32_t reg, uint32_t acc_flags)
d38ceaf9 447{
f4b373f4
TSD
448 uint32_t ret;
449
56b53c0b 450 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
451 return 0;
452
f7ee1874
HZ
453 if ((reg * 4) < adev->rmmio_size) {
454 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
455 amdgpu_sriov_runtime(adev) &&
d0fb18b5 456 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 457 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 458 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
459 } else {
460 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
461 }
462 } else {
463 ret = adev->pcie_rreg(adev, reg * 4);
81202807 464 }
bc992ba5 465
f7ee1874 466 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 467
f4b373f4 468 return ret;
d38ceaf9
AD
469}
470
421a2a30
ML
471/*
472 * MMIO register read with bytes helper functions
473 * @offset:bytes offset from MMIO start
474 *
475*/
476
e3ecdffa
AD
477/**
478 * amdgpu_mm_rreg8 - read a memory mapped IO register
479 *
480 * @adev: amdgpu_device pointer
481 * @offset: byte aligned register offset
482 *
483 * Returns the 8 bit value from the offset specified.
484 */
7cbbc745
AG
485uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
486{
56b53c0b 487 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
488 return 0;
489
421a2a30
ML
490 if (offset < adev->rmmio_size)
491 return (readb(adev->rmmio + offset));
492 BUG();
493}
494
495/*
496 * MMIO register write with bytes helper functions
497 * @offset:bytes offset from MMIO start
498 * @value: the value want to be written to the register
499 *
500*/
e3ecdffa
AD
501/**
502 * amdgpu_mm_wreg8 - read a memory mapped IO register
503 *
504 * @adev: amdgpu_device pointer
505 * @offset: byte aligned register offset
506 * @value: 8 bit value to write
507 *
508 * Writes the value specified to the offset specified.
509 */
7cbbc745
AG
510void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
511{
56b53c0b 512 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
513 return;
514
421a2a30
ML
515 if (offset < adev->rmmio_size)
516 writeb(value, adev->rmmio + offset);
517 else
518 BUG();
519}
520
e3ecdffa 521/**
f7ee1874 522 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
523 *
524 * @adev: amdgpu_device pointer
525 * @reg: dword aligned register offset
526 * @v: 32 bit value to write to the register
527 * @acc_flags: access flags which require special behavior
528 *
529 * Writes the value specified to the offset specified.
530 */
f7ee1874
HZ
531void amdgpu_device_wreg(struct amdgpu_device *adev,
532 uint32_t reg, uint32_t v,
533 uint32_t acc_flags)
d38ceaf9 534{
56b53c0b 535 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
536 return;
537
f7ee1874
HZ
538 if ((reg * 4) < adev->rmmio_size) {
539 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
540 amdgpu_sriov_runtime(adev) &&
d0fb18b5 541 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 542 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 543 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
544 } else {
545 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
546 }
547 } else {
548 adev->pcie_wreg(adev, reg * 4, v);
81202807 549 }
bc992ba5 550
f7ee1874 551 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 552}
d38ceaf9 553
03f2abb0 554/**
4cc9f86f 555 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4
ML
556 *
557 * this function is invoked only the debugfs register access
03f2abb0 558 */
f7ee1874
HZ
559void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
560 uint32_t reg, uint32_t v)
2e0cc4d4 561{
56b53c0b 562 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
563 return;
564
2e0cc4d4 565 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
566 adev->gfx.rlc.funcs &&
567 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 568 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1a4772d9 569 return adev->gfx.rlc.funcs->sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
570 } else if ((reg * 4) >= adev->rmmio_size) {
571 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
572 } else {
573 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 574 }
d38ceaf9
AD
575}
576
d38ceaf9
AD
577/**
578 * amdgpu_mm_rdoorbell - read a doorbell dword
579 *
580 * @adev: amdgpu_device pointer
581 * @index: doorbell index
582 *
583 * Returns the value in the doorbell aperture at the
584 * requested doorbell index (CIK).
585 */
586u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
587{
56b53c0b 588 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
589 return 0;
590
d38ceaf9
AD
591 if (index < adev->doorbell.num_doorbells) {
592 return readl(adev->doorbell.ptr + index);
593 } else {
594 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
595 return 0;
596 }
597}
598
599/**
600 * amdgpu_mm_wdoorbell - write a doorbell dword
601 *
602 * @adev: amdgpu_device pointer
603 * @index: doorbell index
604 * @v: value to write
605 *
606 * Writes @v to the doorbell aperture at the
607 * requested doorbell index (CIK).
608 */
609void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
610{
56b53c0b 611 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
612 return;
613
d38ceaf9
AD
614 if (index < adev->doorbell.num_doorbells) {
615 writel(v, adev->doorbell.ptr + index);
616 } else {
617 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
618 }
619}
620
832be404
KW
621/**
622 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
623 *
624 * @adev: amdgpu_device pointer
625 * @index: doorbell index
626 *
627 * Returns the value in the doorbell aperture at the
628 * requested doorbell index (VEGA10+).
629 */
630u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
631{
56b53c0b 632 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
633 return 0;
634
832be404
KW
635 if (index < adev->doorbell.num_doorbells) {
636 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
637 } else {
638 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
639 return 0;
640 }
641}
642
643/**
644 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
645 *
646 * @adev: amdgpu_device pointer
647 * @index: doorbell index
648 * @v: value to write
649 *
650 * Writes @v to the doorbell aperture at the
651 * requested doorbell index (VEGA10+).
652 */
653void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
654{
56b53c0b 655 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
656 return;
657
832be404
KW
658 if (index < adev->doorbell.num_doorbells) {
659 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
660 } else {
661 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
662 }
663}
664
1bba3683
HZ
665/**
666 * amdgpu_device_indirect_rreg - read an indirect register
667 *
668 * @adev: amdgpu_device pointer
669 * @pcie_index: mmio register offset
670 * @pcie_data: mmio register offset
22f453fb 671 * @reg_addr: indirect register address to read from
1bba3683
HZ
672 *
673 * Returns the value of indirect register @reg_addr
674 */
675u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
676 u32 pcie_index, u32 pcie_data,
677 u32 reg_addr)
678{
679 unsigned long flags;
680 u32 r;
681 void __iomem *pcie_index_offset;
682 void __iomem *pcie_data_offset;
683
684 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
685 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
686 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
687
688 writel(reg_addr, pcie_index_offset);
689 readl(pcie_index_offset);
690 r = readl(pcie_data_offset);
691 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
692
693 return r;
694}
695
696/**
697 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
698 *
699 * @adev: amdgpu_device pointer
700 * @pcie_index: mmio register offset
701 * @pcie_data: mmio register offset
22f453fb 702 * @reg_addr: indirect register address to read from
1bba3683
HZ
703 *
704 * Returns the value of indirect register @reg_addr
705 */
706u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
707 u32 pcie_index, u32 pcie_data,
708 u32 reg_addr)
709{
710 unsigned long flags;
711 u64 r;
712 void __iomem *pcie_index_offset;
713 void __iomem *pcie_data_offset;
714
715 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
716 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
717 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
718
719 /* read low 32 bits */
720 writel(reg_addr, pcie_index_offset);
721 readl(pcie_index_offset);
722 r = readl(pcie_data_offset);
723 /* read high 32 bits */
724 writel(reg_addr + 4, pcie_index_offset);
725 readl(pcie_index_offset);
726 r |= ((u64)readl(pcie_data_offset) << 32);
727 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
728
729 return r;
730}
731
732/**
733 * amdgpu_device_indirect_wreg - write an indirect register address
734 *
735 * @adev: amdgpu_device pointer
736 * @pcie_index: mmio register offset
737 * @pcie_data: mmio register offset
738 * @reg_addr: indirect register offset
739 * @reg_data: indirect register data
740 *
741 */
742void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
743 u32 pcie_index, u32 pcie_data,
744 u32 reg_addr, u32 reg_data)
745{
746 unsigned long flags;
747 void __iomem *pcie_index_offset;
748 void __iomem *pcie_data_offset;
749
750 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
751 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
752 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
753
754 writel(reg_addr, pcie_index_offset);
755 readl(pcie_index_offset);
756 writel(reg_data, pcie_data_offset);
757 readl(pcie_data_offset);
758 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
759}
760
761/**
762 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
763 *
764 * @adev: amdgpu_device pointer
765 * @pcie_index: mmio register offset
766 * @pcie_data: mmio register offset
767 * @reg_addr: indirect register offset
768 * @reg_data: indirect register data
769 *
770 */
771void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
772 u32 pcie_index, u32 pcie_data,
773 u32 reg_addr, u64 reg_data)
774{
775 unsigned long flags;
776 void __iomem *pcie_index_offset;
777 void __iomem *pcie_data_offset;
778
779 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
780 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
781 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
782
783 /* write low 32 bits */
784 writel(reg_addr, pcie_index_offset);
785 readl(pcie_index_offset);
786 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
787 readl(pcie_data_offset);
788 /* write high 32 bits */
789 writel(reg_addr + 4, pcie_index_offset);
790 readl(pcie_index_offset);
791 writel((u32)(reg_data >> 32), pcie_data_offset);
792 readl(pcie_data_offset);
793 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
794}
795
d38ceaf9
AD
796/**
797 * amdgpu_invalid_rreg - dummy reg read function
798 *
982a820b 799 * @adev: amdgpu_device pointer
d38ceaf9
AD
800 * @reg: offset of register
801 *
802 * Dummy register read function. Used for register blocks
803 * that certain asics don't have (all asics).
804 * Returns the value in the register.
805 */
806static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
807{
808 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
809 BUG();
810 return 0;
811}
812
813/**
814 * amdgpu_invalid_wreg - dummy reg write function
815 *
982a820b 816 * @adev: amdgpu_device pointer
d38ceaf9
AD
817 * @reg: offset of register
818 * @v: value to write to the register
819 *
820 * Dummy register read function. Used for register blocks
821 * that certain asics don't have (all asics).
822 */
823static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
824{
825 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
826 reg, v);
827 BUG();
828}
829
4fa1c6a6
TZ
830/**
831 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
832 *
982a820b 833 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
834 * @reg: offset of register
835 *
836 * Dummy register read function. Used for register blocks
837 * that certain asics don't have (all asics).
838 * Returns the value in the register.
839 */
840static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
841{
842 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
843 BUG();
844 return 0;
845}
846
847/**
848 * amdgpu_invalid_wreg64 - dummy reg write function
849 *
982a820b 850 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
851 * @reg: offset of register
852 * @v: value to write to the register
853 *
854 * Dummy register read function. Used for register blocks
855 * that certain asics don't have (all asics).
856 */
857static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
858{
859 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
860 reg, v);
861 BUG();
862}
863
d38ceaf9
AD
864/**
865 * amdgpu_block_invalid_rreg - dummy reg read function
866 *
982a820b 867 * @adev: amdgpu_device pointer
d38ceaf9
AD
868 * @block: offset of instance
869 * @reg: offset of register
870 *
871 * Dummy register read function. Used for register blocks
872 * that certain asics don't have (all asics).
873 * Returns the value in the register.
874 */
875static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
876 uint32_t block, uint32_t reg)
877{
878 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
879 reg, block);
880 BUG();
881 return 0;
882}
883
884/**
885 * amdgpu_block_invalid_wreg - dummy reg write function
886 *
982a820b 887 * @adev: amdgpu_device pointer
d38ceaf9
AD
888 * @block: offset of instance
889 * @reg: offset of register
890 * @v: value to write to the register
891 *
892 * Dummy register read function. Used for register blocks
893 * that certain asics don't have (all asics).
894 */
895static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
896 uint32_t block,
897 uint32_t reg, uint32_t v)
898{
899 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
900 reg, block, v);
901 BUG();
902}
903
4d2997ab
AD
904/**
905 * amdgpu_device_asic_init - Wrapper for atom asic_init
906 *
982a820b 907 * @adev: amdgpu_device pointer
4d2997ab
AD
908 *
909 * Does any asic specific work and then calls atom asic init.
910 */
911static int amdgpu_device_asic_init(struct amdgpu_device *adev)
912{
913 amdgpu_asic_pre_asic_init(adev);
914
915 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
916}
917
e3ecdffa
AD
918/**
919 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
920 *
982a820b 921 * @adev: amdgpu_device pointer
e3ecdffa
AD
922 *
923 * Allocates a scratch page of VRAM for use by various things in the
924 * driver.
925 */
06ec9070 926static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 927{
a4a02777
CK
928 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
929 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
930 &adev->vram_scratch.robj,
931 &adev->vram_scratch.gpu_addr,
932 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
933}
934
e3ecdffa
AD
935/**
936 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
937 *
982a820b 938 * @adev: amdgpu_device pointer
e3ecdffa
AD
939 *
940 * Frees the VRAM scratch page.
941 */
06ec9070 942static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 943{
078af1a3 944 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
945}
946
947/**
9c3f2b54 948 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
949 *
950 * @adev: amdgpu_device pointer
951 * @registers: pointer to the register array
952 * @array_size: size of the register array
953 *
954 * Programs an array or registers with and and or masks.
955 * This is a helper for setting golden registers.
956 */
9c3f2b54
AD
957void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
958 const u32 *registers,
959 const u32 array_size)
d38ceaf9
AD
960{
961 u32 tmp, reg, and_mask, or_mask;
962 int i;
963
964 if (array_size % 3)
965 return;
966
967 for (i = 0; i < array_size; i +=3) {
968 reg = registers[i + 0];
969 and_mask = registers[i + 1];
970 or_mask = registers[i + 2];
971
972 if (and_mask == 0xffffffff) {
973 tmp = or_mask;
974 } else {
975 tmp = RREG32(reg);
976 tmp &= ~and_mask;
e0d07657
HZ
977 if (adev->family >= AMDGPU_FAMILY_AI)
978 tmp |= (or_mask & and_mask);
979 else
980 tmp |= or_mask;
d38ceaf9
AD
981 }
982 WREG32(reg, tmp);
983 }
984}
985
e3ecdffa
AD
986/**
987 * amdgpu_device_pci_config_reset - reset the GPU
988 *
989 * @adev: amdgpu_device pointer
990 *
991 * Resets the GPU using the pci config reset sequence.
992 * Only applicable to asics prior to vega10.
993 */
8111c387 994void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
995{
996 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
997}
998
af484df8
AD
999/**
1000 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1001 *
1002 * @adev: amdgpu_device pointer
1003 *
1004 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1005 */
1006int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1007{
1008 return pci_reset_function(adev->pdev);
1009}
1010
d38ceaf9
AD
1011/*
1012 * GPU doorbell aperture helpers function.
1013 */
1014/**
06ec9070 1015 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1016 *
1017 * @adev: amdgpu_device pointer
1018 *
1019 * Init doorbell driver information (CIK)
1020 * Returns 0 on success, error on failure.
1021 */
06ec9070 1022static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1023{
6585661d 1024
705e519e
CK
1025 /* No doorbell on SI hardware generation */
1026 if (adev->asic_type < CHIP_BONAIRE) {
1027 adev->doorbell.base = 0;
1028 adev->doorbell.size = 0;
1029 adev->doorbell.num_doorbells = 0;
1030 adev->doorbell.ptr = NULL;
1031 return 0;
1032 }
1033
d6895ad3
CK
1034 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1035 return -EINVAL;
1036
22357775
AD
1037 amdgpu_asic_init_doorbell_index(adev);
1038
d38ceaf9
AD
1039 /* doorbell bar mapping */
1040 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1041 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1042
edf600da 1043 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 1044 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
1045 if (adev->doorbell.num_doorbells == 0)
1046 return -EINVAL;
1047
ec3db8a6 1048 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
1049 * paging queue doorbell use the second page. The
1050 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1051 * doorbells are in the first page. So with paging queue enabled,
1052 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
1053 */
1054 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 1055 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 1056
8972e5d2
CK
1057 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1058 adev->doorbell.num_doorbells *
1059 sizeof(u32));
1060 if (adev->doorbell.ptr == NULL)
d38ceaf9 1061 return -ENOMEM;
d38ceaf9
AD
1062
1063 return 0;
1064}
1065
1066/**
06ec9070 1067 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1068 *
1069 * @adev: amdgpu_device pointer
1070 *
1071 * Tear down doorbell driver information (CIK)
1072 */
06ec9070 1073static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1074{
1075 iounmap(adev->doorbell.ptr);
1076 adev->doorbell.ptr = NULL;
1077}
1078
22cb0164 1079
d38ceaf9
AD
1080
1081/*
06ec9070 1082 * amdgpu_device_wb_*()
455a7bc2 1083 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1084 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1085 */
1086
1087/**
06ec9070 1088 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1089 *
1090 * @adev: amdgpu_device pointer
1091 *
1092 * Disables Writeback and frees the Writeback memory (all asics).
1093 * Used at driver shutdown.
1094 */
06ec9070 1095static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1096{
1097 if (adev->wb.wb_obj) {
a76ed485
AD
1098 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1099 &adev->wb.gpu_addr,
1100 (void **)&adev->wb.wb);
d38ceaf9
AD
1101 adev->wb.wb_obj = NULL;
1102 }
1103}
1104
1105/**
03f2abb0 1106 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1107 *
1108 * @adev: amdgpu_device pointer
1109 *
455a7bc2 1110 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1111 * Used at driver startup.
1112 * Returns 0 on success or an -error on failure.
1113 */
06ec9070 1114static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1115{
1116 int r;
1117
1118 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1119 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1120 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1121 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1122 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1123 (void **)&adev->wb.wb);
d38ceaf9
AD
1124 if (r) {
1125 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1126 return r;
1127 }
d38ceaf9
AD
1128
1129 adev->wb.num_wb = AMDGPU_MAX_WB;
1130 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1131
1132 /* clear wb memory */
73469585 1133 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1134 }
1135
1136 return 0;
1137}
1138
1139/**
131b4b36 1140 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1141 *
1142 * @adev: amdgpu_device pointer
1143 * @wb: wb index
1144 *
1145 * Allocate a wb slot for use by the driver (all asics).
1146 * Returns 0 on success or -EINVAL on failure.
1147 */
131b4b36 1148int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1149{
1150 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1151
97407b63 1152 if (offset < adev->wb.num_wb) {
7014285a 1153 __set_bit(offset, adev->wb.used);
63ae07ca 1154 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1155 return 0;
1156 } else {
1157 return -EINVAL;
1158 }
1159}
1160
d38ceaf9 1161/**
131b4b36 1162 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1163 *
1164 * @adev: amdgpu_device pointer
1165 * @wb: wb index
1166 *
1167 * Free a wb slot allocated for use by the driver (all asics)
1168 */
131b4b36 1169void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1170{
73469585 1171 wb >>= 3;
d38ceaf9 1172 if (wb < adev->wb.num_wb)
73469585 1173 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1174}
1175
d6895ad3
CK
1176/**
1177 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1178 *
1179 * @adev: amdgpu_device pointer
1180 *
1181 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1182 * to fail, but if any of the BARs is not accessible after the size we abort
1183 * driver loading by returning -ENODEV.
1184 */
1185int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1186{
453f617a 1187 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1188 struct pci_bus *root;
1189 struct resource *res;
1190 unsigned i;
d6895ad3
CK
1191 u16 cmd;
1192 int r;
1193
0c03b912 1194 /* Bypass for VF */
1195 if (amdgpu_sriov_vf(adev))
1196 return 0;
1197
b7221f2b
AD
1198 /* skip if the bios has already enabled large BAR */
1199 if (adev->gmc.real_vram_size &&
1200 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1201 return 0;
1202
31b8adab
CK
1203 /* Check if the root BUS has 64bit memory resources */
1204 root = adev->pdev->bus;
1205 while (root->parent)
1206 root = root->parent;
1207
1208 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1209 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1210 res->start > 0x100000000ull)
1211 break;
1212 }
1213
1214 /* Trying to resize is pointless without a root hub window above 4GB */
1215 if (!res)
1216 return 0;
1217
453f617a
ND
1218 /* Limit the BAR size to what is available */
1219 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1220 rbar_size);
1221
d6895ad3
CK
1222 /* Disable memory decoding while we change the BAR addresses and size */
1223 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1224 pci_write_config_word(adev->pdev, PCI_COMMAND,
1225 cmd & ~PCI_COMMAND_MEMORY);
1226
1227 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1228 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1229 if (adev->asic_type >= CHIP_BONAIRE)
1230 pci_release_resource(adev->pdev, 2);
1231
1232 pci_release_resource(adev->pdev, 0);
1233
1234 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1235 if (r == -ENOSPC)
1236 DRM_INFO("Not enough PCI address space for a large BAR.");
1237 else if (r && r != -ENOTSUPP)
1238 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1239
1240 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1241
1242 /* When the doorbell or fb BAR isn't available we have no chance of
1243 * using the device.
1244 */
06ec9070 1245 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1246 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1247 return -ENODEV;
1248
1249 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1250
1251 return 0;
1252}
a05502e5 1253
d38ceaf9
AD
1254/*
1255 * GPU helpers function.
1256 */
1257/**
39c640c0 1258 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1259 *
1260 * @adev: amdgpu_device pointer
1261 *
c836fec5
JQ
1262 * Check if the asic has been initialized (all asics) at driver startup
1263 * or post is needed if hw reset is performed.
1264 * Returns true if need or false if not.
d38ceaf9 1265 */
39c640c0 1266bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1267{
1268 uint32_t reg;
1269
bec86378
ML
1270 if (amdgpu_sriov_vf(adev))
1271 return false;
1272
1273 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1274 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1275 * some old smc fw still need driver do vPost otherwise gpu hang, while
1276 * those smc fw version above 22.15 doesn't have this flaw, so we force
1277 * vpost executed for smc version below 22.15
bec86378
ML
1278 */
1279 if (adev->asic_type == CHIP_FIJI) {
1280 int err;
1281 uint32_t fw_ver;
1282 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1283 /* force vPost if error occured */
1284 if (err)
1285 return true;
1286
1287 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1288 if (fw_ver < 0x00160e00)
1289 return true;
bec86378 1290 }
bec86378 1291 }
91fe77eb 1292
e3c1b071 1293 /* Don't post if we need to reset whole hive on init */
1294 if (adev->gmc.xgmi.pending_reset)
1295 return false;
1296
91fe77eb 1297 if (adev->has_hw_reset) {
1298 adev->has_hw_reset = false;
1299 return true;
1300 }
1301
1302 /* bios scratch used on CIK+ */
1303 if (adev->asic_type >= CHIP_BONAIRE)
1304 return amdgpu_atombios_scratch_need_asic_init(adev);
1305
1306 /* check MEM_SIZE for older asics */
1307 reg = amdgpu_asic_get_config_memsize(adev);
1308
1309 if ((reg != 0) && (reg != 0xffffffff))
1310 return false;
1311
1312 return true;
bec86378
ML
1313}
1314
d38ceaf9
AD
1315/* if we get transitioned to only one device, take VGA back */
1316/**
06ec9070 1317 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1318 *
bf44e8ce 1319 * @pdev: PCI device pointer
d38ceaf9
AD
1320 * @state: enable/disable vga decode
1321 *
1322 * Enable/disable vga decode (all asics).
1323 * Returns VGA resource flags.
1324 */
bf44e8ce
CH
1325static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1326 bool state)
d38ceaf9 1327{
bf44e8ce 1328 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1329 amdgpu_asic_set_vga_state(adev, state);
1330 if (state)
1331 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1332 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1333 else
1334 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1335}
1336
e3ecdffa
AD
1337/**
1338 * amdgpu_device_check_block_size - validate the vm block size
1339 *
1340 * @adev: amdgpu_device pointer
1341 *
1342 * Validates the vm block size specified via module parameter.
1343 * The vm block size defines number of bits in page table versus page directory,
1344 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1345 * page table and the remaining bits are in the page directory.
1346 */
06ec9070 1347static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1348{
1349 /* defines number of bits in page table versus page directory,
1350 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1351 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1352 if (amdgpu_vm_block_size == -1)
1353 return;
a1adf8be 1354
bab4fee7 1355 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1356 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1357 amdgpu_vm_block_size);
97489129 1358 amdgpu_vm_block_size = -1;
a1adf8be 1359 }
a1adf8be
CZ
1360}
1361
e3ecdffa
AD
1362/**
1363 * amdgpu_device_check_vm_size - validate the vm size
1364 *
1365 * @adev: amdgpu_device pointer
1366 *
1367 * Validates the vm size in GB specified via module parameter.
1368 * The VM size is the size of the GPU virtual memory space in GB.
1369 */
06ec9070 1370static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1371{
64dab074
AD
1372 /* no need to check the default value */
1373 if (amdgpu_vm_size == -1)
1374 return;
1375
83ca145d
ZJ
1376 if (amdgpu_vm_size < 1) {
1377 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1378 amdgpu_vm_size);
f3368128 1379 amdgpu_vm_size = -1;
83ca145d 1380 }
83ca145d
ZJ
1381}
1382
7951e376
RZ
1383static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1384{
1385 struct sysinfo si;
a9d4fe2f 1386 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1387 uint64_t total_memory;
1388 uint64_t dram_size_seven_GB = 0x1B8000000;
1389 uint64_t dram_size_three_GB = 0xB8000000;
1390
1391 if (amdgpu_smu_memory_pool_size == 0)
1392 return;
1393
1394 if (!is_os_64) {
1395 DRM_WARN("Not 64-bit OS, feature not supported\n");
1396 goto def_value;
1397 }
1398 si_meminfo(&si);
1399 total_memory = (uint64_t)si.totalram * si.mem_unit;
1400
1401 if ((amdgpu_smu_memory_pool_size == 1) ||
1402 (amdgpu_smu_memory_pool_size == 2)) {
1403 if (total_memory < dram_size_three_GB)
1404 goto def_value1;
1405 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1406 (amdgpu_smu_memory_pool_size == 8)) {
1407 if (total_memory < dram_size_seven_GB)
1408 goto def_value1;
1409 } else {
1410 DRM_WARN("Smu memory pool size not supported\n");
1411 goto def_value;
1412 }
1413 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1414
1415 return;
1416
1417def_value1:
1418 DRM_WARN("No enough system memory\n");
1419def_value:
1420 adev->pm.smu_prv_buffer_size = 0;
1421}
1422
9f6a7857
HR
1423static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1424{
1425 if (!(adev->flags & AMD_IS_APU) ||
1426 adev->asic_type < CHIP_RAVEN)
1427 return 0;
1428
1429 switch (adev->asic_type) {
1430 case CHIP_RAVEN:
1431 if (adev->pdev->device == 0x15dd)
1432 adev->apu_flags |= AMD_APU_IS_RAVEN;
1433 if (adev->pdev->device == 0x15d8)
1434 adev->apu_flags |= AMD_APU_IS_PICASSO;
1435 break;
1436 case CHIP_RENOIR:
1437 if ((adev->pdev->device == 0x1636) ||
1438 (adev->pdev->device == 0x164c))
1439 adev->apu_flags |= AMD_APU_IS_RENOIR;
1440 else
1441 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1442 break;
1443 case CHIP_VANGOGH:
1444 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1445 break;
1446 case CHIP_YELLOW_CARP:
1447 break;
d0f56dc2
TZ
1448 case CHIP_CYAN_SKILLFISH:
1449 if (adev->pdev->device == 0x13FE)
1450 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1451 break;
9f6a7857 1452 default:
4eaf21b7 1453 break;
9f6a7857
HR
1454 }
1455
1456 return 0;
1457}
1458
d38ceaf9 1459/**
06ec9070 1460 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1461 *
1462 * @adev: amdgpu_device pointer
1463 *
1464 * Validates certain module parameters and updates
1465 * the associated values used by the driver (all asics).
1466 */
912dfc84 1467static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1468{
5b011235
CZ
1469 if (amdgpu_sched_jobs < 4) {
1470 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1471 amdgpu_sched_jobs);
1472 amdgpu_sched_jobs = 4;
76117507 1473 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1474 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1475 amdgpu_sched_jobs);
1476 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1477 }
d38ceaf9 1478
83e74db6 1479 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1480 /* gart size must be greater or equal to 32M */
1481 dev_warn(adev->dev, "gart size (%d) too small\n",
1482 amdgpu_gart_size);
83e74db6 1483 amdgpu_gart_size = -1;
d38ceaf9
AD
1484 }
1485
36d38372 1486 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1487 /* gtt size must be greater or equal to 32M */
36d38372
CK
1488 dev_warn(adev->dev, "gtt size (%d) too small\n",
1489 amdgpu_gtt_size);
1490 amdgpu_gtt_size = -1;
d38ceaf9
AD
1491 }
1492
d07f14be
RH
1493 /* valid range is between 4 and 9 inclusive */
1494 if (amdgpu_vm_fragment_size != -1 &&
1495 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1496 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1497 amdgpu_vm_fragment_size = -1;
1498 }
1499
5d5bd5e3
KW
1500 if (amdgpu_sched_hw_submission < 2) {
1501 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1502 amdgpu_sched_hw_submission);
1503 amdgpu_sched_hw_submission = 2;
1504 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1505 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1506 amdgpu_sched_hw_submission);
1507 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1508 }
1509
7951e376
RZ
1510 amdgpu_device_check_smu_prv_buffer_size(adev);
1511
06ec9070 1512 amdgpu_device_check_vm_size(adev);
d38ceaf9 1513
06ec9070 1514 amdgpu_device_check_block_size(adev);
6a7f76e7 1515
19aede77 1516 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1517
c6252390 1518 amdgpu_gmc_tmz_set(adev);
01a8dcec 1519
9b498efa
AD
1520 amdgpu_gmc_noretry_set(adev);
1521
e3c00faa 1522 return 0;
d38ceaf9
AD
1523}
1524
1525/**
1526 * amdgpu_switcheroo_set_state - set switcheroo state
1527 *
1528 * @pdev: pci dev pointer
1694467b 1529 * @state: vga_switcheroo state
d38ceaf9
AD
1530 *
1531 * Callback for the switcheroo driver. Suspends or resumes the
1532 * the asics before or after it is powered up using ACPI methods.
1533 */
8aba21b7
LT
1534static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1535 enum vga_switcheroo_state state)
d38ceaf9
AD
1536{
1537 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1538 int r;
d38ceaf9 1539
b98c6299 1540 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1541 return;
1542
1543 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1544 pr_info("switched on\n");
d38ceaf9
AD
1545 /* don't suspend or resume card normally */
1546 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1547
8f66090b
TZ
1548 pci_set_power_state(pdev, PCI_D0);
1549 amdgpu_device_load_pci_state(pdev);
1550 r = pci_enable_device(pdev);
de185019
AD
1551 if (r)
1552 DRM_WARN("pci_enable_device failed (%d)\n", r);
1553 amdgpu_device_resume(dev, true);
d38ceaf9 1554
d38ceaf9 1555 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1556 } else {
dd4fa6c1 1557 pr_info("switched off\n");
d38ceaf9 1558 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1559 amdgpu_device_suspend(dev, true);
8f66090b 1560 amdgpu_device_cache_pci_state(pdev);
de185019 1561 /* Shut down the device */
8f66090b
TZ
1562 pci_disable_device(pdev);
1563 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1564 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1565 }
1566}
1567
1568/**
1569 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1570 *
1571 * @pdev: pci dev pointer
1572 *
1573 * Callback for the switcheroo driver. Check of the switcheroo
1574 * state can be changed.
1575 * Returns true if the state can be changed, false if not.
1576 */
1577static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1578{
1579 struct drm_device *dev = pci_get_drvdata(pdev);
1580
1581 /*
1582 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1583 * locking inversion with the driver load path. And the access here is
1584 * completely racy anyway. So don't bother with locking for now.
1585 */
7e13ad89 1586 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1587}
1588
1589static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1590 .set_gpu_state = amdgpu_switcheroo_set_state,
1591 .reprobe = NULL,
1592 .can_switch = amdgpu_switcheroo_can_switch,
1593};
1594
e3ecdffa
AD
1595/**
1596 * amdgpu_device_ip_set_clockgating_state - set the CG state
1597 *
87e3f136 1598 * @dev: amdgpu_device pointer
e3ecdffa
AD
1599 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1600 * @state: clockgating state (gate or ungate)
1601 *
1602 * Sets the requested clockgating state for all instances of
1603 * the hardware IP specified.
1604 * Returns the error code from the last instance.
1605 */
43fa561f 1606int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1607 enum amd_ip_block_type block_type,
1608 enum amd_clockgating_state state)
d38ceaf9 1609{
43fa561f 1610 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1611 int i, r = 0;
1612
1613 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1614 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1615 continue;
c722865a
RZ
1616 if (adev->ip_blocks[i].version->type != block_type)
1617 continue;
1618 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1619 continue;
1620 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1621 (void *)adev, state);
1622 if (r)
1623 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1624 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1625 }
1626 return r;
1627}
1628
e3ecdffa
AD
1629/**
1630 * amdgpu_device_ip_set_powergating_state - set the PG state
1631 *
87e3f136 1632 * @dev: amdgpu_device pointer
e3ecdffa
AD
1633 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1634 * @state: powergating state (gate or ungate)
1635 *
1636 * Sets the requested powergating state for all instances of
1637 * the hardware IP specified.
1638 * Returns the error code from the last instance.
1639 */
43fa561f 1640int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1641 enum amd_ip_block_type block_type,
1642 enum amd_powergating_state state)
d38ceaf9 1643{
43fa561f 1644 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1645 int i, r = 0;
1646
1647 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1648 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1649 continue;
c722865a
RZ
1650 if (adev->ip_blocks[i].version->type != block_type)
1651 continue;
1652 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1653 continue;
1654 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1655 (void *)adev, state);
1656 if (r)
1657 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1658 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1659 }
1660 return r;
1661}
1662
e3ecdffa
AD
1663/**
1664 * amdgpu_device_ip_get_clockgating_state - get the CG state
1665 *
1666 * @adev: amdgpu_device pointer
1667 * @flags: clockgating feature flags
1668 *
1669 * Walks the list of IPs on the device and updates the clockgating
1670 * flags for each IP.
1671 * Updates @flags with the feature flags for each hardware IP where
1672 * clockgating is enabled.
1673 */
2990a1fc
AD
1674void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1675 u32 *flags)
6cb2d4e4
HR
1676{
1677 int i;
1678
1679 for (i = 0; i < adev->num_ip_blocks; i++) {
1680 if (!adev->ip_blocks[i].status.valid)
1681 continue;
1682 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1683 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1684 }
1685}
1686
e3ecdffa
AD
1687/**
1688 * amdgpu_device_ip_wait_for_idle - wait for idle
1689 *
1690 * @adev: amdgpu_device pointer
1691 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1692 *
1693 * Waits for the request hardware IP to be idle.
1694 * Returns 0 for success or a negative error code on failure.
1695 */
2990a1fc
AD
1696int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1697 enum amd_ip_block_type block_type)
5dbbb60b
AD
1698{
1699 int i, r;
1700
1701 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1702 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1703 continue;
a1255107
AD
1704 if (adev->ip_blocks[i].version->type == block_type) {
1705 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1706 if (r)
1707 return r;
1708 break;
1709 }
1710 }
1711 return 0;
1712
1713}
1714
e3ecdffa
AD
1715/**
1716 * amdgpu_device_ip_is_idle - is the hardware IP idle
1717 *
1718 * @adev: amdgpu_device pointer
1719 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1720 *
1721 * Check if the hardware IP is idle or not.
1722 * Returns true if it the IP is idle, false if not.
1723 */
2990a1fc
AD
1724bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1725 enum amd_ip_block_type block_type)
5dbbb60b
AD
1726{
1727 int i;
1728
1729 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1730 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1731 continue;
a1255107
AD
1732 if (adev->ip_blocks[i].version->type == block_type)
1733 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1734 }
1735 return true;
1736
1737}
1738
e3ecdffa
AD
1739/**
1740 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1741 *
1742 * @adev: amdgpu_device pointer
87e3f136 1743 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1744 *
1745 * Returns a pointer to the hardware IP block structure
1746 * if it exists for the asic, otherwise NULL.
1747 */
2990a1fc
AD
1748struct amdgpu_ip_block *
1749amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1750 enum amd_ip_block_type type)
d38ceaf9
AD
1751{
1752 int i;
1753
1754 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1755 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1756 return &adev->ip_blocks[i];
1757
1758 return NULL;
1759}
1760
1761/**
2990a1fc 1762 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1763 *
1764 * @adev: amdgpu_device pointer
5fc3aeeb 1765 * @type: enum amd_ip_block_type
d38ceaf9
AD
1766 * @major: major version
1767 * @minor: minor version
1768 *
1769 * return 0 if equal or greater
1770 * return 1 if smaller or the ip_block doesn't exist
1771 */
2990a1fc
AD
1772int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1773 enum amd_ip_block_type type,
1774 u32 major, u32 minor)
d38ceaf9 1775{
2990a1fc 1776 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1777
a1255107
AD
1778 if (ip_block && ((ip_block->version->major > major) ||
1779 ((ip_block->version->major == major) &&
1780 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1781 return 0;
1782
1783 return 1;
1784}
1785
a1255107 1786/**
2990a1fc 1787 * amdgpu_device_ip_block_add
a1255107
AD
1788 *
1789 * @adev: amdgpu_device pointer
1790 * @ip_block_version: pointer to the IP to add
1791 *
1792 * Adds the IP block driver information to the collection of IPs
1793 * on the asic.
1794 */
2990a1fc
AD
1795int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1796 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1797{
1798 if (!ip_block_version)
1799 return -EINVAL;
1800
7bd939d0
LG
1801 switch (ip_block_version->type) {
1802 case AMD_IP_BLOCK_TYPE_VCN:
1803 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1804 return 0;
1805 break;
1806 case AMD_IP_BLOCK_TYPE_JPEG:
1807 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1808 return 0;
1809 break;
1810 default:
1811 break;
1812 }
1813
e966a725 1814 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1815 ip_block_version->funcs->name);
1816
a1255107
AD
1817 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1818
1819 return 0;
1820}
1821
e3ecdffa
AD
1822/**
1823 * amdgpu_device_enable_virtual_display - enable virtual display feature
1824 *
1825 * @adev: amdgpu_device pointer
1826 *
1827 * Enabled the virtual display feature if the user has enabled it via
1828 * the module parameter virtual_display. This feature provides a virtual
1829 * display hardware on headless boards or in virtualized environments.
1830 * This function parses and validates the configuration string specified by
1831 * the user and configues the virtual display configuration (number of
1832 * virtual connectors, crtcs, etc.) specified.
1833 */
483ef985 1834static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1835{
1836 adev->enable_virtual_display = false;
1837
1838 if (amdgpu_virtual_display) {
8f66090b 1839 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1840 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1841
1842 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1843 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1844 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1845 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1846 if (!strcmp("all", pciaddname)
1847 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1848 long num_crtc;
1849 int res = -1;
1850
9accf2fd 1851 adev->enable_virtual_display = true;
0f66356d
ED
1852
1853 if (pciaddname_tmp)
1854 res = kstrtol(pciaddname_tmp, 10,
1855 &num_crtc);
1856
1857 if (!res) {
1858 if (num_crtc < 1)
1859 num_crtc = 1;
1860 if (num_crtc > 6)
1861 num_crtc = 6;
1862 adev->mode_info.num_crtc = num_crtc;
1863 } else {
1864 adev->mode_info.num_crtc = 1;
1865 }
9accf2fd
ED
1866 break;
1867 }
1868 }
1869
0f66356d
ED
1870 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1871 amdgpu_virtual_display, pci_address_name,
1872 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1873
1874 kfree(pciaddstr);
1875 }
1876}
1877
e3ecdffa
AD
1878/**
1879 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1880 *
1881 * @adev: amdgpu_device pointer
1882 *
1883 * Parses the asic configuration parameters specified in the gpu info
1884 * firmware and makes them availale to the driver for use in configuring
1885 * the asic.
1886 * Returns 0 on success, -EINVAL on failure.
1887 */
e2a75f88
AD
1888static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1889{
e2a75f88 1890 const char *chip_name;
c0a43457 1891 char fw_name[40];
e2a75f88
AD
1892 int err;
1893 const struct gpu_info_firmware_header_v1_0 *hdr;
1894
ab4fe3e1
HR
1895 adev->firmware.gpu_info_fw = NULL;
1896
72de33f8 1897 if (adev->mman.discovery_bin) {
258620d0 1898 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1899
1900 /*
1901 * FIXME: The bounding box is still needed by Navi12, so
1902 * temporarily read it from gpu_info firmware. Should be droped
1903 * when DAL no longer needs it.
1904 */
1905 if (adev->asic_type != CHIP_NAVI12)
1906 return 0;
258620d0
AD
1907 }
1908
e2a75f88 1909 switch (adev->asic_type) {
e2a75f88
AD
1910#ifdef CONFIG_DRM_AMDGPU_SI
1911 case CHIP_VERDE:
1912 case CHIP_TAHITI:
1913 case CHIP_PITCAIRN:
1914 case CHIP_OLAND:
1915 case CHIP_HAINAN:
1916#endif
1917#ifdef CONFIG_DRM_AMDGPU_CIK
1918 case CHIP_BONAIRE:
1919 case CHIP_HAWAII:
1920 case CHIP_KAVERI:
1921 case CHIP_KABINI:
1922 case CHIP_MULLINS:
1923#endif
da87c30b
AD
1924 case CHIP_TOPAZ:
1925 case CHIP_TONGA:
1926 case CHIP_FIJI:
1927 case CHIP_POLARIS10:
1928 case CHIP_POLARIS11:
1929 case CHIP_POLARIS12:
1930 case CHIP_VEGAM:
1931 case CHIP_CARRIZO:
1932 case CHIP_STONEY:
27c0bc71 1933 case CHIP_VEGA20:
44b3253a 1934 case CHIP_ALDEBARAN:
84d244a3
JC
1935 case CHIP_SIENNA_CICHLID:
1936 case CHIP_NAVY_FLOUNDER:
eac88a5f 1937 case CHIP_DIMGREY_CAVEFISH:
0e5f4b09 1938 case CHIP_BEIGE_GOBY:
e2a75f88
AD
1939 default:
1940 return 0;
1941 case CHIP_VEGA10:
1942 chip_name = "vega10";
1943 break;
3f76dced
AD
1944 case CHIP_VEGA12:
1945 chip_name = "vega12";
1946 break;
2d2e5e7e 1947 case CHIP_RAVEN:
54f78a76 1948 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1949 chip_name = "raven2";
54f78a76 1950 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1951 chip_name = "picasso";
54c4d17e
FX
1952 else
1953 chip_name = "raven";
2d2e5e7e 1954 break;
65e60f6e
LM
1955 case CHIP_ARCTURUS:
1956 chip_name = "arcturus";
1957 break;
b51a26a0 1958 case CHIP_RENOIR:
2e62f0b5
PL
1959 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1960 chip_name = "renoir";
1961 else
1962 chip_name = "green_sardine";
b51a26a0 1963 break;
23c6268e
HR
1964 case CHIP_NAVI10:
1965 chip_name = "navi10";
1966 break;
ed42cfe1
XY
1967 case CHIP_NAVI14:
1968 chip_name = "navi14";
1969 break;
42b325e5
XY
1970 case CHIP_NAVI12:
1971 chip_name = "navi12";
1972 break;
4e52a9f8
HR
1973 case CHIP_VANGOGH:
1974 chip_name = "vangogh";
1975 break;
8bf84f60
AL
1976 case CHIP_YELLOW_CARP:
1977 chip_name = "yellow_carp";
1978 break;
e2a75f88
AD
1979 }
1980
1981 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1982 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1983 if (err) {
1984 dev_err(adev->dev,
1985 "Failed to load gpu_info firmware \"%s\"\n",
1986 fw_name);
1987 goto out;
1988 }
ab4fe3e1 1989 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1990 if (err) {
1991 dev_err(adev->dev,
1992 "Failed to validate gpu_info firmware \"%s\"\n",
1993 fw_name);
1994 goto out;
1995 }
1996
ab4fe3e1 1997 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1998 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1999
2000 switch (hdr->version_major) {
2001 case 1:
2002 {
2003 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2004 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2005 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2006
cc375d8c
TY
2007 /*
2008 * Should be droped when DAL no longer needs it.
2009 */
2010 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2011 goto parse_soc_bounding_box;
2012
b5ab16bf
AD
2013 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2014 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2015 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2016 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2017 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2018 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2019 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2020 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2021 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2022 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2023 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2024 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2025 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2026 adev->gfx.cu_info.max_waves_per_simd =
2027 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2028 adev->gfx.cu_info.max_scratch_slots_per_cu =
2029 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2030 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2031 if (hdr->version_minor >= 1) {
35c2e910
HZ
2032 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2033 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2034 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2035 adev->gfx.config.num_sc_per_sh =
2036 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2037 adev->gfx.config.num_packer_per_sc =
2038 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2039 }
ec51d3fa
XY
2040
2041parse_soc_bounding_box:
ec51d3fa
XY
2042 /*
2043 * soc bounding box info is not integrated in disocovery table,
258620d0 2044 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2045 */
48321c3d
HW
2046 if (hdr->version_minor == 2) {
2047 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2048 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2049 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2050 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2051 }
e2a75f88
AD
2052 break;
2053 }
2054 default:
2055 dev_err(adev->dev,
2056 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2057 err = -EINVAL;
2058 goto out;
2059 }
2060out:
e2a75f88
AD
2061 return err;
2062}
2063
e3ecdffa
AD
2064/**
2065 * amdgpu_device_ip_early_init - run early init for hardware IPs
2066 *
2067 * @adev: amdgpu_device pointer
2068 *
2069 * Early initialization pass for hardware IPs. The hardware IPs that make
2070 * up each asic are discovered each IP's early_init callback is run. This
2071 * is the first stage in initializing the asic.
2072 * Returns 0 on success, negative error code on failure.
2073 */
06ec9070 2074static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2075{
aaa36a97 2076 int i, r;
d38ceaf9 2077
483ef985 2078 amdgpu_device_enable_virtual_display(adev);
a6be7570 2079
00a979f3 2080 if (amdgpu_sriov_vf(adev)) {
00a979f3 2081 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2082 if (r)
2083 return r;
00a979f3
WS
2084 }
2085
d38ceaf9 2086 switch (adev->asic_type) {
33f34802
KW
2087#ifdef CONFIG_DRM_AMDGPU_SI
2088 case CHIP_VERDE:
2089 case CHIP_TAHITI:
2090 case CHIP_PITCAIRN:
2091 case CHIP_OLAND:
2092 case CHIP_HAINAN:
295d0daf 2093 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2094 r = si_set_ip_blocks(adev);
2095 if (r)
2096 return r;
2097 break;
2098#endif
a2e73f56
AD
2099#ifdef CONFIG_DRM_AMDGPU_CIK
2100 case CHIP_BONAIRE:
2101 case CHIP_HAWAII:
2102 case CHIP_KAVERI:
2103 case CHIP_KABINI:
2104 case CHIP_MULLINS:
e1ad2d53 2105 if (adev->flags & AMD_IS_APU)
a2e73f56 2106 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2107 else
2108 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2109
2110 r = cik_set_ip_blocks(adev);
2111 if (r)
2112 return r;
2113 break;
2114#endif
da87c30b
AD
2115 case CHIP_TOPAZ:
2116 case CHIP_TONGA:
2117 case CHIP_FIJI:
2118 case CHIP_POLARIS10:
2119 case CHIP_POLARIS11:
2120 case CHIP_POLARIS12:
2121 case CHIP_VEGAM:
2122 case CHIP_CARRIZO:
2123 case CHIP_STONEY:
2124 if (adev->flags & AMD_IS_APU)
2125 adev->family = AMDGPU_FAMILY_CZ;
2126 else
2127 adev->family = AMDGPU_FAMILY_VI;
2128
2129 r = vi_set_ip_blocks(adev);
2130 if (r)
2131 return r;
2132 break;
d38ceaf9 2133 default:
63352b7f
AD
2134 r = amdgpu_discovery_set_ip_blocks(adev);
2135 if (r)
2136 return r;
2137 break;
d38ceaf9
AD
2138 }
2139
1884734a 2140 amdgpu_amdkfd_device_probe(adev);
2141
3b94fb10 2142 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2143 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2144 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2145 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2146 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2147
d38ceaf9
AD
2148 for (i = 0; i < adev->num_ip_blocks; i++) {
2149 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2150 DRM_ERROR("disabled ip block: %d <%s>\n",
2151 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2152 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2153 } else {
a1255107
AD
2154 if (adev->ip_blocks[i].version->funcs->early_init) {
2155 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2156 if (r == -ENOENT) {
a1255107 2157 adev->ip_blocks[i].status.valid = false;
2c1a2784 2158 } else if (r) {
a1255107
AD
2159 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2160 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2161 return r;
2c1a2784 2162 } else {
a1255107 2163 adev->ip_blocks[i].status.valid = true;
2c1a2784 2164 }
974e6b64 2165 } else {
a1255107 2166 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2167 }
d38ceaf9 2168 }
21a249ca
AD
2169 /* get the vbios after the asic_funcs are set up */
2170 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2171 r = amdgpu_device_parse_gpu_info_fw(adev);
2172 if (r)
2173 return r;
2174
21a249ca
AD
2175 /* Read BIOS */
2176 if (!amdgpu_get_bios(adev))
2177 return -EINVAL;
2178
2179 r = amdgpu_atombios_init(adev);
2180 if (r) {
2181 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2182 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2183 return r;
2184 }
77eabc6f
PJZ
2185
2186 /*get pf2vf msg info at it's earliest time*/
2187 if (amdgpu_sriov_vf(adev))
2188 amdgpu_virt_init_data_exchange(adev);
2189
21a249ca 2190 }
d38ceaf9
AD
2191 }
2192
395d1fb9
NH
2193 adev->cg_flags &= amdgpu_cg_mask;
2194 adev->pg_flags &= amdgpu_pg_mask;
2195
d38ceaf9
AD
2196 return 0;
2197}
2198
0a4f2520
RZ
2199static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2200{
2201 int i, r;
2202
2203 for (i = 0; i < adev->num_ip_blocks; i++) {
2204 if (!adev->ip_blocks[i].status.sw)
2205 continue;
2206 if (adev->ip_blocks[i].status.hw)
2207 continue;
2208 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2209 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2210 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2211 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2212 if (r) {
2213 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2214 adev->ip_blocks[i].version->funcs->name, r);
2215 return r;
2216 }
2217 adev->ip_blocks[i].status.hw = true;
2218 }
2219 }
2220
2221 return 0;
2222}
2223
2224static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2225{
2226 int i, r;
2227
2228 for (i = 0; i < adev->num_ip_blocks; i++) {
2229 if (!adev->ip_blocks[i].status.sw)
2230 continue;
2231 if (adev->ip_blocks[i].status.hw)
2232 continue;
2233 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2234 if (r) {
2235 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2236 adev->ip_blocks[i].version->funcs->name, r);
2237 return r;
2238 }
2239 adev->ip_blocks[i].status.hw = true;
2240 }
2241
2242 return 0;
2243}
2244
7a3e0bb2
RZ
2245static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2246{
2247 int r = 0;
2248 int i;
80f41f84 2249 uint32_t smu_version;
7a3e0bb2
RZ
2250
2251 if (adev->asic_type >= CHIP_VEGA10) {
2252 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2253 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2254 continue;
2255
e3c1b071 2256 if (!adev->ip_blocks[i].status.sw)
2257 continue;
2258
482f0e53
ML
2259 /* no need to do the fw loading again if already done*/
2260 if (adev->ip_blocks[i].status.hw == true)
2261 break;
2262
53b3f8f4 2263 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2264 r = adev->ip_blocks[i].version->funcs->resume(adev);
2265 if (r) {
2266 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2267 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2268 return r;
2269 }
2270 } else {
2271 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2272 if (r) {
2273 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2274 adev->ip_blocks[i].version->funcs->name, r);
2275 return r;
7a3e0bb2 2276 }
7a3e0bb2 2277 }
482f0e53
ML
2278
2279 adev->ip_blocks[i].status.hw = true;
2280 break;
7a3e0bb2
RZ
2281 }
2282 }
482f0e53 2283
8973d9ec
ED
2284 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2285 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2286
80f41f84 2287 return r;
7a3e0bb2
RZ
2288}
2289
5fd8518d
AG
2290static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2291{
2292 long timeout;
2293 int r, i;
2294
2295 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2296 struct amdgpu_ring *ring = adev->rings[i];
2297
2298 /* No need to setup the GPU scheduler for rings that don't need it */
2299 if (!ring || ring->no_scheduler)
2300 continue;
2301
2302 switch (ring->funcs->type) {
2303 case AMDGPU_RING_TYPE_GFX:
2304 timeout = adev->gfx_timeout;
2305 break;
2306 case AMDGPU_RING_TYPE_COMPUTE:
2307 timeout = adev->compute_timeout;
2308 break;
2309 case AMDGPU_RING_TYPE_SDMA:
2310 timeout = adev->sdma_timeout;
2311 break;
2312 default:
2313 timeout = adev->video_timeout;
2314 break;
2315 }
2316
2317 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2318 ring->num_hw_submission, amdgpu_job_hang_limit,
cfbb6b00 2319 timeout, adev->reset_domain->wq, ring->sched_score, ring->name);
5fd8518d
AG
2320 if (r) {
2321 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2322 ring->name);
2323 return r;
2324 }
2325 }
2326
2327 return 0;
2328}
2329
2330
e3ecdffa
AD
2331/**
2332 * amdgpu_device_ip_init - run init for hardware IPs
2333 *
2334 * @adev: amdgpu_device pointer
2335 *
2336 * Main initialization pass for hardware IPs. The list of all the hardware
2337 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2338 * are run. sw_init initializes the software state associated with each IP
2339 * and hw_init initializes the hardware associated with each IP.
2340 * Returns 0 on success, negative error code on failure.
2341 */
06ec9070 2342static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2343{
2344 int i, r;
2345
c030f2e4 2346 r = amdgpu_ras_init(adev);
2347 if (r)
2348 return r;
2349
d38ceaf9 2350 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2351 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2352 continue;
a1255107 2353 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2354 if (r) {
a1255107
AD
2355 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2356 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2357 goto init_failed;
2c1a2784 2358 }
a1255107 2359 adev->ip_blocks[i].status.sw = true;
bfca0289 2360
d38ceaf9 2361 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 2362 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
892deb48
VS
2363 /* Try to reserve bad pages early */
2364 if (amdgpu_sriov_vf(adev))
2365 amdgpu_virt_exchange_data(adev);
2366
06ec9070 2367 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2368 if (r) {
2369 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2370 goto init_failed;
2c1a2784 2371 }
a1255107 2372 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2373 if (r) {
2374 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2375 goto init_failed;
2c1a2784 2376 }
06ec9070 2377 r = amdgpu_device_wb_init(adev);
2c1a2784 2378 if (r) {
06ec9070 2379 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2380 goto init_failed;
2c1a2784 2381 }
a1255107 2382 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2383
2384 /* right after GMC hw init, we create CSA */
f92d5c61 2385 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2386 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2387 AMDGPU_GEM_DOMAIN_VRAM,
2388 AMDGPU_CSA_SIZE);
2493664f
ML
2389 if (r) {
2390 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2391 goto init_failed;
2493664f
ML
2392 }
2393 }
d38ceaf9
AD
2394 }
2395 }
2396
c9ffa427 2397 if (amdgpu_sriov_vf(adev))
9a458402 2398 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2399
533aed27
AG
2400 r = amdgpu_ib_pool_init(adev);
2401 if (r) {
2402 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2403 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2404 goto init_failed;
2405 }
2406
c8963ea4
RZ
2407 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2408 if (r)
72d3f592 2409 goto init_failed;
0a4f2520
RZ
2410
2411 r = amdgpu_device_ip_hw_init_phase1(adev);
2412 if (r)
72d3f592 2413 goto init_failed;
0a4f2520 2414
7a3e0bb2
RZ
2415 r = amdgpu_device_fw_loading(adev);
2416 if (r)
72d3f592 2417 goto init_failed;
7a3e0bb2 2418
0a4f2520
RZ
2419 r = amdgpu_device_ip_hw_init_phase2(adev);
2420 if (r)
72d3f592 2421 goto init_failed;
d38ceaf9 2422
121a2bc6
AG
2423 /*
2424 * retired pages will be loaded from eeprom and reserved here,
2425 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2426 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2427 * for I2C communication which only true at this point.
b82e65a9
GC
2428 *
2429 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2430 * failure from bad gpu situation and stop amdgpu init process
2431 * accordingly. For other failed cases, it will still release all
2432 * the resource and print error message, rather than returning one
2433 * negative value to upper level.
121a2bc6
AG
2434 *
2435 * Note: theoretically, this should be called before all vram allocations
2436 * to protect retired page from abusing
2437 */
b82e65a9
GC
2438 r = amdgpu_ras_recovery_init(adev);
2439 if (r)
2440 goto init_failed;
121a2bc6 2441
cfbb6b00
AG
2442 /**
2443 * In case of XGMI grab extra reference for reset domain for this device
2444 */
a4c63caf 2445 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00
AG
2446 if (amdgpu_xgmi_add_device(adev) == 0) {
2447 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
a4c63caf 2448
cfbb6b00
AG
2449 if (!hive->reset_domain ||
2450 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2451 r = -ENOENT;
2452 goto init_failed;
2453 }
e3c1b071 2454
cfbb6b00
AG
2455 /* Drop the early temporary reset domain we created for device */
2456 amdgpu_reset_put_reset_domain(adev->reset_domain);
2457 adev->reset_domain = hive->reset_domain;
a4c63caf
AG
2458 }
2459 }
2460
5fd8518d
AG
2461 r = amdgpu_device_init_schedulers(adev);
2462 if (r)
2463 goto init_failed;
2464
e3c1b071 2465 /* Don't init kfd if whole hive need to be reset during init */
2466 if (!adev->gmc.xgmi.pending_reset)
2467 amdgpu_amdkfd_device_init(adev);
c6332b97 2468
bd607166
KR
2469 amdgpu_fru_get_product_info(adev);
2470
72d3f592 2471init_failed:
c9ffa427 2472 if (amdgpu_sriov_vf(adev))
c6332b97 2473 amdgpu_virt_release_full_gpu(adev, true);
2474
72d3f592 2475 return r;
d38ceaf9
AD
2476}
2477
e3ecdffa
AD
2478/**
2479 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2480 *
2481 * @adev: amdgpu_device pointer
2482 *
2483 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2484 * this function before a GPU reset. If the value is retained after a
2485 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2486 */
06ec9070 2487static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2488{
2489 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2490}
2491
e3ecdffa
AD
2492/**
2493 * amdgpu_device_check_vram_lost - check if vram is valid
2494 *
2495 * @adev: amdgpu_device pointer
2496 *
2497 * Checks the reset magic value written to the gart pointer in VRAM.
2498 * The driver calls this after a GPU reset to see if the contents of
2499 * VRAM is lost or now.
2500 * returns true if vram is lost, false if not.
2501 */
06ec9070 2502static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2503{
dadce777
EQ
2504 if (memcmp(adev->gart.ptr, adev->reset_magic,
2505 AMDGPU_RESET_MAGIC_NUM))
2506 return true;
2507
53b3f8f4 2508 if (!amdgpu_in_reset(adev))
dadce777
EQ
2509 return false;
2510
2511 /*
2512 * For all ASICs with baco/mode1 reset, the VRAM is
2513 * always assumed to be lost.
2514 */
2515 switch (amdgpu_asic_reset_method(adev)) {
2516 case AMD_RESET_METHOD_BACO:
2517 case AMD_RESET_METHOD_MODE1:
2518 return true;
2519 default:
2520 return false;
2521 }
0c49e0b8
CZ
2522}
2523
e3ecdffa 2524/**
1112a46b 2525 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2526 *
2527 * @adev: amdgpu_device pointer
b8b72130 2528 * @state: clockgating state (gate or ungate)
e3ecdffa 2529 *
e3ecdffa 2530 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2531 * set_clockgating_state callbacks are run.
2532 * Late initialization pass enabling clockgating for hardware IPs.
2533 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2534 * Returns 0 on success, negative error code on failure.
2535 */
fdd34271 2536
5d89bb2d
LL
2537int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2538 enum amd_clockgating_state state)
d38ceaf9 2539{
1112a46b 2540 int i, j, r;
d38ceaf9 2541
4a2ba394
SL
2542 if (amdgpu_emu_mode == 1)
2543 return 0;
2544
1112a46b
RZ
2545 for (j = 0; j < adev->num_ip_blocks; j++) {
2546 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2547 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2548 continue;
5d70a549
PV
2549 /* skip CG for GFX on S0ix */
2550 if (adev->in_s0ix &&
2551 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2552 continue;
4a446d55 2553 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2554 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2555 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2556 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2557 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2558 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2559 /* enable clockgating to save power */
a1255107 2560 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2561 state);
4a446d55
AD
2562 if (r) {
2563 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2564 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2565 return r;
2566 }
b0b00ff1 2567 }
d38ceaf9 2568 }
06b18f61 2569
c9f96fd5
RZ
2570 return 0;
2571}
2572
5d89bb2d
LL
2573int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2574 enum amd_powergating_state state)
c9f96fd5 2575{
1112a46b 2576 int i, j, r;
06b18f61 2577
c9f96fd5
RZ
2578 if (amdgpu_emu_mode == 1)
2579 return 0;
2580
1112a46b
RZ
2581 for (j = 0; j < adev->num_ip_blocks; j++) {
2582 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2583 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2584 continue;
5d70a549
PV
2585 /* skip PG for GFX on S0ix */
2586 if (adev->in_s0ix &&
2587 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2588 continue;
c9f96fd5
RZ
2589 /* skip CG for VCE/UVD, it's handled specially */
2590 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2591 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2592 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2593 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2594 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2595 /* enable powergating to save power */
2596 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2597 state);
c9f96fd5
RZ
2598 if (r) {
2599 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2600 adev->ip_blocks[i].version->funcs->name, r);
2601 return r;
2602 }
2603 }
2604 }
2dc80b00
S
2605 return 0;
2606}
2607
beff74bc
AD
2608static int amdgpu_device_enable_mgpu_fan_boost(void)
2609{
2610 struct amdgpu_gpu_instance *gpu_ins;
2611 struct amdgpu_device *adev;
2612 int i, ret = 0;
2613
2614 mutex_lock(&mgpu_info.mutex);
2615
2616 /*
2617 * MGPU fan boost feature should be enabled
2618 * only when there are two or more dGPUs in
2619 * the system
2620 */
2621 if (mgpu_info.num_dgpu < 2)
2622 goto out;
2623
2624 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2625 gpu_ins = &(mgpu_info.gpu_ins[i]);
2626 adev = gpu_ins->adev;
2627 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2628 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2629 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2630 if (ret)
2631 break;
2632
2633 gpu_ins->mgpu_fan_enabled = 1;
2634 }
2635 }
2636
2637out:
2638 mutex_unlock(&mgpu_info.mutex);
2639
2640 return ret;
2641}
2642
e3ecdffa
AD
2643/**
2644 * amdgpu_device_ip_late_init - run late init for hardware IPs
2645 *
2646 * @adev: amdgpu_device pointer
2647 *
2648 * Late initialization pass for hardware IPs. The list of all the hardware
2649 * IPs that make up the asic is walked and the late_init callbacks are run.
2650 * late_init covers any special initialization that an IP requires
2651 * after all of the have been initialized or something that needs to happen
2652 * late in the init process.
2653 * Returns 0 on success, negative error code on failure.
2654 */
06ec9070 2655static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2656{
60599a03 2657 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2658 int i = 0, r;
2659
2660 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2661 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2662 continue;
2663 if (adev->ip_blocks[i].version->funcs->late_init) {
2664 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2665 if (r) {
2666 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2667 adev->ip_blocks[i].version->funcs->name, r);
2668 return r;
2669 }
2dc80b00 2670 }
73f847db 2671 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2672 }
2673
a891d239
DL
2674 amdgpu_ras_set_error_query_ready(adev, true);
2675
1112a46b
RZ
2676 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2677 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2678
06ec9070 2679 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2680
beff74bc
AD
2681 r = amdgpu_device_enable_mgpu_fan_boost();
2682 if (r)
2683 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2684
4da8b639 2685 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2686 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2687 adev->asic_type == CHIP_ALDEBARAN ))
2688 smu_handle_passthrough_sbr(&adev->smu, true);
60599a03
EQ
2689
2690 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2691 mutex_lock(&mgpu_info.mutex);
2692
2693 /*
2694 * Reset device p-state to low as this was booted with high.
2695 *
2696 * This should be performed only after all devices from the same
2697 * hive get initialized.
2698 *
2699 * However, it's unknown how many device in the hive in advance.
2700 * As this is counted one by one during devices initializations.
2701 *
2702 * So, we wait for all XGMI interlinked devices initialized.
2703 * This may bring some delays as those devices may come from
2704 * different hives. But that should be OK.
2705 */
2706 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2707 for (i = 0; i < mgpu_info.num_gpu; i++) {
2708 gpu_instance = &(mgpu_info.gpu_ins[i]);
2709 if (gpu_instance->adev->flags & AMD_IS_APU)
2710 continue;
2711
d84a430d
JK
2712 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2713 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2714 if (r) {
2715 DRM_ERROR("pstate setting failed (%d).\n", r);
2716 break;
2717 }
2718 }
2719 }
2720
2721 mutex_unlock(&mgpu_info.mutex);
2722 }
2723
d38ceaf9
AD
2724 return 0;
2725}
2726
613aa3ea
LY
2727/**
2728 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2729 *
2730 * @adev: amdgpu_device pointer
2731 *
2732 * For ASICs need to disable SMC first
2733 */
2734static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2735{
2736 int i, r;
2737
2738 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2739 return;
2740
2741 for (i = 0; i < adev->num_ip_blocks; i++) {
2742 if (!adev->ip_blocks[i].status.hw)
2743 continue;
2744 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2745 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2746 /* XXX handle errors */
2747 if (r) {
2748 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2749 adev->ip_blocks[i].version->funcs->name, r);
2750 }
2751 adev->ip_blocks[i].status.hw = false;
2752 break;
2753 }
2754 }
2755}
2756
e9669fb7 2757static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2758{
2759 int i, r;
2760
e9669fb7
AG
2761 for (i = 0; i < adev->num_ip_blocks; i++) {
2762 if (!adev->ip_blocks[i].version->funcs->early_fini)
2763 continue;
5278a159 2764
e9669fb7
AG
2765 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2766 if (r) {
2767 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2768 adev->ip_blocks[i].version->funcs->name, r);
2769 }
2770 }
c030f2e4 2771
e9669fb7 2772 amdgpu_amdkfd_suspend(adev, false);
a82400b5 2773
05df1f01 2774 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2775 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2776
613aa3ea
LY
2777 /* Workaroud for ASICs need to disable SMC first */
2778 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2779
d38ceaf9 2780 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2781 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2782 continue;
8201a67a 2783
a1255107 2784 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2785 /* XXX handle errors */
2c1a2784 2786 if (r) {
a1255107
AD
2787 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2788 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2789 }
8201a67a 2790
a1255107 2791 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2792 }
2793
6effad8a
GC
2794 if (amdgpu_sriov_vf(adev)) {
2795 if (amdgpu_virt_release_full_gpu(adev, false))
2796 DRM_ERROR("failed to release exclusive mode on fini\n");
2797 }
2798
e9669fb7
AG
2799 return 0;
2800}
2801
2802/**
2803 * amdgpu_device_ip_fini - run fini for hardware IPs
2804 *
2805 * @adev: amdgpu_device pointer
2806 *
2807 * Main teardown pass for hardware IPs. The list of all the hardware
2808 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2809 * are run. hw_fini tears down the hardware associated with each IP
2810 * and sw_fini tears down any software state associated with each IP.
2811 * Returns 0 on success, negative error code on failure.
2812 */
2813static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2814{
2815 int i, r;
2816
2817 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2818 amdgpu_virt_release_ras_err_handler_data(adev);
2819
e9669fb7
AG
2820 if (adev->gmc.xgmi.num_physical_nodes > 1)
2821 amdgpu_xgmi_remove_device(adev);
2822
2823 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2824
d38ceaf9 2825 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2826 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2827 continue;
c12aba3a
ML
2828
2829 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2830 amdgpu_ucode_free_bo(adev);
1e256e27 2831 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2832 amdgpu_device_wb_fini(adev);
2833 amdgpu_device_vram_scratch_fini(adev);
533aed27 2834 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2835 }
2836
a1255107 2837 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2838 /* XXX handle errors */
2c1a2784 2839 if (r) {
a1255107
AD
2840 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2841 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2842 }
a1255107
AD
2843 adev->ip_blocks[i].status.sw = false;
2844 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2845 }
2846
a6dcfd9c 2847 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2848 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2849 continue;
a1255107
AD
2850 if (adev->ip_blocks[i].version->funcs->late_fini)
2851 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2852 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2853 }
2854
c030f2e4 2855 amdgpu_ras_fini(adev);
2856
d38ceaf9
AD
2857 return 0;
2858}
2859
e3ecdffa 2860/**
beff74bc 2861 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2862 *
1112a46b 2863 * @work: work_struct.
e3ecdffa 2864 */
beff74bc 2865static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2866{
2867 struct amdgpu_device *adev =
beff74bc 2868 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2869 int r;
2870
2871 r = amdgpu_ib_ring_tests(adev);
2872 if (r)
2873 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2874}
2875
1e317b99
RZ
2876static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2877{
2878 struct amdgpu_device *adev =
2879 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2880
90a92662
MD
2881 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2882 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2883
2884 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2885 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2886}
2887
e3ecdffa 2888/**
e7854a03 2889 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2890 *
2891 * @adev: amdgpu_device pointer
2892 *
2893 * Main suspend function for hardware IPs. The list of all the hardware
2894 * IPs that make up the asic is walked, clockgating is disabled and the
2895 * suspend callbacks are run. suspend puts the hardware and software state
2896 * in each IP into a state suitable for suspend.
2897 * Returns 0 on success, negative error code on failure.
2898 */
e7854a03
AD
2899static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2900{
2901 int i, r;
2902
50ec83f0
AD
2903 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2904 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2905
e7854a03
AD
2906 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2907 if (!adev->ip_blocks[i].status.valid)
2908 continue;
2b9f7848 2909
e7854a03 2910 /* displays are handled separately */
2b9f7848
ND
2911 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2912 continue;
2913
2914 /* XXX handle errors */
2915 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2916 /* XXX handle errors */
2917 if (r) {
2918 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2919 adev->ip_blocks[i].version->funcs->name, r);
2920 return r;
e7854a03 2921 }
2b9f7848
ND
2922
2923 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2924 }
2925
e7854a03
AD
2926 return 0;
2927}
2928
2929/**
2930 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2931 *
2932 * @adev: amdgpu_device pointer
2933 *
2934 * Main suspend function for hardware IPs. The list of all the hardware
2935 * IPs that make up the asic is walked, clockgating is disabled and the
2936 * suspend callbacks are run. suspend puts the hardware and software state
2937 * in each IP into a state suitable for suspend.
2938 * Returns 0 on success, negative error code on failure.
2939 */
2940static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2941{
2942 int i, r;
2943
557f42a2 2944 if (adev->in_s0ix)
34416931 2945 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
34416931 2946
d38ceaf9 2947 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2948 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2949 continue;
e7854a03
AD
2950 /* displays are handled in phase1 */
2951 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2952 continue;
bff77e86
LM
2953 /* PSP lost connection when err_event_athub occurs */
2954 if (amdgpu_ras_intr_triggered() &&
2955 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2956 adev->ip_blocks[i].status.hw = false;
2957 continue;
2958 }
e3c1b071 2959
2960 /* skip unnecessary suspend if we do not initialize them yet */
2961 if (adev->gmc.xgmi.pending_reset &&
2962 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2963 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2964 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2965 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2966 adev->ip_blocks[i].status.hw = false;
2967 continue;
2968 }
557f42a2 2969
32ff160d
AD
2970 /* skip suspend of gfx and psp for S0ix
2971 * gfx is in gfxoff state, so on resume it will exit gfxoff just
2972 * like at runtime. PSP is also part of the always on hardware
2973 * so no need to suspend it.
2974 */
557f42a2 2975 if (adev->in_s0ix &&
32ff160d
AD
2976 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
2977 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
557f42a2
AD
2978 continue;
2979
d38ceaf9 2980 /* XXX handle errors */
a1255107 2981 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2982 /* XXX handle errors */
2c1a2784 2983 if (r) {
a1255107
AD
2984 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2985 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2986 }
876923fb 2987 adev->ip_blocks[i].status.hw = false;
a3a09142 2988 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2989 if(!amdgpu_sriov_vf(adev)){
2990 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2991 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2992 if (r) {
2993 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2994 adev->mp1_state, r);
2995 return r;
2996 }
a3a09142
AD
2997 }
2998 }
d38ceaf9
AD
2999 }
3000
3001 return 0;
3002}
3003
e7854a03
AD
3004/**
3005 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3006 *
3007 * @adev: amdgpu_device pointer
3008 *
3009 * Main suspend function for hardware IPs. The list of all the hardware
3010 * IPs that make up the asic is walked, clockgating is disabled and the
3011 * suspend callbacks are run. suspend puts the hardware and software state
3012 * in each IP into a state suitable for suspend.
3013 * Returns 0 on success, negative error code on failure.
3014 */
3015int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3016{
3017 int r;
3018
3c73683c
JC
3019 if (amdgpu_sriov_vf(adev)) {
3020 amdgpu_virt_fini_data_exchange(adev);
e7819644 3021 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3022 }
e7819644 3023
e7854a03
AD
3024 r = amdgpu_device_ip_suspend_phase1(adev);
3025 if (r)
3026 return r;
3027 r = amdgpu_device_ip_suspend_phase2(adev);
3028
e7819644
YT
3029 if (amdgpu_sriov_vf(adev))
3030 amdgpu_virt_release_full_gpu(adev, false);
3031
e7854a03
AD
3032 return r;
3033}
3034
06ec9070 3035static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3036{
3037 int i, r;
3038
2cb681b6
ML
3039 static enum amd_ip_block_type ip_order[] = {
3040 AMD_IP_BLOCK_TYPE_GMC,
3041 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 3042 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3043 AMD_IP_BLOCK_TYPE_IH,
3044 };
a90ad3c2 3045
95ea3dbc 3046 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3047 int j;
3048 struct amdgpu_ip_block *block;
a90ad3c2 3049
4cd2a96d
J
3050 block = &adev->ip_blocks[i];
3051 block->status.hw = false;
2cb681b6 3052
4cd2a96d 3053 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3054
4cd2a96d 3055 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3056 !block->status.valid)
3057 continue;
3058
3059 r = block->version->funcs->hw_init(adev);
0aaeefcc 3060 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3061 if (r)
3062 return r;
482f0e53 3063 block->status.hw = true;
a90ad3c2
ML
3064 }
3065 }
3066
3067 return 0;
3068}
3069
06ec9070 3070static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3071{
3072 int i, r;
3073
2cb681b6
ML
3074 static enum amd_ip_block_type ip_order[] = {
3075 AMD_IP_BLOCK_TYPE_SMC,
3076 AMD_IP_BLOCK_TYPE_DCE,
3077 AMD_IP_BLOCK_TYPE_GFX,
3078 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 3079 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
3080 AMD_IP_BLOCK_TYPE_VCE,
3081 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 3082 };
a90ad3c2 3083
2cb681b6
ML
3084 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3085 int j;
3086 struct amdgpu_ip_block *block;
a90ad3c2 3087
2cb681b6
ML
3088 for (j = 0; j < adev->num_ip_blocks; j++) {
3089 block = &adev->ip_blocks[j];
3090
3091 if (block->version->type != ip_order[i] ||
482f0e53
ML
3092 !block->status.valid ||
3093 block->status.hw)
2cb681b6
ML
3094 continue;
3095
895bd048
JZ
3096 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3097 r = block->version->funcs->resume(adev);
3098 else
3099 r = block->version->funcs->hw_init(adev);
3100
0aaeefcc 3101 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3102 if (r)
3103 return r;
482f0e53 3104 block->status.hw = true;
a90ad3c2
ML
3105 }
3106 }
3107
3108 return 0;
3109}
3110
e3ecdffa
AD
3111/**
3112 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3113 *
3114 * @adev: amdgpu_device pointer
3115 *
3116 * First resume function for hardware IPs. The list of all the hardware
3117 * IPs that make up the asic is walked and the resume callbacks are run for
3118 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3119 * after a suspend and updates the software state as necessary. This
3120 * function is also used for restoring the GPU after a GPU reset.
3121 * Returns 0 on success, negative error code on failure.
3122 */
06ec9070 3123static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3124{
3125 int i, r;
3126
a90ad3c2 3127 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3128 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3129 continue;
a90ad3c2 3130 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
3131 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3132 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 3133
fcf0649f
CZ
3134 r = adev->ip_blocks[i].version->funcs->resume(adev);
3135 if (r) {
3136 DRM_ERROR("resume of IP block <%s> failed %d\n",
3137 adev->ip_blocks[i].version->funcs->name, r);
3138 return r;
3139 }
482f0e53 3140 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3141 }
3142 }
3143
3144 return 0;
3145}
3146
e3ecdffa
AD
3147/**
3148 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3149 *
3150 * @adev: amdgpu_device pointer
3151 *
3152 * First resume function for hardware IPs. The list of all the hardware
3153 * IPs that make up the asic is walked and the resume callbacks are run for
3154 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3155 * functional state after a suspend and updates the software state as
3156 * necessary. This function is also used for restoring the GPU after a GPU
3157 * reset.
3158 * Returns 0 on success, negative error code on failure.
3159 */
06ec9070 3160static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3161{
3162 int i, r;
3163
3164 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3165 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3166 continue;
fcf0649f 3167 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3168 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3169 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3170 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3171 continue;
a1255107 3172 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3173 if (r) {
a1255107
AD
3174 DRM_ERROR("resume of IP block <%s> failed %d\n",
3175 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3176 return r;
2c1a2784 3177 }
482f0e53 3178 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3179 }
3180
3181 return 0;
3182}
3183
e3ecdffa
AD
3184/**
3185 * amdgpu_device_ip_resume - run resume for hardware IPs
3186 *
3187 * @adev: amdgpu_device pointer
3188 *
3189 * Main resume function for hardware IPs. The hardware IPs
3190 * are split into two resume functions because they are
3191 * are also used in in recovering from a GPU reset and some additional
3192 * steps need to be take between them. In this case (S3/S4) they are
3193 * run sequentially.
3194 * Returns 0 on success, negative error code on failure.
3195 */
06ec9070 3196static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3197{
3198 int r;
3199
9cec53c1
JZ
3200 r = amdgpu_amdkfd_resume_iommu(adev);
3201 if (r)
3202 return r;
3203
06ec9070 3204 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3205 if (r)
3206 return r;
7a3e0bb2
RZ
3207
3208 r = amdgpu_device_fw_loading(adev);
3209 if (r)
3210 return r;
3211
06ec9070 3212 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3213
3214 return r;
3215}
3216
e3ecdffa
AD
3217/**
3218 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3219 *
3220 * @adev: amdgpu_device pointer
3221 *
3222 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3223 */
4e99a44e 3224static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3225{
6867e1b5
ML
3226 if (amdgpu_sriov_vf(adev)) {
3227 if (adev->is_atom_fw) {
58ff791a 3228 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3229 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3230 } else {
3231 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3232 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3233 }
3234
3235 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3236 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3237 }
048765ad
AR
3238}
3239
e3ecdffa
AD
3240/**
3241 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3242 *
3243 * @asic_type: AMD asic type
3244 *
3245 * Check if there is DC (new modesetting infrastructre) support for an asic.
3246 * returns true if DC has support, false if not.
3247 */
4562236b
HW
3248bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3249{
3250 switch (asic_type) {
0637d417
AD
3251#ifdef CONFIG_DRM_AMDGPU_SI
3252 case CHIP_HAINAN:
3253#endif
3254 case CHIP_TOPAZ:
3255 /* chips with no display hardware */
3256 return false;
4562236b 3257#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3258 case CHIP_TAHITI:
3259 case CHIP_PITCAIRN:
3260 case CHIP_VERDE:
3261 case CHIP_OLAND:
2d32ffd6
AD
3262 /*
3263 * We have systems in the wild with these ASICs that require
3264 * LVDS and VGA support which is not supported with DC.
3265 *
3266 * Fallback to the non-DC driver here by default so as not to
3267 * cause regressions.
3268 */
3269#if defined(CONFIG_DRM_AMD_DC_SI)
3270 return amdgpu_dc > 0;
3271#else
3272 return false;
64200c46 3273#endif
4562236b 3274 case CHIP_BONAIRE:
0d6fbccb 3275 case CHIP_KAVERI:
367e6687
AD
3276 case CHIP_KABINI:
3277 case CHIP_MULLINS:
d9fda248
HW
3278 /*
3279 * We have systems in the wild with these ASICs that require
3280 * LVDS and VGA support which is not supported with DC.
3281 *
3282 * Fallback to the non-DC driver here by default so as not to
3283 * cause regressions.
3284 */
3285 return amdgpu_dc > 0;
3286 case CHIP_HAWAII:
4562236b
HW
3287 case CHIP_CARRIZO:
3288 case CHIP_STONEY:
4562236b 3289 case CHIP_POLARIS10:
675fd32b 3290 case CHIP_POLARIS11:
2c8ad2d5 3291 case CHIP_POLARIS12:
675fd32b 3292 case CHIP_VEGAM:
4562236b
HW
3293 case CHIP_TONGA:
3294 case CHIP_FIJI:
42f8ffa1 3295 case CHIP_VEGA10:
dca7b401 3296 case CHIP_VEGA12:
c6034aa2 3297 case CHIP_VEGA20:
b86a1aa3 3298#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 3299 case CHIP_RAVEN:
b4f199c7 3300 case CHIP_NAVI10:
8fceceb6 3301 case CHIP_NAVI14:
078655d9 3302 case CHIP_NAVI12:
e1c14c43 3303 case CHIP_RENOIR:
3f68c01b 3304 case CHIP_CYAN_SKILLFISH:
81d9bfb8 3305 case CHIP_SIENNA_CICHLID:
a6c5308f 3306 case CHIP_NAVY_FLOUNDER:
7cc656e2 3307 case CHIP_DIMGREY_CAVEFISH:
ddaed58b 3308 case CHIP_BEIGE_GOBY:
84b934bc 3309 case CHIP_VANGOGH:
c8b73f7f 3310 case CHIP_YELLOW_CARP:
42f8ffa1 3311#endif
f7f12b25 3312 default:
fd187853 3313 return amdgpu_dc != 0;
f7f12b25 3314#else
4562236b 3315 default:
93b09a9a 3316 if (amdgpu_dc > 0)
044a48f4 3317 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3318 "but isn't supported by ASIC, ignoring\n");
4562236b 3319 return false;
f7f12b25 3320#endif
4562236b
HW
3321 }
3322}
3323
3324/**
3325 * amdgpu_device_has_dc_support - check if dc is supported
3326 *
982a820b 3327 * @adev: amdgpu_device pointer
4562236b
HW
3328 *
3329 * Returns true for supported, false for not supported
3330 */
3331bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3332{
abaf210c
AS
3333 if (amdgpu_sriov_vf(adev) ||
3334 adev->enable_virtual_display ||
3335 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3336 return false;
3337
4562236b
HW
3338 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3339}
3340
d4535e2c
AG
3341static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3342{
3343 struct amdgpu_device *adev =
3344 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3345 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3346
c6a6e2db
AG
3347 /* It's a bug to not have a hive within this function */
3348 if (WARN_ON(!hive))
3349 return;
3350
3351 /*
3352 * Use task barrier to synchronize all xgmi reset works across the
3353 * hive. task_barrier_enter and task_barrier_exit will block
3354 * until all the threads running the xgmi reset works reach
3355 * those points. task_barrier_full will do both blocks.
3356 */
3357 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3358
3359 task_barrier_enter(&hive->tb);
4a580877 3360 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3361
3362 if (adev->asic_reset_res)
3363 goto fail;
3364
3365 task_barrier_exit(&hive->tb);
4a580877 3366 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3367
3368 if (adev->asic_reset_res)
3369 goto fail;
43c4d576 3370
8bc7b360
HZ
3371 if (adev->mmhub.ras_funcs &&
3372 adev->mmhub.ras_funcs->reset_ras_error_count)
3373 adev->mmhub.ras_funcs->reset_ras_error_count(adev);
c6a6e2db
AG
3374 } else {
3375
3376 task_barrier_full(&hive->tb);
3377 adev->asic_reset_res = amdgpu_asic_reset(adev);
3378 }
ce316fa5 3379
c6a6e2db 3380fail:
d4535e2c 3381 if (adev->asic_reset_res)
fed184e9 3382 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3383 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3384 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3385}
3386
71f98027
AD
3387static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3388{
3389 char *input = amdgpu_lockup_timeout;
3390 char *timeout_setting = NULL;
3391 int index = 0;
3392 long timeout;
3393 int ret = 0;
3394
3395 /*
67387dfe
AD
3396 * By default timeout for non compute jobs is 10000
3397 * and 60000 for compute jobs.
71f98027 3398 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3399 * jobs are 60000 by default.
71f98027
AD
3400 */
3401 adev->gfx_timeout = msecs_to_jiffies(10000);
3402 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3403 if (amdgpu_sriov_vf(adev))
3404 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3405 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3406 else
67387dfe 3407 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3408
f440ff44 3409 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3410 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3411 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3412 ret = kstrtol(timeout_setting, 0, &timeout);
3413 if (ret)
3414 return ret;
3415
3416 if (timeout == 0) {
3417 index++;
3418 continue;
3419 } else if (timeout < 0) {
3420 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3421 dev_warn(adev->dev, "lockup timeout disabled");
3422 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3423 } else {
3424 timeout = msecs_to_jiffies(timeout);
3425 }
3426
3427 switch (index++) {
3428 case 0:
3429 adev->gfx_timeout = timeout;
3430 break;
3431 case 1:
3432 adev->compute_timeout = timeout;
3433 break;
3434 case 2:
3435 adev->sdma_timeout = timeout;
3436 break;
3437 case 3:
3438 adev->video_timeout = timeout;
3439 break;
3440 default:
3441 break;
3442 }
3443 }
3444 /*
3445 * There is only one value specified and
3446 * it should apply to all non-compute jobs.
3447 */
bcccee89 3448 if (index == 1) {
71f98027 3449 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3450 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3451 adev->compute_timeout = adev->gfx_timeout;
3452 }
71f98027
AD
3453 }
3454
3455 return ret;
3456}
d4535e2c 3457
4a74c38c
PY
3458/**
3459 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3460 *
3461 * @adev: amdgpu_device pointer
3462 *
3463 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3464 */
3465static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3466{
3467 struct iommu_domain *domain;
3468
3469 domain = iommu_get_domain_for_dev(adev->dev);
3470 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3471 adev->ram_is_direct_mapped = true;
3472}
3473
77f3a5cd
ND
3474static const struct attribute *amdgpu_dev_attributes[] = {
3475 &dev_attr_product_name.attr,
3476 &dev_attr_product_number.attr,
3477 &dev_attr_serial_number.attr,
3478 &dev_attr_pcie_replay_count.attr,
3479 NULL
3480};
3481
d38ceaf9
AD
3482/**
3483 * amdgpu_device_init - initialize the driver
3484 *
3485 * @adev: amdgpu_device pointer
d38ceaf9
AD
3486 * @flags: driver flags
3487 *
3488 * Initializes the driver info and hw (all asics).
3489 * Returns 0 for success or an error on failure.
3490 * Called at driver startup.
3491 */
3492int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3493 uint32_t flags)
3494{
8aba21b7
LT
3495 struct drm_device *ddev = adev_to_drm(adev);
3496 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3497 int r, i;
b98c6299 3498 bool px = false;
95844d20 3499 u32 max_MBps;
d38ceaf9
AD
3500
3501 adev->shutdown = false;
d38ceaf9 3502 adev->flags = flags;
4e66d7d2
YZ
3503
3504 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3505 adev->asic_type = amdgpu_force_asic_type;
3506 else
3507 adev->asic_type = flags & AMD_ASIC_MASK;
3508
d38ceaf9 3509 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3510 if (amdgpu_emu_mode == 1)
8bdab6bb 3511 adev->usec_timeout *= 10;
770d13b1 3512 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3513 adev->accel_working = false;
3514 adev->num_rings = 0;
3515 adev->mman.buffer_funcs = NULL;
3516 adev->mman.buffer_funcs_ring = NULL;
3517 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3518 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3519 adev->gmc.gmc_funcs = NULL;
7bd939d0 3520 adev->harvest_ip_mask = 0x0;
f54d1867 3521 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3522 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3523
3524 adev->smc_rreg = &amdgpu_invalid_rreg;
3525 adev->smc_wreg = &amdgpu_invalid_wreg;
3526 adev->pcie_rreg = &amdgpu_invalid_rreg;
3527 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3528 adev->pciep_rreg = &amdgpu_invalid_rreg;
3529 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3530 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3531 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3532 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3533 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3534 adev->didt_rreg = &amdgpu_invalid_rreg;
3535 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3536 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3537 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3538 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3539 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3540
3e39ab90
AD
3541 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3542 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3543 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3544
3545 /* mutex initialization are all done here so we
3546 * can recall function without having locking issues */
0e5ca0d1 3547 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3548 mutex_init(&adev->pm.mutex);
3549 mutex_init(&adev->gfx.gpu_clock_mutex);
3550 mutex_init(&adev->srbm_mutex);
b8866c26 3551 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3552 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3553 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3554 mutex_init(&adev->mn_lock);
e23b74aa 3555 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3556 hash_init(adev->mn_hash);
32eaeae0 3557 mutex_init(&adev->psp.mutex);
bd052211 3558 mutex_init(&adev->notifier_lock);
d38ceaf9 3559
4eaf21b7 3560 amdgpu_device_init_apu_flags(adev);
9f6a7857 3561
912dfc84
EQ
3562 r = amdgpu_device_check_arguments(adev);
3563 if (r)
3564 return r;
d38ceaf9 3565
d38ceaf9
AD
3566 spin_lock_init(&adev->mmio_idx_lock);
3567 spin_lock_init(&adev->smc_idx_lock);
3568 spin_lock_init(&adev->pcie_idx_lock);
3569 spin_lock_init(&adev->uvd_ctx_idx_lock);
3570 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3571 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3572 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3573 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3574 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3575
0c4e7fa5
CZ
3576 INIT_LIST_HEAD(&adev->shadow_list);
3577 mutex_init(&adev->shadow_list_lock);
3578
655ce9cb 3579 INIT_LIST_HEAD(&adev->reset_list);
3580
beff74bc
AD
3581 INIT_DELAYED_WORK(&adev->delayed_init_work,
3582 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3583 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3584 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3585
d4535e2c
AG
3586 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3587
d23ee13f 3588 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3589 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3590
b265bdbd
EQ
3591 atomic_set(&adev->throttling_logging_enabled, 1);
3592 /*
3593 * If throttling continues, logging will be performed every minute
3594 * to avoid log flooding. "-1" is subtracted since the thermal
3595 * throttling interrupt comes every second. Thus, the total logging
3596 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3597 * for throttling interrupt) = 60 seconds.
3598 */
3599 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3600 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3601
0fa49558
AX
3602 /* Registers mapping */
3603 /* TODO: block userspace mapping of io register */
da69c161
KW
3604 if (adev->asic_type >= CHIP_BONAIRE) {
3605 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3606 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3607 } else {
3608 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3609 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3610 }
d38ceaf9 3611
6c08e0ef
EQ
3612 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3613 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3614
d38ceaf9
AD
3615 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3616 if (adev->rmmio == NULL) {
3617 return -ENOMEM;
3618 }
3619 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3620 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3621
5494d864
AD
3622 amdgpu_device_get_pcie_info(adev);
3623
b239c017
JX
3624 if (amdgpu_mcbp)
3625 DRM_INFO("MCBP is enabled\n");
3626
5f84cc63
JX
3627 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3628 adev->enable_mes = true;
3629
3aa0115d
ML
3630 /* detect hw virtualization here */
3631 amdgpu_detect_virtualization(adev);
3632
dffa11b4
ML
3633 r = amdgpu_device_get_job_timeout_settings(adev);
3634 if (r) {
3635 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3636 return r;
a190d1c7
XY
3637 }
3638
cfbb6b00
AG
3639 /*
3640 * Reset domain needs to be present early, before XGMI hive discovered
3641 * (if any) and intitialized to use reset sem and in_gpu reset flag
3642 * early on during init.
3643 */
3644 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE ,"amdgpu-reset-dev");
3645 if (!adev->reset_domain)
3646 return -ENOMEM;
3647
d38ceaf9 3648 /* early init functions */
06ec9070 3649 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3650 if (r)
4ef87d8f 3651 return r;
d38ceaf9 3652
4a0165f0
VS
3653 /* Need to get xgmi info early to decide the reset behavior*/
3654 if (adev->gmc.xgmi.supported) {
3655 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3656 if (r)
3657 return r;
3658 }
3659
8e6d0b69 3660 /* enable PCIE atomic ops */
3661 if (amdgpu_sriov_vf(adev))
3662 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3663 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
3664 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3665 else
3666 adev->have_atomics_support =
3667 !pci_enable_atomic_ops_to_root(adev->pdev,
3668 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3669 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3670 if (!adev->have_atomics_support)
3671 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3672
6585661d
OZ
3673 /* doorbell bar mapping and doorbell index init*/
3674 amdgpu_device_doorbell_init(adev);
3675
9475a943
SL
3676 if (amdgpu_emu_mode == 1) {
3677 /* post the asic on emulation mode */
3678 emu_soc_asic_init(adev);
bfca0289 3679 goto fence_driver_init;
9475a943 3680 }
bfca0289 3681
04442bf7
LL
3682 amdgpu_reset_init(adev);
3683
4e99a44e
ML
3684 /* detect if we are with an SRIOV vbios */
3685 amdgpu_device_detect_sriov_bios(adev);
048765ad 3686
95e8e59e
AD
3687 /* check if we need to reset the asic
3688 * E.g., driver was not cleanly unloaded previously, etc.
3689 */
f14899fd 3690 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3691 if (adev->gmc.xgmi.num_physical_nodes) {
3692 dev_info(adev->dev, "Pending hive reset.\n");
3693 adev->gmc.xgmi.pending_reset = true;
3694 /* Only need to init necessary block for SMU to handle the reset */
3695 for (i = 0; i < adev->num_ip_blocks; i++) {
3696 if (!adev->ip_blocks[i].status.valid)
3697 continue;
3698 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3699 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3700 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3701 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3702 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3703 adev->ip_blocks[i].version->funcs->name);
3704 adev->ip_blocks[i].status.hw = true;
3705 }
3706 }
3707 } else {
3708 r = amdgpu_asic_reset(adev);
3709 if (r) {
3710 dev_err(adev->dev, "asic reset on init failed\n");
3711 goto failed;
3712 }
95e8e59e
AD
3713 }
3714 }
3715
8f66090b 3716 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3717
d38ceaf9 3718 /* Post card if necessary */
39c640c0 3719 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3720 if (!adev->bios) {
bec86378 3721 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3722 r = -EINVAL;
3723 goto failed;
d38ceaf9 3724 }
bec86378 3725 DRM_INFO("GPU posting now...\n");
4d2997ab 3726 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3727 if (r) {
3728 dev_err(adev->dev, "gpu post error!\n");
3729 goto failed;
3730 }
d38ceaf9
AD
3731 }
3732
88b64e95
AD
3733 if (adev->is_atom_fw) {
3734 /* Initialize clocks */
3735 r = amdgpu_atomfirmware_get_clock_info(adev);
3736 if (r) {
3737 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3738 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3739 goto failed;
3740 }
3741 } else {
a5bde2f9
AD
3742 /* Initialize clocks */
3743 r = amdgpu_atombios_get_clock_info(adev);
3744 if (r) {
3745 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3746 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3747 goto failed;
a5bde2f9
AD
3748 }
3749 /* init i2c buses */
4562236b
HW
3750 if (!amdgpu_device_has_dc_support(adev))
3751 amdgpu_atombios_i2c_init(adev);
2c1a2784 3752 }
d38ceaf9 3753
bfca0289 3754fence_driver_init:
d38ceaf9 3755 /* Fence driver */
067f44c8 3756 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3757 if (r) {
067f44c8 3758 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3759 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3760 goto failed;
2c1a2784 3761 }
d38ceaf9
AD
3762
3763 /* init the mode config */
4a580877 3764 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3765
06ec9070 3766 r = amdgpu_device_ip_init(adev);
d38ceaf9 3767 if (r) {
8840a387 3768 /* failed in exclusive mode due to timeout */
3769 if (amdgpu_sriov_vf(adev) &&
3770 !amdgpu_sriov_runtime(adev) &&
3771 amdgpu_virt_mmio_blocked(adev) &&
3772 !amdgpu_virt_wait_reset(adev)) {
3773 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3774 /* Don't send request since VF is inactive. */
3775 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3776 adev->virt.ops = NULL;
8840a387 3777 r = -EAGAIN;
970fd197 3778 goto release_ras_con;
8840a387 3779 }
06ec9070 3780 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3781 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3782 goto release_ras_con;
d38ceaf9
AD
3783 }
3784
8d35a259
LG
3785 amdgpu_fence_driver_hw_init(adev);
3786
d69b8971
YZ
3787 dev_info(adev->dev,
3788 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3789 adev->gfx.config.max_shader_engines,
3790 adev->gfx.config.max_sh_per_se,
3791 adev->gfx.config.max_cu_per_sh,
3792 adev->gfx.cu_info.number);
3793
d38ceaf9
AD
3794 adev->accel_working = true;
3795
e59c0205
AX
3796 amdgpu_vm_check_compute_bug(adev);
3797
95844d20
MO
3798 /* Initialize the buffer migration limit. */
3799 if (amdgpu_moverate >= 0)
3800 max_MBps = amdgpu_moverate;
3801 else
3802 max_MBps = 8; /* Allow 8 MB/s. */
3803 /* Get a log2 for easy divisions. */
3804 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3805
d2f52ac8 3806 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3807 if (r) {
3808 adev->pm_sysfs_en = false;
d2f52ac8 3809 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3810 } else
3811 adev->pm_sysfs_en = true;
d2f52ac8 3812
5bb23532 3813 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3814 if (r) {
3815 adev->ucode_sysfs_en = false;
5bb23532 3816 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3817 } else
3818 adev->ucode_sysfs_en = true;
5bb23532 3819
d38ceaf9
AD
3820 if ((amdgpu_testing & 1)) {
3821 if (adev->accel_working)
3822 amdgpu_test_moves(adev);
3823 else
3824 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3825 }
d38ceaf9
AD
3826 if (amdgpu_benchmarking) {
3827 if (adev->accel_working)
3828 amdgpu_benchmark(adev, amdgpu_benchmarking);
3829 else
3830 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3831 }
3832
b0adca4d
EQ
3833 /*
3834 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3835 * Otherwise the mgpu fan boost feature will be skipped due to the
3836 * gpu instance is counted less.
3837 */
3838 amdgpu_register_gpu_instance(adev);
3839
d38ceaf9
AD
3840 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3841 * explicit gating rather than handling it automatically.
3842 */
e3c1b071 3843 if (!adev->gmc.xgmi.pending_reset) {
3844 r = amdgpu_device_ip_late_init(adev);
3845 if (r) {
3846 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3847 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3848 goto release_ras_con;
e3c1b071 3849 }
3850 /* must succeed. */
3851 amdgpu_ras_resume(adev);
3852 queue_delayed_work(system_wq, &adev->delayed_init_work,
3853 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3854 }
d38ceaf9 3855
2c738637
ML
3856 if (amdgpu_sriov_vf(adev))
3857 flush_delayed_work(&adev->delayed_init_work);
3858
77f3a5cd 3859 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3860 if (r)
77f3a5cd 3861 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3862
d155bef0
AB
3863 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3864 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3865 if (r)
3866 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3867
c1dd4aa6
AG
3868 /* Have stored pci confspace at hand for restore in sudden PCI error */
3869 if (amdgpu_device_cache_pci_state(adev->pdev))
3870 pci_restore_state(pdev);
3871
8c3dd61c
KHF
3872 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3873 /* this will fail for cards that aren't VGA class devices, just
3874 * ignore it */
3875 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3876 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c
KHF
3877
3878 if (amdgpu_device_supports_px(ddev)) {
3879 px = true;
3880 vga_switcheroo_register_client(adev->pdev,
3881 &amdgpu_switcheroo_ops, px);
3882 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3883 }
3884
e3c1b071 3885 if (adev->gmc.xgmi.pending_reset)
3886 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3887 msecs_to_jiffies(AMDGPU_RESUME_MS));
3888
4a74c38c
PY
3889 amdgpu_device_check_iommu_direct_map(adev);
3890
d38ceaf9 3891 return 0;
83ba126a 3892
970fd197
SY
3893release_ras_con:
3894 amdgpu_release_ras_context(adev);
3895
83ba126a 3896failed:
89041940 3897 amdgpu_vf_error_trans_all(adev);
8840a387 3898
83ba126a 3899 return r;
d38ceaf9
AD
3900}
3901
07775fc1
AG
3902static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3903{
62d5f9f7 3904
07775fc1
AG
3905 /* Clear all CPU mappings pointing to this device */
3906 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3907
3908 /* Unmap all mapped bars - Doorbell, registers and VRAM */
3909 amdgpu_device_doorbell_fini(adev);
3910
3911 iounmap(adev->rmmio);
3912 adev->rmmio = NULL;
3913 if (adev->mman.aper_base_kaddr)
3914 iounmap(adev->mman.aper_base_kaddr);
3915 adev->mman.aper_base_kaddr = NULL;
3916
3917 /* Memory manager related */
3918 if (!adev->gmc.xgmi.connected_to_cpu) {
3919 arch_phys_wc_del(adev->gmc.vram_mtrr);
3920 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3921 }
3922}
3923
d38ceaf9 3924/**
bbe04dec 3925 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3926 *
3927 * @adev: amdgpu_device pointer
3928 *
3929 * Tear down the driver info (all asics).
3930 * Called at driver shutdown.
3931 */
72c8c97b 3932void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 3933{
aac89168 3934 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3935 flush_delayed_work(&adev->delayed_init_work);
691191a2
YW
3936 if (adev->mman.initialized) {
3937 flush_delayed_work(&adev->mman.bdev.wq);
e78b3197 3938 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
691191a2 3939 }
d0d13fe8 3940 adev->shutdown = true;
9f875167 3941
752c683d
ML
3942 /* make sure IB test finished before entering exclusive mode
3943 * to avoid preemption on IB test
3944 * */
519b8b76 3945 if (amdgpu_sriov_vf(adev)) {
752c683d 3946 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3947 amdgpu_virt_fini_data_exchange(adev);
3948 }
752c683d 3949
e5b03032
ML
3950 /* disable all interrupts */
3951 amdgpu_irq_disable_all(adev);
ff97cba8 3952 if (adev->mode_info.mode_config_initialized){
1053b9c9 3953 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 3954 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3955 else
4a580877 3956 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3957 }
8d35a259 3958 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 3959
7c868b59
YT
3960 if (adev->pm_sysfs_en)
3961 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
3962 if (adev->ucode_sysfs_en)
3963 amdgpu_ucode_sysfs_fini(adev);
3964 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3965
232d1d43
SY
3966 /* disable ras feature must before hw fini */
3967 amdgpu_ras_pre_fini(adev);
3968
e9669fb7 3969 amdgpu_device_ip_fini_early(adev);
d10d0daa 3970
a3848df6
YW
3971 amdgpu_irq_fini_hw(adev);
3972
b6fd6e0f
SK
3973 if (adev->mman.initialized)
3974 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 3975
d10d0daa 3976 amdgpu_gart_dummy_page_fini(adev);
07775fc1 3977
87172e89
LS
3978 if (drm_dev_is_unplugged(adev_to_drm(adev)))
3979 amdgpu_device_unmap_mmio(adev);
3980
72c8c97b
AG
3981}
3982
3983void amdgpu_device_fini_sw(struct amdgpu_device *adev)
3984{
62d5f9f7
LS
3985 int idx;
3986
8d35a259 3987 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 3988 amdgpu_device_ip_fini(adev);
75e1658e
ND
3989 release_firmware(adev->firmware.gpu_info_fw);
3990 adev->firmware.gpu_info_fw = NULL;
d38ceaf9 3991 adev->accel_working = false;
04442bf7
LL
3992
3993 amdgpu_reset_fini(adev);
3994
d38ceaf9 3995 /* free i2c buses */
4562236b
HW
3996 if (!amdgpu_device_has_dc_support(adev))
3997 amdgpu_i2c_fini(adev);
bfca0289
SL
3998
3999 if (amdgpu_emu_mode != 1)
4000 amdgpu_atombios_fini(adev);
4001
d38ceaf9
AD
4002 kfree(adev->bios);
4003 adev->bios = NULL;
b98c6299 4004 if (amdgpu_device_supports_px(adev_to_drm(adev))) {
84c8b22e 4005 vga_switcheroo_unregister_client(adev->pdev);
83ba126a 4006 vga_switcheroo_fini_domain_pm_ops(adev->dev);
b98c6299 4007 }
38d6be81 4008 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4009 vga_client_unregister(adev->pdev);
e9bc1bf7 4010
62d5f9f7
LS
4011 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4012
4013 iounmap(adev->rmmio);
4014 adev->rmmio = NULL;
4015 amdgpu_device_doorbell_fini(adev);
4016 drm_dev_exit(idx);
4017 }
4018
d155bef0
AB
4019 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4020 amdgpu_pmu_fini(adev);
72de33f8 4021 if (adev->mman.discovery_bin)
a190d1c7 4022 amdgpu_discovery_fini(adev);
72c8c97b 4023
cfbb6b00
AG
4024 amdgpu_reset_put_reset_domain(adev->reset_domain);
4025 adev->reset_domain = NULL;
4026
72c8c97b
AG
4027 kfree(adev->pci_state);
4028
d38ceaf9
AD
4029}
4030
58144d28
ND
4031/**
4032 * amdgpu_device_evict_resources - evict device resources
4033 * @adev: amdgpu device object
4034 *
4035 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4036 * of the vram memory type. Mainly used for evicting device resources
4037 * at suspend time.
4038 *
4039 */
4040static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
4041{
e53d9665
ML
4042 /* No need to evict vram on APUs for suspend to ram or s2idle */
4043 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
58144d28
ND
4044 return;
4045
4046 if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
4047 DRM_WARN("evicting device resources failed\n");
4048
4049}
d38ceaf9
AD
4050
4051/*
4052 * Suspend & resume.
4053 */
4054/**
810ddc3a 4055 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4056 *
87e3f136 4057 * @dev: drm dev pointer
87e3f136 4058 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4059 *
4060 * Puts the hw in the suspend state (all asics).
4061 * Returns 0 for success or an error on failure.
4062 * Called at driver suspend.
4063 */
de185019 4064int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4065{
a2e15b0e 4066 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 4067
d38ceaf9
AD
4068 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4069 return 0;
4070
44779b43 4071 adev->in_suspend = true;
3fa8f89d
S
4072
4073 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4074 DRM_WARN("smart shift update failed\n");
4075
d38ceaf9
AD
4076 drm_kms_helper_poll_disable(dev);
4077
5f818173 4078 if (fbcon)
087451f3 4079 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4080
beff74bc 4081 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4082
5e6932fe 4083 amdgpu_ras_suspend(adev);
4084
2196927b 4085 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4086
5d3a2d95
AD
4087 if (!adev->in_s0ix)
4088 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4089
58144d28 4090 amdgpu_device_evict_resources(adev);
d38ceaf9 4091
8d35a259 4092 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4093
2196927b 4094 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4095
d38ceaf9
AD
4096 return 0;
4097}
4098
4099/**
810ddc3a 4100 * amdgpu_device_resume - initiate device resume
d38ceaf9 4101 *
87e3f136 4102 * @dev: drm dev pointer
87e3f136 4103 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4104 *
4105 * Bring the hw back to operating state (all asics).
4106 * Returns 0 for success or an error on failure.
4107 * Called at driver resume.
4108 */
de185019 4109int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4110{
1348969a 4111 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4112 int r = 0;
d38ceaf9
AD
4113
4114 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4115 return 0;
4116
62498733 4117 if (adev->in_s0ix)
628c36d7
PL
4118 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
4119
d38ceaf9 4120 /* post card */
39c640c0 4121 if (amdgpu_device_need_post(adev)) {
4d2997ab 4122 r = amdgpu_device_asic_init(adev);
74b0b157 4123 if (r)
aac89168 4124 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4125 }
d38ceaf9 4126
06ec9070 4127 r = amdgpu_device_ip_resume(adev);
e6707218 4128 if (r) {
aac89168 4129 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 4130 return r;
e6707218 4131 }
8d35a259 4132 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4133
06ec9070 4134 r = amdgpu_device_ip_late_init(adev);
03161a6e 4135 if (r)
4d3b9ae5 4136 return r;
d38ceaf9 4137
beff74bc
AD
4138 queue_delayed_work(system_wq, &adev->delayed_init_work,
4139 msecs_to_jiffies(AMDGPU_RESUME_MS));
4140
5d3a2d95
AD
4141 if (!adev->in_s0ix) {
4142 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4143 if (r)
4144 return r;
4145 }
756e6880 4146
96a5d8d4 4147 /* Make sure IB tests flushed */
beff74bc 4148 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4149
a2e15b0e 4150 if (fbcon)
087451f3 4151 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9
AD
4152
4153 drm_kms_helper_poll_enable(dev);
23a1a9e5 4154
5e6932fe 4155 amdgpu_ras_resume(adev);
4156
23a1a9e5
L
4157 /*
4158 * Most of the connector probing functions try to acquire runtime pm
4159 * refs to ensure that the GPU is powered on when connector polling is
4160 * performed. Since we're calling this from a runtime PM callback,
4161 * trying to acquire rpm refs will cause us to deadlock.
4162 *
4163 * Since we're guaranteed to be holding the rpm lock, it's safe to
4164 * temporarily disable the rpm helpers so this doesn't deadlock us.
4165 */
4166#ifdef CONFIG_PM
4167 dev->dev->power.disable_depth++;
4168#endif
4562236b
HW
4169 if (!amdgpu_device_has_dc_support(adev))
4170 drm_helper_hpd_irq_event(dev);
4171 else
4172 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
4173#ifdef CONFIG_PM
4174 dev->dev->power.disable_depth--;
4175#endif
44779b43
RZ
4176 adev->in_suspend = false;
4177
3fa8f89d
S
4178 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4179 DRM_WARN("smart shift update failed\n");
4180
4d3b9ae5 4181 return 0;
d38ceaf9
AD
4182}
4183
e3ecdffa
AD
4184/**
4185 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4186 *
4187 * @adev: amdgpu_device pointer
4188 *
4189 * The list of all the hardware IPs that make up the asic is walked and
4190 * the check_soft_reset callbacks are run. check_soft_reset determines
4191 * if the asic is still hung or not.
4192 * Returns true if any of the IPs are still in a hung state, false if not.
4193 */
06ec9070 4194static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4195{
4196 int i;
4197 bool asic_hang = false;
4198
f993d628
ML
4199 if (amdgpu_sriov_vf(adev))
4200 return true;
4201
8bc04c29
AD
4202 if (amdgpu_asic_need_full_reset(adev))
4203 return true;
4204
63fbf42f 4205 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4206 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4207 continue;
a1255107
AD
4208 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4209 adev->ip_blocks[i].status.hang =
4210 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4211 if (adev->ip_blocks[i].status.hang) {
aac89168 4212 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4213 asic_hang = true;
4214 }
4215 }
4216 return asic_hang;
4217}
4218
e3ecdffa
AD
4219/**
4220 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4221 *
4222 * @adev: amdgpu_device pointer
4223 *
4224 * The list of all the hardware IPs that make up the asic is walked and the
4225 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4226 * handles any IP specific hardware or software state changes that are
4227 * necessary for a soft reset to succeed.
4228 * Returns 0 on success, negative error code on failure.
4229 */
06ec9070 4230static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4231{
4232 int i, r = 0;
4233
4234 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4235 if (!adev->ip_blocks[i].status.valid)
d31a501e 4236 continue;
a1255107
AD
4237 if (adev->ip_blocks[i].status.hang &&
4238 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4239 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4240 if (r)
4241 return r;
4242 }
4243 }
4244
4245 return 0;
4246}
4247
e3ecdffa
AD
4248/**
4249 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4250 *
4251 * @adev: amdgpu_device pointer
4252 *
4253 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4254 * reset is necessary to recover.
4255 * Returns true if a full asic reset is required, false if not.
4256 */
06ec9070 4257static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4258{
da146d3b
AD
4259 int i;
4260
8bc04c29
AD
4261 if (amdgpu_asic_need_full_reset(adev))
4262 return true;
4263
da146d3b 4264 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4265 if (!adev->ip_blocks[i].status.valid)
da146d3b 4266 continue;
a1255107
AD
4267 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4268 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4269 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4270 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4271 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4272 if (adev->ip_blocks[i].status.hang) {
aac89168 4273 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4274 return true;
4275 }
4276 }
35d782fe
CZ
4277 }
4278 return false;
4279}
4280
e3ecdffa
AD
4281/**
4282 * amdgpu_device_ip_soft_reset - do a soft reset
4283 *
4284 * @adev: amdgpu_device pointer
4285 *
4286 * The list of all the hardware IPs that make up the asic is walked and the
4287 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4288 * IP specific hardware or software state changes that are necessary to soft
4289 * reset the IP.
4290 * Returns 0 on success, negative error code on failure.
4291 */
06ec9070 4292static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4293{
4294 int i, r = 0;
4295
4296 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4297 if (!adev->ip_blocks[i].status.valid)
35d782fe 4298 continue;
a1255107
AD
4299 if (adev->ip_blocks[i].status.hang &&
4300 adev->ip_blocks[i].version->funcs->soft_reset) {
4301 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4302 if (r)
4303 return r;
4304 }
4305 }
4306
4307 return 0;
4308}
4309
e3ecdffa
AD
4310/**
4311 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4312 *
4313 * @adev: amdgpu_device pointer
4314 *
4315 * The list of all the hardware IPs that make up the asic is walked and the
4316 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4317 * handles any IP specific hardware or software state changes that are
4318 * necessary after the IP has been soft reset.
4319 * Returns 0 on success, negative error code on failure.
4320 */
06ec9070 4321static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4322{
4323 int i, r = 0;
4324
4325 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4326 if (!adev->ip_blocks[i].status.valid)
35d782fe 4327 continue;
a1255107
AD
4328 if (adev->ip_blocks[i].status.hang &&
4329 adev->ip_blocks[i].version->funcs->post_soft_reset)
4330 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4331 if (r)
4332 return r;
4333 }
4334
4335 return 0;
4336}
4337
e3ecdffa 4338/**
c33adbc7 4339 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4340 *
4341 * @adev: amdgpu_device pointer
4342 *
4343 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4344 * restore things like GPUVM page tables after a GPU reset where
4345 * the contents of VRAM might be lost.
403009bf
CK
4346 *
4347 * Returns:
4348 * 0 on success, negative error code on failure.
e3ecdffa 4349 */
c33adbc7 4350static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4351{
c41d1cf6 4352 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4353 struct amdgpu_bo *shadow;
e18aaea7 4354 struct amdgpu_bo_vm *vmbo;
403009bf 4355 long r = 1, tmo;
c41d1cf6
ML
4356
4357 if (amdgpu_sriov_runtime(adev))
b045d3af 4358 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4359 else
4360 tmo = msecs_to_jiffies(100);
4361
aac89168 4362 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4363 mutex_lock(&adev->shadow_list_lock);
e18aaea7
ND
4364 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4365 shadow = &vmbo->bo;
403009bf 4366 /* No need to recover an evicted BO */
d3116756
CK
4367 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4368 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4369 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4370 continue;
4371
4372 r = amdgpu_bo_restore_shadow(shadow, &next);
4373 if (r)
4374 break;
4375
c41d1cf6 4376 if (fence) {
1712fb1a 4377 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4378 dma_fence_put(fence);
4379 fence = next;
1712fb1a 4380 if (tmo == 0) {
4381 r = -ETIMEDOUT;
c41d1cf6 4382 break;
1712fb1a 4383 } else if (tmo < 0) {
4384 r = tmo;
4385 break;
4386 }
403009bf
CK
4387 } else {
4388 fence = next;
c41d1cf6 4389 }
c41d1cf6
ML
4390 }
4391 mutex_unlock(&adev->shadow_list_lock);
4392
403009bf
CK
4393 if (fence)
4394 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4395 dma_fence_put(fence);
4396
1712fb1a 4397 if (r < 0 || tmo <= 0) {
aac89168 4398 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4399 return -EIO;
4400 }
c41d1cf6 4401
aac89168 4402 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4403 return 0;
c41d1cf6
ML
4404}
4405
a90ad3c2 4406
e3ecdffa 4407/**
06ec9070 4408 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4409 *
982a820b 4410 * @adev: amdgpu_device pointer
87e3f136 4411 * @from_hypervisor: request from hypervisor
5740682e
ML
4412 *
4413 * do VF FLR and reinitialize Asic
3f48c681 4414 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4415 */
4416static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4417 bool from_hypervisor)
5740682e
ML
4418{
4419 int r;
a5f67c93 4420 struct amdgpu_hive_info *hive = NULL;
5740682e 4421
992110d7 4422 amdgpu_amdkfd_pre_reset(adev);
5740682e 4423
428890a3 4424 amdgpu_amdkfd_pre_reset(adev);
4425
5740682e
ML
4426 if (from_hypervisor)
4427 r = amdgpu_virt_request_full_gpu(adev, true);
4428 else
4429 r = amdgpu_virt_reset_gpu(adev);
4430 if (r)
4431 return r;
a90ad3c2
ML
4432
4433 /* Resume IP prior to SMC */
06ec9070 4434 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4435 if (r)
4436 goto error;
a90ad3c2 4437
c9ffa427 4438 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4439
7a3e0bb2
RZ
4440 r = amdgpu_device_fw_loading(adev);
4441 if (r)
4442 return r;
4443
a90ad3c2 4444 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4445 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4446 if (r)
4447 goto error;
a90ad3c2 4448
a5f67c93
ZL
4449 hive = amdgpu_get_xgmi_hive(adev);
4450 /* Update PSP FW topology after reset */
4451 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4452 r = amdgpu_xgmi_update_topology(hive, adev);
4453
4454 if (hive)
4455 amdgpu_put_xgmi_hive(hive);
4456
4457 if (!r) {
4458 amdgpu_irq_gpu_reset_resume_helper(adev);
4459 r = amdgpu_ib_ring_tests(adev);
4460 amdgpu_amdkfd_post_reset(adev);
4461 }
a90ad3c2 4462
abc34253 4463error:
c41d1cf6 4464 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4465 amdgpu_inc_vram_lost(adev);
c33adbc7 4466 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4467 }
437f3e0b 4468 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2
ML
4469
4470 return r;
4471}
4472
9a1cddd6 4473/**
4474 * amdgpu_device_has_job_running - check if there is any job in mirror list
4475 *
982a820b 4476 * @adev: amdgpu_device pointer
9a1cddd6 4477 *
4478 * check if there is any job in mirror list
4479 */
4480bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4481{
4482 int i;
4483 struct drm_sched_job *job;
4484
4485 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4486 struct amdgpu_ring *ring = adev->rings[i];
4487
4488 if (!ring || !ring->sched.thread)
4489 continue;
4490
4491 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4492 job = list_first_entry_or_null(&ring->sched.pending_list,
4493 struct drm_sched_job, list);
9a1cddd6 4494 spin_unlock(&ring->sched.job_list_lock);
4495 if (job)
4496 return true;
4497 }
4498 return false;
4499}
4500
12938fad
CK
4501/**
4502 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4503 *
982a820b 4504 * @adev: amdgpu_device pointer
12938fad
CK
4505 *
4506 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4507 * a hung GPU.
4508 */
4509bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4510{
4511 if (!amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4512 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
12938fad
CK
4513 return false;
4514 }
4515
3ba7b418
AG
4516 if (amdgpu_gpu_recovery == 0)
4517 goto disabled;
4518
4519 if (amdgpu_sriov_vf(adev))
4520 return true;
4521
4522 if (amdgpu_gpu_recovery == -1) {
4523 switch (adev->asic_type) {
0ffb1fd1
AD
4524#ifdef CONFIG_DRM_AMDGPU_SI
4525 case CHIP_VERDE:
4526 case CHIP_TAHITI:
4527 case CHIP_PITCAIRN:
4528 case CHIP_OLAND:
4529 case CHIP_HAINAN:
4530#endif
4531#ifdef CONFIG_DRM_AMDGPU_CIK
4532 case CHIP_KAVERI:
4533 case CHIP_KABINI:
4534 case CHIP_MULLINS:
4535#endif
4536 case CHIP_CARRIZO:
4537 case CHIP_STONEY:
4538 case CHIP_CYAN_SKILLFISH:
3ba7b418 4539 goto disabled;
0ffb1fd1
AD
4540 default:
4541 break;
3ba7b418 4542 }
12938fad
CK
4543 }
4544
4545 return true;
3ba7b418
AG
4546
4547disabled:
aac89168 4548 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4549 return false;
12938fad
CK
4550}
4551
5c03e584
FX
4552int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4553{
4554 u32 i;
4555 int ret = 0;
4556
4557 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4558
4559 dev_info(adev->dev, "GPU mode1 reset\n");
4560
4561 /* disable BM */
4562 pci_clear_master(adev->pdev);
4563
4564 amdgpu_device_cache_pci_state(adev->pdev);
4565
4566 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4567 dev_info(adev->dev, "GPU smu mode1 reset\n");
4568 ret = amdgpu_dpm_mode1_reset(adev);
4569 } else {
4570 dev_info(adev->dev, "GPU psp mode1 reset\n");
4571 ret = psp_gpu_reset(adev);
4572 }
4573
4574 if (ret)
4575 dev_err(adev->dev, "GPU mode1 reset failed\n");
4576
4577 amdgpu_device_load_pci_state(adev->pdev);
4578
4579 /* wait for asic to come out of reset */
4580 for (i = 0; i < adev->usec_timeout; i++) {
4581 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4582
4583 if (memsize != 0xffffffff)
4584 break;
4585 udelay(1);
4586 }
4587
4588 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4589 return ret;
4590}
5c6dd71e 4591
e3c1b071 4592int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4593 struct amdgpu_reset_context *reset_context)
26bc5340 4594{
5c1e6fa4 4595 int i, r = 0;
04442bf7
LL
4596 struct amdgpu_job *job = NULL;
4597 bool need_full_reset =
4598 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4599
4600 if (reset_context->reset_req_dev == adev)
4601 job = reset_context->job;
71182665 4602
b602ca5f
TZ
4603 if (amdgpu_sriov_vf(adev)) {
4604 /* stop the data exchange thread */
4605 amdgpu_virt_fini_data_exchange(adev);
4606 }
4607
71182665 4608 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4609 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4610 struct amdgpu_ring *ring = adev->rings[i];
4611
51687759 4612 if (!ring || !ring->sched.thread)
0875dc9e 4613 continue;
5740682e 4614
c530b02f
JZ
4615 /*clear job fence from fence drv to avoid force_completion
4616 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4617 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4618
2f9d4084
ML
4619 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4620 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4621 }
d38ceaf9 4622
ff99849b 4623 if (job && job->vm)
222b5f04
AG
4624 drm_sched_increase_karma(&job->base);
4625
04442bf7 4626 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4627 /* If reset handler not implemented, continue; otherwise return */
4628 if (r == -ENOSYS)
4629 r = 0;
4630 else
04442bf7
LL
4631 return r;
4632
1d721ed6 4633 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4634 if (!amdgpu_sriov_vf(adev)) {
4635
4636 if (!need_full_reset)
4637 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4638
4639 if (!need_full_reset) {
4640 amdgpu_device_ip_pre_soft_reset(adev);
4641 r = amdgpu_device_ip_soft_reset(adev);
4642 amdgpu_device_ip_post_soft_reset(adev);
4643 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4644 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4645 need_full_reset = true;
4646 }
4647 }
4648
4649 if (need_full_reset)
4650 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4651 if (need_full_reset)
4652 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4653 else
4654 clear_bit(AMDGPU_NEED_FULL_RESET,
4655 &reset_context->flags);
26bc5340
AG
4656 }
4657
4658 return r;
4659}
4660
04442bf7
LL
4661int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4662 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4663{
4664 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4665 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340
AG
4666 int r = 0;
4667
04442bf7
LL
4668 /* Try reset handler method first */
4669 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4670 reset_list);
4671 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4672 /* If reset handler not implemented, continue; otherwise return */
4673 if (r == -ENOSYS)
4674 r = 0;
4675 else
04442bf7
LL
4676 return r;
4677
4678 /* Reset handler not implemented, use the default method */
4679 need_full_reset =
4680 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4681 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4682
26bc5340 4683 /*
655ce9cb 4684 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4685 * to allow proper links negotiation in FW (within 1 sec)
4686 */
7ac71382 4687 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4688 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4689 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4690 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4691 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4692 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4693 r = -EALREADY;
4694 } else
4695 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4696
041a62bc 4697 if (r) {
aac89168 4698 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4699 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4700 break;
ce316fa5
LM
4701 }
4702 }
4703
041a62bc
AG
4704 /* For XGMI wait for all resets to complete before proceed */
4705 if (!r) {
655ce9cb 4706 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4707 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4708 flush_work(&tmp_adev->xgmi_reset_work);
4709 r = tmp_adev->asic_reset_res;
4710 if (r)
4711 break;
ce316fa5
LM
4712 }
4713 }
4714 }
ce316fa5 4715 }
26bc5340 4716
43c4d576 4717 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4718 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
8bc7b360
HZ
4719 if (tmp_adev->mmhub.ras_funcs &&
4720 tmp_adev->mmhub.ras_funcs->reset_ras_error_count)
4721 tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);
43c4d576
JC
4722 }
4723
00eaa571 4724 amdgpu_ras_intr_cleared();
43c4d576 4725 }
00eaa571 4726
655ce9cb 4727 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4728 if (need_full_reset) {
4729 /* post card */
e3c1b071 4730 r = amdgpu_device_asic_init(tmp_adev);
4731 if (r) {
aac89168 4732 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4733 } else {
26bc5340 4734 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4735 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4736 if (r)
4737 goto out;
4738
26bc5340
AG
4739 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4740 if (r)
4741 goto out;
4742
4743 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4744 if (vram_lost) {
77e7f829 4745 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4746 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4747 }
4748
26bc5340
AG
4749 r = amdgpu_device_fw_loading(tmp_adev);
4750 if (r)
4751 return r;
4752
4753 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4754 if (r)
4755 goto out;
4756
4757 if (vram_lost)
4758 amdgpu_device_fill_reset_magic(tmp_adev);
4759
fdafb359
EQ
4760 /*
4761 * Add this ASIC as tracked as reset was already
4762 * complete successfully.
4763 */
4764 amdgpu_register_gpu_instance(tmp_adev);
4765
04442bf7
LL
4766 if (!reset_context->hive &&
4767 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4768 amdgpu_xgmi_add_device(tmp_adev);
4769
7c04ca50 4770 r = amdgpu_device_ip_late_init(tmp_adev);
4771 if (r)
4772 goto out;
4773
087451f3 4774 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 4775
e8fbaf03
GC
4776 /*
4777 * The GPU enters bad state once faulty pages
4778 * by ECC has reached the threshold, and ras
4779 * recovery is scheduled next. So add one check
4780 * here to break recovery if it indeed exceeds
4781 * bad page threshold, and remind user to
4782 * retire this GPU or setting one bigger
4783 * bad_page_threshold value to fix this once
4784 * probing driver again.
4785 */
11003c68 4786 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
4787 /* must succeed. */
4788 amdgpu_ras_resume(tmp_adev);
4789 } else {
4790 r = -EINVAL;
4791 goto out;
4792 }
e79a04d5 4793
26bc5340 4794 /* Update PSP FW topology after reset */
04442bf7
LL
4795 if (reset_context->hive &&
4796 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4797 r = amdgpu_xgmi_update_topology(
4798 reset_context->hive, tmp_adev);
26bc5340
AG
4799 }
4800 }
4801
26bc5340
AG
4802out:
4803 if (!r) {
4804 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4805 r = amdgpu_ib_ring_tests(tmp_adev);
4806 if (r) {
4807 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
4808 need_full_reset = true;
4809 r = -EAGAIN;
4810 goto end;
4811 }
4812 }
4813
4814 if (!r)
4815 r = amdgpu_device_recover_vram(tmp_adev);
4816 else
4817 tmp_adev->asic_reset_res = r;
4818 }
4819
4820end:
04442bf7
LL
4821 if (need_full_reset)
4822 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4823 else
4824 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
4825 return r;
4826}
4827
e923be99 4828static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
26bc5340 4829{
5740682e 4830
a3a09142
AD
4831 switch (amdgpu_asic_reset_method(adev)) {
4832 case AMD_RESET_METHOD_MODE1:
4833 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4834 break;
4835 case AMD_RESET_METHOD_MODE2:
4836 adev->mp1_state = PP_MP1_STATE_RESET;
4837 break;
4838 default:
4839 adev->mp1_state = PP_MP1_STATE_NONE;
4840 break;
4841 }
26bc5340 4842}
d38ceaf9 4843
e923be99 4844static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
26bc5340 4845{
89041940 4846 amdgpu_vf_error_trans_all(adev);
a3a09142 4847 adev->mp1_state = PP_MP1_STATE_NONE;
26bc5340
AG
4848}
4849
3f12acc8
EQ
4850static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4851{
4852 struct pci_dev *p = NULL;
4853
4854 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4855 adev->pdev->bus->number, 1);
4856 if (p) {
4857 pm_runtime_enable(&(p->dev));
4858 pm_runtime_resume(&(p->dev));
4859 }
4860}
4861
4862static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4863{
4864 enum amd_reset_method reset_method;
4865 struct pci_dev *p = NULL;
4866 u64 expires;
4867
4868 /*
4869 * For now, only BACO and mode1 reset are confirmed
4870 * to suffer the audio issue without proper suspended.
4871 */
4872 reset_method = amdgpu_asic_reset_method(adev);
4873 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4874 (reset_method != AMD_RESET_METHOD_MODE1))
4875 return -EINVAL;
4876
4877 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4878 adev->pdev->bus->number, 1);
4879 if (!p)
4880 return -ENODEV;
4881
4882 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4883 if (!expires)
4884 /*
4885 * If we cannot get the audio device autosuspend delay,
4886 * a fixed 4S interval will be used. Considering 3S is
4887 * the audio controller default autosuspend delay setting.
4888 * 4S used here is guaranteed to cover that.
4889 */
54b7feb9 4890 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4891
4892 while (!pm_runtime_status_suspended(&(p->dev))) {
4893 if (!pm_runtime_suspend(&(p->dev)))
4894 break;
4895
4896 if (expires < ktime_get_mono_fast_ns()) {
4897 dev_warn(adev->dev, "failed to suspend display audio\n");
4898 /* TODO: abort the succeeding gpu reset? */
4899 return -ETIMEDOUT;
4900 }
4901 }
4902
4903 pm_runtime_disable(&(p->dev));
4904
4905 return 0;
4906}
4907
9d8d96be 4908static void amdgpu_device_recheck_guilty_jobs(
04442bf7
LL
4909 struct amdgpu_device *adev, struct list_head *device_list_handle,
4910 struct amdgpu_reset_context *reset_context)
e6c6338f
JZ
4911{
4912 int i, r = 0;
4913
4914 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4915 struct amdgpu_ring *ring = adev->rings[i];
4916 int ret = 0;
4917 struct drm_sched_job *s_job;
4918
4919 if (!ring || !ring->sched.thread)
4920 continue;
4921
4922 s_job = list_first_entry_or_null(&ring->sched.pending_list,
4923 struct drm_sched_job, list);
4924 if (s_job == NULL)
4925 continue;
4926
4927 /* clear job's guilty and depend the folowing step to decide the real one */
4928 drm_sched_reset_karma(s_job);
38d4e463
JC
4929 /* for the real bad job, it will be resubmitted twice, adding a dma_fence_get
4930 * to make sure fence is balanced */
4931 dma_fence_get(s_job->s_fence->parent);
e6c6338f
JZ
4932 drm_sched_resubmit_jobs_ext(&ring->sched, 1);
4933
4934 ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
4935 if (ret == 0) { /* timeout */
4936 DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
4937 ring->sched.name, s_job->id);
4938
4939 /* set guilty */
4940 drm_sched_increase_karma(s_job);
4941retry:
4942 /* do hw reset */
4943 if (amdgpu_sriov_vf(adev)) {
4944 amdgpu_virt_fini_data_exchange(adev);
4945 r = amdgpu_device_reset_sriov(adev, false);
4946 if (r)
4947 adev->asic_reset_res = r;
4948 } else {
04442bf7
LL
4949 clear_bit(AMDGPU_SKIP_HW_RESET,
4950 &reset_context->flags);
4951 r = amdgpu_do_asic_reset(device_list_handle,
4952 reset_context);
e6c6338f
JZ
4953 if (r && r == -EAGAIN)
4954 goto retry;
4955 }
4956
4957 /*
4958 * add reset counter so that the following
4959 * resubmitted job could flush vmid
4960 */
4961 atomic_inc(&adev->gpu_reset_counter);
4962 continue;
4963 }
4964
4965 /* got the hw fence, signal finished fence */
4966 atomic_dec(ring->sched.score);
38d4e463 4967 dma_fence_put(s_job->s_fence->parent);
e6c6338f
JZ
4968 dma_fence_get(&s_job->s_fence->finished);
4969 dma_fence_signal(&s_job->s_fence->finished);
4970 dma_fence_put(&s_job->s_fence->finished);
4971
4972 /* remove node from list and free the job */
4973 spin_lock(&ring->sched.job_list_lock);
4974 list_del_init(&s_job->list);
4975 spin_unlock(&ring->sched.job_list_lock);
4976 ring->sched.ops->free_job(s_job);
4977 }
4978}
4979
26bc5340 4980/**
c7703ce3 4981 * amdgpu_device_gpu_recover_imp - reset the asic and recover scheduler
26bc5340 4982 *
982a820b 4983 * @adev: amdgpu_device pointer
26bc5340
AG
4984 * @job: which job trigger hang
4985 *
4986 * Attempt to reset the GPU if it has hung (all asics).
4987 * Attempt to do soft-reset or full-reset and reinitialize Asic
4988 * Returns 0 for success or an error on failure.
4989 */
4990
54f329cc 4991int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
26bc5340
AG
4992 struct amdgpu_job *job)
4993{
1d721ed6 4994 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 4995 bool job_signaled = false;
26bc5340 4996 struct amdgpu_hive_info *hive = NULL;
26bc5340 4997 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4998 int i, r = 0;
bb5c7235 4999 bool need_emergency_restart = false;
3f12acc8 5000 bool audio_suspended = false;
e6c6338f 5001 int tmp_vram_lost_counter;
04442bf7
LL
5002 struct amdgpu_reset_context reset_context;
5003
5004 memset(&reset_context, 0, sizeof(reset_context));
26bc5340 5005
6e3cd2a9 5006 /*
bb5c7235
WS
5007 * Special case: RAS triggered and full reset isn't supported
5008 */
5009 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5010
d5ea093e
AG
5011 /*
5012 * Flush RAM to disk so that after reboot
5013 * the user can read log and see why the system rebooted.
5014 */
bb5c7235 5015 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5016 DRM_WARN("Emergency reboot.");
5017
5018 ksys_sync_helper();
5019 emergency_restart();
5020 }
5021
b823821f 5022 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5023 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5024
175ac6ec
ZL
5025 if (!amdgpu_sriov_vf(adev))
5026 hive = amdgpu_get_xgmi_hive(adev);
681260df 5027 if (hive)
53b3f8f4 5028 mutex_lock(&hive->hive_lock);
26bc5340 5029
04442bf7
LL
5030 reset_context.method = AMD_RESET_METHOD_NONE;
5031 reset_context.reset_req_dev = adev;
5032 reset_context.job = job;
5033 reset_context.hive = hive;
5034 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5035
9e94d22c
EQ
5036 /*
5037 * Build list of devices to reset.
5038 * In case we are in XGMI hive mode, resort the device list
5039 * to put adev in the 1st position.
5040 */
5041 INIT_LIST_HEAD(&device_list);
175ac6ec 5042 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
655ce9cb 5043 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
5044 list_add_tail(&tmp_adev->reset_list, &device_list);
5045 if (!list_is_first(&adev->reset_list, &device_list))
5046 list_rotate_to_front(&adev->reset_list, &device_list);
5047 device_list_handle = &device_list;
26bc5340 5048 } else {
655ce9cb 5049 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5050 device_list_handle = &device_list;
5051 }
5052
e923be99
AG
5053 /* We need to lock reset domain only once both for XGMI and single device */
5054 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5055 reset_list);
3675c2f2 5056 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
e923be99 5057
1d721ed6 5058 /* block all schedulers and reset given job's ring */
655ce9cb 5059 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5 5060
e923be99 5061 amdgpu_device_set_mp1_state(tmp_adev);
f287a3c5 5062
3f12acc8
EQ
5063 /*
5064 * Try to put the audio codec into suspend state
5065 * before gpu reset started.
5066 *
5067 * Due to the power domain of the graphics device
5068 * is shared with AZ power domain. Without this,
5069 * we may change the audio hardware from behind
5070 * the audio driver's back. That will trigger
5071 * some audio codec errors.
5072 */
5073 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5074 audio_suspended = true;
5075
9e94d22c
EQ
5076 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5077
52fb44cf
EQ
5078 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5079
428890a3 5080 if (!amdgpu_sriov_vf(tmp_adev))
5081 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5082
12ffa55d
AG
5083 /*
5084 * Mark these ASICs to be reseted as untracked first
5085 * And add them back after reset completed
5086 */
5087 amdgpu_unregister_gpu_instance(tmp_adev);
5088
087451f3 5089 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
565d1941 5090
f1c1314b 5091 /* disable ras on ALL IPs */
bb5c7235 5092 if (!need_emergency_restart &&
b823821f 5093 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5094 amdgpu_ras_suspend(tmp_adev);
5095
1d721ed6
AG
5096 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5097 struct amdgpu_ring *ring = tmp_adev->rings[i];
5098
5099 if (!ring || !ring->sched.thread)
5100 continue;
5101
0b2d2c2e 5102 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5103
bb5c7235 5104 if (need_emergency_restart)
7c6e68c7 5105 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5106 }
8f8c80f4 5107 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5108 }
5109
bb5c7235 5110 if (need_emergency_restart)
7c6e68c7
AG
5111 goto skip_sched_resume;
5112
1d721ed6
AG
5113 /*
5114 * Must check guilty signal here since after this point all old
5115 * HW fences are force signaled.
5116 *
5117 * job->base holds a reference to parent fence
5118 */
5119 if (job && job->base.s_fence->parent &&
7dd8c205 5120 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 5121 job_signaled = true;
1d721ed6
AG
5122 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5123 goto skip_hw_reset;
5124 }
5125
26bc5340 5126retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5127 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
04442bf7 5128 r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context);
26bc5340
AG
5129 /*TODO Should we stop ?*/
5130 if (r) {
aac89168 5131 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5132 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5133 tmp_adev->asic_reset_res = r;
5134 }
5135 }
5136
e6c6338f 5137 tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
26bc5340 5138 /* Actual ASIC resets if needed.*/
4f30d920 5139 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5140 if (amdgpu_sriov_vf(adev)) {
5141 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5142 if (r)
5143 adev->asic_reset_res = r;
5144 } else {
04442bf7 5145 r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
26bc5340
AG
5146 if (r && r == -EAGAIN)
5147 goto retry;
5148 }
5149
1d721ed6
AG
5150skip_hw_reset:
5151
26bc5340 5152 /* Post ASIC reset for all devs .*/
655ce9cb 5153 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5154
e6c6338f
JZ
5155 /*
5156 * Sometimes a later bad compute job can block a good gfx job as gfx
5157 * and compute ring share internal GC HW mutually. We add an additional
5158 * guilty jobs recheck step to find the real guilty job, it synchronously
5159 * submits and pends for the first job being signaled. If it gets timeout,
5160 * we identify it as a real guilty job.
5161 */
5162 if (amdgpu_gpu_recovery == 2 &&
5163 !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
04442bf7
LL
5164 amdgpu_device_recheck_guilty_jobs(
5165 tmp_adev, device_list_handle, &reset_context);
e6c6338f 5166
1d721ed6
AG
5167 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5168 struct amdgpu_ring *ring = tmp_adev->rings[i];
5169
5170 if (!ring || !ring->sched.thread)
5171 continue;
5172
5173 /* No point to resubmit jobs if we didn't HW reset*/
5174 if (!tmp_adev->asic_reset_res && !job_signaled)
5175 drm_sched_resubmit_jobs(&ring->sched);
5176
5177 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
5178 }
5179
1053b9c9 5180 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5181 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5182 }
5183
5184 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5185
5186 if (r) {
5187 /* bad news, how to tell it to userspace ? */
12ffa55d 5188 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5189 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5190 } else {
12ffa55d 5191 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5192 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5193 DRM_WARN("smart shift update failed\n");
26bc5340 5194 }
7c6e68c7 5195 }
26bc5340 5196
7c6e68c7 5197skip_sched_resume:
655ce9cb 5198 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5199 /* unlock kfd: SRIOV would do it separately */
5200 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5201 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5202
5203 /* kfd_post_reset will do nothing if kfd device is not initialized,
5204 * need to bring up kfd here if it's not be initialized before
5205 */
5206 if (!adev->kfd.init_complete)
5207 amdgpu_amdkfd_device_init(adev);
5208
3f12acc8
EQ
5209 if (audio_suspended)
5210 amdgpu_device_resume_display_audio(tmp_adev);
e923be99
AG
5211
5212 amdgpu_device_unset_mp1_state(tmp_adev);
26bc5340
AG
5213 }
5214
e923be99
AG
5215 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5216 reset_list);
5217 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5218
9e94d22c 5219 if (hive) {
9e94d22c 5220 mutex_unlock(&hive->hive_lock);
d95e8e97 5221 amdgpu_put_xgmi_hive(hive);
9e94d22c 5222 }
26bc5340 5223
f287a3c5 5224 if (r)
26bc5340 5225 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
5226 return r;
5227}
5228
54f329cc
AG
5229struct amdgpu_recover_work_struct {
5230 struct work_struct base;
5231 struct amdgpu_device *adev;
5232 struct amdgpu_job *job;
5233 int ret;
5234};
5235
5236static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
5237{
5238 struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
5239
5240 recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
5241}
5242/*
5243 * Serialize gpu recover into reset domain single threaded wq
5244 */
5245int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5246 struct amdgpu_job *job)
5247{
5248 struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
5249
5250 INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
5251
cfbb6b00 5252 if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
54f329cc
AG
5253 return -EAGAIN;
5254
5255 flush_work(&work.base);
5256
5257 return work.ret;
5258}
5259
e3ecdffa
AD
5260/**
5261 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5262 *
5263 * @adev: amdgpu_device pointer
5264 *
5265 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5266 * and lanes) of the slot the device is in. Handles APUs and
5267 * virtualized environments where PCIE config space may not be available.
5268 */
5494d864 5269static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5270{
5d9a6330 5271 struct pci_dev *pdev;
c5313457
HK
5272 enum pci_bus_speed speed_cap, platform_speed_cap;
5273 enum pcie_link_width platform_link_width;
d0dd7f0c 5274
cd474ba0
AD
5275 if (amdgpu_pcie_gen_cap)
5276 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5277
cd474ba0
AD
5278 if (amdgpu_pcie_lane_cap)
5279 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5280
cd474ba0
AD
5281 /* covers APUs as well */
5282 if (pci_is_root_bus(adev->pdev->bus)) {
5283 if (adev->pm.pcie_gen_mask == 0)
5284 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5285 if (adev->pm.pcie_mlw_mask == 0)
5286 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5287 return;
cd474ba0 5288 }
d0dd7f0c 5289
c5313457
HK
5290 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5291 return;
5292
dbaa922b
AD
5293 pcie_bandwidth_available(adev->pdev, NULL,
5294 &platform_speed_cap, &platform_link_width);
c5313457 5295
cd474ba0 5296 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5297 /* asic caps */
5298 pdev = adev->pdev;
5299 speed_cap = pcie_get_speed_cap(pdev);
5300 if (speed_cap == PCI_SPEED_UNKNOWN) {
5301 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5302 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5303 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5304 } else {
2b3a1f51
FX
5305 if (speed_cap == PCIE_SPEED_32_0GT)
5306 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5307 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5308 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5309 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5310 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5311 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5312 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5313 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5314 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5315 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5316 else if (speed_cap == PCIE_SPEED_8_0GT)
5317 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5318 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5319 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5320 else if (speed_cap == PCIE_SPEED_5_0GT)
5321 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5322 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5323 else
5324 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5325 }
5326 /* platform caps */
c5313457 5327 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5328 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5329 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5330 } else {
2b3a1f51
FX
5331 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5332 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5333 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5334 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5335 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5336 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5337 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5338 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5339 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5340 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5341 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5342 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5343 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5344 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5345 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5346 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5347 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5348 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5349 else
5350 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5351
cd474ba0
AD
5352 }
5353 }
5354 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5355 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5356 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5357 } else {
c5313457 5358 switch (platform_link_width) {
5d9a6330 5359 case PCIE_LNK_X32:
cd474ba0
AD
5360 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5361 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5362 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5363 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5364 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5365 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5366 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5367 break;
5d9a6330 5368 case PCIE_LNK_X16:
cd474ba0
AD
5369 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5370 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5371 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5372 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5373 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5374 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5375 break;
5d9a6330 5376 case PCIE_LNK_X12:
cd474ba0
AD
5377 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5378 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5379 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5380 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5381 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5382 break;
5d9a6330 5383 case PCIE_LNK_X8:
cd474ba0
AD
5384 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5385 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5386 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5387 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5388 break;
5d9a6330 5389 case PCIE_LNK_X4:
cd474ba0
AD
5390 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5391 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5392 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5393 break;
5d9a6330 5394 case PCIE_LNK_X2:
cd474ba0
AD
5395 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5396 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5397 break;
5d9a6330 5398 case PCIE_LNK_X1:
cd474ba0
AD
5399 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5400 break;
5401 default:
5402 break;
5403 }
d0dd7f0c
AD
5404 }
5405 }
5406}
d38ceaf9 5407
361dbd01
AD
5408int amdgpu_device_baco_enter(struct drm_device *dev)
5409{
1348969a 5410 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5411 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5412
4a580877 5413 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5414 return -ENOTSUPP;
5415
8ab0d6f0 5416 if (ras && adev->ras_enabled &&
acdae216 5417 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5418 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5419
9530273e 5420 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5421}
5422
5423int amdgpu_device_baco_exit(struct drm_device *dev)
5424{
1348969a 5425 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5426 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5427 int ret = 0;
361dbd01 5428
4a580877 5429 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5430 return -ENOTSUPP;
5431
9530273e
EQ
5432 ret = amdgpu_dpm_baco_exit(adev);
5433 if (ret)
5434 return ret;
7a22677b 5435
8ab0d6f0 5436 if (ras && adev->ras_enabled &&
acdae216 5437 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5438 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5439
1bece222
CL
5440 if (amdgpu_passthrough(adev) &&
5441 adev->nbio.funcs->clear_doorbell_interrupt)
5442 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5443
7a22677b 5444 return 0;
361dbd01 5445}
c9a6b82f
AG
5446
5447/**
5448 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5449 * @pdev: PCI device struct
5450 * @state: PCI channel state
5451 *
5452 * Description: Called when a PCI error is detected.
5453 *
5454 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5455 */
5456pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5457{
5458 struct drm_device *dev = pci_get_drvdata(pdev);
5459 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5460 int i;
c9a6b82f
AG
5461
5462 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5463
6894305c
AG
5464 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5465 DRM_WARN("No support for XGMI hive yet...");
5466 return PCI_ERS_RESULT_DISCONNECT;
5467 }
5468
e17e27f9
GC
5469 adev->pci_channel_state = state;
5470
c9a6b82f
AG
5471 switch (state) {
5472 case pci_channel_io_normal:
5473 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5474 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5475 case pci_channel_io_frozen:
5476 /*
d0fb18b5 5477 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5478 * to GPU during PCI error recovery
5479 */
3675c2f2 5480 amdgpu_device_lock_reset_domain(adev->reset_domain);
e923be99 5481 amdgpu_device_set_mp1_state(adev);
acd89fca
AG
5482
5483 /*
5484 * Block any work scheduling as we do for regular GPU reset
5485 * for the duration of the recovery
5486 */
5487 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5488 struct amdgpu_ring *ring = adev->rings[i];
5489
5490 if (!ring || !ring->sched.thread)
5491 continue;
5492
5493 drm_sched_stop(&ring->sched, NULL);
5494 }
8f8c80f4 5495 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5496 return PCI_ERS_RESULT_NEED_RESET;
5497 case pci_channel_io_perm_failure:
5498 /* Permanent error, prepare for device removal */
5499 return PCI_ERS_RESULT_DISCONNECT;
5500 }
5501
5502 return PCI_ERS_RESULT_NEED_RESET;
5503}
5504
5505/**
5506 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5507 * @pdev: pointer to PCI device
5508 */
5509pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5510{
5511
5512 DRM_INFO("PCI error: mmio enabled callback!!\n");
5513
5514 /* TODO - dump whatever for debugging purposes */
5515
5516 /* This called only if amdgpu_pci_error_detected returns
5517 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5518 * works, no need to reset slot.
5519 */
5520
5521 return PCI_ERS_RESULT_RECOVERED;
5522}
5523
5524/**
5525 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5526 * @pdev: PCI device struct
5527 *
5528 * Description: This routine is called by the pci error recovery
5529 * code after the PCI slot has been reset, just before we
5530 * should resume normal operations.
5531 */
5532pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5533{
5534 struct drm_device *dev = pci_get_drvdata(pdev);
5535 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5536 int r, i;
04442bf7 5537 struct amdgpu_reset_context reset_context;
362c7b91 5538 u32 memsize;
7ac71382 5539 struct list_head device_list;
c9a6b82f
AG
5540
5541 DRM_INFO("PCI error: slot reset callback!!\n");
5542
04442bf7
LL
5543 memset(&reset_context, 0, sizeof(reset_context));
5544
7ac71382 5545 INIT_LIST_HEAD(&device_list);
655ce9cb 5546 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5547
362c7b91
AG
5548 /* wait for asic to come out of reset */
5549 msleep(500);
5550
7ac71382 5551 /* Restore PCI confspace */
c1dd4aa6 5552 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5553
362c7b91
AG
5554 /* confirm ASIC came out of reset */
5555 for (i = 0; i < adev->usec_timeout; i++) {
5556 memsize = amdgpu_asic_get_config_memsize(adev);
5557
5558 if (memsize != 0xffffffff)
5559 break;
5560 udelay(1);
5561 }
5562 if (memsize == 0xffffffff) {
5563 r = -ETIME;
5564 goto out;
5565 }
5566
04442bf7
LL
5567 reset_context.method = AMD_RESET_METHOD_NONE;
5568 reset_context.reset_req_dev = adev;
5569 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5570 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5571
7afefb81 5572 adev->no_hw_access = true;
04442bf7 5573 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5574 adev->no_hw_access = false;
c9a6b82f
AG
5575 if (r)
5576 goto out;
5577
04442bf7 5578 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5579
5580out:
c9a6b82f 5581 if (!r) {
c1dd4aa6
AG
5582 if (amdgpu_device_cache_pci_state(adev->pdev))
5583 pci_restore_state(adev->pdev);
5584
c9a6b82f
AG
5585 DRM_INFO("PCIe error recovery succeeded\n");
5586 } else {
5587 DRM_ERROR("PCIe error recovery failed, err:%d", r);
e923be99
AG
5588 amdgpu_device_unset_mp1_state(adev);
5589 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f
AG
5590 }
5591
5592 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5593}
5594
5595/**
5596 * amdgpu_pci_resume() - resume normal ops after PCI reset
5597 * @pdev: pointer to PCI device
5598 *
5599 * Called when the error recovery driver tells us that its
505199a3 5600 * OK to resume normal operation.
c9a6b82f
AG
5601 */
5602void amdgpu_pci_resume(struct pci_dev *pdev)
5603{
5604 struct drm_device *dev = pci_get_drvdata(pdev);
5605 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5606 int i;
c9a6b82f 5607
c9a6b82f
AG
5608
5609 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5610
e17e27f9
GC
5611 /* Only continue execution for the case of pci_channel_io_frozen */
5612 if (adev->pci_channel_state != pci_channel_io_frozen)
5613 return;
5614
acd89fca
AG
5615 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5616 struct amdgpu_ring *ring = adev->rings[i];
5617
5618 if (!ring || !ring->sched.thread)
5619 continue;
5620
5621
5622 drm_sched_resubmit_jobs(&ring->sched);
5623 drm_sched_start(&ring->sched, true);
5624 }
5625
e923be99
AG
5626 amdgpu_device_unset_mp1_state(adev);
5627 amdgpu_device_unlock_reset_domain(adev->reset_domain);
c9a6b82f 5628}
c1dd4aa6
AG
5629
5630bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5631{
5632 struct drm_device *dev = pci_get_drvdata(pdev);
5633 struct amdgpu_device *adev = drm_to_adev(dev);
5634 int r;
5635
5636 r = pci_save_state(pdev);
5637 if (!r) {
5638 kfree(adev->pci_state);
5639
5640 adev->pci_state = pci_store_saved_state(pdev);
5641
5642 if (!adev->pci_state) {
5643 DRM_ERROR("Failed to store PCI saved state");
5644 return false;
5645 }
5646 } else {
5647 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5648 return false;
5649 }
5650
5651 return true;
5652}
5653
5654bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5655{
5656 struct drm_device *dev = pci_get_drvdata(pdev);
5657 struct amdgpu_device *adev = drm_to_adev(dev);
5658 int r;
5659
5660 if (!adev->pci_state)
5661 return false;
5662
5663 r = pci_load_saved_state(pdev, adev->pci_state);
5664
5665 if (!r) {
5666 pci_restore_state(pdev);
5667 } else {
5668 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5669 return false;
5670 }
5671
5672 return true;
5673}
5674
810085dd
EH
5675void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5676 struct amdgpu_ring *ring)
5677{
5678#ifdef CONFIG_X86_64
5679 if (adev->flags & AMD_IS_APU)
5680 return;
5681#endif
5682 if (adev->gmc.xgmi.connected_to_cpu)
5683 return;
5684
5685 if (ring && ring->funcs->emit_hdp_flush)
5686 amdgpu_ring_emit_hdp_flush(ring);
5687 else
5688 amdgpu_asic_flush_hdp(adev, ring);
5689}
c1dd4aa6 5690
810085dd
EH
5691void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5692 struct amdgpu_ring *ring)
5693{
5694#ifdef CONFIG_X86_64
5695 if (adev->flags & AMD_IS_APU)
5696 return;
5697#endif
5698 if (adev->gmc.xgmi.connected_to_cpu)
5699 return;
c1dd4aa6 5700
810085dd
EH
5701 amdgpu_asic_invalidate_hdp(adev, ring);
5702}
34f3a4a9 5703
89a7a870
AG
5704int amdgpu_in_reset(struct amdgpu_device *adev)
5705{
5706 return atomic_read(&adev->reset_domain->in_gpu_reset);
5707 }
5708
34f3a4a9
LY
5709/**
5710 * amdgpu_device_halt() - bring hardware to some kind of halt state
5711 *
5712 * @adev: amdgpu_device pointer
5713 *
5714 * Bring hardware to some kind of halt state so that no one can touch it
5715 * any more. It will help to maintain error context when error occurred.
5716 * Compare to a simple hang, the system will keep stable at least for SSH
5717 * access. Then it should be trivial to inspect the hardware state and
5718 * see what's going on. Implemented as following:
5719 *
5720 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5721 * clears all CPU mappings to device, disallows remappings through page faults
5722 * 2. amdgpu_irq_disable_all() disables all interrupts
5723 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5724 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5725 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5726 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5727 * flush any in flight DMA operations
5728 */
5729void amdgpu_device_halt(struct amdgpu_device *adev)
5730{
5731 struct pci_dev *pdev = adev->pdev;
e0f943b4 5732 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
5733
5734 drm_dev_unplug(ddev);
5735
5736 amdgpu_irq_disable_all(adev);
5737
5738 amdgpu_fence_driver_hw_fini(adev);
5739
5740 adev->no_hw_access = true;
5741
5742 amdgpu_device_unmap_mmio(adev);
5743
5744 pci_disable_device(pdev);
5745 pci_wait_for_pending_transaction(pdev);
5746}