drm/amdgpu: Move reset sem into reset_domain
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
4a74c38c 33#include <linux/iommu.h>
fdf2f6c5 34
4562236b 35#include <drm/drm_atomic_helper.h>
fcd70cd3 36#include <drm/drm_probe_helper.h>
d38ceaf9
AD
37#include <drm/amdgpu_drm.h>
38#include <linux/vgaarb.h>
39#include <linux/vga_switcheroo.h>
40#include <linux/efi.h>
41#include "amdgpu.h"
f4b373f4 42#include "amdgpu_trace.h"
d38ceaf9
AD
43#include "amdgpu_i2c.h"
44#include "atom.h"
45#include "amdgpu_atombios.h"
a5bde2f9 46#include "amdgpu_atomfirmware.h"
d0dd7f0c 47#include "amd_pcie.h"
33f34802
KW
48#ifdef CONFIG_DRM_AMDGPU_SI
49#include "si.h"
50#endif
a2e73f56
AD
51#ifdef CONFIG_DRM_AMDGPU_CIK
52#include "cik.h"
53#endif
aaa36a97 54#include "vi.h"
460826e6 55#include "soc15.h"
0a5b8c7b 56#include "nv.h"
d38ceaf9 57#include "bif/bif_4_1_d.h"
9accf2fd 58#include <linux/pci.h>
bec86378 59#include <linux/firmware.h>
89041940 60#include "amdgpu_vf_error.h"
d38ceaf9 61
ba997709 62#include "amdgpu_amdkfd.h"
d2f52ac8 63#include "amdgpu_pm.h"
d38ceaf9 64
5183411b 65#include "amdgpu_xgmi.h"
c030f2e4 66#include "amdgpu_ras.h"
9c7c85f7 67#include "amdgpu_pmu.h"
bd607166 68#include "amdgpu_fru_eeprom.h"
04442bf7 69#include "amdgpu_reset.h"
5183411b 70
d5ea093e 71#include <linux/suspend.h>
c6a6e2db 72#include <drm/task_barrier.h>
3f12acc8 73#include <linux/pm_runtime.h>
d5ea093e 74
f89f8c6b
AG
75#include <drm/drm_drv.h>
76
e2a75f88 77MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 78MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 79MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 80MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 81MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 82MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 83MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 84MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 85MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 86MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
4e52a9f8 87MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
8bf84f60 88MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin");
e2a75f88 89
2dc80b00
S
90#define AMDGPU_RESUME_MS 2000
91
050091ab 92const char *amdgpu_asic_name[] = {
da69c161
KW
93 "TAHITI",
94 "PITCAIRN",
95 "VERDE",
96 "OLAND",
97 "HAINAN",
d38ceaf9
AD
98 "BONAIRE",
99 "KAVERI",
100 "KABINI",
101 "HAWAII",
102 "MULLINS",
103 "TOPAZ",
104 "TONGA",
48299f95 105 "FIJI",
d38ceaf9 106 "CARRIZO",
139f4917 107 "STONEY",
2cc0c0b5
FC
108 "POLARIS10",
109 "POLARIS11",
c4642a47 110 "POLARIS12",
48ff108d 111 "VEGAM",
d4196f01 112 "VEGA10",
8fab806a 113 "VEGA12",
956fcddc 114 "VEGA20",
2ca8a5d2 115 "RAVEN",
d6c3b24e 116 "ARCTURUS",
1eee4228 117 "RENOIR",
d46b417a 118 "ALDEBARAN",
852a6626 119 "NAVI10",
d0f56dc2 120 "CYAN_SKILLFISH",
87dbad02 121 "NAVI14",
9802f5d7 122 "NAVI12",
ccaf72d3 123 "SIENNA_CICHLID",
ddd8fbe7 124 "NAVY_FLOUNDER",
4f1e9a76 125 "VANGOGH",
a2468e04 126 "DIMGREY_CAVEFISH",
6f169591 127 "BEIGE_GOBY",
ee9236b7 128 "YELLOW_CARP",
3ae695d6 129 "IP DISCOVERY",
d38ceaf9
AD
130 "LAST",
131};
132
dcea6e65
KR
133/**
134 * DOC: pcie_replay_count
135 *
136 * The amdgpu driver provides a sysfs API for reporting the total number
137 * of PCIe replays (NAKs)
138 * The file pcie_replay_count is used for this and returns the total
139 * number of replays as a sum of the NAKs generated and NAKs received
140 */
141
142static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
143 struct device_attribute *attr, char *buf)
144{
145 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 146 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
147 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
148
36000c7a 149 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
150}
151
152static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
153 amdgpu_device_get_pcie_replay_count, NULL);
154
5494d864
AD
155static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
156
bd607166
KR
157/**
158 * DOC: product_name
159 *
160 * The amdgpu driver provides a sysfs API for reporting the product name
161 * for the device
162 * The file serial_number is used for this and returns the product name
163 * as returned from the FRU.
164 * NOTE: This is only available for certain server cards
165 */
166
167static ssize_t amdgpu_device_get_product_name(struct device *dev,
168 struct device_attribute *attr, char *buf)
169{
170 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 171 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 172
36000c7a 173 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
174}
175
176static DEVICE_ATTR(product_name, S_IRUGO,
177 amdgpu_device_get_product_name, NULL);
178
179/**
180 * DOC: product_number
181 *
182 * The amdgpu driver provides a sysfs API for reporting the part number
183 * for the device
184 * The file serial_number is used for this and returns the part number
185 * as returned from the FRU.
186 * NOTE: This is only available for certain server cards
187 */
188
189static ssize_t amdgpu_device_get_product_number(struct device *dev,
190 struct device_attribute *attr, char *buf)
191{
192 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 193 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 194
36000c7a 195 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
196}
197
198static DEVICE_ATTR(product_number, S_IRUGO,
199 amdgpu_device_get_product_number, NULL);
200
201/**
202 * DOC: serial_number
203 *
204 * The amdgpu driver provides a sysfs API for reporting the serial number
205 * for the device
206 * The file serial_number is used for this and returns the serial number
207 * as returned from the FRU.
208 * NOTE: This is only available for certain server cards
209 */
210
211static ssize_t amdgpu_device_get_serial_number(struct device *dev,
212 struct device_attribute *attr, char *buf)
213{
214 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 215 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 216
36000c7a 217 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
218}
219
220static DEVICE_ATTR(serial_number, S_IRUGO,
221 amdgpu_device_get_serial_number, NULL);
222
fd496ca8 223/**
b98c6299 224 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
225 *
226 * @dev: drm_device pointer
227 *
b98c6299 228 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
229 * otherwise return false.
230 */
b98c6299 231bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
232{
233 struct amdgpu_device *adev = drm_to_adev(dev);
234
b98c6299 235 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
236 return true;
237 return false;
238}
239
e3ecdffa 240/**
0330b848 241 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
242 *
243 * @dev: drm_device pointer
244 *
b98c6299 245 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
246 * otherwise return false.
247 */
31af062a 248bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 249{
1348969a 250 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 251
b98c6299
AD
252 if (adev->has_pr3 ||
253 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
254 return true;
255 return false;
256}
257
a69cba42
AD
258/**
259 * amdgpu_device_supports_baco - Does the device support BACO
260 *
261 * @dev: drm_device pointer
262 *
263 * Returns true if the device supporte BACO,
264 * otherwise return false.
265 */
266bool amdgpu_device_supports_baco(struct drm_device *dev)
267{
1348969a 268 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
269
270 return amdgpu_asic_supports_baco(adev);
271}
272
3fa8f89d
S
273/**
274 * amdgpu_device_supports_smart_shift - Is the device dGPU with
275 * smart shift support
276 *
277 * @dev: drm_device pointer
278 *
279 * Returns true if the device is a dGPU with Smart Shift support,
280 * otherwise returns false.
281 */
282bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
283{
284 return (amdgpu_device_supports_boco(dev) &&
285 amdgpu_acpi_is_power_shift_control_supported());
286}
287
6e3cd2a9
MCC
288/*
289 * VRAM access helper functions
290 */
291
e35e2b11 292/**
048af66b 293 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
e35e2b11
TY
294 *
295 * @adev: amdgpu_device pointer
296 * @pos: offset of the buffer in vram
297 * @buf: virtual address of the buffer in system memory
298 * @size: read/write size, sizeof(@buf) must > @size
299 * @write: true - write to vram, otherwise - read from vram
300 */
048af66b
KW
301void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
302 void *buf, size_t size, bool write)
e35e2b11 303{
e35e2b11 304 unsigned long flags;
048af66b
KW
305 uint32_t hi = ~0, tmp = 0;
306 uint32_t *data = buf;
ce05ac56 307 uint64_t last;
f89f8c6b 308 int idx;
ce05ac56 309
c58a863b 310 if (!drm_dev_enter(adev_to_drm(adev), &idx))
f89f8c6b 311 return;
9d11eb0d 312
048af66b
KW
313 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
314
315 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
316 for (last = pos + size; pos < last; pos += 4) {
317 tmp = pos >> 31;
318
319 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
320 if (tmp != hi) {
321 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
322 hi = tmp;
323 }
324 if (write)
325 WREG32_NO_KIQ(mmMM_DATA, *data++);
326 else
327 *data++ = RREG32_NO_KIQ(mmMM_DATA);
328 }
329
330 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
331 drm_dev_exit(idx);
332}
333
334/**
bbe04dec 335 * amdgpu_device_aper_access - access vram by vram aperature
048af66b
KW
336 *
337 * @adev: amdgpu_device pointer
338 * @pos: offset of the buffer in vram
339 * @buf: virtual address of the buffer in system memory
340 * @size: read/write size, sizeof(@buf) must > @size
341 * @write: true - write to vram, otherwise - read from vram
342 *
343 * The return value means how many bytes have been transferred.
344 */
345size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
346 void *buf, size_t size, bool write)
347{
9d11eb0d 348#ifdef CONFIG_64BIT
048af66b
KW
349 void __iomem *addr;
350 size_t count = 0;
351 uint64_t last;
352
353 if (!adev->mman.aper_base_kaddr)
354 return 0;
355
9d11eb0d
CK
356 last = min(pos + size, adev->gmc.visible_vram_size);
357 if (last > pos) {
048af66b
KW
358 addr = adev->mman.aper_base_kaddr + pos;
359 count = last - pos;
9d11eb0d
CK
360
361 if (write) {
362 memcpy_toio(addr, buf, count);
363 mb();
810085dd 364 amdgpu_device_flush_hdp(adev, NULL);
9d11eb0d 365 } else {
810085dd 366 amdgpu_device_invalidate_hdp(adev, NULL);
9d11eb0d
CK
367 mb();
368 memcpy_fromio(buf, addr, count);
369 }
370
9d11eb0d 371 }
048af66b
KW
372
373 return count;
374#else
375 return 0;
9d11eb0d 376#endif
048af66b 377}
9d11eb0d 378
048af66b
KW
379/**
380 * amdgpu_device_vram_access - read/write a buffer in vram
381 *
382 * @adev: amdgpu_device pointer
383 * @pos: offset of the buffer in vram
384 * @buf: virtual address of the buffer in system memory
385 * @size: read/write size, sizeof(@buf) must > @size
386 * @write: true - write to vram, otherwise - read from vram
387 */
388void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
389 void *buf, size_t size, bool write)
390{
391 size_t count;
e35e2b11 392
048af66b
KW
393 /* try to using vram apreature to access vram first */
394 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
395 size -= count;
396 if (size) {
397 /* using MM to access rest vram */
398 pos += count;
399 buf += count;
400 amdgpu_device_mm_access(adev, pos, buf, size, write);
e35e2b11
TY
401 }
402}
403
d38ceaf9 404/*
f7ee1874 405 * register access helper functions.
d38ceaf9 406 */
56b53c0b
DL
407
408/* Check if hw access should be skipped because of hotplug or device error */
409bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
410{
7afefb81 411 if (adev->no_hw_access)
56b53c0b
DL
412 return true;
413
414#ifdef CONFIG_LOCKDEP
415 /*
416 * This is a bit complicated to understand, so worth a comment. What we assert
417 * here is that the GPU reset is not running on another thread in parallel.
418 *
419 * For this we trylock the read side of the reset semaphore, if that succeeds
420 * we know that the reset is not running in paralell.
421 *
422 * If the trylock fails we assert that we are either already holding the read
423 * side of the lock or are the reset thread itself and hold the write side of
424 * the lock.
425 */
426 if (in_task()) {
d0fb18b5
AG
427 if (down_read_trylock(&adev->reset_domain->sem))
428 up_read(&adev->reset_domain->sem);
56b53c0b 429 else
d0fb18b5 430 lockdep_assert_held(&adev->reset_domain->sem);
56b53c0b
DL
431 }
432#endif
433 return false;
434}
435
e3ecdffa 436/**
f7ee1874 437 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
438 *
439 * @adev: amdgpu_device pointer
440 * @reg: dword aligned register offset
441 * @acc_flags: access flags which require special behavior
442 *
443 * Returns the 32 bit value from the offset specified.
444 */
f7ee1874
HZ
445uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
446 uint32_t reg, uint32_t acc_flags)
d38ceaf9 447{
f4b373f4
TSD
448 uint32_t ret;
449
56b53c0b 450 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
451 return 0;
452
f7ee1874
HZ
453 if ((reg * 4) < adev->rmmio_size) {
454 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
455 amdgpu_sriov_runtime(adev) &&
d0fb18b5 456 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 457 ret = amdgpu_kiq_rreg(adev, reg);
d0fb18b5 458 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
459 } else {
460 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
461 }
462 } else {
463 ret = adev->pcie_rreg(adev, reg * 4);
81202807 464 }
bc992ba5 465
f7ee1874 466 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 467
f4b373f4 468 return ret;
d38ceaf9
AD
469}
470
421a2a30
ML
471/*
472 * MMIO register read with bytes helper functions
473 * @offset:bytes offset from MMIO start
474 *
475*/
476
e3ecdffa
AD
477/**
478 * amdgpu_mm_rreg8 - read a memory mapped IO register
479 *
480 * @adev: amdgpu_device pointer
481 * @offset: byte aligned register offset
482 *
483 * Returns the 8 bit value from the offset specified.
484 */
7cbbc745
AG
485uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
486{
56b53c0b 487 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
488 return 0;
489
421a2a30
ML
490 if (offset < adev->rmmio_size)
491 return (readb(adev->rmmio + offset));
492 BUG();
493}
494
495/*
496 * MMIO register write with bytes helper functions
497 * @offset:bytes offset from MMIO start
498 * @value: the value want to be written to the register
499 *
500*/
e3ecdffa
AD
501/**
502 * amdgpu_mm_wreg8 - read a memory mapped IO register
503 *
504 * @adev: amdgpu_device pointer
505 * @offset: byte aligned register offset
506 * @value: 8 bit value to write
507 *
508 * Writes the value specified to the offset specified.
509 */
7cbbc745
AG
510void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
511{
56b53c0b 512 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
513 return;
514
421a2a30
ML
515 if (offset < adev->rmmio_size)
516 writeb(value, adev->rmmio + offset);
517 else
518 BUG();
519}
520
e3ecdffa 521/**
f7ee1874 522 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
523 *
524 * @adev: amdgpu_device pointer
525 * @reg: dword aligned register offset
526 * @v: 32 bit value to write to the register
527 * @acc_flags: access flags which require special behavior
528 *
529 * Writes the value specified to the offset specified.
530 */
f7ee1874
HZ
531void amdgpu_device_wreg(struct amdgpu_device *adev,
532 uint32_t reg, uint32_t v,
533 uint32_t acc_flags)
d38ceaf9 534{
56b53c0b 535 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
536 return;
537
f7ee1874
HZ
538 if ((reg * 4) < adev->rmmio_size) {
539 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
540 amdgpu_sriov_runtime(adev) &&
d0fb18b5 541 down_read_trylock(&adev->reset_domain->sem)) {
f7ee1874 542 amdgpu_kiq_wreg(adev, reg, v);
d0fb18b5 543 up_read(&adev->reset_domain->sem);
f7ee1874
HZ
544 } else {
545 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
546 }
547 } else {
548 adev->pcie_wreg(adev, reg * 4, v);
81202807 549 }
bc992ba5 550
f7ee1874 551 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 552}
d38ceaf9 553
03f2abb0 554/**
4cc9f86f 555 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
2e0cc4d4
ML
556 *
557 * this function is invoked only the debugfs register access
03f2abb0 558 */
f7ee1874
HZ
559void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
560 uint32_t reg, uint32_t v)
2e0cc4d4 561{
56b53c0b 562 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
563 return;
564
2e0cc4d4 565 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
566 adev->gfx.rlc.funcs &&
567 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 568 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
1a4772d9 569 return adev->gfx.rlc.funcs->sriov_wreg(adev, reg, v, 0, 0);
4cc9f86f
TSD
570 } else if ((reg * 4) >= adev->rmmio_size) {
571 adev->pcie_wreg(adev, reg * 4, v);
f7ee1874
HZ
572 } else {
573 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 574 }
d38ceaf9
AD
575}
576
d38ceaf9
AD
577/**
578 * amdgpu_mm_rdoorbell - read a doorbell dword
579 *
580 * @adev: amdgpu_device pointer
581 * @index: doorbell index
582 *
583 * Returns the value in the doorbell aperture at the
584 * requested doorbell index (CIK).
585 */
586u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
587{
56b53c0b 588 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
589 return 0;
590
d38ceaf9
AD
591 if (index < adev->doorbell.num_doorbells) {
592 return readl(adev->doorbell.ptr + index);
593 } else {
594 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
595 return 0;
596 }
597}
598
599/**
600 * amdgpu_mm_wdoorbell - write a doorbell dword
601 *
602 * @adev: amdgpu_device pointer
603 * @index: doorbell index
604 * @v: value to write
605 *
606 * Writes @v to the doorbell aperture at the
607 * requested doorbell index (CIK).
608 */
609void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
610{
56b53c0b 611 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
612 return;
613
d38ceaf9
AD
614 if (index < adev->doorbell.num_doorbells) {
615 writel(v, adev->doorbell.ptr + index);
616 } else {
617 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
618 }
619}
620
832be404
KW
621/**
622 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
623 *
624 * @adev: amdgpu_device pointer
625 * @index: doorbell index
626 *
627 * Returns the value in the doorbell aperture at the
628 * requested doorbell index (VEGA10+).
629 */
630u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
631{
56b53c0b 632 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
633 return 0;
634
832be404
KW
635 if (index < adev->doorbell.num_doorbells) {
636 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
637 } else {
638 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
639 return 0;
640 }
641}
642
643/**
644 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
645 *
646 * @adev: amdgpu_device pointer
647 * @index: doorbell index
648 * @v: value to write
649 *
650 * Writes @v to the doorbell aperture at the
651 * requested doorbell index (VEGA10+).
652 */
653void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
654{
56b53c0b 655 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
656 return;
657
832be404
KW
658 if (index < adev->doorbell.num_doorbells) {
659 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
660 } else {
661 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
662 }
663}
664
1bba3683
HZ
665/**
666 * amdgpu_device_indirect_rreg - read an indirect register
667 *
668 * @adev: amdgpu_device pointer
669 * @pcie_index: mmio register offset
670 * @pcie_data: mmio register offset
22f453fb 671 * @reg_addr: indirect register address to read from
1bba3683
HZ
672 *
673 * Returns the value of indirect register @reg_addr
674 */
675u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
676 u32 pcie_index, u32 pcie_data,
677 u32 reg_addr)
678{
679 unsigned long flags;
680 u32 r;
681 void __iomem *pcie_index_offset;
682 void __iomem *pcie_data_offset;
683
684 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
685 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
686 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
687
688 writel(reg_addr, pcie_index_offset);
689 readl(pcie_index_offset);
690 r = readl(pcie_data_offset);
691 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
692
693 return r;
694}
695
696/**
697 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
698 *
699 * @adev: amdgpu_device pointer
700 * @pcie_index: mmio register offset
701 * @pcie_data: mmio register offset
22f453fb 702 * @reg_addr: indirect register address to read from
1bba3683
HZ
703 *
704 * Returns the value of indirect register @reg_addr
705 */
706u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
707 u32 pcie_index, u32 pcie_data,
708 u32 reg_addr)
709{
710 unsigned long flags;
711 u64 r;
712 void __iomem *pcie_index_offset;
713 void __iomem *pcie_data_offset;
714
715 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
716 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
717 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
718
719 /* read low 32 bits */
720 writel(reg_addr, pcie_index_offset);
721 readl(pcie_index_offset);
722 r = readl(pcie_data_offset);
723 /* read high 32 bits */
724 writel(reg_addr + 4, pcie_index_offset);
725 readl(pcie_index_offset);
726 r |= ((u64)readl(pcie_data_offset) << 32);
727 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
728
729 return r;
730}
731
732/**
733 * amdgpu_device_indirect_wreg - write an indirect register address
734 *
735 * @adev: amdgpu_device pointer
736 * @pcie_index: mmio register offset
737 * @pcie_data: mmio register offset
738 * @reg_addr: indirect register offset
739 * @reg_data: indirect register data
740 *
741 */
742void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
743 u32 pcie_index, u32 pcie_data,
744 u32 reg_addr, u32 reg_data)
745{
746 unsigned long flags;
747 void __iomem *pcie_index_offset;
748 void __iomem *pcie_data_offset;
749
750 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
751 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
752 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
753
754 writel(reg_addr, pcie_index_offset);
755 readl(pcie_index_offset);
756 writel(reg_data, pcie_data_offset);
757 readl(pcie_data_offset);
758 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
759}
760
761/**
762 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
763 *
764 * @adev: amdgpu_device pointer
765 * @pcie_index: mmio register offset
766 * @pcie_data: mmio register offset
767 * @reg_addr: indirect register offset
768 * @reg_data: indirect register data
769 *
770 */
771void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
772 u32 pcie_index, u32 pcie_data,
773 u32 reg_addr, u64 reg_data)
774{
775 unsigned long flags;
776 void __iomem *pcie_index_offset;
777 void __iomem *pcie_data_offset;
778
779 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
780 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
781 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
782
783 /* write low 32 bits */
784 writel(reg_addr, pcie_index_offset);
785 readl(pcie_index_offset);
786 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
787 readl(pcie_data_offset);
788 /* write high 32 bits */
789 writel(reg_addr + 4, pcie_index_offset);
790 readl(pcie_index_offset);
791 writel((u32)(reg_data >> 32), pcie_data_offset);
792 readl(pcie_data_offset);
793 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
794}
795
d38ceaf9
AD
796/**
797 * amdgpu_invalid_rreg - dummy reg read function
798 *
982a820b 799 * @adev: amdgpu_device pointer
d38ceaf9
AD
800 * @reg: offset of register
801 *
802 * Dummy register read function. Used for register blocks
803 * that certain asics don't have (all asics).
804 * Returns the value in the register.
805 */
806static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
807{
808 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
809 BUG();
810 return 0;
811}
812
813/**
814 * amdgpu_invalid_wreg - dummy reg write function
815 *
982a820b 816 * @adev: amdgpu_device pointer
d38ceaf9
AD
817 * @reg: offset of register
818 * @v: value to write to the register
819 *
820 * Dummy register read function. Used for register blocks
821 * that certain asics don't have (all asics).
822 */
823static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
824{
825 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
826 reg, v);
827 BUG();
828}
829
4fa1c6a6
TZ
830/**
831 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
832 *
982a820b 833 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
834 * @reg: offset of register
835 *
836 * Dummy register read function. Used for register blocks
837 * that certain asics don't have (all asics).
838 * Returns the value in the register.
839 */
840static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
841{
842 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
843 BUG();
844 return 0;
845}
846
847/**
848 * amdgpu_invalid_wreg64 - dummy reg write function
849 *
982a820b 850 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
851 * @reg: offset of register
852 * @v: value to write to the register
853 *
854 * Dummy register read function. Used for register blocks
855 * that certain asics don't have (all asics).
856 */
857static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
858{
859 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
860 reg, v);
861 BUG();
862}
863
d38ceaf9
AD
864/**
865 * amdgpu_block_invalid_rreg - dummy reg read function
866 *
982a820b 867 * @adev: amdgpu_device pointer
d38ceaf9
AD
868 * @block: offset of instance
869 * @reg: offset of register
870 *
871 * Dummy register read function. Used for register blocks
872 * that certain asics don't have (all asics).
873 * Returns the value in the register.
874 */
875static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
876 uint32_t block, uint32_t reg)
877{
878 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
879 reg, block);
880 BUG();
881 return 0;
882}
883
884/**
885 * amdgpu_block_invalid_wreg - dummy reg write function
886 *
982a820b 887 * @adev: amdgpu_device pointer
d38ceaf9
AD
888 * @block: offset of instance
889 * @reg: offset of register
890 * @v: value to write to the register
891 *
892 * Dummy register read function. Used for register blocks
893 * that certain asics don't have (all asics).
894 */
895static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
896 uint32_t block,
897 uint32_t reg, uint32_t v)
898{
899 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
900 reg, block, v);
901 BUG();
902}
903
4d2997ab
AD
904/**
905 * amdgpu_device_asic_init - Wrapper for atom asic_init
906 *
982a820b 907 * @adev: amdgpu_device pointer
4d2997ab
AD
908 *
909 * Does any asic specific work and then calls atom asic init.
910 */
911static int amdgpu_device_asic_init(struct amdgpu_device *adev)
912{
913 amdgpu_asic_pre_asic_init(adev);
914
915 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
916}
917
e3ecdffa
AD
918/**
919 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
920 *
982a820b 921 * @adev: amdgpu_device pointer
e3ecdffa
AD
922 *
923 * Allocates a scratch page of VRAM for use by various things in the
924 * driver.
925 */
06ec9070 926static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 927{
a4a02777
CK
928 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
929 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
930 &adev->vram_scratch.robj,
931 &adev->vram_scratch.gpu_addr,
932 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
933}
934
e3ecdffa
AD
935/**
936 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
937 *
982a820b 938 * @adev: amdgpu_device pointer
e3ecdffa
AD
939 *
940 * Frees the VRAM scratch page.
941 */
06ec9070 942static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 943{
078af1a3 944 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
945}
946
947/**
9c3f2b54 948 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
949 *
950 * @adev: amdgpu_device pointer
951 * @registers: pointer to the register array
952 * @array_size: size of the register array
953 *
954 * Programs an array or registers with and and or masks.
955 * This is a helper for setting golden registers.
956 */
9c3f2b54
AD
957void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
958 const u32 *registers,
959 const u32 array_size)
d38ceaf9
AD
960{
961 u32 tmp, reg, and_mask, or_mask;
962 int i;
963
964 if (array_size % 3)
965 return;
966
967 for (i = 0; i < array_size; i +=3) {
968 reg = registers[i + 0];
969 and_mask = registers[i + 1];
970 or_mask = registers[i + 2];
971
972 if (and_mask == 0xffffffff) {
973 tmp = or_mask;
974 } else {
975 tmp = RREG32(reg);
976 tmp &= ~and_mask;
e0d07657
HZ
977 if (adev->family >= AMDGPU_FAMILY_AI)
978 tmp |= (or_mask & and_mask);
979 else
980 tmp |= or_mask;
d38ceaf9
AD
981 }
982 WREG32(reg, tmp);
983 }
984}
985
e3ecdffa
AD
986/**
987 * amdgpu_device_pci_config_reset - reset the GPU
988 *
989 * @adev: amdgpu_device pointer
990 *
991 * Resets the GPU using the pci config reset sequence.
992 * Only applicable to asics prior to vega10.
993 */
8111c387 994void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
995{
996 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
997}
998
af484df8
AD
999/**
1000 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1001 *
1002 * @adev: amdgpu_device pointer
1003 *
1004 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1005 */
1006int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1007{
1008 return pci_reset_function(adev->pdev);
1009}
1010
d38ceaf9
AD
1011/*
1012 * GPU doorbell aperture helpers function.
1013 */
1014/**
06ec9070 1015 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
1016 *
1017 * @adev: amdgpu_device pointer
1018 *
1019 * Init doorbell driver information (CIK)
1020 * Returns 0 on success, error on failure.
1021 */
06ec9070 1022static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 1023{
6585661d 1024
705e519e
CK
1025 /* No doorbell on SI hardware generation */
1026 if (adev->asic_type < CHIP_BONAIRE) {
1027 adev->doorbell.base = 0;
1028 adev->doorbell.size = 0;
1029 adev->doorbell.num_doorbells = 0;
1030 adev->doorbell.ptr = NULL;
1031 return 0;
1032 }
1033
d6895ad3
CK
1034 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1035 return -EINVAL;
1036
22357775
AD
1037 amdgpu_asic_init_doorbell_index(adev);
1038
d38ceaf9
AD
1039 /* doorbell bar mapping */
1040 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1041 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1042
edf600da 1043 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 1044 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
1045 if (adev->doorbell.num_doorbells == 0)
1046 return -EINVAL;
1047
ec3db8a6 1048 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
1049 * paging queue doorbell use the second page. The
1050 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1051 * doorbells are in the first page. So with paging queue enabled,
1052 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
1053 */
1054 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 1055 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 1056
8972e5d2
CK
1057 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1058 adev->doorbell.num_doorbells *
1059 sizeof(u32));
1060 if (adev->doorbell.ptr == NULL)
d38ceaf9 1061 return -ENOMEM;
d38ceaf9
AD
1062
1063 return 0;
1064}
1065
1066/**
06ec9070 1067 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1068 *
1069 * @adev: amdgpu_device pointer
1070 *
1071 * Tear down doorbell driver information (CIK)
1072 */
06ec9070 1073static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1074{
1075 iounmap(adev->doorbell.ptr);
1076 adev->doorbell.ptr = NULL;
1077}
1078
22cb0164 1079
d38ceaf9
AD
1080
1081/*
06ec9070 1082 * amdgpu_device_wb_*()
455a7bc2 1083 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1084 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1085 */
1086
1087/**
06ec9070 1088 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1089 *
1090 * @adev: amdgpu_device pointer
1091 *
1092 * Disables Writeback and frees the Writeback memory (all asics).
1093 * Used at driver shutdown.
1094 */
06ec9070 1095static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1096{
1097 if (adev->wb.wb_obj) {
a76ed485
AD
1098 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1099 &adev->wb.gpu_addr,
1100 (void **)&adev->wb.wb);
d38ceaf9
AD
1101 adev->wb.wb_obj = NULL;
1102 }
1103}
1104
1105/**
03f2abb0 1106 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
d38ceaf9
AD
1107 *
1108 * @adev: amdgpu_device pointer
1109 *
455a7bc2 1110 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1111 * Used at driver startup.
1112 * Returns 0 on success or an -error on failure.
1113 */
06ec9070 1114static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1115{
1116 int r;
1117
1118 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1119 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1120 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1121 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1122 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1123 (void **)&adev->wb.wb);
d38ceaf9
AD
1124 if (r) {
1125 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1126 return r;
1127 }
d38ceaf9
AD
1128
1129 adev->wb.num_wb = AMDGPU_MAX_WB;
1130 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1131
1132 /* clear wb memory */
73469585 1133 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1134 }
1135
1136 return 0;
1137}
1138
1139/**
131b4b36 1140 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1141 *
1142 * @adev: amdgpu_device pointer
1143 * @wb: wb index
1144 *
1145 * Allocate a wb slot for use by the driver (all asics).
1146 * Returns 0 on success or -EINVAL on failure.
1147 */
131b4b36 1148int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1149{
1150 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1151
97407b63 1152 if (offset < adev->wb.num_wb) {
7014285a 1153 __set_bit(offset, adev->wb.used);
63ae07ca 1154 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1155 return 0;
1156 } else {
1157 return -EINVAL;
1158 }
1159}
1160
d38ceaf9 1161/**
131b4b36 1162 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1163 *
1164 * @adev: amdgpu_device pointer
1165 * @wb: wb index
1166 *
1167 * Free a wb slot allocated for use by the driver (all asics)
1168 */
131b4b36 1169void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1170{
73469585 1171 wb >>= 3;
d38ceaf9 1172 if (wb < adev->wb.num_wb)
73469585 1173 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1174}
1175
d6895ad3
CK
1176/**
1177 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1178 *
1179 * @adev: amdgpu_device pointer
1180 *
1181 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1182 * to fail, but if any of the BARs is not accessible after the size we abort
1183 * driver loading by returning -ENODEV.
1184 */
1185int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1186{
453f617a 1187 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1188 struct pci_bus *root;
1189 struct resource *res;
1190 unsigned i;
d6895ad3
CK
1191 u16 cmd;
1192 int r;
1193
0c03b912 1194 /* Bypass for VF */
1195 if (amdgpu_sriov_vf(adev))
1196 return 0;
1197
b7221f2b
AD
1198 /* skip if the bios has already enabled large BAR */
1199 if (adev->gmc.real_vram_size &&
1200 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1201 return 0;
1202
31b8adab
CK
1203 /* Check if the root BUS has 64bit memory resources */
1204 root = adev->pdev->bus;
1205 while (root->parent)
1206 root = root->parent;
1207
1208 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1209 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1210 res->start > 0x100000000ull)
1211 break;
1212 }
1213
1214 /* Trying to resize is pointless without a root hub window above 4GB */
1215 if (!res)
1216 return 0;
1217
453f617a
ND
1218 /* Limit the BAR size to what is available */
1219 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1220 rbar_size);
1221
d6895ad3
CK
1222 /* Disable memory decoding while we change the BAR addresses and size */
1223 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1224 pci_write_config_word(adev->pdev, PCI_COMMAND,
1225 cmd & ~PCI_COMMAND_MEMORY);
1226
1227 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1228 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1229 if (adev->asic_type >= CHIP_BONAIRE)
1230 pci_release_resource(adev->pdev, 2);
1231
1232 pci_release_resource(adev->pdev, 0);
1233
1234 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1235 if (r == -ENOSPC)
1236 DRM_INFO("Not enough PCI address space for a large BAR.");
1237 else if (r && r != -ENOTSUPP)
1238 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1239
1240 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1241
1242 /* When the doorbell or fb BAR isn't available we have no chance of
1243 * using the device.
1244 */
06ec9070 1245 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1246 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1247 return -ENODEV;
1248
1249 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1250
1251 return 0;
1252}
a05502e5 1253
d38ceaf9
AD
1254/*
1255 * GPU helpers function.
1256 */
1257/**
39c640c0 1258 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1259 *
1260 * @adev: amdgpu_device pointer
1261 *
c836fec5
JQ
1262 * Check if the asic has been initialized (all asics) at driver startup
1263 * or post is needed if hw reset is performed.
1264 * Returns true if need or false if not.
d38ceaf9 1265 */
39c640c0 1266bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1267{
1268 uint32_t reg;
1269
bec86378
ML
1270 if (amdgpu_sriov_vf(adev))
1271 return false;
1272
1273 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1274 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1275 * some old smc fw still need driver do vPost otherwise gpu hang, while
1276 * those smc fw version above 22.15 doesn't have this flaw, so we force
1277 * vpost executed for smc version below 22.15
bec86378
ML
1278 */
1279 if (adev->asic_type == CHIP_FIJI) {
1280 int err;
1281 uint32_t fw_ver;
1282 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1283 /* force vPost if error occured */
1284 if (err)
1285 return true;
1286
1287 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1288 if (fw_ver < 0x00160e00)
1289 return true;
bec86378 1290 }
bec86378 1291 }
91fe77eb 1292
e3c1b071 1293 /* Don't post if we need to reset whole hive on init */
1294 if (adev->gmc.xgmi.pending_reset)
1295 return false;
1296
91fe77eb 1297 if (adev->has_hw_reset) {
1298 adev->has_hw_reset = false;
1299 return true;
1300 }
1301
1302 /* bios scratch used on CIK+ */
1303 if (adev->asic_type >= CHIP_BONAIRE)
1304 return amdgpu_atombios_scratch_need_asic_init(adev);
1305
1306 /* check MEM_SIZE for older asics */
1307 reg = amdgpu_asic_get_config_memsize(adev);
1308
1309 if ((reg != 0) && (reg != 0xffffffff))
1310 return false;
1311
1312 return true;
bec86378
ML
1313}
1314
d38ceaf9
AD
1315/* if we get transitioned to only one device, take VGA back */
1316/**
06ec9070 1317 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9 1318 *
bf44e8ce 1319 * @pdev: PCI device pointer
d38ceaf9
AD
1320 * @state: enable/disable vga decode
1321 *
1322 * Enable/disable vga decode (all asics).
1323 * Returns VGA resource flags.
1324 */
bf44e8ce
CH
1325static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1326 bool state)
d38ceaf9 1327{
bf44e8ce 1328 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
d38ceaf9
AD
1329 amdgpu_asic_set_vga_state(adev, state);
1330 if (state)
1331 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1332 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1333 else
1334 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1335}
1336
e3ecdffa
AD
1337/**
1338 * amdgpu_device_check_block_size - validate the vm block size
1339 *
1340 * @adev: amdgpu_device pointer
1341 *
1342 * Validates the vm block size specified via module parameter.
1343 * The vm block size defines number of bits in page table versus page directory,
1344 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1345 * page table and the remaining bits are in the page directory.
1346 */
06ec9070 1347static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1348{
1349 /* defines number of bits in page table versus page directory,
1350 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1351 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1352 if (amdgpu_vm_block_size == -1)
1353 return;
a1adf8be 1354
bab4fee7 1355 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1356 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1357 amdgpu_vm_block_size);
97489129 1358 amdgpu_vm_block_size = -1;
a1adf8be 1359 }
a1adf8be
CZ
1360}
1361
e3ecdffa
AD
1362/**
1363 * amdgpu_device_check_vm_size - validate the vm size
1364 *
1365 * @adev: amdgpu_device pointer
1366 *
1367 * Validates the vm size in GB specified via module parameter.
1368 * The VM size is the size of the GPU virtual memory space in GB.
1369 */
06ec9070 1370static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1371{
64dab074
AD
1372 /* no need to check the default value */
1373 if (amdgpu_vm_size == -1)
1374 return;
1375
83ca145d
ZJ
1376 if (amdgpu_vm_size < 1) {
1377 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1378 amdgpu_vm_size);
f3368128 1379 amdgpu_vm_size = -1;
83ca145d 1380 }
83ca145d
ZJ
1381}
1382
7951e376
RZ
1383static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1384{
1385 struct sysinfo si;
a9d4fe2f 1386 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1387 uint64_t total_memory;
1388 uint64_t dram_size_seven_GB = 0x1B8000000;
1389 uint64_t dram_size_three_GB = 0xB8000000;
1390
1391 if (amdgpu_smu_memory_pool_size == 0)
1392 return;
1393
1394 if (!is_os_64) {
1395 DRM_WARN("Not 64-bit OS, feature not supported\n");
1396 goto def_value;
1397 }
1398 si_meminfo(&si);
1399 total_memory = (uint64_t)si.totalram * si.mem_unit;
1400
1401 if ((amdgpu_smu_memory_pool_size == 1) ||
1402 (amdgpu_smu_memory_pool_size == 2)) {
1403 if (total_memory < dram_size_three_GB)
1404 goto def_value1;
1405 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1406 (amdgpu_smu_memory_pool_size == 8)) {
1407 if (total_memory < dram_size_seven_GB)
1408 goto def_value1;
1409 } else {
1410 DRM_WARN("Smu memory pool size not supported\n");
1411 goto def_value;
1412 }
1413 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1414
1415 return;
1416
1417def_value1:
1418 DRM_WARN("No enough system memory\n");
1419def_value:
1420 adev->pm.smu_prv_buffer_size = 0;
1421}
1422
9f6a7857
HR
1423static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1424{
1425 if (!(adev->flags & AMD_IS_APU) ||
1426 adev->asic_type < CHIP_RAVEN)
1427 return 0;
1428
1429 switch (adev->asic_type) {
1430 case CHIP_RAVEN:
1431 if (adev->pdev->device == 0x15dd)
1432 adev->apu_flags |= AMD_APU_IS_RAVEN;
1433 if (adev->pdev->device == 0x15d8)
1434 adev->apu_flags |= AMD_APU_IS_PICASSO;
1435 break;
1436 case CHIP_RENOIR:
1437 if ((adev->pdev->device == 0x1636) ||
1438 (adev->pdev->device == 0x164c))
1439 adev->apu_flags |= AMD_APU_IS_RENOIR;
1440 else
1441 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1442 break;
1443 case CHIP_VANGOGH:
1444 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1445 break;
1446 case CHIP_YELLOW_CARP:
1447 break;
d0f56dc2
TZ
1448 case CHIP_CYAN_SKILLFISH:
1449 if (adev->pdev->device == 0x13FE)
1450 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1451 break;
9f6a7857 1452 default:
4eaf21b7 1453 break;
9f6a7857
HR
1454 }
1455
1456 return 0;
1457}
1458
d38ceaf9 1459/**
06ec9070 1460 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1461 *
1462 * @adev: amdgpu_device pointer
1463 *
1464 * Validates certain module parameters and updates
1465 * the associated values used by the driver (all asics).
1466 */
912dfc84 1467static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1468{
5b011235
CZ
1469 if (amdgpu_sched_jobs < 4) {
1470 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1471 amdgpu_sched_jobs);
1472 amdgpu_sched_jobs = 4;
76117507 1473 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1474 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1475 amdgpu_sched_jobs);
1476 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1477 }
d38ceaf9 1478
83e74db6 1479 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1480 /* gart size must be greater or equal to 32M */
1481 dev_warn(adev->dev, "gart size (%d) too small\n",
1482 amdgpu_gart_size);
83e74db6 1483 amdgpu_gart_size = -1;
d38ceaf9
AD
1484 }
1485
36d38372 1486 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1487 /* gtt size must be greater or equal to 32M */
36d38372
CK
1488 dev_warn(adev->dev, "gtt size (%d) too small\n",
1489 amdgpu_gtt_size);
1490 amdgpu_gtt_size = -1;
d38ceaf9
AD
1491 }
1492
d07f14be
RH
1493 /* valid range is between 4 and 9 inclusive */
1494 if (amdgpu_vm_fragment_size != -1 &&
1495 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1496 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1497 amdgpu_vm_fragment_size = -1;
1498 }
1499
5d5bd5e3
KW
1500 if (amdgpu_sched_hw_submission < 2) {
1501 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1502 amdgpu_sched_hw_submission);
1503 amdgpu_sched_hw_submission = 2;
1504 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1505 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1506 amdgpu_sched_hw_submission);
1507 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1508 }
1509
7951e376
RZ
1510 amdgpu_device_check_smu_prv_buffer_size(adev);
1511
06ec9070 1512 amdgpu_device_check_vm_size(adev);
d38ceaf9 1513
06ec9070 1514 amdgpu_device_check_block_size(adev);
6a7f76e7 1515
19aede77 1516 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1517
c6252390 1518 amdgpu_gmc_tmz_set(adev);
01a8dcec 1519
9b498efa
AD
1520 amdgpu_gmc_noretry_set(adev);
1521
e3c00faa 1522 return 0;
d38ceaf9
AD
1523}
1524
1525/**
1526 * amdgpu_switcheroo_set_state - set switcheroo state
1527 *
1528 * @pdev: pci dev pointer
1694467b 1529 * @state: vga_switcheroo state
d38ceaf9
AD
1530 *
1531 * Callback for the switcheroo driver. Suspends or resumes the
1532 * the asics before or after it is powered up using ACPI methods.
1533 */
8aba21b7
LT
1534static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1535 enum vga_switcheroo_state state)
d38ceaf9
AD
1536{
1537 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1538 int r;
d38ceaf9 1539
b98c6299 1540 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1541 return;
1542
1543 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1544 pr_info("switched on\n");
d38ceaf9
AD
1545 /* don't suspend or resume card normally */
1546 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1547
8f66090b
TZ
1548 pci_set_power_state(pdev, PCI_D0);
1549 amdgpu_device_load_pci_state(pdev);
1550 r = pci_enable_device(pdev);
de185019
AD
1551 if (r)
1552 DRM_WARN("pci_enable_device failed (%d)\n", r);
1553 amdgpu_device_resume(dev, true);
d38ceaf9 1554
d38ceaf9 1555 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1556 } else {
dd4fa6c1 1557 pr_info("switched off\n");
d38ceaf9 1558 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1559 amdgpu_device_suspend(dev, true);
8f66090b 1560 amdgpu_device_cache_pci_state(pdev);
de185019 1561 /* Shut down the device */
8f66090b
TZ
1562 pci_disable_device(pdev);
1563 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1564 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1565 }
1566}
1567
1568/**
1569 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1570 *
1571 * @pdev: pci dev pointer
1572 *
1573 * Callback for the switcheroo driver. Check of the switcheroo
1574 * state can be changed.
1575 * Returns true if the state can be changed, false if not.
1576 */
1577static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1578{
1579 struct drm_device *dev = pci_get_drvdata(pdev);
1580
1581 /*
1582 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1583 * locking inversion with the driver load path. And the access here is
1584 * completely racy anyway. So don't bother with locking for now.
1585 */
7e13ad89 1586 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1587}
1588
1589static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1590 .set_gpu_state = amdgpu_switcheroo_set_state,
1591 .reprobe = NULL,
1592 .can_switch = amdgpu_switcheroo_can_switch,
1593};
1594
e3ecdffa
AD
1595/**
1596 * amdgpu_device_ip_set_clockgating_state - set the CG state
1597 *
87e3f136 1598 * @dev: amdgpu_device pointer
e3ecdffa
AD
1599 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1600 * @state: clockgating state (gate or ungate)
1601 *
1602 * Sets the requested clockgating state for all instances of
1603 * the hardware IP specified.
1604 * Returns the error code from the last instance.
1605 */
43fa561f 1606int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1607 enum amd_ip_block_type block_type,
1608 enum amd_clockgating_state state)
d38ceaf9 1609{
43fa561f 1610 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1611 int i, r = 0;
1612
1613 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1614 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1615 continue;
c722865a
RZ
1616 if (adev->ip_blocks[i].version->type != block_type)
1617 continue;
1618 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1619 continue;
1620 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1621 (void *)adev, state);
1622 if (r)
1623 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1624 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1625 }
1626 return r;
1627}
1628
e3ecdffa
AD
1629/**
1630 * amdgpu_device_ip_set_powergating_state - set the PG state
1631 *
87e3f136 1632 * @dev: amdgpu_device pointer
e3ecdffa
AD
1633 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1634 * @state: powergating state (gate or ungate)
1635 *
1636 * Sets the requested powergating state for all instances of
1637 * the hardware IP specified.
1638 * Returns the error code from the last instance.
1639 */
43fa561f 1640int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1641 enum amd_ip_block_type block_type,
1642 enum amd_powergating_state state)
d38ceaf9 1643{
43fa561f 1644 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1645 int i, r = 0;
1646
1647 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1648 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1649 continue;
c722865a
RZ
1650 if (adev->ip_blocks[i].version->type != block_type)
1651 continue;
1652 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1653 continue;
1654 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1655 (void *)adev, state);
1656 if (r)
1657 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1658 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1659 }
1660 return r;
1661}
1662
e3ecdffa
AD
1663/**
1664 * amdgpu_device_ip_get_clockgating_state - get the CG state
1665 *
1666 * @adev: amdgpu_device pointer
1667 * @flags: clockgating feature flags
1668 *
1669 * Walks the list of IPs on the device and updates the clockgating
1670 * flags for each IP.
1671 * Updates @flags with the feature flags for each hardware IP where
1672 * clockgating is enabled.
1673 */
2990a1fc
AD
1674void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1675 u32 *flags)
6cb2d4e4
HR
1676{
1677 int i;
1678
1679 for (i = 0; i < adev->num_ip_blocks; i++) {
1680 if (!adev->ip_blocks[i].status.valid)
1681 continue;
1682 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1683 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1684 }
1685}
1686
e3ecdffa
AD
1687/**
1688 * amdgpu_device_ip_wait_for_idle - wait for idle
1689 *
1690 * @adev: amdgpu_device pointer
1691 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1692 *
1693 * Waits for the request hardware IP to be idle.
1694 * Returns 0 for success or a negative error code on failure.
1695 */
2990a1fc
AD
1696int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1697 enum amd_ip_block_type block_type)
5dbbb60b
AD
1698{
1699 int i, r;
1700
1701 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1702 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1703 continue;
a1255107
AD
1704 if (adev->ip_blocks[i].version->type == block_type) {
1705 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1706 if (r)
1707 return r;
1708 break;
1709 }
1710 }
1711 return 0;
1712
1713}
1714
e3ecdffa
AD
1715/**
1716 * amdgpu_device_ip_is_idle - is the hardware IP idle
1717 *
1718 * @adev: amdgpu_device pointer
1719 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1720 *
1721 * Check if the hardware IP is idle or not.
1722 * Returns true if it the IP is idle, false if not.
1723 */
2990a1fc
AD
1724bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1725 enum amd_ip_block_type block_type)
5dbbb60b
AD
1726{
1727 int i;
1728
1729 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1730 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1731 continue;
a1255107
AD
1732 if (adev->ip_blocks[i].version->type == block_type)
1733 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1734 }
1735 return true;
1736
1737}
1738
e3ecdffa
AD
1739/**
1740 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1741 *
1742 * @adev: amdgpu_device pointer
87e3f136 1743 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1744 *
1745 * Returns a pointer to the hardware IP block structure
1746 * if it exists for the asic, otherwise NULL.
1747 */
2990a1fc
AD
1748struct amdgpu_ip_block *
1749amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1750 enum amd_ip_block_type type)
d38ceaf9
AD
1751{
1752 int i;
1753
1754 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1755 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1756 return &adev->ip_blocks[i];
1757
1758 return NULL;
1759}
1760
1761/**
2990a1fc 1762 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1763 *
1764 * @adev: amdgpu_device pointer
5fc3aeeb 1765 * @type: enum amd_ip_block_type
d38ceaf9
AD
1766 * @major: major version
1767 * @minor: minor version
1768 *
1769 * return 0 if equal or greater
1770 * return 1 if smaller or the ip_block doesn't exist
1771 */
2990a1fc
AD
1772int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1773 enum amd_ip_block_type type,
1774 u32 major, u32 minor)
d38ceaf9 1775{
2990a1fc 1776 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1777
a1255107
AD
1778 if (ip_block && ((ip_block->version->major > major) ||
1779 ((ip_block->version->major == major) &&
1780 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1781 return 0;
1782
1783 return 1;
1784}
1785
a1255107 1786/**
2990a1fc 1787 * amdgpu_device_ip_block_add
a1255107
AD
1788 *
1789 * @adev: amdgpu_device pointer
1790 * @ip_block_version: pointer to the IP to add
1791 *
1792 * Adds the IP block driver information to the collection of IPs
1793 * on the asic.
1794 */
2990a1fc
AD
1795int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1796 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1797{
1798 if (!ip_block_version)
1799 return -EINVAL;
1800
7bd939d0
LG
1801 switch (ip_block_version->type) {
1802 case AMD_IP_BLOCK_TYPE_VCN:
1803 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1804 return 0;
1805 break;
1806 case AMD_IP_BLOCK_TYPE_JPEG:
1807 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1808 return 0;
1809 break;
1810 default:
1811 break;
1812 }
1813
e966a725 1814 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1815 ip_block_version->funcs->name);
1816
a1255107
AD
1817 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1818
1819 return 0;
1820}
1821
e3ecdffa
AD
1822/**
1823 * amdgpu_device_enable_virtual_display - enable virtual display feature
1824 *
1825 * @adev: amdgpu_device pointer
1826 *
1827 * Enabled the virtual display feature if the user has enabled it via
1828 * the module parameter virtual_display. This feature provides a virtual
1829 * display hardware on headless boards or in virtualized environments.
1830 * This function parses and validates the configuration string specified by
1831 * the user and configues the virtual display configuration (number of
1832 * virtual connectors, crtcs, etc.) specified.
1833 */
483ef985 1834static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1835{
1836 adev->enable_virtual_display = false;
1837
1838 if (amdgpu_virtual_display) {
8f66090b 1839 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1840 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1841
1842 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1843 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1844 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1845 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1846 if (!strcmp("all", pciaddname)
1847 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1848 long num_crtc;
1849 int res = -1;
1850
9accf2fd 1851 adev->enable_virtual_display = true;
0f66356d
ED
1852
1853 if (pciaddname_tmp)
1854 res = kstrtol(pciaddname_tmp, 10,
1855 &num_crtc);
1856
1857 if (!res) {
1858 if (num_crtc < 1)
1859 num_crtc = 1;
1860 if (num_crtc > 6)
1861 num_crtc = 6;
1862 adev->mode_info.num_crtc = num_crtc;
1863 } else {
1864 adev->mode_info.num_crtc = 1;
1865 }
9accf2fd
ED
1866 break;
1867 }
1868 }
1869
0f66356d
ED
1870 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1871 amdgpu_virtual_display, pci_address_name,
1872 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1873
1874 kfree(pciaddstr);
1875 }
1876}
1877
e3ecdffa
AD
1878/**
1879 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1880 *
1881 * @adev: amdgpu_device pointer
1882 *
1883 * Parses the asic configuration parameters specified in the gpu info
1884 * firmware and makes them availale to the driver for use in configuring
1885 * the asic.
1886 * Returns 0 on success, -EINVAL on failure.
1887 */
e2a75f88
AD
1888static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1889{
e2a75f88 1890 const char *chip_name;
c0a43457 1891 char fw_name[40];
e2a75f88
AD
1892 int err;
1893 const struct gpu_info_firmware_header_v1_0 *hdr;
1894
ab4fe3e1
HR
1895 adev->firmware.gpu_info_fw = NULL;
1896
72de33f8 1897 if (adev->mman.discovery_bin) {
258620d0 1898 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1899
1900 /*
1901 * FIXME: The bounding box is still needed by Navi12, so
1902 * temporarily read it from gpu_info firmware. Should be droped
1903 * when DAL no longer needs it.
1904 */
1905 if (adev->asic_type != CHIP_NAVI12)
1906 return 0;
258620d0
AD
1907 }
1908
e2a75f88 1909 switch (adev->asic_type) {
e2a75f88
AD
1910#ifdef CONFIG_DRM_AMDGPU_SI
1911 case CHIP_VERDE:
1912 case CHIP_TAHITI:
1913 case CHIP_PITCAIRN:
1914 case CHIP_OLAND:
1915 case CHIP_HAINAN:
1916#endif
1917#ifdef CONFIG_DRM_AMDGPU_CIK
1918 case CHIP_BONAIRE:
1919 case CHIP_HAWAII:
1920 case CHIP_KAVERI:
1921 case CHIP_KABINI:
1922 case CHIP_MULLINS:
1923#endif
da87c30b
AD
1924 case CHIP_TOPAZ:
1925 case CHIP_TONGA:
1926 case CHIP_FIJI:
1927 case CHIP_POLARIS10:
1928 case CHIP_POLARIS11:
1929 case CHIP_POLARIS12:
1930 case CHIP_VEGAM:
1931 case CHIP_CARRIZO:
1932 case CHIP_STONEY:
27c0bc71 1933 case CHIP_VEGA20:
44b3253a 1934 case CHIP_ALDEBARAN:
84d244a3
JC
1935 case CHIP_SIENNA_CICHLID:
1936 case CHIP_NAVY_FLOUNDER:
eac88a5f 1937 case CHIP_DIMGREY_CAVEFISH:
0e5f4b09 1938 case CHIP_BEIGE_GOBY:
e2a75f88
AD
1939 default:
1940 return 0;
1941 case CHIP_VEGA10:
1942 chip_name = "vega10";
1943 break;
3f76dced
AD
1944 case CHIP_VEGA12:
1945 chip_name = "vega12";
1946 break;
2d2e5e7e 1947 case CHIP_RAVEN:
54f78a76 1948 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1949 chip_name = "raven2";
54f78a76 1950 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1951 chip_name = "picasso";
54c4d17e
FX
1952 else
1953 chip_name = "raven";
2d2e5e7e 1954 break;
65e60f6e
LM
1955 case CHIP_ARCTURUS:
1956 chip_name = "arcturus";
1957 break;
b51a26a0 1958 case CHIP_RENOIR:
2e62f0b5
PL
1959 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1960 chip_name = "renoir";
1961 else
1962 chip_name = "green_sardine";
b51a26a0 1963 break;
23c6268e
HR
1964 case CHIP_NAVI10:
1965 chip_name = "navi10";
1966 break;
ed42cfe1
XY
1967 case CHIP_NAVI14:
1968 chip_name = "navi14";
1969 break;
42b325e5
XY
1970 case CHIP_NAVI12:
1971 chip_name = "navi12";
1972 break;
4e52a9f8
HR
1973 case CHIP_VANGOGH:
1974 chip_name = "vangogh";
1975 break;
8bf84f60
AL
1976 case CHIP_YELLOW_CARP:
1977 chip_name = "yellow_carp";
1978 break;
e2a75f88
AD
1979 }
1980
1981 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1982 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1983 if (err) {
1984 dev_err(adev->dev,
1985 "Failed to load gpu_info firmware \"%s\"\n",
1986 fw_name);
1987 goto out;
1988 }
ab4fe3e1 1989 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1990 if (err) {
1991 dev_err(adev->dev,
1992 "Failed to validate gpu_info firmware \"%s\"\n",
1993 fw_name);
1994 goto out;
1995 }
1996
ab4fe3e1 1997 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1998 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1999
2000 switch (hdr->version_major) {
2001 case 1:
2002 {
2003 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 2004 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
2005 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2006
cc375d8c
TY
2007 /*
2008 * Should be droped when DAL no longer needs it.
2009 */
2010 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
2011 goto parse_soc_bounding_box;
2012
b5ab16bf
AD
2013 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2014 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2015 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2016 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 2017 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
2018 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2019 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2020 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2021 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2022 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 2023 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
2024 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2025 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
2026 adev->gfx.cu_info.max_waves_per_simd =
2027 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2028 adev->gfx.cu_info.max_scratch_slots_per_cu =
2029 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2030 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 2031 if (hdr->version_minor >= 1) {
35c2e910
HZ
2032 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2033 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2034 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2035 adev->gfx.config.num_sc_per_sh =
2036 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2037 adev->gfx.config.num_packer_per_sc =
2038 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2039 }
ec51d3fa
XY
2040
2041parse_soc_bounding_box:
ec51d3fa
XY
2042 /*
2043 * soc bounding box info is not integrated in disocovery table,
258620d0 2044 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 2045 */
48321c3d
HW
2046 if (hdr->version_minor == 2) {
2047 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2048 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2049 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2050 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2051 }
e2a75f88
AD
2052 break;
2053 }
2054 default:
2055 dev_err(adev->dev,
2056 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2057 err = -EINVAL;
2058 goto out;
2059 }
2060out:
e2a75f88
AD
2061 return err;
2062}
2063
e3ecdffa
AD
2064/**
2065 * amdgpu_device_ip_early_init - run early init for hardware IPs
2066 *
2067 * @adev: amdgpu_device pointer
2068 *
2069 * Early initialization pass for hardware IPs. The hardware IPs that make
2070 * up each asic are discovered each IP's early_init callback is run. This
2071 * is the first stage in initializing the asic.
2072 * Returns 0 on success, negative error code on failure.
2073 */
06ec9070 2074static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 2075{
aaa36a97 2076 int i, r;
d38ceaf9 2077
483ef985 2078 amdgpu_device_enable_virtual_display(adev);
a6be7570 2079
00a979f3 2080 if (amdgpu_sriov_vf(adev)) {
00a979f3 2081 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
2082 if (r)
2083 return r;
00a979f3
WS
2084 }
2085
d38ceaf9 2086 switch (adev->asic_type) {
33f34802
KW
2087#ifdef CONFIG_DRM_AMDGPU_SI
2088 case CHIP_VERDE:
2089 case CHIP_TAHITI:
2090 case CHIP_PITCAIRN:
2091 case CHIP_OLAND:
2092 case CHIP_HAINAN:
295d0daf 2093 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2094 r = si_set_ip_blocks(adev);
2095 if (r)
2096 return r;
2097 break;
2098#endif
a2e73f56
AD
2099#ifdef CONFIG_DRM_AMDGPU_CIK
2100 case CHIP_BONAIRE:
2101 case CHIP_HAWAII:
2102 case CHIP_KAVERI:
2103 case CHIP_KABINI:
2104 case CHIP_MULLINS:
e1ad2d53 2105 if (adev->flags & AMD_IS_APU)
a2e73f56 2106 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2107 else
2108 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2109
2110 r = cik_set_ip_blocks(adev);
2111 if (r)
2112 return r;
2113 break;
2114#endif
da87c30b
AD
2115 case CHIP_TOPAZ:
2116 case CHIP_TONGA:
2117 case CHIP_FIJI:
2118 case CHIP_POLARIS10:
2119 case CHIP_POLARIS11:
2120 case CHIP_POLARIS12:
2121 case CHIP_VEGAM:
2122 case CHIP_CARRIZO:
2123 case CHIP_STONEY:
2124 if (adev->flags & AMD_IS_APU)
2125 adev->family = AMDGPU_FAMILY_CZ;
2126 else
2127 adev->family = AMDGPU_FAMILY_VI;
2128
2129 r = vi_set_ip_blocks(adev);
2130 if (r)
2131 return r;
2132 break;
d38ceaf9 2133 default:
63352b7f
AD
2134 r = amdgpu_discovery_set_ip_blocks(adev);
2135 if (r)
2136 return r;
2137 break;
d38ceaf9
AD
2138 }
2139
1884734a 2140 amdgpu_amdkfd_device_probe(adev);
2141
3b94fb10 2142 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2143 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2144 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2145 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2146 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2147
d38ceaf9
AD
2148 for (i = 0; i < adev->num_ip_blocks; i++) {
2149 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2150 DRM_ERROR("disabled ip block: %d <%s>\n",
2151 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2152 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2153 } else {
a1255107
AD
2154 if (adev->ip_blocks[i].version->funcs->early_init) {
2155 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2156 if (r == -ENOENT) {
a1255107 2157 adev->ip_blocks[i].status.valid = false;
2c1a2784 2158 } else if (r) {
a1255107
AD
2159 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2160 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2161 return r;
2c1a2784 2162 } else {
a1255107 2163 adev->ip_blocks[i].status.valid = true;
2c1a2784 2164 }
974e6b64 2165 } else {
a1255107 2166 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2167 }
d38ceaf9 2168 }
21a249ca
AD
2169 /* get the vbios after the asic_funcs are set up */
2170 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2171 r = amdgpu_device_parse_gpu_info_fw(adev);
2172 if (r)
2173 return r;
2174
21a249ca
AD
2175 /* Read BIOS */
2176 if (!amdgpu_get_bios(adev))
2177 return -EINVAL;
2178
2179 r = amdgpu_atombios_init(adev);
2180 if (r) {
2181 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2182 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2183 return r;
2184 }
77eabc6f
PJZ
2185
2186 /*get pf2vf msg info at it's earliest time*/
2187 if (amdgpu_sriov_vf(adev))
2188 amdgpu_virt_init_data_exchange(adev);
2189
21a249ca 2190 }
d38ceaf9
AD
2191 }
2192
395d1fb9
NH
2193 adev->cg_flags &= amdgpu_cg_mask;
2194 adev->pg_flags &= amdgpu_pg_mask;
2195
d38ceaf9
AD
2196 return 0;
2197}
2198
0a4f2520
RZ
2199static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2200{
2201 int i, r;
2202
2203 for (i = 0; i < adev->num_ip_blocks; i++) {
2204 if (!adev->ip_blocks[i].status.sw)
2205 continue;
2206 if (adev->ip_blocks[i].status.hw)
2207 continue;
2208 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2209 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2210 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2211 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2212 if (r) {
2213 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2214 adev->ip_blocks[i].version->funcs->name, r);
2215 return r;
2216 }
2217 adev->ip_blocks[i].status.hw = true;
2218 }
2219 }
2220
2221 return 0;
2222}
2223
2224static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2225{
2226 int i, r;
2227
2228 for (i = 0; i < adev->num_ip_blocks; i++) {
2229 if (!adev->ip_blocks[i].status.sw)
2230 continue;
2231 if (adev->ip_blocks[i].status.hw)
2232 continue;
2233 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2234 if (r) {
2235 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2236 adev->ip_blocks[i].version->funcs->name, r);
2237 return r;
2238 }
2239 adev->ip_blocks[i].status.hw = true;
2240 }
2241
2242 return 0;
2243}
2244
7a3e0bb2
RZ
2245static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2246{
2247 int r = 0;
2248 int i;
80f41f84 2249 uint32_t smu_version;
7a3e0bb2
RZ
2250
2251 if (adev->asic_type >= CHIP_VEGA10) {
2252 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2253 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2254 continue;
2255
e3c1b071 2256 if (!adev->ip_blocks[i].status.sw)
2257 continue;
2258
482f0e53
ML
2259 /* no need to do the fw loading again if already done*/
2260 if (adev->ip_blocks[i].status.hw == true)
2261 break;
2262
53b3f8f4 2263 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2264 r = adev->ip_blocks[i].version->funcs->resume(adev);
2265 if (r) {
2266 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2267 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2268 return r;
2269 }
2270 } else {
2271 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2272 if (r) {
2273 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2274 adev->ip_blocks[i].version->funcs->name, r);
2275 return r;
7a3e0bb2 2276 }
7a3e0bb2 2277 }
482f0e53
ML
2278
2279 adev->ip_blocks[i].status.hw = true;
2280 break;
7a3e0bb2
RZ
2281 }
2282 }
482f0e53 2283
8973d9ec
ED
2284 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2285 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2286
80f41f84 2287 return r;
7a3e0bb2
RZ
2288}
2289
5fd8518d
AG
2290static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2291{
2292 long timeout;
2293 int r, i;
2294
2295 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2296 struct amdgpu_ring *ring = adev->rings[i];
2297
2298 /* No need to setup the GPU scheduler for rings that don't need it */
2299 if (!ring || ring->no_scheduler)
2300 continue;
2301
2302 switch (ring->funcs->type) {
2303 case AMDGPU_RING_TYPE_GFX:
2304 timeout = adev->gfx_timeout;
2305 break;
2306 case AMDGPU_RING_TYPE_COMPUTE:
2307 timeout = adev->compute_timeout;
2308 break;
2309 case AMDGPU_RING_TYPE_SDMA:
2310 timeout = adev->sdma_timeout;
2311 break;
2312 default:
2313 timeout = adev->video_timeout;
2314 break;
2315 }
2316
2317 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2318 ring->num_hw_submission, amdgpu_job_hang_limit,
cfbb6b00 2319 timeout, adev->reset_domain->wq, ring->sched_score, ring->name);
5fd8518d
AG
2320 if (r) {
2321 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2322 ring->name);
2323 return r;
2324 }
2325 }
2326
2327 return 0;
2328}
2329
2330
e3ecdffa
AD
2331/**
2332 * amdgpu_device_ip_init - run init for hardware IPs
2333 *
2334 * @adev: amdgpu_device pointer
2335 *
2336 * Main initialization pass for hardware IPs. The list of all the hardware
2337 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2338 * are run. sw_init initializes the software state associated with each IP
2339 * and hw_init initializes the hardware associated with each IP.
2340 * Returns 0 on success, negative error code on failure.
2341 */
06ec9070 2342static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2343{
2344 int i, r;
2345
c030f2e4 2346 r = amdgpu_ras_init(adev);
2347 if (r)
2348 return r;
2349
d38ceaf9 2350 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2351 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2352 continue;
a1255107 2353 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2354 if (r) {
a1255107
AD
2355 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2356 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2357 goto init_failed;
2c1a2784 2358 }
a1255107 2359 adev->ip_blocks[i].status.sw = true;
bfca0289 2360
d38ceaf9 2361 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 2362 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
892deb48
VS
2363 /* Try to reserve bad pages early */
2364 if (amdgpu_sriov_vf(adev))
2365 amdgpu_virt_exchange_data(adev);
2366
06ec9070 2367 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2368 if (r) {
2369 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2370 goto init_failed;
2c1a2784 2371 }
a1255107 2372 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2373 if (r) {
2374 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2375 goto init_failed;
2c1a2784 2376 }
06ec9070 2377 r = amdgpu_device_wb_init(adev);
2c1a2784 2378 if (r) {
06ec9070 2379 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2380 goto init_failed;
2c1a2784 2381 }
a1255107 2382 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2383
2384 /* right after GMC hw init, we create CSA */
f92d5c61 2385 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2386 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2387 AMDGPU_GEM_DOMAIN_VRAM,
2388 AMDGPU_CSA_SIZE);
2493664f
ML
2389 if (r) {
2390 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2391 goto init_failed;
2493664f
ML
2392 }
2393 }
d38ceaf9
AD
2394 }
2395 }
2396
c9ffa427 2397 if (amdgpu_sriov_vf(adev))
9a458402 2398 amdgpu_virt_init_data_exchange(adev);
c9ffa427 2399
533aed27
AG
2400 r = amdgpu_ib_pool_init(adev);
2401 if (r) {
2402 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2403 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2404 goto init_failed;
2405 }
2406
c8963ea4
RZ
2407 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2408 if (r)
72d3f592 2409 goto init_failed;
0a4f2520
RZ
2410
2411 r = amdgpu_device_ip_hw_init_phase1(adev);
2412 if (r)
72d3f592 2413 goto init_failed;
0a4f2520 2414
7a3e0bb2
RZ
2415 r = amdgpu_device_fw_loading(adev);
2416 if (r)
72d3f592 2417 goto init_failed;
7a3e0bb2 2418
0a4f2520
RZ
2419 r = amdgpu_device_ip_hw_init_phase2(adev);
2420 if (r)
72d3f592 2421 goto init_failed;
d38ceaf9 2422
121a2bc6
AG
2423 /*
2424 * retired pages will be loaded from eeprom and reserved here,
2425 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2426 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2427 * for I2C communication which only true at this point.
b82e65a9
GC
2428 *
2429 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2430 * failure from bad gpu situation and stop amdgpu init process
2431 * accordingly. For other failed cases, it will still release all
2432 * the resource and print error message, rather than returning one
2433 * negative value to upper level.
121a2bc6
AG
2434 *
2435 * Note: theoretically, this should be called before all vram allocations
2436 * to protect retired page from abusing
2437 */
b82e65a9
GC
2438 r = amdgpu_ras_recovery_init(adev);
2439 if (r)
2440 goto init_failed;
121a2bc6 2441
cfbb6b00
AG
2442 /**
2443 * In case of XGMI grab extra reference for reset domain for this device
2444 */
a4c63caf 2445 if (adev->gmc.xgmi.num_physical_nodes > 1) {
cfbb6b00
AG
2446 if (amdgpu_xgmi_add_device(adev) == 0) {
2447 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
a4c63caf 2448
cfbb6b00
AG
2449 if (!hive->reset_domain ||
2450 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2451 r = -ENOENT;
2452 goto init_failed;
2453 }
e3c1b071 2454
cfbb6b00
AG
2455 /* Drop the early temporary reset domain we created for device */
2456 amdgpu_reset_put_reset_domain(adev->reset_domain);
2457 adev->reset_domain = hive->reset_domain;
a4c63caf
AG
2458 }
2459 }
2460
5fd8518d
AG
2461 r = amdgpu_device_init_schedulers(adev);
2462 if (r)
2463 goto init_failed;
2464
e3c1b071 2465 /* Don't init kfd if whole hive need to be reset during init */
2466 if (!adev->gmc.xgmi.pending_reset)
2467 amdgpu_amdkfd_device_init(adev);
c6332b97 2468
bd607166
KR
2469 amdgpu_fru_get_product_info(adev);
2470
72d3f592 2471init_failed:
c9ffa427 2472 if (amdgpu_sriov_vf(adev))
c6332b97 2473 amdgpu_virt_release_full_gpu(adev, true);
2474
72d3f592 2475 return r;
d38ceaf9
AD
2476}
2477
e3ecdffa
AD
2478/**
2479 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2480 *
2481 * @adev: amdgpu_device pointer
2482 *
2483 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2484 * this function before a GPU reset. If the value is retained after a
2485 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2486 */
06ec9070 2487static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2488{
2489 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2490}
2491
e3ecdffa
AD
2492/**
2493 * amdgpu_device_check_vram_lost - check if vram is valid
2494 *
2495 * @adev: amdgpu_device pointer
2496 *
2497 * Checks the reset magic value written to the gart pointer in VRAM.
2498 * The driver calls this after a GPU reset to see if the contents of
2499 * VRAM is lost or now.
2500 * returns true if vram is lost, false if not.
2501 */
06ec9070 2502static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2503{
dadce777
EQ
2504 if (memcmp(adev->gart.ptr, adev->reset_magic,
2505 AMDGPU_RESET_MAGIC_NUM))
2506 return true;
2507
53b3f8f4 2508 if (!amdgpu_in_reset(adev))
dadce777
EQ
2509 return false;
2510
2511 /*
2512 * For all ASICs with baco/mode1 reset, the VRAM is
2513 * always assumed to be lost.
2514 */
2515 switch (amdgpu_asic_reset_method(adev)) {
2516 case AMD_RESET_METHOD_BACO:
2517 case AMD_RESET_METHOD_MODE1:
2518 return true;
2519 default:
2520 return false;
2521 }
0c49e0b8
CZ
2522}
2523
e3ecdffa 2524/**
1112a46b 2525 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2526 *
2527 * @adev: amdgpu_device pointer
b8b72130 2528 * @state: clockgating state (gate or ungate)
e3ecdffa 2529 *
e3ecdffa 2530 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2531 * set_clockgating_state callbacks are run.
2532 * Late initialization pass enabling clockgating for hardware IPs.
2533 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2534 * Returns 0 on success, negative error code on failure.
2535 */
fdd34271 2536
5d89bb2d
LL
2537int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2538 enum amd_clockgating_state state)
d38ceaf9 2539{
1112a46b 2540 int i, j, r;
d38ceaf9 2541
4a2ba394
SL
2542 if (amdgpu_emu_mode == 1)
2543 return 0;
2544
1112a46b
RZ
2545 for (j = 0; j < adev->num_ip_blocks; j++) {
2546 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2547 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2548 continue;
5d70a549
PV
2549 /* skip CG for GFX on S0ix */
2550 if (adev->in_s0ix &&
2551 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2552 continue;
4a446d55 2553 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2554 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2555 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2556 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2557 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2558 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2559 /* enable clockgating to save power */
a1255107 2560 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2561 state);
4a446d55
AD
2562 if (r) {
2563 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2564 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2565 return r;
2566 }
b0b00ff1 2567 }
d38ceaf9 2568 }
06b18f61 2569
c9f96fd5
RZ
2570 return 0;
2571}
2572
5d89bb2d
LL
2573int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2574 enum amd_powergating_state state)
c9f96fd5 2575{
1112a46b 2576 int i, j, r;
06b18f61 2577
c9f96fd5
RZ
2578 if (amdgpu_emu_mode == 1)
2579 return 0;
2580
1112a46b
RZ
2581 for (j = 0; j < adev->num_ip_blocks; j++) {
2582 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2583 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2584 continue;
5d70a549
PV
2585 /* skip PG for GFX on S0ix */
2586 if (adev->in_s0ix &&
2587 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2588 continue;
c9f96fd5
RZ
2589 /* skip CG for VCE/UVD, it's handled specially */
2590 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2591 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2592 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2593 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2594 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2595 /* enable powergating to save power */
2596 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2597 state);
c9f96fd5
RZ
2598 if (r) {
2599 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2600 adev->ip_blocks[i].version->funcs->name, r);
2601 return r;
2602 }
2603 }
2604 }
2dc80b00
S
2605 return 0;
2606}
2607
beff74bc
AD
2608static int amdgpu_device_enable_mgpu_fan_boost(void)
2609{
2610 struct amdgpu_gpu_instance *gpu_ins;
2611 struct amdgpu_device *adev;
2612 int i, ret = 0;
2613
2614 mutex_lock(&mgpu_info.mutex);
2615
2616 /*
2617 * MGPU fan boost feature should be enabled
2618 * only when there are two or more dGPUs in
2619 * the system
2620 */
2621 if (mgpu_info.num_dgpu < 2)
2622 goto out;
2623
2624 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2625 gpu_ins = &(mgpu_info.gpu_ins[i]);
2626 adev = gpu_ins->adev;
2627 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2628 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2629 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2630 if (ret)
2631 break;
2632
2633 gpu_ins->mgpu_fan_enabled = 1;
2634 }
2635 }
2636
2637out:
2638 mutex_unlock(&mgpu_info.mutex);
2639
2640 return ret;
2641}
2642
e3ecdffa
AD
2643/**
2644 * amdgpu_device_ip_late_init - run late init for hardware IPs
2645 *
2646 * @adev: amdgpu_device pointer
2647 *
2648 * Late initialization pass for hardware IPs. The list of all the hardware
2649 * IPs that make up the asic is walked and the late_init callbacks are run.
2650 * late_init covers any special initialization that an IP requires
2651 * after all of the have been initialized or something that needs to happen
2652 * late in the init process.
2653 * Returns 0 on success, negative error code on failure.
2654 */
06ec9070 2655static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2656{
60599a03 2657 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2658 int i = 0, r;
2659
2660 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2661 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2662 continue;
2663 if (adev->ip_blocks[i].version->funcs->late_init) {
2664 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2665 if (r) {
2666 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2667 adev->ip_blocks[i].version->funcs->name, r);
2668 return r;
2669 }
2dc80b00 2670 }
73f847db 2671 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2672 }
2673
a891d239
DL
2674 amdgpu_ras_set_error_query_ready(adev, true);
2675
1112a46b
RZ
2676 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2677 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2678
06ec9070 2679 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2680
beff74bc
AD
2681 r = amdgpu_device_enable_mgpu_fan_boost();
2682 if (r)
2683 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2684
4da8b639 2685 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2686 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2687 adev->asic_type == CHIP_ALDEBARAN ))
2688 smu_handle_passthrough_sbr(&adev->smu, true);
60599a03
EQ
2689
2690 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2691 mutex_lock(&mgpu_info.mutex);
2692
2693 /*
2694 * Reset device p-state to low as this was booted with high.
2695 *
2696 * This should be performed only after all devices from the same
2697 * hive get initialized.
2698 *
2699 * However, it's unknown how many device in the hive in advance.
2700 * As this is counted one by one during devices initializations.
2701 *
2702 * So, we wait for all XGMI interlinked devices initialized.
2703 * This may bring some delays as those devices may come from
2704 * different hives. But that should be OK.
2705 */
2706 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2707 for (i = 0; i < mgpu_info.num_gpu; i++) {
2708 gpu_instance = &(mgpu_info.gpu_ins[i]);
2709 if (gpu_instance->adev->flags & AMD_IS_APU)
2710 continue;
2711
d84a430d
JK
2712 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2713 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2714 if (r) {
2715 DRM_ERROR("pstate setting failed (%d).\n", r);
2716 break;
2717 }
2718 }
2719 }
2720
2721 mutex_unlock(&mgpu_info.mutex);
2722 }
2723
d38ceaf9
AD
2724 return 0;
2725}
2726
613aa3ea
LY
2727/**
2728 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2729 *
2730 * @adev: amdgpu_device pointer
2731 *
2732 * For ASICs need to disable SMC first
2733 */
2734static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2735{
2736 int i, r;
2737
2738 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2739 return;
2740
2741 for (i = 0; i < adev->num_ip_blocks; i++) {
2742 if (!adev->ip_blocks[i].status.hw)
2743 continue;
2744 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2745 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2746 /* XXX handle errors */
2747 if (r) {
2748 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2749 adev->ip_blocks[i].version->funcs->name, r);
2750 }
2751 adev->ip_blocks[i].status.hw = false;
2752 break;
2753 }
2754 }
2755}
2756
e9669fb7 2757static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2758{
2759 int i, r;
2760
e9669fb7
AG
2761 for (i = 0; i < adev->num_ip_blocks; i++) {
2762 if (!adev->ip_blocks[i].version->funcs->early_fini)
2763 continue;
5278a159 2764
e9669fb7
AG
2765 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2766 if (r) {
2767 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2768 adev->ip_blocks[i].version->funcs->name, r);
2769 }
2770 }
c030f2e4 2771
e9669fb7 2772 amdgpu_amdkfd_suspend(adev, false);
a82400b5 2773
05df1f01 2774 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2775 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2776
613aa3ea
LY
2777 /* Workaroud for ASICs need to disable SMC first */
2778 amdgpu_device_smu_fini_early(adev);
3e96dbfd 2779
d38ceaf9 2780 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2781 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2782 continue;
8201a67a 2783
a1255107 2784 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2785 /* XXX handle errors */
2c1a2784 2786 if (r) {
a1255107
AD
2787 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2788 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2789 }
8201a67a 2790
a1255107 2791 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2792 }
2793
6effad8a
GC
2794 if (amdgpu_sriov_vf(adev)) {
2795 if (amdgpu_virt_release_full_gpu(adev, false))
2796 DRM_ERROR("failed to release exclusive mode on fini\n");
2797 }
2798
e9669fb7
AG
2799 return 0;
2800}
2801
2802/**
2803 * amdgpu_device_ip_fini - run fini for hardware IPs
2804 *
2805 * @adev: amdgpu_device pointer
2806 *
2807 * Main teardown pass for hardware IPs. The list of all the hardware
2808 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2809 * are run. hw_fini tears down the hardware associated with each IP
2810 * and sw_fini tears down any software state associated with each IP.
2811 * Returns 0 on success, negative error code on failure.
2812 */
2813static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2814{
2815 int i, r;
2816
2817 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2818 amdgpu_virt_release_ras_err_handler_data(adev);
2819
e9669fb7
AG
2820 if (adev->gmc.xgmi.num_physical_nodes > 1)
2821 amdgpu_xgmi_remove_device(adev);
2822
2823 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2824
d38ceaf9 2825 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2826 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2827 continue;
c12aba3a
ML
2828
2829 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2830 amdgpu_ucode_free_bo(adev);
1e256e27 2831 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2832 amdgpu_device_wb_fini(adev);
2833 amdgpu_device_vram_scratch_fini(adev);
533aed27 2834 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2835 }
2836
a1255107 2837 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2838 /* XXX handle errors */
2c1a2784 2839 if (r) {
a1255107
AD
2840 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2841 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2842 }
a1255107
AD
2843 adev->ip_blocks[i].status.sw = false;
2844 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2845 }
2846
a6dcfd9c 2847 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2848 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2849 continue;
a1255107
AD
2850 if (adev->ip_blocks[i].version->funcs->late_fini)
2851 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2852 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2853 }
2854
c030f2e4 2855 amdgpu_ras_fini(adev);
2856
d38ceaf9
AD
2857 return 0;
2858}
2859
e3ecdffa 2860/**
beff74bc 2861 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2862 *
1112a46b 2863 * @work: work_struct.
e3ecdffa 2864 */
beff74bc 2865static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2866{
2867 struct amdgpu_device *adev =
beff74bc 2868 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2869 int r;
2870
2871 r = amdgpu_ib_ring_tests(adev);
2872 if (r)
2873 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2874}
2875
1e317b99
RZ
2876static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2877{
2878 struct amdgpu_device *adev =
2879 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2880
90a92662
MD
2881 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2882 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2883
2884 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2885 adev->gfx.gfx_off_state = true;
1e317b99
RZ
2886}
2887
e3ecdffa 2888/**
e7854a03 2889 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2890 *
2891 * @adev: amdgpu_device pointer
2892 *
2893 * Main suspend function for hardware IPs. The list of all the hardware
2894 * IPs that make up the asic is walked, clockgating is disabled and the
2895 * suspend callbacks are run. suspend puts the hardware and software state
2896 * in each IP into a state suitable for suspend.
2897 * Returns 0 on success, negative error code on failure.
2898 */
e7854a03
AD
2899static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2900{
2901 int i, r;
2902
50ec83f0
AD
2903 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2904 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2905
e7854a03
AD
2906 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2907 if (!adev->ip_blocks[i].status.valid)
2908 continue;
2b9f7848 2909
e7854a03 2910 /* displays are handled separately */
2b9f7848
ND
2911 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2912 continue;
2913
2914 /* XXX handle errors */
2915 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2916 /* XXX handle errors */
2917 if (r) {
2918 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2919 adev->ip_blocks[i].version->funcs->name, r);
2920 return r;
e7854a03 2921 }
2b9f7848
ND
2922
2923 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2924 }
2925
e7854a03
AD
2926 return 0;
2927}
2928
2929/**
2930 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2931 *
2932 * @adev: amdgpu_device pointer
2933 *
2934 * Main suspend function for hardware IPs. The list of all the hardware
2935 * IPs that make up the asic is walked, clockgating is disabled and the
2936 * suspend callbacks are run. suspend puts the hardware and software state
2937 * in each IP into a state suitable for suspend.
2938 * Returns 0 on success, negative error code on failure.
2939 */
2940static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2941{
2942 int i, r;
2943
557f42a2 2944 if (adev->in_s0ix)
34416931 2945 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
34416931 2946
d38ceaf9 2947 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2948 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2949 continue;
e7854a03
AD
2950 /* displays are handled in phase1 */
2951 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2952 continue;
bff77e86
LM
2953 /* PSP lost connection when err_event_athub occurs */
2954 if (amdgpu_ras_intr_triggered() &&
2955 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2956 adev->ip_blocks[i].status.hw = false;
2957 continue;
2958 }
e3c1b071 2959
2960 /* skip unnecessary suspend if we do not initialize them yet */
2961 if (adev->gmc.xgmi.pending_reset &&
2962 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2963 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2964 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2965 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2966 adev->ip_blocks[i].status.hw = false;
2967 continue;
2968 }
557f42a2 2969
32ff160d
AD
2970 /* skip suspend of gfx and psp for S0ix
2971 * gfx is in gfxoff state, so on resume it will exit gfxoff just
2972 * like at runtime. PSP is also part of the always on hardware
2973 * so no need to suspend it.
2974 */
557f42a2 2975 if (adev->in_s0ix &&
32ff160d
AD
2976 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
2977 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
557f42a2
AD
2978 continue;
2979
d38ceaf9 2980 /* XXX handle errors */
a1255107 2981 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2982 /* XXX handle errors */
2c1a2784 2983 if (r) {
a1255107
AD
2984 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2985 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2986 }
876923fb 2987 adev->ip_blocks[i].status.hw = false;
a3a09142 2988 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2989 if(!amdgpu_sriov_vf(adev)){
2990 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2991 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2992 if (r) {
2993 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2994 adev->mp1_state, r);
2995 return r;
2996 }
a3a09142
AD
2997 }
2998 }
d38ceaf9
AD
2999 }
3000
3001 return 0;
3002}
3003
e7854a03
AD
3004/**
3005 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3006 *
3007 * @adev: amdgpu_device pointer
3008 *
3009 * Main suspend function for hardware IPs. The list of all the hardware
3010 * IPs that make up the asic is walked, clockgating is disabled and the
3011 * suspend callbacks are run. suspend puts the hardware and software state
3012 * in each IP into a state suitable for suspend.
3013 * Returns 0 on success, negative error code on failure.
3014 */
3015int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3016{
3017 int r;
3018
3c73683c
JC
3019 if (amdgpu_sriov_vf(adev)) {
3020 amdgpu_virt_fini_data_exchange(adev);
e7819644 3021 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 3022 }
e7819644 3023
e7854a03
AD
3024 r = amdgpu_device_ip_suspend_phase1(adev);
3025 if (r)
3026 return r;
3027 r = amdgpu_device_ip_suspend_phase2(adev);
3028
e7819644
YT
3029 if (amdgpu_sriov_vf(adev))
3030 amdgpu_virt_release_full_gpu(adev, false);
3031
e7854a03
AD
3032 return r;
3033}
3034
06ec9070 3035static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3036{
3037 int i, r;
3038
2cb681b6
ML
3039 static enum amd_ip_block_type ip_order[] = {
3040 AMD_IP_BLOCK_TYPE_GMC,
3041 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 3042 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
3043 AMD_IP_BLOCK_TYPE_IH,
3044 };
a90ad3c2 3045
95ea3dbc 3046 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
3047 int j;
3048 struct amdgpu_ip_block *block;
a90ad3c2 3049
4cd2a96d
J
3050 block = &adev->ip_blocks[i];
3051 block->status.hw = false;
2cb681b6 3052
4cd2a96d 3053 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 3054
4cd2a96d 3055 if (block->version->type != ip_order[j] ||
2cb681b6
ML
3056 !block->status.valid)
3057 continue;
3058
3059 r = block->version->funcs->hw_init(adev);
0aaeefcc 3060 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3061 if (r)
3062 return r;
482f0e53 3063 block->status.hw = true;
a90ad3c2
ML
3064 }
3065 }
3066
3067 return 0;
3068}
3069
06ec9070 3070static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
3071{
3072 int i, r;
3073
2cb681b6
ML
3074 static enum amd_ip_block_type ip_order[] = {
3075 AMD_IP_BLOCK_TYPE_SMC,
3076 AMD_IP_BLOCK_TYPE_DCE,
3077 AMD_IP_BLOCK_TYPE_GFX,
3078 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 3079 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
3080 AMD_IP_BLOCK_TYPE_VCE,
3081 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 3082 };
a90ad3c2 3083
2cb681b6
ML
3084 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3085 int j;
3086 struct amdgpu_ip_block *block;
a90ad3c2 3087
2cb681b6
ML
3088 for (j = 0; j < adev->num_ip_blocks; j++) {
3089 block = &adev->ip_blocks[j];
3090
3091 if (block->version->type != ip_order[i] ||
482f0e53
ML
3092 !block->status.valid ||
3093 block->status.hw)
2cb681b6
ML
3094 continue;
3095
895bd048
JZ
3096 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3097 r = block->version->funcs->resume(adev);
3098 else
3099 r = block->version->funcs->hw_init(adev);
3100
0aaeefcc 3101 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
3102 if (r)
3103 return r;
482f0e53 3104 block->status.hw = true;
a90ad3c2
ML
3105 }
3106 }
3107
3108 return 0;
3109}
3110
e3ecdffa
AD
3111/**
3112 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3113 *
3114 * @adev: amdgpu_device pointer
3115 *
3116 * First resume function for hardware IPs. The list of all the hardware
3117 * IPs that make up the asic is walked and the resume callbacks are run for
3118 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3119 * after a suspend and updates the software state as necessary. This
3120 * function is also used for restoring the GPU after a GPU reset.
3121 * Returns 0 on success, negative error code on failure.
3122 */
06ec9070 3123static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
3124{
3125 int i, r;
3126
a90ad3c2 3127 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3128 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 3129 continue;
a90ad3c2 3130 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
3131 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3132 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 3133
fcf0649f
CZ
3134 r = adev->ip_blocks[i].version->funcs->resume(adev);
3135 if (r) {
3136 DRM_ERROR("resume of IP block <%s> failed %d\n",
3137 adev->ip_blocks[i].version->funcs->name, r);
3138 return r;
3139 }
482f0e53 3140 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3141 }
3142 }
3143
3144 return 0;
3145}
3146
e3ecdffa
AD
3147/**
3148 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3149 *
3150 * @adev: amdgpu_device pointer
3151 *
3152 * First resume function for hardware IPs. The list of all the hardware
3153 * IPs that make up the asic is walked and the resume callbacks are run for
3154 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3155 * functional state after a suspend and updates the software state as
3156 * necessary. This function is also used for restoring the GPU after a GPU
3157 * reset.
3158 * Returns 0 on success, negative error code on failure.
3159 */
06ec9070 3160static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3161{
3162 int i, r;
3163
3164 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3165 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3166 continue;
fcf0649f 3167 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3168 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3169 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3170 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3171 continue;
a1255107 3172 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3173 if (r) {
a1255107
AD
3174 DRM_ERROR("resume of IP block <%s> failed %d\n",
3175 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3176 return r;
2c1a2784 3177 }
482f0e53 3178 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3179 }
3180
3181 return 0;
3182}
3183
e3ecdffa
AD
3184/**
3185 * amdgpu_device_ip_resume - run resume for hardware IPs
3186 *
3187 * @adev: amdgpu_device pointer
3188 *
3189 * Main resume function for hardware IPs. The hardware IPs
3190 * are split into two resume functions because they are
3191 * are also used in in recovering from a GPU reset and some additional
3192 * steps need to be take between them. In this case (S3/S4) they are
3193 * run sequentially.
3194 * Returns 0 on success, negative error code on failure.
3195 */
06ec9070 3196static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3197{
3198 int r;
3199
9cec53c1
JZ
3200 r = amdgpu_amdkfd_resume_iommu(adev);
3201 if (r)
3202 return r;
3203
06ec9070 3204 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3205 if (r)
3206 return r;
7a3e0bb2
RZ
3207
3208 r = amdgpu_device_fw_loading(adev);
3209 if (r)
3210 return r;
3211
06ec9070 3212 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3213
3214 return r;
3215}
3216
e3ecdffa
AD
3217/**
3218 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3219 *
3220 * @adev: amdgpu_device pointer
3221 *
3222 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3223 */
4e99a44e 3224static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3225{
6867e1b5
ML
3226 if (amdgpu_sriov_vf(adev)) {
3227 if (adev->is_atom_fw) {
58ff791a 3228 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3229 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3230 } else {
3231 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3232 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3233 }
3234
3235 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3236 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3237 }
048765ad
AR
3238}
3239
e3ecdffa
AD
3240/**
3241 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3242 *
3243 * @asic_type: AMD asic type
3244 *
3245 * Check if there is DC (new modesetting infrastructre) support for an asic.
3246 * returns true if DC has support, false if not.
3247 */
4562236b
HW
3248bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3249{
3250 switch (asic_type) {
0637d417
AD
3251#ifdef CONFIG_DRM_AMDGPU_SI
3252 case CHIP_HAINAN:
3253#endif
3254 case CHIP_TOPAZ:
3255 /* chips with no display hardware */
3256 return false;
4562236b 3257#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3258 case CHIP_TAHITI:
3259 case CHIP_PITCAIRN:
3260 case CHIP_VERDE:
3261 case CHIP_OLAND:
2d32ffd6
AD
3262 /*
3263 * We have systems in the wild with these ASICs that require
3264 * LVDS and VGA support which is not supported with DC.
3265 *
3266 * Fallback to the non-DC driver here by default so as not to
3267 * cause regressions.
3268 */
3269#if defined(CONFIG_DRM_AMD_DC_SI)
3270 return amdgpu_dc > 0;
3271#else
3272 return false;
64200c46 3273#endif
4562236b 3274 case CHIP_BONAIRE:
0d6fbccb 3275 case CHIP_KAVERI:
367e6687
AD
3276 case CHIP_KABINI:
3277 case CHIP_MULLINS:
d9fda248
HW
3278 /*
3279 * We have systems in the wild with these ASICs that require
3280 * LVDS and VGA support which is not supported with DC.
3281 *
3282 * Fallback to the non-DC driver here by default so as not to
3283 * cause regressions.
3284 */
3285 return amdgpu_dc > 0;
3286 case CHIP_HAWAII:
4562236b
HW
3287 case CHIP_CARRIZO:
3288 case CHIP_STONEY:
4562236b 3289 case CHIP_POLARIS10:
675fd32b 3290 case CHIP_POLARIS11:
2c8ad2d5 3291 case CHIP_POLARIS12:
675fd32b 3292 case CHIP_VEGAM:
4562236b
HW
3293 case CHIP_TONGA:
3294 case CHIP_FIJI:
42f8ffa1 3295 case CHIP_VEGA10:
dca7b401 3296 case CHIP_VEGA12:
c6034aa2 3297 case CHIP_VEGA20:
b86a1aa3 3298#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 3299 case CHIP_RAVEN:
b4f199c7 3300 case CHIP_NAVI10:
8fceceb6 3301 case CHIP_NAVI14:
078655d9 3302 case CHIP_NAVI12:
e1c14c43 3303 case CHIP_RENOIR:
3f68c01b 3304 case CHIP_CYAN_SKILLFISH:
81d9bfb8 3305 case CHIP_SIENNA_CICHLID:
a6c5308f 3306 case CHIP_NAVY_FLOUNDER:
7cc656e2 3307 case CHIP_DIMGREY_CAVEFISH:
ddaed58b 3308 case CHIP_BEIGE_GOBY:
84b934bc 3309 case CHIP_VANGOGH:
c8b73f7f 3310 case CHIP_YELLOW_CARP:
42f8ffa1 3311#endif
f7f12b25 3312 default:
fd187853 3313 return amdgpu_dc != 0;
f7f12b25 3314#else
4562236b 3315 default:
93b09a9a 3316 if (amdgpu_dc > 0)
044a48f4 3317 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3318 "but isn't supported by ASIC, ignoring\n");
4562236b 3319 return false;
f7f12b25 3320#endif
4562236b
HW
3321 }
3322}
3323
3324/**
3325 * amdgpu_device_has_dc_support - check if dc is supported
3326 *
982a820b 3327 * @adev: amdgpu_device pointer
4562236b
HW
3328 *
3329 * Returns true for supported, false for not supported
3330 */
3331bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3332{
abaf210c
AS
3333 if (amdgpu_sriov_vf(adev) ||
3334 adev->enable_virtual_display ||
3335 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3336 return false;
3337
4562236b
HW
3338 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3339}
3340
d4535e2c
AG
3341static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3342{
3343 struct amdgpu_device *adev =
3344 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3345 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3346
c6a6e2db
AG
3347 /* It's a bug to not have a hive within this function */
3348 if (WARN_ON(!hive))
3349 return;
3350
3351 /*
3352 * Use task barrier to synchronize all xgmi reset works across the
3353 * hive. task_barrier_enter and task_barrier_exit will block
3354 * until all the threads running the xgmi reset works reach
3355 * those points. task_barrier_full will do both blocks.
3356 */
3357 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3358
3359 task_barrier_enter(&hive->tb);
4a580877 3360 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3361
3362 if (adev->asic_reset_res)
3363 goto fail;
3364
3365 task_barrier_exit(&hive->tb);
4a580877 3366 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3367
3368 if (adev->asic_reset_res)
3369 goto fail;
43c4d576 3370
8bc7b360
HZ
3371 if (adev->mmhub.ras_funcs &&
3372 adev->mmhub.ras_funcs->reset_ras_error_count)
3373 adev->mmhub.ras_funcs->reset_ras_error_count(adev);
c6a6e2db
AG
3374 } else {
3375
3376 task_barrier_full(&hive->tb);
3377 adev->asic_reset_res = amdgpu_asic_reset(adev);
3378 }
ce316fa5 3379
c6a6e2db 3380fail:
d4535e2c 3381 if (adev->asic_reset_res)
fed184e9 3382 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3383 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3384 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3385}
3386
71f98027
AD
3387static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3388{
3389 char *input = amdgpu_lockup_timeout;
3390 char *timeout_setting = NULL;
3391 int index = 0;
3392 long timeout;
3393 int ret = 0;
3394
3395 /*
67387dfe
AD
3396 * By default timeout for non compute jobs is 10000
3397 * and 60000 for compute jobs.
71f98027 3398 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3399 * jobs are 60000 by default.
71f98027
AD
3400 */
3401 adev->gfx_timeout = msecs_to_jiffies(10000);
3402 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3403 if (amdgpu_sriov_vf(adev))
3404 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3405 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3406 else
67387dfe 3407 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3408
f440ff44 3409 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3410 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3411 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3412 ret = kstrtol(timeout_setting, 0, &timeout);
3413 if (ret)
3414 return ret;
3415
3416 if (timeout == 0) {
3417 index++;
3418 continue;
3419 } else if (timeout < 0) {
3420 timeout = MAX_SCHEDULE_TIMEOUT;
127aedf9
CK
3421 dev_warn(adev->dev, "lockup timeout disabled");
3422 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
71f98027
AD
3423 } else {
3424 timeout = msecs_to_jiffies(timeout);
3425 }
3426
3427 switch (index++) {
3428 case 0:
3429 adev->gfx_timeout = timeout;
3430 break;
3431 case 1:
3432 adev->compute_timeout = timeout;
3433 break;
3434 case 2:
3435 adev->sdma_timeout = timeout;
3436 break;
3437 case 3:
3438 adev->video_timeout = timeout;
3439 break;
3440 default:
3441 break;
3442 }
3443 }
3444 /*
3445 * There is only one value specified and
3446 * it should apply to all non-compute jobs.
3447 */
bcccee89 3448 if (index == 1) {
71f98027 3449 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3450 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3451 adev->compute_timeout = adev->gfx_timeout;
3452 }
71f98027
AD
3453 }
3454
3455 return ret;
3456}
d4535e2c 3457
4a74c38c
PY
3458/**
3459 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3460 *
3461 * @adev: amdgpu_device pointer
3462 *
3463 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3464 */
3465static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3466{
3467 struct iommu_domain *domain;
3468
3469 domain = iommu_get_domain_for_dev(adev->dev);
3470 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3471 adev->ram_is_direct_mapped = true;
3472}
3473
77f3a5cd
ND
3474static const struct attribute *amdgpu_dev_attributes[] = {
3475 &dev_attr_product_name.attr,
3476 &dev_attr_product_number.attr,
3477 &dev_attr_serial_number.attr,
3478 &dev_attr_pcie_replay_count.attr,
3479 NULL
3480};
3481
d38ceaf9
AD
3482/**
3483 * amdgpu_device_init - initialize the driver
3484 *
3485 * @adev: amdgpu_device pointer
d38ceaf9
AD
3486 * @flags: driver flags
3487 *
3488 * Initializes the driver info and hw (all asics).
3489 * Returns 0 for success or an error on failure.
3490 * Called at driver startup.
3491 */
3492int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3493 uint32_t flags)
3494{
8aba21b7
LT
3495 struct drm_device *ddev = adev_to_drm(adev);
3496 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3497 int r, i;
b98c6299 3498 bool px = false;
95844d20 3499 u32 max_MBps;
d38ceaf9
AD
3500
3501 adev->shutdown = false;
d38ceaf9 3502 adev->flags = flags;
4e66d7d2
YZ
3503
3504 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3505 adev->asic_type = amdgpu_force_asic_type;
3506 else
3507 adev->asic_type = flags & AMD_ASIC_MASK;
3508
d38ceaf9 3509 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3510 if (amdgpu_emu_mode == 1)
8bdab6bb 3511 adev->usec_timeout *= 10;
770d13b1 3512 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3513 adev->accel_working = false;
3514 adev->num_rings = 0;
3515 adev->mman.buffer_funcs = NULL;
3516 adev->mman.buffer_funcs_ring = NULL;
3517 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3518 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3519 adev->gmc.gmc_funcs = NULL;
7bd939d0 3520 adev->harvest_ip_mask = 0x0;
f54d1867 3521 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3522 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3523
3524 adev->smc_rreg = &amdgpu_invalid_rreg;
3525 adev->smc_wreg = &amdgpu_invalid_wreg;
3526 adev->pcie_rreg = &amdgpu_invalid_rreg;
3527 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3528 adev->pciep_rreg = &amdgpu_invalid_rreg;
3529 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3530 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3531 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3532 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3533 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3534 adev->didt_rreg = &amdgpu_invalid_rreg;
3535 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3536 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3537 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3538 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3539 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3540
3e39ab90
AD
3541 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3542 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3543 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3544
3545 /* mutex initialization are all done here so we
3546 * can recall function without having locking issues */
0e5ca0d1 3547 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3548 mutex_init(&adev->pm.mutex);
3549 mutex_init(&adev->gfx.gpu_clock_mutex);
3550 mutex_init(&adev->srbm_mutex);
b8866c26 3551 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3552 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3553 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3554 mutex_init(&adev->mn_lock);
e23b74aa 3555 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3556 hash_init(adev->mn_hash);
53b3f8f4 3557 atomic_set(&adev->in_gpu_reset, 0);
32eaeae0 3558 mutex_init(&adev->psp.mutex);
bd052211 3559 mutex_init(&adev->notifier_lock);
d38ceaf9 3560
4eaf21b7 3561 amdgpu_device_init_apu_flags(adev);
9f6a7857 3562
912dfc84
EQ
3563 r = amdgpu_device_check_arguments(adev);
3564 if (r)
3565 return r;
d38ceaf9 3566
d38ceaf9
AD
3567 spin_lock_init(&adev->mmio_idx_lock);
3568 spin_lock_init(&adev->smc_idx_lock);
3569 spin_lock_init(&adev->pcie_idx_lock);
3570 spin_lock_init(&adev->uvd_ctx_idx_lock);
3571 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3572 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3573 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3574 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3575 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3576
0c4e7fa5
CZ
3577 INIT_LIST_HEAD(&adev->shadow_list);
3578 mutex_init(&adev->shadow_list_lock);
3579
655ce9cb 3580 INIT_LIST_HEAD(&adev->reset_list);
3581
beff74bc
AD
3582 INIT_DELAYED_WORK(&adev->delayed_init_work,
3583 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3584 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3585 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3586
d4535e2c
AG
3587 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3588
d23ee13f 3589 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3590 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3591
b265bdbd
EQ
3592 atomic_set(&adev->throttling_logging_enabled, 1);
3593 /*
3594 * If throttling continues, logging will be performed every minute
3595 * to avoid log flooding. "-1" is subtracted since the thermal
3596 * throttling interrupt comes every second. Thus, the total logging
3597 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3598 * for throttling interrupt) = 60 seconds.
3599 */
3600 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3601 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3602
0fa49558
AX
3603 /* Registers mapping */
3604 /* TODO: block userspace mapping of io register */
da69c161
KW
3605 if (adev->asic_type >= CHIP_BONAIRE) {
3606 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3607 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3608 } else {
3609 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3610 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3611 }
d38ceaf9 3612
6c08e0ef
EQ
3613 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3614 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3615
d38ceaf9
AD
3616 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3617 if (adev->rmmio == NULL) {
3618 return -ENOMEM;
3619 }
3620 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3621 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3622
5494d864
AD
3623 amdgpu_device_get_pcie_info(adev);
3624
b239c017
JX
3625 if (amdgpu_mcbp)
3626 DRM_INFO("MCBP is enabled\n");
3627
5f84cc63
JX
3628 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3629 adev->enable_mes = true;
3630
3aa0115d
ML
3631 /* detect hw virtualization here */
3632 amdgpu_detect_virtualization(adev);
3633
dffa11b4
ML
3634 r = amdgpu_device_get_job_timeout_settings(adev);
3635 if (r) {
3636 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4ef87d8f 3637 return r;
a190d1c7
XY
3638 }
3639
cfbb6b00
AG
3640 /*
3641 * Reset domain needs to be present early, before XGMI hive discovered
3642 * (if any) and intitialized to use reset sem and in_gpu reset flag
3643 * early on during init.
3644 */
3645 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE ,"amdgpu-reset-dev");
3646 if (!adev->reset_domain)
3647 return -ENOMEM;
3648
d38ceaf9 3649 /* early init functions */
06ec9070 3650 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3651 if (r)
4ef87d8f 3652 return r;
d38ceaf9 3653
4a0165f0
VS
3654 /* Need to get xgmi info early to decide the reset behavior*/
3655 if (adev->gmc.xgmi.supported) {
3656 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3657 if (r)
3658 return r;
3659 }
3660
8e6d0b69 3661 /* enable PCIE atomic ops */
3662 if (amdgpu_sriov_vf(adev))
3663 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3664 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
3665 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3666 else
3667 adev->have_atomics_support =
3668 !pci_enable_atomic_ops_to_root(adev->pdev,
3669 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3670 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3671 if (!adev->have_atomics_support)
3672 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3673
6585661d
OZ
3674 /* doorbell bar mapping and doorbell index init*/
3675 amdgpu_device_doorbell_init(adev);
3676
9475a943
SL
3677 if (amdgpu_emu_mode == 1) {
3678 /* post the asic on emulation mode */
3679 emu_soc_asic_init(adev);
bfca0289 3680 goto fence_driver_init;
9475a943 3681 }
bfca0289 3682
04442bf7
LL
3683 amdgpu_reset_init(adev);
3684
4e99a44e
ML
3685 /* detect if we are with an SRIOV vbios */
3686 amdgpu_device_detect_sriov_bios(adev);
048765ad 3687
95e8e59e
AD
3688 /* check if we need to reset the asic
3689 * E.g., driver was not cleanly unloaded previously, etc.
3690 */
f14899fd 3691 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3692 if (adev->gmc.xgmi.num_physical_nodes) {
3693 dev_info(adev->dev, "Pending hive reset.\n");
3694 adev->gmc.xgmi.pending_reset = true;
3695 /* Only need to init necessary block for SMU to handle the reset */
3696 for (i = 0; i < adev->num_ip_blocks; i++) {
3697 if (!adev->ip_blocks[i].status.valid)
3698 continue;
3699 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3700 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3701 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3702 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3703 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3704 adev->ip_blocks[i].version->funcs->name);
3705 adev->ip_blocks[i].status.hw = true;
3706 }
3707 }
3708 } else {
3709 r = amdgpu_asic_reset(adev);
3710 if (r) {
3711 dev_err(adev->dev, "asic reset on init failed\n");
3712 goto failed;
3713 }
95e8e59e
AD
3714 }
3715 }
3716
8f66090b 3717 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3718
d38ceaf9 3719 /* Post card if necessary */
39c640c0 3720 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3721 if (!adev->bios) {
bec86378 3722 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3723 r = -EINVAL;
3724 goto failed;
d38ceaf9 3725 }
bec86378 3726 DRM_INFO("GPU posting now...\n");
4d2997ab 3727 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3728 if (r) {
3729 dev_err(adev->dev, "gpu post error!\n");
3730 goto failed;
3731 }
d38ceaf9
AD
3732 }
3733
88b64e95
AD
3734 if (adev->is_atom_fw) {
3735 /* Initialize clocks */
3736 r = amdgpu_atomfirmware_get_clock_info(adev);
3737 if (r) {
3738 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3739 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3740 goto failed;
3741 }
3742 } else {
a5bde2f9
AD
3743 /* Initialize clocks */
3744 r = amdgpu_atombios_get_clock_info(adev);
3745 if (r) {
3746 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3747 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3748 goto failed;
a5bde2f9
AD
3749 }
3750 /* init i2c buses */
4562236b
HW
3751 if (!amdgpu_device_has_dc_support(adev))
3752 amdgpu_atombios_i2c_init(adev);
2c1a2784 3753 }
d38ceaf9 3754
bfca0289 3755fence_driver_init:
d38ceaf9 3756 /* Fence driver */
067f44c8 3757 r = amdgpu_fence_driver_sw_init(adev);
2c1a2784 3758 if (r) {
067f44c8 3759 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
e23b74aa 3760 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3761 goto failed;
2c1a2784 3762 }
d38ceaf9
AD
3763
3764 /* init the mode config */
4a580877 3765 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3766
06ec9070 3767 r = amdgpu_device_ip_init(adev);
d38ceaf9 3768 if (r) {
8840a387 3769 /* failed in exclusive mode due to timeout */
3770 if (amdgpu_sriov_vf(adev) &&
3771 !amdgpu_sriov_runtime(adev) &&
3772 amdgpu_virt_mmio_blocked(adev) &&
3773 !amdgpu_virt_wait_reset(adev)) {
3774 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3775 /* Don't send request since VF is inactive. */
3776 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3777 adev->virt.ops = NULL;
8840a387 3778 r = -EAGAIN;
970fd197 3779 goto release_ras_con;
8840a387 3780 }
06ec9070 3781 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3782 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3783 goto release_ras_con;
d38ceaf9
AD
3784 }
3785
8d35a259
LG
3786 amdgpu_fence_driver_hw_init(adev);
3787
d69b8971
YZ
3788 dev_info(adev->dev,
3789 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3790 adev->gfx.config.max_shader_engines,
3791 adev->gfx.config.max_sh_per_se,
3792 adev->gfx.config.max_cu_per_sh,
3793 adev->gfx.cu_info.number);
3794
d38ceaf9
AD
3795 adev->accel_working = true;
3796
e59c0205
AX
3797 amdgpu_vm_check_compute_bug(adev);
3798
95844d20
MO
3799 /* Initialize the buffer migration limit. */
3800 if (amdgpu_moverate >= 0)
3801 max_MBps = amdgpu_moverate;
3802 else
3803 max_MBps = 8; /* Allow 8 MB/s. */
3804 /* Get a log2 for easy divisions. */
3805 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3806
d2f52ac8 3807 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3808 if (r) {
3809 adev->pm_sysfs_en = false;
d2f52ac8 3810 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3811 } else
3812 adev->pm_sysfs_en = true;
d2f52ac8 3813
5bb23532 3814 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3815 if (r) {
3816 adev->ucode_sysfs_en = false;
5bb23532 3817 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3818 } else
3819 adev->ucode_sysfs_en = true;
5bb23532 3820
d38ceaf9
AD
3821 if ((amdgpu_testing & 1)) {
3822 if (adev->accel_working)
3823 amdgpu_test_moves(adev);
3824 else
3825 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3826 }
d38ceaf9
AD
3827 if (amdgpu_benchmarking) {
3828 if (adev->accel_working)
3829 amdgpu_benchmark(adev, amdgpu_benchmarking);
3830 else
3831 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3832 }
3833
b0adca4d
EQ
3834 /*
3835 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3836 * Otherwise the mgpu fan boost feature will be skipped due to the
3837 * gpu instance is counted less.
3838 */
3839 amdgpu_register_gpu_instance(adev);
3840
d38ceaf9
AD
3841 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3842 * explicit gating rather than handling it automatically.
3843 */
e3c1b071 3844 if (!adev->gmc.xgmi.pending_reset) {
3845 r = amdgpu_device_ip_late_init(adev);
3846 if (r) {
3847 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3848 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3849 goto release_ras_con;
e3c1b071 3850 }
3851 /* must succeed. */
3852 amdgpu_ras_resume(adev);
3853 queue_delayed_work(system_wq, &adev->delayed_init_work,
3854 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3855 }
d38ceaf9 3856
2c738637
ML
3857 if (amdgpu_sriov_vf(adev))
3858 flush_delayed_work(&adev->delayed_init_work);
3859
77f3a5cd 3860 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3861 if (r)
77f3a5cd 3862 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3863
d155bef0
AB
3864 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3865 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3866 if (r)
3867 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3868
c1dd4aa6
AG
3869 /* Have stored pci confspace at hand for restore in sudden PCI error */
3870 if (amdgpu_device_cache_pci_state(adev->pdev))
3871 pci_restore_state(pdev);
3872
8c3dd61c
KHF
3873 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3874 /* this will fail for cards that aren't VGA class devices, just
3875 * ignore it */
3876 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
bf44e8ce 3877 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
8c3dd61c
KHF
3878
3879 if (amdgpu_device_supports_px(ddev)) {
3880 px = true;
3881 vga_switcheroo_register_client(adev->pdev,
3882 &amdgpu_switcheroo_ops, px);
3883 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3884 }
3885
e3c1b071 3886 if (adev->gmc.xgmi.pending_reset)
3887 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3888 msecs_to_jiffies(AMDGPU_RESUME_MS));
3889
4a74c38c
PY
3890 amdgpu_device_check_iommu_direct_map(adev);
3891
d38ceaf9 3892 return 0;
83ba126a 3893
970fd197
SY
3894release_ras_con:
3895 amdgpu_release_ras_context(adev);
3896
83ba126a 3897failed:
89041940 3898 amdgpu_vf_error_trans_all(adev);
8840a387 3899
83ba126a 3900 return r;
d38ceaf9
AD
3901}
3902
07775fc1
AG
3903static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3904{
62d5f9f7 3905
07775fc1
AG
3906 /* Clear all CPU mappings pointing to this device */
3907 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3908
3909 /* Unmap all mapped bars - Doorbell, registers and VRAM */
3910 amdgpu_device_doorbell_fini(adev);
3911
3912 iounmap(adev->rmmio);
3913 adev->rmmio = NULL;
3914 if (adev->mman.aper_base_kaddr)
3915 iounmap(adev->mman.aper_base_kaddr);
3916 adev->mman.aper_base_kaddr = NULL;
3917
3918 /* Memory manager related */
3919 if (!adev->gmc.xgmi.connected_to_cpu) {
3920 arch_phys_wc_del(adev->gmc.vram_mtrr);
3921 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3922 }
3923}
3924
d38ceaf9 3925/**
bbe04dec 3926 * amdgpu_device_fini_hw - tear down the driver
d38ceaf9
AD
3927 *
3928 * @adev: amdgpu_device pointer
3929 *
3930 * Tear down the driver info (all asics).
3931 * Called at driver shutdown.
3932 */
72c8c97b 3933void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 3934{
aac89168 3935 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3936 flush_delayed_work(&adev->delayed_init_work);
691191a2
YW
3937 if (adev->mman.initialized) {
3938 flush_delayed_work(&adev->mman.bdev.wq);
e78b3197 3939 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
691191a2 3940 }
d0d13fe8 3941 adev->shutdown = true;
9f875167 3942
752c683d
ML
3943 /* make sure IB test finished before entering exclusive mode
3944 * to avoid preemption on IB test
3945 * */
519b8b76 3946 if (amdgpu_sriov_vf(adev)) {
752c683d 3947 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3948 amdgpu_virt_fini_data_exchange(adev);
3949 }
752c683d 3950
e5b03032
ML
3951 /* disable all interrupts */
3952 amdgpu_irq_disable_all(adev);
ff97cba8 3953 if (adev->mode_info.mode_config_initialized){
1053b9c9 3954 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4a580877 3955 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3956 else
4a580877 3957 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3958 }
8d35a259 3959 amdgpu_fence_driver_hw_fini(adev);
72c8c97b 3960
7c868b59
YT
3961 if (adev->pm_sysfs_en)
3962 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
3963 if (adev->ucode_sysfs_en)
3964 amdgpu_ucode_sysfs_fini(adev);
3965 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3966
232d1d43
SY
3967 /* disable ras feature must before hw fini */
3968 amdgpu_ras_pre_fini(adev);
3969
e9669fb7 3970 amdgpu_device_ip_fini_early(adev);
d10d0daa 3971
a3848df6
YW
3972 amdgpu_irq_fini_hw(adev);
3973
b6fd6e0f
SK
3974 if (adev->mman.initialized)
3975 ttm_device_clear_dma_mappings(&adev->mman.bdev);
894c6890 3976
d10d0daa 3977 amdgpu_gart_dummy_page_fini(adev);
07775fc1 3978
87172e89
LS
3979 if (drm_dev_is_unplugged(adev_to_drm(adev)))
3980 amdgpu_device_unmap_mmio(adev);
3981
72c8c97b
AG
3982}
3983
3984void amdgpu_device_fini_sw(struct amdgpu_device *adev)
3985{
62d5f9f7
LS
3986 int idx;
3987
8d35a259 3988 amdgpu_fence_driver_sw_fini(adev);
a5c5d8d5 3989 amdgpu_device_ip_fini(adev);
75e1658e
ND
3990 release_firmware(adev->firmware.gpu_info_fw);
3991 adev->firmware.gpu_info_fw = NULL;
d38ceaf9 3992 adev->accel_working = false;
04442bf7
LL
3993
3994 amdgpu_reset_fini(adev);
3995
d38ceaf9 3996 /* free i2c buses */
4562236b
HW
3997 if (!amdgpu_device_has_dc_support(adev))
3998 amdgpu_i2c_fini(adev);
bfca0289
SL
3999
4000 if (amdgpu_emu_mode != 1)
4001 amdgpu_atombios_fini(adev);
4002
d38ceaf9
AD
4003 kfree(adev->bios);
4004 adev->bios = NULL;
b98c6299 4005 if (amdgpu_device_supports_px(adev_to_drm(adev))) {
84c8b22e 4006 vga_switcheroo_unregister_client(adev->pdev);
83ba126a 4007 vga_switcheroo_fini_domain_pm_ops(adev->dev);
b98c6299 4008 }
38d6be81 4009 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
b8779475 4010 vga_client_unregister(adev->pdev);
e9bc1bf7 4011
62d5f9f7
LS
4012 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4013
4014 iounmap(adev->rmmio);
4015 adev->rmmio = NULL;
4016 amdgpu_device_doorbell_fini(adev);
4017 drm_dev_exit(idx);
4018 }
4019
d155bef0
AB
4020 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4021 amdgpu_pmu_fini(adev);
72de33f8 4022 if (adev->mman.discovery_bin)
a190d1c7 4023 amdgpu_discovery_fini(adev);
72c8c97b 4024
cfbb6b00
AG
4025 amdgpu_reset_put_reset_domain(adev->reset_domain);
4026 adev->reset_domain = NULL;
4027
72c8c97b
AG
4028 kfree(adev->pci_state);
4029
d38ceaf9
AD
4030}
4031
58144d28
ND
4032/**
4033 * amdgpu_device_evict_resources - evict device resources
4034 * @adev: amdgpu device object
4035 *
4036 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4037 * of the vram memory type. Mainly used for evicting device resources
4038 * at suspend time.
4039 *
4040 */
4041static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
4042{
e53d9665
ML
4043 /* No need to evict vram on APUs for suspend to ram or s2idle */
4044 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
58144d28
ND
4045 return;
4046
4047 if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
4048 DRM_WARN("evicting device resources failed\n");
4049
4050}
d38ceaf9
AD
4051
4052/*
4053 * Suspend & resume.
4054 */
4055/**
810ddc3a 4056 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 4057 *
87e3f136 4058 * @dev: drm dev pointer
87e3f136 4059 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
4060 *
4061 * Puts the hw in the suspend state (all asics).
4062 * Returns 0 for success or an error on failure.
4063 * Called at driver suspend.
4064 */
de185019 4065int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 4066{
a2e15b0e 4067 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 4068
d38ceaf9
AD
4069 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4070 return 0;
4071
44779b43 4072 adev->in_suspend = true;
3fa8f89d
S
4073
4074 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4075 DRM_WARN("smart shift update failed\n");
4076
d38ceaf9
AD
4077 drm_kms_helper_poll_disable(dev);
4078
5f818173 4079 if (fbcon)
087451f3 4080 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
5f818173 4081
beff74bc 4082 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 4083
5e6932fe 4084 amdgpu_ras_suspend(adev);
4085
2196927b 4086 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 4087
5d3a2d95
AD
4088 if (!adev->in_s0ix)
4089 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 4090
58144d28 4091 amdgpu_device_evict_resources(adev);
d38ceaf9 4092
8d35a259 4093 amdgpu_fence_driver_hw_fini(adev);
d38ceaf9 4094
2196927b 4095 amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 4096
d38ceaf9
AD
4097 return 0;
4098}
4099
4100/**
810ddc3a 4101 * amdgpu_device_resume - initiate device resume
d38ceaf9 4102 *
87e3f136 4103 * @dev: drm dev pointer
87e3f136 4104 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
4105 *
4106 * Bring the hw back to operating state (all asics).
4107 * Returns 0 for success or an error on failure.
4108 * Called at driver resume.
4109 */
de185019 4110int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 4111{
1348969a 4112 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 4113 int r = 0;
d38ceaf9
AD
4114
4115 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4116 return 0;
4117
62498733 4118 if (adev->in_s0ix)
628c36d7
PL
4119 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
4120
d38ceaf9 4121 /* post card */
39c640c0 4122 if (amdgpu_device_need_post(adev)) {
4d2997ab 4123 r = amdgpu_device_asic_init(adev);
74b0b157 4124 if (r)
aac89168 4125 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 4126 }
d38ceaf9 4127
06ec9070 4128 r = amdgpu_device_ip_resume(adev);
e6707218 4129 if (r) {
aac89168 4130 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 4131 return r;
e6707218 4132 }
8d35a259 4133 amdgpu_fence_driver_hw_init(adev);
5ceb54c6 4134
06ec9070 4135 r = amdgpu_device_ip_late_init(adev);
03161a6e 4136 if (r)
4d3b9ae5 4137 return r;
d38ceaf9 4138
beff74bc
AD
4139 queue_delayed_work(system_wq, &adev->delayed_init_work,
4140 msecs_to_jiffies(AMDGPU_RESUME_MS));
4141
5d3a2d95
AD
4142 if (!adev->in_s0ix) {
4143 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4144 if (r)
4145 return r;
4146 }
756e6880 4147
96a5d8d4 4148 /* Make sure IB tests flushed */
beff74bc 4149 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 4150
a2e15b0e 4151 if (fbcon)
087451f3 4152 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
d38ceaf9
AD
4153
4154 drm_kms_helper_poll_enable(dev);
23a1a9e5 4155
5e6932fe 4156 amdgpu_ras_resume(adev);
4157
23a1a9e5
L
4158 /*
4159 * Most of the connector probing functions try to acquire runtime pm
4160 * refs to ensure that the GPU is powered on when connector polling is
4161 * performed. Since we're calling this from a runtime PM callback,
4162 * trying to acquire rpm refs will cause us to deadlock.
4163 *
4164 * Since we're guaranteed to be holding the rpm lock, it's safe to
4165 * temporarily disable the rpm helpers so this doesn't deadlock us.
4166 */
4167#ifdef CONFIG_PM
4168 dev->dev->power.disable_depth++;
4169#endif
4562236b
HW
4170 if (!amdgpu_device_has_dc_support(adev))
4171 drm_helper_hpd_irq_event(dev);
4172 else
4173 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
4174#ifdef CONFIG_PM
4175 dev->dev->power.disable_depth--;
4176#endif
44779b43
RZ
4177 adev->in_suspend = false;
4178
3fa8f89d
S
4179 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4180 DRM_WARN("smart shift update failed\n");
4181
4d3b9ae5 4182 return 0;
d38ceaf9
AD
4183}
4184
e3ecdffa
AD
4185/**
4186 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4187 *
4188 * @adev: amdgpu_device pointer
4189 *
4190 * The list of all the hardware IPs that make up the asic is walked and
4191 * the check_soft_reset callbacks are run. check_soft_reset determines
4192 * if the asic is still hung or not.
4193 * Returns true if any of the IPs are still in a hung state, false if not.
4194 */
06ec9070 4195static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
4196{
4197 int i;
4198 bool asic_hang = false;
4199
f993d628
ML
4200 if (amdgpu_sriov_vf(adev))
4201 return true;
4202
8bc04c29
AD
4203 if (amdgpu_asic_need_full_reset(adev))
4204 return true;
4205
63fbf42f 4206 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4207 if (!adev->ip_blocks[i].status.valid)
63fbf42f 4208 continue;
a1255107
AD
4209 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4210 adev->ip_blocks[i].status.hang =
4211 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4212 if (adev->ip_blocks[i].status.hang) {
aac89168 4213 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
4214 asic_hang = true;
4215 }
4216 }
4217 return asic_hang;
4218}
4219
e3ecdffa
AD
4220/**
4221 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4222 *
4223 * @adev: amdgpu_device pointer
4224 *
4225 * The list of all the hardware IPs that make up the asic is walked and the
4226 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4227 * handles any IP specific hardware or software state changes that are
4228 * necessary for a soft reset to succeed.
4229 * Returns 0 on success, negative error code on failure.
4230 */
06ec9070 4231static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
4232{
4233 int i, r = 0;
4234
4235 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4236 if (!adev->ip_blocks[i].status.valid)
d31a501e 4237 continue;
a1255107
AD
4238 if (adev->ip_blocks[i].status.hang &&
4239 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4240 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4241 if (r)
4242 return r;
4243 }
4244 }
4245
4246 return 0;
4247}
4248
e3ecdffa
AD
4249/**
4250 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4251 *
4252 * @adev: amdgpu_device pointer
4253 *
4254 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4255 * reset is necessary to recover.
4256 * Returns true if a full asic reset is required, false if not.
4257 */
06ec9070 4258static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4259{
da146d3b
AD
4260 int i;
4261
8bc04c29
AD
4262 if (amdgpu_asic_need_full_reset(adev))
4263 return true;
4264
da146d3b 4265 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4266 if (!adev->ip_blocks[i].status.valid)
da146d3b 4267 continue;
a1255107
AD
4268 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4269 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4270 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4271 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4272 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4273 if (adev->ip_blocks[i].status.hang) {
aac89168 4274 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4275 return true;
4276 }
4277 }
35d782fe
CZ
4278 }
4279 return false;
4280}
4281
e3ecdffa
AD
4282/**
4283 * amdgpu_device_ip_soft_reset - do a soft reset
4284 *
4285 * @adev: amdgpu_device pointer
4286 *
4287 * The list of all the hardware IPs that make up the asic is walked and the
4288 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4289 * IP specific hardware or software state changes that are necessary to soft
4290 * reset the IP.
4291 * Returns 0 on success, negative error code on failure.
4292 */
06ec9070 4293static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4294{
4295 int i, r = 0;
4296
4297 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4298 if (!adev->ip_blocks[i].status.valid)
35d782fe 4299 continue;
a1255107
AD
4300 if (adev->ip_blocks[i].status.hang &&
4301 adev->ip_blocks[i].version->funcs->soft_reset) {
4302 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4303 if (r)
4304 return r;
4305 }
4306 }
4307
4308 return 0;
4309}
4310
e3ecdffa
AD
4311/**
4312 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4313 *
4314 * @adev: amdgpu_device pointer
4315 *
4316 * The list of all the hardware IPs that make up the asic is walked and the
4317 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4318 * handles any IP specific hardware or software state changes that are
4319 * necessary after the IP has been soft reset.
4320 * Returns 0 on success, negative error code on failure.
4321 */
06ec9070 4322static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4323{
4324 int i, r = 0;
4325
4326 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4327 if (!adev->ip_blocks[i].status.valid)
35d782fe 4328 continue;
a1255107
AD
4329 if (adev->ip_blocks[i].status.hang &&
4330 adev->ip_blocks[i].version->funcs->post_soft_reset)
4331 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4332 if (r)
4333 return r;
4334 }
4335
4336 return 0;
4337}
4338
e3ecdffa 4339/**
c33adbc7 4340 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4341 *
4342 * @adev: amdgpu_device pointer
4343 *
4344 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4345 * restore things like GPUVM page tables after a GPU reset where
4346 * the contents of VRAM might be lost.
403009bf
CK
4347 *
4348 * Returns:
4349 * 0 on success, negative error code on failure.
e3ecdffa 4350 */
c33adbc7 4351static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4352{
c41d1cf6 4353 struct dma_fence *fence = NULL, *next = NULL;
403009bf 4354 struct amdgpu_bo *shadow;
e18aaea7 4355 struct amdgpu_bo_vm *vmbo;
403009bf 4356 long r = 1, tmo;
c41d1cf6
ML
4357
4358 if (amdgpu_sriov_runtime(adev))
b045d3af 4359 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4360 else
4361 tmo = msecs_to_jiffies(100);
4362
aac89168 4363 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4364 mutex_lock(&adev->shadow_list_lock);
e18aaea7
ND
4365 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4366 shadow = &vmbo->bo;
403009bf 4367 /* No need to recover an evicted BO */
d3116756
CK
4368 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4369 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4370 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
403009bf
CK
4371 continue;
4372
4373 r = amdgpu_bo_restore_shadow(shadow, &next);
4374 if (r)
4375 break;
4376
c41d1cf6 4377 if (fence) {
1712fb1a 4378 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4379 dma_fence_put(fence);
4380 fence = next;
1712fb1a 4381 if (tmo == 0) {
4382 r = -ETIMEDOUT;
c41d1cf6 4383 break;
1712fb1a 4384 } else if (tmo < 0) {
4385 r = tmo;
4386 break;
4387 }
403009bf
CK
4388 } else {
4389 fence = next;
c41d1cf6 4390 }
c41d1cf6
ML
4391 }
4392 mutex_unlock(&adev->shadow_list_lock);
4393
403009bf
CK
4394 if (fence)
4395 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4396 dma_fence_put(fence);
4397
1712fb1a 4398 if (r < 0 || tmo <= 0) {
aac89168 4399 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4400 return -EIO;
4401 }
c41d1cf6 4402
aac89168 4403 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4404 return 0;
c41d1cf6
ML
4405}
4406
a90ad3c2 4407
e3ecdffa 4408/**
06ec9070 4409 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4410 *
982a820b 4411 * @adev: amdgpu_device pointer
87e3f136 4412 * @from_hypervisor: request from hypervisor
5740682e
ML
4413 *
4414 * do VF FLR and reinitialize Asic
3f48c681 4415 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4416 */
4417static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4418 bool from_hypervisor)
5740682e
ML
4419{
4420 int r;
a5f67c93 4421 struct amdgpu_hive_info *hive = NULL;
5740682e 4422
992110d7 4423 amdgpu_amdkfd_pre_reset(adev);
5740682e 4424
428890a3 4425 amdgpu_amdkfd_pre_reset(adev);
4426
5740682e
ML
4427 if (from_hypervisor)
4428 r = amdgpu_virt_request_full_gpu(adev, true);
4429 else
4430 r = amdgpu_virt_reset_gpu(adev);
4431 if (r)
4432 return r;
a90ad3c2
ML
4433
4434 /* Resume IP prior to SMC */
06ec9070 4435 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4436 if (r)
4437 goto error;
a90ad3c2 4438
c9ffa427 4439 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4440
7a3e0bb2
RZ
4441 r = amdgpu_device_fw_loading(adev);
4442 if (r)
4443 return r;
4444
a90ad3c2 4445 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4446 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4447 if (r)
4448 goto error;
a90ad3c2 4449
a5f67c93
ZL
4450 hive = amdgpu_get_xgmi_hive(adev);
4451 /* Update PSP FW topology after reset */
4452 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4453 r = amdgpu_xgmi_update_topology(hive, adev);
4454
4455 if (hive)
4456 amdgpu_put_xgmi_hive(hive);
4457
4458 if (!r) {
4459 amdgpu_irq_gpu_reset_resume_helper(adev);
4460 r = amdgpu_ib_ring_tests(adev);
4461 amdgpu_amdkfd_post_reset(adev);
4462 }
a90ad3c2 4463
abc34253 4464error:
c41d1cf6 4465 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4466 amdgpu_inc_vram_lost(adev);
c33adbc7 4467 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4468 }
437f3e0b 4469 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2
ML
4470
4471 return r;
4472}
4473
9a1cddd6 4474/**
4475 * amdgpu_device_has_job_running - check if there is any job in mirror list
4476 *
982a820b 4477 * @adev: amdgpu_device pointer
9a1cddd6 4478 *
4479 * check if there is any job in mirror list
4480 */
4481bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4482{
4483 int i;
4484 struct drm_sched_job *job;
4485
4486 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4487 struct amdgpu_ring *ring = adev->rings[i];
4488
4489 if (!ring || !ring->sched.thread)
4490 continue;
4491
4492 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4493 job = list_first_entry_or_null(&ring->sched.pending_list,
4494 struct drm_sched_job, list);
9a1cddd6 4495 spin_unlock(&ring->sched.job_list_lock);
4496 if (job)
4497 return true;
4498 }
4499 return false;
4500}
4501
12938fad
CK
4502/**
4503 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4504 *
982a820b 4505 * @adev: amdgpu_device pointer
12938fad
CK
4506 *
4507 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4508 * a hung GPU.
4509 */
4510bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4511{
4512 if (!amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4513 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
12938fad
CK
4514 return false;
4515 }
4516
3ba7b418
AG
4517 if (amdgpu_gpu_recovery == 0)
4518 goto disabled;
4519
4520 if (amdgpu_sriov_vf(adev))
4521 return true;
4522
4523 if (amdgpu_gpu_recovery == -1) {
4524 switch (adev->asic_type) {
0ffb1fd1
AD
4525#ifdef CONFIG_DRM_AMDGPU_SI
4526 case CHIP_VERDE:
4527 case CHIP_TAHITI:
4528 case CHIP_PITCAIRN:
4529 case CHIP_OLAND:
4530 case CHIP_HAINAN:
4531#endif
4532#ifdef CONFIG_DRM_AMDGPU_CIK
4533 case CHIP_KAVERI:
4534 case CHIP_KABINI:
4535 case CHIP_MULLINS:
4536#endif
4537 case CHIP_CARRIZO:
4538 case CHIP_STONEY:
4539 case CHIP_CYAN_SKILLFISH:
3ba7b418 4540 goto disabled;
0ffb1fd1
AD
4541 default:
4542 break;
3ba7b418 4543 }
12938fad
CK
4544 }
4545
4546 return true;
3ba7b418
AG
4547
4548disabled:
aac89168 4549 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4550 return false;
12938fad
CK
4551}
4552
5c03e584
FX
4553int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4554{
4555 u32 i;
4556 int ret = 0;
4557
4558 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4559
4560 dev_info(adev->dev, "GPU mode1 reset\n");
4561
4562 /* disable BM */
4563 pci_clear_master(adev->pdev);
4564
4565 amdgpu_device_cache_pci_state(adev->pdev);
4566
4567 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4568 dev_info(adev->dev, "GPU smu mode1 reset\n");
4569 ret = amdgpu_dpm_mode1_reset(adev);
4570 } else {
4571 dev_info(adev->dev, "GPU psp mode1 reset\n");
4572 ret = psp_gpu_reset(adev);
4573 }
4574
4575 if (ret)
4576 dev_err(adev->dev, "GPU mode1 reset failed\n");
4577
4578 amdgpu_device_load_pci_state(adev->pdev);
4579
4580 /* wait for asic to come out of reset */
4581 for (i = 0; i < adev->usec_timeout; i++) {
4582 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4583
4584 if (memsize != 0xffffffff)
4585 break;
4586 udelay(1);
4587 }
4588
4589 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4590 return ret;
4591}
5c6dd71e 4592
e3c1b071 4593int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4594 struct amdgpu_reset_context *reset_context)
26bc5340 4595{
5c1e6fa4 4596 int i, r = 0;
04442bf7
LL
4597 struct amdgpu_job *job = NULL;
4598 bool need_full_reset =
4599 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4600
4601 if (reset_context->reset_req_dev == adev)
4602 job = reset_context->job;
71182665 4603
b602ca5f
TZ
4604 if (amdgpu_sriov_vf(adev)) {
4605 /* stop the data exchange thread */
4606 amdgpu_virt_fini_data_exchange(adev);
4607 }
4608
71182665 4609 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4610 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4611 struct amdgpu_ring *ring = adev->rings[i];
4612
51687759 4613 if (!ring || !ring->sched.thread)
0875dc9e 4614 continue;
5740682e 4615
c530b02f
JZ
4616 /*clear job fence from fence drv to avoid force_completion
4617 *leave NULL and vm flush fence in fence drv */
5c1e6fa4 4618 amdgpu_fence_driver_clear_job_fences(ring);
c530b02f 4619
2f9d4084
ML
4620 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4621 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4622 }
d38ceaf9 4623
ff99849b 4624 if (job && job->vm)
222b5f04
AG
4625 drm_sched_increase_karma(&job->base);
4626
04442bf7 4627 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4628 /* If reset handler not implemented, continue; otherwise return */
4629 if (r == -ENOSYS)
4630 r = 0;
4631 else
04442bf7
LL
4632 return r;
4633
1d721ed6 4634 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4635 if (!amdgpu_sriov_vf(adev)) {
4636
4637 if (!need_full_reset)
4638 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4639
4640 if (!need_full_reset) {
4641 amdgpu_device_ip_pre_soft_reset(adev);
4642 r = amdgpu_device_ip_soft_reset(adev);
4643 amdgpu_device_ip_post_soft_reset(adev);
4644 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4645 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4646 need_full_reset = true;
4647 }
4648 }
4649
4650 if (need_full_reset)
4651 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4652 if (need_full_reset)
4653 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4654 else
4655 clear_bit(AMDGPU_NEED_FULL_RESET,
4656 &reset_context->flags);
26bc5340
AG
4657 }
4658
4659 return r;
4660}
4661
04442bf7
LL
4662int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4663 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4664{
4665 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4666 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340
AG
4667 int r = 0;
4668
04442bf7
LL
4669 /* Try reset handler method first */
4670 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4671 reset_list);
4672 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4673 /* If reset handler not implemented, continue; otherwise return */
4674 if (r == -ENOSYS)
4675 r = 0;
4676 else
04442bf7
LL
4677 return r;
4678
4679 /* Reset handler not implemented, use the default method */
4680 need_full_reset =
4681 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4682 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4683
26bc5340 4684 /*
655ce9cb 4685 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4686 * to allow proper links negotiation in FW (within 1 sec)
4687 */
7ac71382 4688 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4689 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4690 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4691 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4692 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4693 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4694 r = -EALREADY;
4695 } else
4696 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4697
041a62bc 4698 if (r) {
aac89168 4699 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4700 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4701 break;
ce316fa5
LM
4702 }
4703 }
4704
041a62bc
AG
4705 /* For XGMI wait for all resets to complete before proceed */
4706 if (!r) {
655ce9cb 4707 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4708 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4709 flush_work(&tmp_adev->xgmi_reset_work);
4710 r = tmp_adev->asic_reset_res;
4711 if (r)
4712 break;
ce316fa5
LM
4713 }
4714 }
4715 }
ce316fa5 4716 }
26bc5340 4717
43c4d576 4718 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4719 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
8bc7b360
HZ
4720 if (tmp_adev->mmhub.ras_funcs &&
4721 tmp_adev->mmhub.ras_funcs->reset_ras_error_count)
4722 tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);
43c4d576
JC
4723 }
4724
00eaa571 4725 amdgpu_ras_intr_cleared();
43c4d576 4726 }
00eaa571 4727
655ce9cb 4728 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4729 if (need_full_reset) {
4730 /* post card */
e3c1b071 4731 r = amdgpu_device_asic_init(tmp_adev);
4732 if (r) {
aac89168 4733 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4734 } else {
26bc5340 4735 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
9cec53c1
JZ
4736 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4737 if (r)
4738 goto out;
4739
26bc5340
AG
4740 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4741 if (r)
4742 goto out;
4743
4744 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4745 if (vram_lost) {
77e7f829 4746 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4747 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4748 }
4749
26bc5340
AG
4750 r = amdgpu_device_fw_loading(tmp_adev);
4751 if (r)
4752 return r;
4753
4754 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4755 if (r)
4756 goto out;
4757
4758 if (vram_lost)
4759 amdgpu_device_fill_reset_magic(tmp_adev);
4760
fdafb359
EQ
4761 /*
4762 * Add this ASIC as tracked as reset was already
4763 * complete successfully.
4764 */
4765 amdgpu_register_gpu_instance(tmp_adev);
4766
04442bf7
LL
4767 if (!reset_context->hive &&
4768 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4769 amdgpu_xgmi_add_device(tmp_adev);
4770
7c04ca50 4771 r = amdgpu_device_ip_late_init(tmp_adev);
4772 if (r)
4773 goto out;
4774
087451f3 4775 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
565d1941 4776
e8fbaf03
GC
4777 /*
4778 * The GPU enters bad state once faulty pages
4779 * by ECC has reached the threshold, and ras
4780 * recovery is scheduled next. So add one check
4781 * here to break recovery if it indeed exceeds
4782 * bad page threshold, and remind user to
4783 * retire this GPU or setting one bigger
4784 * bad_page_threshold value to fix this once
4785 * probing driver again.
4786 */
11003c68 4787 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
4788 /* must succeed. */
4789 amdgpu_ras_resume(tmp_adev);
4790 } else {
4791 r = -EINVAL;
4792 goto out;
4793 }
e79a04d5 4794
26bc5340 4795 /* Update PSP FW topology after reset */
04442bf7
LL
4796 if (reset_context->hive &&
4797 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4798 r = amdgpu_xgmi_update_topology(
4799 reset_context->hive, tmp_adev);
26bc5340
AG
4800 }
4801 }
4802
26bc5340
AG
4803out:
4804 if (!r) {
4805 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4806 r = amdgpu_ib_ring_tests(tmp_adev);
4807 if (r) {
4808 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
4809 need_full_reset = true;
4810 r = -EAGAIN;
4811 goto end;
4812 }
4813 }
4814
4815 if (!r)
4816 r = amdgpu_device_recover_vram(tmp_adev);
4817 else
4818 tmp_adev->asic_reset_res = r;
4819 }
4820
4821end:
04442bf7
LL
4822 if (need_full_reset)
4823 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4824 else
4825 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
4826 return r;
4827}
4828
f287a3c5 4829static void amdgpu_device_lock_adev(struct amdgpu_device *adev,
08ebb485 4830 struct amdgpu_hive_info *hive)
26bc5340 4831{
f287a3c5 4832 atomic_set(&adev->in_gpu_reset, 1);
53b3f8f4 4833
08ebb485 4834 if (hive) {
d0fb18b5 4835 down_write_nest_lock(&adev->reset_domain->sem, &hive->hive_lock);
08ebb485 4836 } else {
d0fb18b5 4837 down_write(&adev->reset_domain->sem);
08ebb485 4838 }
5740682e 4839
a3a09142
AD
4840 switch (amdgpu_asic_reset_method(adev)) {
4841 case AMD_RESET_METHOD_MODE1:
4842 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4843 break;
4844 case AMD_RESET_METHOD_MODE2:
4845 adev->mp1_state = PP_MP1_STATE_RESET;
4846 break;
4847 default:
4848 adev->mp1_state = PP_MP1_STATE_NONE;
4849 break;
4850 }
26bc5340 4851}
d38ceaf9 4852
26bc5340
AG
4853static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4854{
89041940 4855 amdgpu_vf_error_trans_all(adev);
a3a09142 4856 adev->mp1_state = PP_MP1_STATE_NONE;
53b3f8f4 4857 atomic_set(&adev->in_gpu_reset, 0);
d0fb18b5 4858 up_write(&adev->reset_domain->sem);
26bc5340
AG
4859}
4860
3f12acc8
EQ
4861static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4862{
4863 struct pci_dev *p = NULL;
4864
4865 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4866 adev->pdev->bus->number, 1);
4867 if (p) {
4868 pm_runtime_enable(&(p->dev));
4869 pm_runtime_resume(&(p->dev));
4870 }
4871}
4872
4873static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4874{
4875 enum amd_reset_method reset_method;
4876 struct pci_dev *p = NULL;
4877 u64 expires;
4878
4879 /*
4880 * For now, only BACO and mode1 reset are confirmed
4881 * to suffer the audio issue without proper suspended.
4882 */
4883 reset_method = amdgpu_asic_reset_method(adev);
4884 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4885 (reset_method != AMD_RESET_METHOD_MODE1))
4886 return -EINVAL;
4887
4888 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4889 adev->pdev->bus->number, 1);
4890 if (!p)
4891 return -ENODEV;
4892
4893 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4894 if (!expires)
4895 /*
4896 * If we cannot get the audio device autosuspend delay,
4897 * a fixed 4S interval will be used. Considering 3S is
4898 * the audio controller default autosuspend delay setting.
4899 * 4S used here is guaranteed to cover that.
4900 */
54b7feb9 4901 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4902
4903 while (!pm_runtime_status_suspended(&(p->dev))) {
4904 if (!pm_runtime_suspend(&(p->dev)))
4905 break;
4906
4907 if (expires < ktime_get_mono_fast_ns()) {
4908 dev_warn(adev->dev, "failed to suspend display audio\n");
4909 /* TODO: abort the succeeding gpu reset? */
4910 return -ETIMEDOUT;
4911 }
4912 }
4913
4914 pm_runtime_disable(&(p->dev));
4915
4916 return 0;
4917}
4918
9d8d96be 4919static void amdgpu_device_recheck_guilty_jobs(
04442bf7
LL
4920 struct amdgpu_device *adev, struct list_head *device_list_handle,
4921 struct amdgpu_reset_context *reset_context)
e6c6338f
JZ
4922{
4923 int i, r = 0;
4924
4925 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4926 struct amdgpu_ring *ring = adev->rings[i];
4927 int ret = 0;
4928 struct drm_sched_job *s_job;
4929
4930 if (!ring || !ring->sched.thread)
4931 continue;
4932
4933 s_job = list_first_entry_or_null(&ring->sched.pending_list,
4934 struct drm_sched_job, list);
4935 if (s_job == NULL)
4936 continue;
4937
4938 /* clear job's guilty and depend the folowing step to decide the real one */
4939 drm_sched_reset_karma(s_job);
38d4e463
JC
4940 /* for the real bad job, it will be resubmitted twice, adding a dma_fence_get
4941 * to make sure fence is balanced */
4942 dma_fence_get(s_job->s_fence->parent);
e6c6338f
JZ
4943 drm_sched_resubmit_jobs_ext(&ring->sched, 1);
4944
4945 ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
4946 if (ret == 0) { /* timeout */
4947 DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
4948 ring->sched.name, s_job->id);
4949
4950 /* set guilty */
4951 drm_sched_increase_karma(s_job);
4952retry:
4953 /* do hw reset */
4954 if (amdgpu_sriov_vf(adev)) {
4955 amdgpu_virt_fini_data_exchange(adev);
4956 r = amdgpu_device_reset_sriov(adev, false);
4957 if (r)
4958 adev->asic_reset_res = r;
4959 } else {
04442bf7
LL
4960 clear_bit(AMDGPU_SKIP_HW_RESET,
4961 &reset_context->flags);
4962 r = amdgpu_do_asic_reset(device_list_handle,
4963 reset_context);
e6c6338f
JZ
4964 if (r && r == -EAGAIN)
4965 goto retry;
4966 }
4967
4968 /*
4969 * add reset counter so that the following
4970 * resubmitted job could flush vmid
4971 */
4972 atomic_inc(&adev->gpu_reset_counter);
4973 continue;
4974 }
4975
4976 /* got the hw fence, signal finished fence */
4977 atomic_dec(ring->sched.score);
38d4e463 4978 dma_fence_put(s_job->s_fence->parent);
e6c6338f
JZ
4979 dma_fence_get(&s_job->s_fence->finished);
4980 dma_fence_signal(&s_job->s_fence->finished);
4981 dma_fence_put(&s_job->s_fence->finished);
4982
4983 /* remove node from list and free the job */
4984 spin_lock(&ring->sched.job_list_lock);
4985 list_del_init(&s_job->list);
4986 spin_unlock(&ring->sched.job_list_lock);
4987 ring->sched.ops->free_job(s_job);
4988 }
4989}
4990
26bc5340
AG
4991/**
4992 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4993 *
982a820b 4994 * @adev: amdgpu_device pointer
26bc5340
AG
4995 * @job: which job trigger hang
4996 *
4997 * Attempt to reset the GPU if it has hung (all asics).
4998 * Attempt to do soft-reset or full-reset and reinitialize Asic
4999 * Returns 0 for success or an error on failure.
5000 */
5001
54f329cc 5002int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
26bc5340
AG
5003 struct amdgpu_job *job)
5004{
1d721ed6 5005 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 5006 bool job_signaled = false;
26bc5340 5007 struct amdgpu_hive_info *hive = NULL;
26bc5340 5008 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 5009 int i, r = 0;
bb5c7235 5010 bool need_emergency_restart = false;
3f12acc8 5011 bool audio_suspended = false;
e6c6338f 5012 int tmp_vram_lost_counter;
04442bf7
LL
5013 struct amdgpu_reset_context reset_context;
5014
5015 memset(&reset_context, 0, sizeof(reset_context));
26bc5340 5016
6e3cd2a9 5017 /*
bb5c7235
WS
5018 * Special case: RAS triggered and full reset isn't supported
5019 */
5020 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5021
d5ea093e
AG
5022 /*
5023 * Flush RAM to disk so that after reboot
5024 * the user can read log and see why the system rebooted.
5025 */
bb5c7235 5026 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
5027 DRM_WARN("Emergency reboot.");
5028
5029 ksys_sync_helper();
5030 emergency_restart();
5031 }
5032
b823821f 5033 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 5034 need_emergency_restart ? "jobs stop":"reset");
26bc5340 5035
175ac6ec
ZL
5036 if (!amdgpu_sriov_vf(adev))
5037 hive = amdgpu_get_xgmi_hive(adev);
681260df 5038 if (hive)
53b3f8f4 5039 mutex_lock(&hive->hive_lock);
26bc5340 5040
04442bf7
LL
5041 reset_context.method = AMD_RESET_METHOD_NONE;
5042 reset_context.reset_req_dev = adev;
5043 reset_context.job = job;
5044 reset_context.hive = hive;
5045 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5046
9e94d22c
EQ
5047 /*
5048 * Build list of devices to reset.
5049 * In case we are in XGMI hive mode, resort the device list
5050 * to put adev in the 1st position.
5051 */
5052 INIT_LIST_HEAD(&device_list);
175ac6ec 5053 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
655ce9cb 5054 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
5055 list_add_tail(&tmp_adev->reset_list, &device_list);
5056 if (!list_is_first(&adev->reset_list, &device_list))
5057 list_rotate_to_front(&adev->reset_list, &device_list);
5058 device_list_handle = &device_list;
26bc5340 5059 } else {
655ce9cb 5060 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
5061 device_list_handle = &device_list;
5062 }
5063
1d721ed6 5064 /* block all schedulers and reset given job's ring */
655ce9cb 5065 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
f287a3c5
AG
5066
5067 amdgpu_device_lock_adev(tmp_adev, hive);
5068
3f12acc8
EQ
5069 /*
5070 * Try to put the audio codec into suspend state
5071 * before gpu reset started.
5072 *
5073 * Due to the power domain of the graphics device
5074 * is shared with AZ power domain. Without this,
5075 * we may change the audio hardware from behind
5076 * the audio driver's back. That will trigger
5077 * some audio codec errors.
5078 */
5079 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5080 audio_suspended = true;
5081
9e94d22c
EQ
5082 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5083
52fb44cf
EQ
5084 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5085
428890a3 5086 if (!amdgpu_sriov_vf(tmp_adev))
5087 amdgpu_amdkfd_pre_reset(tmp_adev);
9e94d22c 5088
12ffa55d
AG
5089 /*
5090 * Mark these ASICs to be reseted as untracked first
5091 * And add them back after reset completed
5092 */
5093 amdgpu_unregister_gpu_instance(tmp_adev);
5094
087451f3 5095 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
565d1941 5096
f1c1314b 5097 /* disable ras on ALL IPs */
bb5c7235 5098 if (!need_emergency_restart &&
b823821f 5099 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 5100 amdgpu_ras_suspend(tmp_adev);
5101
1d721ed6
AG
5102 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5103 struct amdgpu_ring *ring = tmp_adev->rings[i];
5104
5105 if (!ring || !ring->sched.thread)
5106 continue;
5107
0b2d2c2e 5108 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 5109
bb5c7235 5110 if (need_emergency_restart)
7c6e68c7 5111 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 5112 }
8f8c80f4 5113 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
5114 }
5115
bb5c7235 5116 if (need_emergency_restart)
7c6e68c7
AG
5117 goto skip_sched_resume;
5118
1d721ed6
AG
5119 /*
5120 * Must check guilty signal here since after this point all old
5121 * HW fences are force signaled.
5122 *
5123 * job->base holds a reference to parent fence
5124 */
5125 if (job && job->base.s_fence->parent &&
7dd8c205 5126 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 5127 job_signaled = true;
1d721ed6
AG
5128 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5129 goto skip_hw_reset;
5130 }
5131
26bc5340 5132retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 5133 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
04442bf7 5134 r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context);
26bc5340
AG
5135 /*TODO Should we stop ?*/
5136 if (r) {
aac89168 5137 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 5138 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
5139 tmp_adev->asic_reset_res = r;
5140 }
5141 }
5142
e6c6338f 5143 tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
26bc5340 5144 /* Actual ASIC resets if needed.*/
4f30d920 5145 /* Host driver will handle XGMI hive reset for SRIOV */
26bc5340
AG
5146 if (amdgpu_sriov_vf(adev)) {
5147 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5148 if (r)
5149 adev->asic_reset_res = r;
5150 } else {
04442bf7 5151 r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
26bc5340
AG
5152 if (r && r == -EAGAIN)
5153 goto retry;
5154 }
5155
1d721ed6
AG
5156skip_hw_reset:
5157
26bc5340 5158 /* Post ASIC reset for all devs .*/
655ce9cb 5159 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 5160
e6c6338f
JZ
5161 /*
5162 * Sometimes a later bad compute job can block a good gfx job as gfx
5163 * and compute ring share internal GC HW mutually. We add an additional
5164 * guilty jobs recheck step to find the real guilty job, it synchronously
5165 * submits and pends for the first job being signaled. If it gets timeout,
5166 * we identify it as a real guilty job.
5167 */
5168 if (amdgpu_gpu_recovery == 2 &&
5169 !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
04442bf7
LL
5170 amdgpu_device_recheck_guilty_jobs(
5171 tmp_adev, device_list_handle, &reset_context);
e6c6338f 5172
1d721ed6
AG
5173 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5174 struct amdgpu_ring *ring = tmp_adev->rings[i];
5175
5176 if (!ring || !ring->sched.thread)
5177 continue;
5178
5179 /* No point to resubmit jobs if we didn't HW reset*/
5180 if (!tmp_adev->asic_reset_res && !job_signaled)
5181 drm_sched_resubmit_jobs(&ring->sched);
5182
5183 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
5184 }
5185
1053b9c9 5186 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
4a580877 5187 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5188 }
5189
5190 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5191
5192 if (r) {
5193 /* bad news, how to tell it to userspace ? */
12ffa55d 5194 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5195 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5196 } else {
12ffa55d 5197 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5198 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5199 DRM_WARN("smart shift update failed\n");
26bc5340 5200 }
7c6e68c7 5201 }
26bc5340 5202
7c6e68c7 5203skip_sched_resume:
655ce9cb 5204 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
428890a3 5205 /* unlock kfd: SRIOV would do it separately */
5206 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5207 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5208
5209 /* kfd_post_reset will do nothing if kfd device is not initialized,
5210 * need to bring up kfd here if it's not be initialized before
5211 */
5212 if (!adev->kfd.init_complete)
5213 amdgpu_amdkfd_device_init(adev);
5214
3f12acc8
EQ
5215 if (audio_suspended)
5216 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
5217 amdgpu_device_unlock_adev(tmp_adev);
5218 }
5219
9e94d22c 5220 if (hive) {
9e94d22c 5221 mutex_unlock(&hive->hive_lock);
d95e8e97 5222 amdgpu_put_xgmi_hive(hive);
9e94d22c 5223 }
26bc5340 5224
f287a3c5 5225 if (r)
26bc5340 5226 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
5227 return r;
5228}
5229
54f329cc
AG
5230struct amdgpu_recover_work_struct {
5231 struct work_struct base;
5232 struct amdgpu_device *adev;
5233 struct amdgpu_job *job;
5234 int ret;
5235};
5236
5237static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
5238{
5239 struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
5240
5241 recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
5242}
5243/*
5244 * Serialize gpu recover into reset domain single threaded wq
5245 */
5246int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5247 struct amdgpu_job *job)
5248{
5249 struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
5250
5251 INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
5252
cfbb6b00 5253 if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
54f329cc
AG
5254 return -EAGAIN;
5255
5256 flush_work(&work.base);
5257
5258 return work.ret;
5259}
5260
e3ecdffa
AD
5261/**
5262 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5263 *
5264 * @adev: amdgpu_device pointer
5265 *
5266 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5267 * and lanes) of the slot the device is in. Handles APUs and
5268 * virtualized environments where PCIE config space may not be available.
5269 */
5494d864 5270static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5271{
5d9a6330 5272 struct pci_dev *pdev;
c5313457
HK
5273 enum pci_bus_speed speed_cap, platform_speed_cap;
5274 enum pcie_link_width platform_link_width;
d0dd7f0c 5275
cd474ba0
AD
5276 if (amdgpu_pcie_gen_cap)
5277 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5278
cd474ba0
AD
5279 if (amdgpu_pcie_lane_cap)
5280 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5281
cd474ba0
AD
5282 /* covers APUs as well */
5283 if (pci_is_root_bus(adev->pdev->bus)) {
5284 if (adev->pm.pcie_gen_mask == 0)
5285 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5286 if (adev->pm.pcie_mlw_mask == 0)
5287 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5288 return;
cd474ba0 5289 }
d0dd7f0c 5290
c5313457
HK
5291 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5292 return;
5293
dbaa922b
AD
5294 pcie_bandwidth_available(adev->pdev, NULL,
5295 &platform_speed_cap, &platform_link_width);
c5313457 5296
cd474ba0 5297 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5298 /* asic caps */
5299 pdev = adev->pdev;
5300 speed_cap = pcie_get_speed_cap(pdev);
5301 if (speed_cap == PCI_SPEED_UNKNOWN) {
5302 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5303 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5304 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5305 } else {
2b3a1f51
FX
5306 if (speed_cap == PCIE_SPEED_32_0GT)
5307 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5308 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5309 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5310 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5311 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5312 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5313 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5314 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5315 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5316 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5317 else if (speed_cap == PCIE_SPEED_8_0GT)
5318 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5319 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5320 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5321 else if (speed_cap == PCIE_SPEED_5_0GT)
5322 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5323 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5324 else
5325 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5326 }
5327 /* platform caps */
c5313457 5328 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5329 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5330 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5331 } else {
2b3a1f51
FX
5332 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5333 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5334 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5335 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5336 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5337 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5338 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5339 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5340 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5341 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5342 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5343 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5344 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5345 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5346 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5347 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5348 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5349 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5350 else
5351 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5352
cd474ba0
AD
5353 }
5354 }
5355 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5356 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5357 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5358 } else {
c5313457 5359 switch (platform_link_width) {
5d9a6330 5360 case PCIE_LNK_X32:
cd474ba0
AD
5361 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5362 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5363 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5364 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5365 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5366 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5367 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5368 break;
5d9a6330 5369 case PCIE_LNK_X16:
cd474ba0
AD
5370 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5371 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5372 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5373 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5374 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5375 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5376 break;
5d9a6330 5377 case PCIE_LNK_X12:
cd474ba0
AD
5378 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5379 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5380 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5381 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5382 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5383 break;
5d9a6330 5384 case PCIE_LNK_X8:
cd474ba0
AD
5385 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5386 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5387 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5388 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5389 break;
5d9a6330 5390 case PCIE_LNK_X4:
cd474ba0
AD
5391 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5392 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5393 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5394 break;
5d9a6330 5395 case PCIE_LNK_X2:
cd474ba0
AD
5396 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5397 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5398 break;
5d9a6330 5399 case PCIE_LNK_X1:
cd474ba0
AD
5400 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5401 break;
5402 default:
5403 break;
5404 }
d0dd7f0c
AD
5405 }
5406 }
5407}
d38ceaf9 5408
361dbd01
AD
5409int amdgpu_device_baco_enter(struct drm_device *dev)
5410{
1348969a 5411 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5412 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5413
4a580877 5414 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5415 return -ENOTSUPP;
5416
8ab0d6f0 5417 if (ras && adev->ras_enabled &&
acdae216 5418 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5419 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5420
9530273e 5421 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5422}
5423
5424int amdgpu_device_baco_exit(struct drm_device *dev)
5425{
1348969a 5426 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5427 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5428 int ret = 0;
361dbd01 5429
4a580877 5430 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5431 return -ENOTSUPP;
5432
9530273e
EQ
5433 ret = amdgpu_dpm_baco_exit(adev);
5434 if (ret)
5435 return ret;
7a22677b 5436
8ab0d6f0 5437 if (ras && adev->ras_enabled &&
acdae216 5438 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5439 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5440
1bece222
CL
5441 if (amdgpu_passthrough(adev) &&
5442 adev->nbio.funcs->clear_doorbell_interrupt)
5443 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5444
7a22677b 5445 return 0;
361dbd01 5446}
c9a6b82f
AG
5447
5448/**
5449 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5450 * @pdev: PCI device struct
5451 * @state: PCI channel state
5452 *
5453 * Description: Called when a PCI error is detected.
5454 *
5455 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5456 */
5457pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5458{
5459 struct drm_device *dev = pci_get_drvdata(pdev);
5460 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5461 int i;
c9a6b82f
AG
5462
5463 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5464
6894305c
AG
5465 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5466 DRM_WARN("No support for XGMI hive yet...");
5467 return PCI_ERS_RESULT_DISCONNECT;
5468 }
5469
e17e27f9
GC
5470 adev->pci_channel_state = state;
5471
c9a6b82f
AG
5472 switch (state) {
5473 case pci_channel_io_normal:
5474 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5475 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5476 case pci_channel_io_frozen:
5477 /*
d0fb18b5 5478 * Locking adev->reset_domain->sem will prevent any external access
acd89fca
AG
5479 * to GPU during PCI error recovery
5480 */
f287a3c5 5481 amdgpu_device_lock_adev(adev, NULL);
acd89fca
AG
5482
5483 /*
5484 * Block any work scheduling as we do for regular GPU reset
5485 * for the duration of the recovery
5486 */
5487 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5488 struct amdgpu_ring *ring = adev->rings[i];
5489
5490 if (!ring || !ring->sched.thread)
5491 continue;
5492
5493 drm_sched_stop(&ring->sched, NULL);
5494 }
8f8c80f4 5495 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5496 return PCI_ERS_RESULT_NEED_RESET;
5497 case pci_channel_io_perm_failure:
5498 /* Permanent error, prepare for device removal */
5499 return PCI_ERS_RESULT_DISCONNECT;
5500 }
5501
5502 return PCI_ERS_RESULT_NEED_RESET;
5503}
5504
5505/**
5506 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5507 * @pdev: pointer to PCI device
5508 */
5509pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5510{
5511
5512 DRM_INFO("PCI error: mmio enabled callback!!\n");
5513
5514 /* TODO - dump whatever for debugging purposes */
5515
5516 /* This called only if amdgpu_pci_error_detected returns
5517 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5518 * works, no need to reset slot.
5519 */
5520
5521 return PCI_ERS_RESULT_RECOVERED;
5522}
5523
5524/**
5525 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5526 * @pdev: PCI device struct
5527 *
5528 * Description: This routine is called by the pci error recovery
5529 * code after the PCI slot has been reset, just before we
5530 * should resume normal operations.
5531 */
5532pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5533{
5534 struct drm_device *dev = pci_get_drvdata(pdev);
5535 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5536 int r, i;
04442bf7 5537 struct amdgpu_reset_context reset_context;
362c7b91 5538 u32 memsize;
7ac71382 5539 struct list_head device_list;
c9a6b82f
AG
5540
5541 DRM_INFO("PCI error: slot reset callback!!\n");
5542
04442bf7
LL
5543 memset(&reset_context, 0, sizeof(reset_context));
5544
7ac71382 5545 INIT_LIST_HEAD(&device_list);
655ce9cb 5546 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5547
362c7b91
AG
5548 /* wait for asic to come out of reset */
5549 msleep(500);
5550
7ac71382 5551 /* Restore PCI confspace */
c1dd4aa6 5552 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5553
362c7b91
AG
5554 /* confirm ASIC came out of reset */
5555 for (i = 0; i < adev->usec_timeout; i++) {
5556 memsize = amdgpu_asic_get_config_memsize(adev);
5557
5558 if (memsize != 0xffffffff)
5559 break;
5560 udelay(1);
5561 }
5562 if (memsize == 0xffffffff) {
5563 r = -ETIME;
5564 goto out;
5565 }
5566
04442bf7
LL
5567 reset_context.method = AMD_RESET_METHOD_NONE;
5568 reset_context.reset_req_dev = adev;
5569 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5570 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5571
7afefb81 5572 adev->no_hw_access = true;
04442bf7 5573 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5574 adev->no_hw_access = false;
c9a6b82f
AG
5575 if (r)
5576 goto out;
5577
04442bf7 5578 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5579
5580out:
c9a6b82f 5581 if (!r) {
c1dd4aa6
AG
5582 if (amdgpu_device_cache_pci_state(adev->pdev))
5583 pci_restore_state(adev->pdev);
5584
c9a6b82f
AG
5585 DRM_INFO("PCIe error recovery succeeded\n");
5586 } else {
5587 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5588 amdgpu_device_unlock_adev(adev);
5589 }
5590
5591 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5592}
5593
5594/**
5595 * amdgpu_pci_resume() - resume normal ops after PCI reset
5596 * @pdev: pointer to PCI device
5597 *
5598 * Called when the error recovery driver tells us that its
505199a3 5599 * OK to resume normal operation.
c9a6b82f
AG
5600 */
5601void amdgpu_pci_resume(struct pci_dev *pdev)
5602{
5603 struct drm_device *dev = pci_get_drvdata(pdev);
5604 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5605 int i;
c9a6b82f 5606
c9a6b82f
AG
5607
5608 DRM_INFO("PCI error: resume callback!!\n");
acd89fca 5609
e17e27f9
GC
5610 /* Only continue execution for the case of pci_channel_io_frozen */
5611 if (adev->pci_channel_state != pci_channel_io_frozen)
5612 return;
5613
acd89fca
AG
5614 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5615 struct amdgpu_ring *ring = adev->rings[i];
5616
5617 if (!ring || !ring->sched.thread)
5618 continue;
5619
5620
5621 drm_sched_resubmit_jobs(&ring->sched);
5622 drm_sched_start(&ring->sched, true);
5623 }
5624
5625 amdgpu_device_unlock_adev(adev);
c9a6b82f 5626}
c1dd4aa6
AG
5627
5628bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5629{
5630 struct drm_device *dev = pci_get_drvdata(pdev);
5631 struct amdgpu_device *adev = drm_to_adev(dev);
5632 int r;
5633
5634 r = pci_save_state(pdev);
5635 if (!r) {
5636 kfree(adev->pci_state);
5637
5638 adev->pci_state = pci_store_saved_state(pdev);
5639
5640 if (!adev->pci_state) {
5641 DRM_ERROR("Failed to store PCI saved state");
5642 return false;
5643 }
5644 } else {
5645 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5646 return false;
5647 }
5648
5649 return true;
5650}
5651
5652bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5653{
5654 struct drm_device *dev = pci_get_drvdata(pdev);
5655 struct amdgpu_device *adev = drm_to_adev(dev);
5656 int r;
5657
5658 if (!adev->pci_state)
5659 return false;
5660
5661 r = pci_load_saved_state(pdev, adev->pci_state);
5662
5663 if (!r) {
5664 pci_restore_state(pdev);
5665 } else {
5666 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5667 return false;
5668 }
5669
5670 return true;
5671}
5672
810085dd
EH
5673void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5674 struct amdgpu_ring *ring)
5675{
5676#ifdef CONFIG_X86_64
5677 if (adev->flags & AMD_IS_APU)
5678 return;
5679#endif
5680 if (adev->gmc.xgmi.connected_to_cpu)
5681 return;
5682
5683 if (ring && ring->funcs->emit_hdp_flush)
5684 amdgpu_ring_emit_hdp_flush(ring);
5685 else
5686 amdgpu_asic_flush_hdp(adev, ring);
5687}
c1dd4aa6 5688
810085dd
EH
5689void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5690 struct amdgpu_ring *ring)
5691{
5692#ifdef CONFIG_X86_64
5693 if (adev->flags & AMD_IS_APU)
5694 return;
5695#endif
5696 if (adev->gmc.xgmi.connected_to_cpu)
5697 return;
c1dd4aa6 5698
810085dd
EH
5699 amdgpu_asic_invalidate_hdp(adev, ring);
5700}
34f3a4a9
LY
5701
5702/**
5703 * amdgpu_device_halt() - bring hardware to some kind of halt state
5704 *
5705 * @adev: amdgpu_device pointer
5706 *
5707 * Bring hardware to some kind of halt state so that no one can touch it
5708 * any more. It will help to maintain error context when error occurred.
5709 * Compare to a simple hang, the system will keep stable at least for SSH
5710 * access. Then it should be trivial to inspect the hardware state and
5711 * see what's going on. Implemented as following:
5712 *
5713 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5714 * clears all CPU mappings to device, disallows remappings through page faults
5715 * 2. amdgpu_irq_disable_all() disables all interrupts
5716 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5717 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5718 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5719 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5720 * flush any in flight DMA operations
5721 */
5722void amdgpu_device_halt(struct amdgpu_device *adev)
5723{
5724 struct pci_dev *pdev = adev->pdev;
e0f943b4 5725 struct drm_device *ddev = adev_to_drm(adev);
34f3a4a9
LY
5726
5727 drm_dev_unplug(ddev);
5728
5729 amdgpu_irq_disable_all(adev);
5730
5731 amdgpu_fence_driver_hw_fini(adev);
5732
5733 adev->no_hw_access = true;
5734
5735 amdgpu_device_unmap_mmio(adev);
5736
5737 pci_disable_device(pdev);
5738 pci_wait_for_pending_transaction(pdev);
5739}