drm/amd/pm: Use generic BACO function for smu11 ASICs
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
04442bf7 68#include "amdgpu_reset.h"
5183411b 69
d5ea093e 70#include <linux/suspend.h>
c6a6e2db 71#include <drm/task_barrier.h>
3f12acc8 72#include <linux/pm_runtime.h>
d5ea093e 73
f89f8c6b
AG
74#include <drm/drm_drv.h>
75
e2a75f88 76MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 77MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 78MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 79MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 80MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 81MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 82MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 83MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 84MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 85MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
4e52a9f8 86MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
e2a75f88 87
2dc80b00
S
88#define AMDGPU_RESUME_MS 2000
89
050091ab 90const char *amdgpu_asic_name[] = {
da69c161
KW
91 "TAHITI",
92 "PITCAIRN",
93 "VERDE",
94 "OLAND",
95 "HAINAN",
d38ceaf9
AD
96 "BONAIRE",
97 "KAVERI",
98 "KABINI",
99 "HAWAII",
100 "MULLINS",
101 "TOPAZ",
102 "TONGA",
48299f95 103 "FIJI",
d38ceaf9 104 "CARRIZO",
139f4917 105 "STONEY",
2cc0c0b5
FC
106 "POLARIS10",
107 "POLARIS11",
c4642a47 108 "POLARIS12",
48ff108d 109 "VEGAM",
d4196f01 110 "VEGA10",
8fab806a 111 "VEGA12",
956fcddc 112 "VEGA20",
2ca8a5d2 113 "RAVEN",
d6c3b24e 114 "ARCTURUS",
1eee4228 115 "RENOIR",
d46b417a 116 "ALDEBARAN",
852a6626 117 "NAVI10",
87dbad02 118 "NAVI14",
9802f5d7 119 "NAVI12",
ccaf72d3 120 "SIENNA_CICHLID",
ddd8fbe7 121 "NAVY_FLOUNDER",
4f1e9a76 122 "VANGOGH",
a2468e04 123 "DIMGREY_CAVEFISH",
6f169591 124 "BEIGE_GOBY",
d38ceaf9
AD
125 "LAST",
126};
127
dcea6e65
KR
128/**
129 * DOC: pcie_replay_count
130 *
131 * The amdgpu driver provides a sysfs API for reporting the total number
132 * of PCIe replays (NAKs)
133 * The file pcie_replay_count is used for this and returns the total
134 * number of replays as a sum of the NAKs generated and NAKs received
135 */
136
137static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
138 struct device_attribute *attr, char *buf)
139{
140 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 141 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
142 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
143
36000c7a 144 return sysfs_emit(buf, "%llu\n", cnt);
dcea6e65
KR
145}
146
147static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
148 amdgpu_device_get_pcie_replay_count, NULL);
149
5494d864
AD
150static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
151
bd607166
KR
152/**
153 * DOC: product_name
154 *
155 * The amdgpu driver provides a sysfs API for reporting the product name
156 * for the device
157 * The file serial_number is used for this and returns the product name
158 * as returned from the FRU.
159 * NOTE: This is only available for certain server cards
160 */
161
162static ssize_t amdgpu_device_get_product_name(struct device *dev,
163 struct device_attribute *attr, char *buf)
164{
165 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 166 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 167
36000c7a 168 return sysfs_emit(buf, "%s\n", adev->product_name);
bd607166
KR
169}
170
171static DEVICE_ATTR(product_name, S_IRUGO,
172 amdgpu_device_get_product_name, NULL);
173
174/**
175 * DOC: product_number
176 *
177 * The amdgpu driver provides a sysfs API for reporting the part number
178 * for the device
179 * The file serial_number is used for this and returns the part number
180 * as returned from the FRU.
181 * NOTE: This is only available for certain server cards
182 */
183
184static ssize_t amdgpu_device_get_product_number(struct device *dev,
185 struct device_attribute *attr, char *buf)
186{
187 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 188 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 189
36000c7a 190 return sysfs_emit(buf, "%s\n", adev->product_number);
bd607166
KR
191}
192
193static DEVICE_ATTR(product_number, S_IRUGO,
194 amdgpu_device_get_product_number, NULL);
195
196/**
197 * DOC: serial_number
198 *
199 * The amdgpu driver provides a sysfs API for reporting the serial number
200 * for the device
201 * The file serial_number is used for this and returns the serial number
202 * as returned from the FRU.
203 * NOTE: This is only available for certain server cards
204 */
205
206static ssize_t amdgpu_device_get_serial_number(struct device *dev,
207 struct device_attribute *attr, char *buf)
208{
209 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 210 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166 211
36000c7a 212 return sysfs_emit(buf, "%s\n", adev->serial);
bd607166
KR
213}
214
215static DEVICE_ATTR(serial_number, S_IRUGO,
216 amdgpu_device_get_serial_number, NULL);
217
fd496ca8 218/**
b98c6299 219 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
fd496ca8
AD
220 *
221 * @dev: drm_device pointer
222 *
b98c6299 223 * Returns true if the device is a dGPU with ATPX power control,
fd496ca8
AD
224 * otherwise return false.
225 */
b98c6299 226bool amdgpu_device_supports_px(struct drm_device *dev)
fd496ca8
AD
227{
228 struct amdgpu_device *adev = drm_to_adev(dev);
229
b98c6299 230 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
fd496ca8
AD
231 return true;
232 return false;
233}
234
e3ecdffa 235/**
0330b848 236 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
237 *
238 * @dev: drm_device pointer
239 *
b98c6299 240 * Returns true if the device is a dGPU with ACPI power control,
e3ecdffa
AD
241 * otherwise return false.
242 */
31af062a 243bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 244{
1348969a 245 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 246
b98c6299
AD
247 if (adev->has_pr3 ||
248 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
d38ceaf9
AD
249 return true;
250 return false;
251}
252
a69cba42
AD
253/**
254 * amdgpu_device_supports_baco - Does the device support BACO
255 *
256 * @dev: drm_device pointer
257 *
258 * Returns true if the device supporte BACO,
259 * otherwise return false.
260 */
261bool amdgpu_device_supports_baco(struct drm_device *dev)
262{
1348969a 263 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
264
265 return amdgpu_asic_supports_baco(adev);
266}
267
3fa8f89d
S
268/**
269 * amdgpu_device_supports_smart_shift - Is the device dGPU with
270 * smart shift support
271 *
272 * @dev: drm_device pointer
273 *
274 * Returns true if the device is a dGPU with Smart Shift support,
275 * otherwise returns false.
276 */
277bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
278{
279 return (amdgpu_device_supports_boco(dev) &&
280 amdgpu_acpi_is_power_shift_control_supported());
281}
282
6e3cd2a9
MCC
283/*
284 * VRAM access helper functions
285 */
286
e35e2b11 287/**
e35e2b11
TY
288 * amdgpu_device_vram_access - read/write a buffer in vram
289 *
290 * @adev: amdgpu_device pointer
291 * @pos: offset of the buffer in vram
292 * @buf: virtual address of the buffer in system memory
293 * @size: read/write size, sizeof(@buf) must > @size
294 * @write: true - write to vram, otherwise - read from vram
295 */
296void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
297 uint32_t *buf, size_t size, bool write)
298{
e35e2b11 299 unsigned long flags;
ce05ac56
CK
300 uint32_t hi = ~0;
301 uint64_t last;
f89f8c6b 302 int idx;
ce05ac56 303
f89f8c6b
AG
304 if (!drm_dev_enter(&adev->ddev, &idx))
305 return;
9d11eb0d
CK
306
307#ifdef CONFIG_64BIT
308 last = min(pos + size, adev->gmc.visible_vram_size);
309 if (last > pos) {
310 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
311 size_t count = last - pos;
312
313 if (write) {
314 memcpy_toio(addr, buf, count);
315 mb();
316 amdgpu_asic_flush_hdp(adev, NULL);
317 } else {
318 amdgpu_asic_invalidate_hdp(adev, NULL);
319 mb();
320 memcpy_fromio(buf, addr, count);
321 }
322
323 if (count == size)
f89f8c6b 324 goto exit;
9d11eb0d
CK
325
326 pos += count;
327 buf += count / 4;
328 size -= count;
329 }
330#endif
331
ce05ac56
CK
332 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
333 for (last = pos + size; pos < last; pos += 4) {
334 uint32_t tmp = pos >> 31;
e35e2b11 335
e35e2b11 336 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
337 if (tmp != hi) {
338 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
339 hi = tmp;
340 }
e35e2b11
TY
341 if (write)
342 WREG32_NO_KIQ(mmMM_DATA, *buf++);
343 else
344 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 345 }
ce05ac56 346 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
f89f8c6b 347
8eca89a1 348#ifdef CONFIG_64BIT
f89f8c6b 349exit:
8eca89a1 350#endif
f89f8c6b 351 drm_dev_exit(idx);
e35e2b11
TY
352}
353
d38ceaf9 354/*
f7ee1874 355 * register access helper functions.
d38ceaf9 356 */
56b53c0b
DL
357
358/* Check if hw access should be skipped because of hotplug or device error */
359bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
360{
7afefb81 361 if (adev->no_hw_access)
56b53c0b
DL
362 return true;
363
364#ifdef CONFIG_LOCKDEP
365 /*
366 * This is a bit complicated to understand, so worth a comment. What we assert
367 * here is that the GPU reset is not running on another thread in parallel.
368 *
369 * For this we trylock the read side of the reset semaphore, if that succeeds
370 * we know that the reset is not running in paralell.
371 *
372 * If the trylock fails we assert that we are either already holding the read
373 * side of the lock or are the reset thread itself and hold the write side of
374 * the lock.
375 */
376 if (in_task()) {
377 if (down_read_trylock(&adev->reset_sem))
378 up_read(&adev->reset_sem);
379 else
380 lockdep_assert_held(&adev->reset_sem);
381 }
382#endif
383 return false;
384}
385
e3ecdffa 386/**
f7ee1874 387 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
388 *
389 * @adev: amdgpu_device pointer
390 * @reg: dword aligned register offset
391 * @acc_flags: access flags which require special behavior
392 *
393 * Returns the 32 bit value from the offset specified.
394 */
f7ee1874
HZ
395uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
396 uint32_t reg, uint32_t acc_flags)
d38ceaf9 397{
f4b373f4
TSD
398 uint32_t ret;
399
56b53c0b 400 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
401 return 0;
402
f7ee1874
HZ
403 if ((reg * 4) < adev->rmmio_size) {
404 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
405 amdgpu_sriov_runtime(adev) &&
406 down_read_trylock(&adev->reset_sem)) {
407 ret = amdgpu_kiq_rreg(adev, reg);
408 up_read(&adev->reset_sem);
409 } else {
410 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
411 }
412 } else {
413 ret = adev->pcie_rreg(adev, reg * 4);
81202807 414 }
bc992ba5 415
f7ee1874 416 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 417
f4b373f4 418 return ret;
d38ceaf9
AD
419}
420
421a2a30
ML
421/*
422 * MMIO register read with bytes helper functions
423 * @offset:bytes offset from MMIO start
424 *
425*/
426
e3ecdffa
AD
427/**
428 * amdgpu_mm_rreg8 - read a memory mapped IO register
429 *
430 * @adev: amdgpu_device pointer
431 * @offset: byte aligned register offset
432 *
433 * Returns the 8 bit value from the offset specified.
434 */
7cbbc745
AG
435uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
436{
56b53c0b 437 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
438 return 0;
439
421a2a30
ML
440 if (offset < adev->rmmio_size)
441 return (readb(adev->rmmio + offset));
442 BUG();
443}
444
445/*
446 * MMIO register write with bytes helper functions
447 * @offset:bytes offset from MMIO start
448 * @value: the value want to be written to the register
449 *
450*/
e3ecdffa
AD
451/**
452 * amdgpu_mm_wreg8 - read a memory mapped IO register
453 *
454 * @adev: amdgpu_device pointer
455 * @offset: byte aligned register offset
456 * @value: 8 bit value to write
457 *
458 * Writes the value specified to the offset specified.
459 */
7cbbc745
AG
460void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
461{
56b53c0b 462 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
463 return;
464
421a2a30
ML
465 if (offset < adev->rmmio_size)
466 writeb(value, adev->rmmio + offset);
467 else
468 BUG();
469}
470
e3ecdffa 471/**
f7ee1874 472 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
473 *
474 * @adev: amdgpu_device pointer
475 * @reg: dword aligned register offset
476 * @v: 32 bit value to write to the register
477 * @acc_flags: access flags which require special behavior
478 *
479 * Writes the value specified to the offset specified.
480 */
f7ee1874
HZ
481void amdgpu_device_wreg(struct amdgpu_device *adev,
482 uint32_t reg, uint32_t v,
483 uint32_t acc_flags)
d38ceaf9 484{
56b53c0b 485 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
486 return;
487
f7ee1874
HZ
488 if ((reg * 4) < adev->rmmio_size) {
489 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
490 amdgpu_sriov_runtime(adev) &&
491 down_read_trylock(&adev->reset_sem)) {
492 amdgpu_kiq_wreg(adev, reg, v);
493 up_read(&adev->reset_sem);
494 } else {
495 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
496 }
497 } else {
498 adev->pcie_wreg(adev, reg * 4, v);
81202807 499 }
bc992ba5 500
f7ee1874 501 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 502}
d38ceaf9 503
2e0cc4d4
ML
504/*
505 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
506 *
507 * this function is invoked only the debugfs register access
508 * */
f7ee1874
HZ
509void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
510 uint32_t reg, uint32_t v)
2e0cc4d4 511{
56b53c0b 512 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
513 return;
514
2e0cc4d4 515 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
516 adev->gfx.rlc.funcs &&
517 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4 518 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
a5504e9a 519 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0, 0);
f7ee1874
HZ
520 } else {
521 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 522 }
d38ceaf9
AD
523}
524
d38ceaf9
AD
525/**
526 * amdgpu_mm_rdoorbell - read a doorbell dword
527 *
528 * @adev: amdgpu_device pointer
529 * @index: doorbell index
530 *
531 * Returns the value in the doorbell aperture at the
532 * requested doorbell index (CIK).
533 */
534u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
535{
56b53c0b 536 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
537 return 0;
538
d38ceaf9
AD
539 if (index < adev->doorbell.num_doorbells) {
540 return readl(adev->doorbell.ptr + index);
541 } else {
542 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
543 return 0;
544 }
545}
546
547/**
548 * amdgpu_mm_wdoorbell - write a doorbell dword
549 *
550 * @adev: amdgpu_device pointer
551 * @index: doorbell index
552 * @v: value to write
553 *
554 * Writes @v to the doorbell aperture at the
555 * requested doorbell index (CIK).
556 */
557void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
558{
56b53c0b 559 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
560 return;
561
d38ceaf9
AD
562 if (index < adev->doorbell.num_doorbells) {
563 writel(v, adev->doorbell.ptr + index);
564 } else {
565 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
566 }
567}
568
832be404
KW
569/**
570 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
571 *
572 * @adev: amdgpu_device pointer
573 * @index: doorbell index
574 *
575 * Returns the value in the doorbell aperture at the
576 * requested doorbell index (VEGA10+).
577 */
578u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
579{
56b53c0b 580 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
581 return 0;
582
832be404
KW
583 if (index < adev->doorbell.num_doorbells) {
584 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
585 } else {
586 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
587 return 0;
588 }
589}
590
591/**
592 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
593 *
594 * @adev: amdgpu_device pointer
595 * @index: doorbell index
596 * @v: value to write
597 *
598 * Writes @v to the doorbell aperture at the
599 * requested doorbell index (VEGA10+).
600 */
601void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
602{
56b53c0b 603 if (amdgpu_device_skip_hw_access(adev))
bf36b52e
AG
604 return;
605
832be404
KW
606 if (index < adev->doorbell.num_doorbells) {
607 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
608 } else {
609 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
610 }
611}
612
1bba3683
HZ
613/**
614 * amdgpu_device_indirect_rreg - read an indirect register
615 *
616 * @adev: amdgpu_device pointer
617 * @pcie_index: mmio register offset
618 * @pcie_data: mmio register offset
22f453fb 619 * @reg_addr: indirect register address to read from
1bba3683
HZ
620 *
621 * Returns the value of indirect register @reg_addr
622 */
623u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
624 u32 pcie_index, u32 pcie_data,
625 u32 reg_addr)
626{
627 unsigned long flags;
628 u32 r;
629 void __iomem *pcie_index_offset;
630 void __iomem *pcie_data_offset;
631
632 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
633 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
634 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
635
636 writel(reg_addr, pcie_index_offset);
637 readl(pcie_index_offset);
638 r = readl(pcie_data_offset);
639 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
640
641 return r;
642}
643
644/**
645 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
646 *
647 * @adev: amdgpu_device pointer
648 * @pcie_index: mmio register offset
649 * @pcie_data: mmio register offset
22f453fb 650 * @reg_addr: indirect register address to read from
1bba3683
HZ
651 *
652 * Returns the value of indirect register @reg_addr
653 */
654u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
655 u32 pcie_index, u32 pcie_data,
656 u32 reg_addr)
657{
658 unsigned long flags;
659 u64 r;
660 void __iomem *pcie_index_offset;
661 void __iomem *pcie_data_offset;
662
663 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
664 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
665 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
666
667 /* read low 32 bits */
668 writel(reg_addr, pcie_index_offset);
669 readl(pcie_index_offset);
670 r = readl(pcie_data_offset);
671 /* read high 32 bits */
672 writel(reg_addr + 4, pcie_index_offset);
673 readl(pcie_index_offset);
674 r |= ((u64)readl(pcie_data_offset) << 32);
675 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
676
677 return r;
678}
679
680/**
681 * amdgpu_device_indirect_wreg - write an indirect register address
682 *
683 * @adev: amdgpu_device pointer
684 * @pcie_index: mmio register offset
685 * @pcie_data: mmio register offset
686 * @reg_addr: indirect register offset
687 * @reg_data: indirect register data
688 *
689 */
690void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
691 u32 pcie_index, u32 pcie_data,
692 u32 reg_addr, u32 reg_data)
693{
694 unsigned long flags;
695 void __iomem *pcie_index_offset;
696 void __iomem *pcie_data_offset;
697
698 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
699 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
700 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
701
702 writel(reg_addr, pcie_index_offset);
703 readl(pcie_index_offset);
704 writel(reg_data, pcie_data_offset);
705 readl(pcie_data_offset);
706 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
707}
708
709/**
710 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
711 *
712 * @adev: amdgpu_device pointer
713 * @pcie_index: mmio register offset
714 * @pcie_data: mmio register offset
715 * @reg_addr: indirect register offset
716 * @reg_data: indirect register data
717 *
718 */
719void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
720 u32 pcie_index, u32 pcie_data,
721 u32 reg_addr, u64 reg_data)
722{
723 unsigned long flags;
724 void __iomem *pcie_index_offset;
725 void __iomem *pcie_data_offset;
726
727 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
728 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
729 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
730
731 /* write low 32 bits */
732 writel(reg_addr, pcie_index_offset);
733 readl(pcie_index_offset);
734 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
735 readl(pcie_data_offset);
736 /* write high 32 bits */
737 writel(reg_addr + 4, pcie_index_offset);
738 readl(pcie_index_offset);
739 writel((u32)(reg_data >> 32), pcie_data_offset);
740 readl(pcie_data_offset);
741 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
742}
743
d38ceaf9
AD
744/**
745 * amdgpu_invalid_rreg - dummy reg read function
746 *
982a820b 747 * @adev: amdgpu_device pointer
d38ceaf9
AD
748 * @reg: offset of register
749 *
750 * Dummy register read function. Used for register blocks
751 * that certain asics don't have (all asics).
752 * Returns the value in the register.
753 */
754static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
755{
756 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
757 BUG();
758 return 0;
759}
760
761/**
762 * amdgpu_invalid_wreg - dummy reg write function
763 *
982a820b 764 * @adev: amdgpu_device pointer
d38ceaf9
AD
765 * @reg: offset of register
766 * @v: value to write to the register
767 *
768 * Dummy register read function. Used for register blocks
769 * that certain asics don't have (all asics).
770 */
771static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
772{
773 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
774 reg, v);
775 BUG();
776}
777
4fa1c6a6
TZ
778/**
779 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
780 *
982a820b 781 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
782 * @reg: offset of register
783 *
784 * Dummy register read function. Used for register blocks
785 * that certain asics don't have (all asics).
786 * Returns the value in the register.
787 */
788static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
789{
790 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
791 BUG();
792 return 0;
793}
794
795/**
796 * amdgpu_invalid_wreg64 - dummy reg write function
797 *
982a820b 798 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
799 * @reg: offset of register
800 * @v: value to write to the register
801 *
802 * Dummy register read function. Used for register blocks
803 * that certain asics don't have (all asics).
804 */
805static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
806{
807 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
808 reg, v);
809 BUG();
810}
811
d38ceaf9
AD
812/**
813 * amdgpu_block_invalid_rreg - dummy reg read function
814 *
982a820b 815 * @adev: amdgpu_device pointer
d38ceaf9
AD
816 * @block: offset of instance
817 * @reg: offset of register
818 *
819 * Dummy register read function. Used for register blocks
820 * that certain asics don't have (all asics).
821 * Returns the value in the register.
822 */
823static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
824 uint32_t block, uint32_t reg)
825{
826 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
827 reg, block);
828 BUG();
829 return 0;
830}
831
832/**
833 * amdgpu_block_invalid_wreg - dummy reg write function
834 *
982a820b 835 * @adev: amdgpu_device pointer
d38ceaf9
AD
836 * @block: offset of instance
837 * @reg: offset of register
838 * @v: value to write to the register
839 *
840 * Dummy register read function. Used for register blocks
841 * that certain asics don't have (all asics).
842 */
843static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
844 uint32_t block,
845 uint32_t reg, uint32_t v)
846{
847 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
848 reg, block, v);
849 BUG();
850}
851
4d2997ab
AD
852/**
853 * amdgpu_device_asic_init - Wrapper for atom asic_init
854 *
982a820b 855 * @adev: amdgpu_device pointer
4d2997ab
AD
856 *
857 * Does any asic specific work and then calls atom asic init.
858 */
859static int amdgpu_device_asic_init(struct amdgpu_device *adev)
860{
861 amdgpu_asic_pre_asic_init(adev);
862
863 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
864}
865
e3ecdffa
AD
866/**
867 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
868 *
982a820b 869 * @adev: amdgpu_device pointer
e3ecdffa
AD
870 *
871 * Allocates a scratch page of VRAM for use by various things in the
872 * driver.
873 */
06ec9070 874static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 875{
a4a02777
CK
876 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
877 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
878 &adev->vram_scratch.robj,
879 &adev->vram_scratch.gpu_addr,
880 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
881}
882
e3ecdffa
AD
883/**
884 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
885 *
982a820b 886 * @adev: amdgpu_device pointer
e3ecdffa
AD
887 *
888 * Frees the VRAM scratch page.
889 */
06ec9070 890static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 891{
078af1a3 892 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
893}
894
895/**
9c3f2b54 896 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
897 *
898 * @adev: amdgpu_device pointer
899 * @registers: pointer to the register array
900 * @array_size: size of the register array
901 *
902 * Programs an array or registers with and and or masks.
903 * This is a helper for setting golden registers.
904 */
9c3f2b54
AD
905void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
906 const u32 *registers,
907 const u32 array_size)
d38ceaf9
AD
908{
909 u32 tmp, reg, and_mask, or_mask;
910 int i;
911
912 if (array_size % 3)
913 return;
914
915 for (i = 0; i < array_size; i +=3) {
916 reg = registers[i + 0];
917 and_mask = registers[i + 1];
918 or_mask = registers[i + 2];
919
920 if (and_mask == 0xffffffff) {
921 tmp = or_mask;
922 } else {
923 tmp = RREG32(reg);
924 tmp &= ~and_mask;
e0d07657
HZ
925 if (adev->family >= AMDGPU_FAMILY_AI)
926 tmp |= (or_mask & and_mask);
927 else
928 tmp |= or_mask;
d38ceaf9
AD
929 }
930 WREG32(reg, tmp);
931 }
932}
933
e3ecdffa
AD
934/**
935 * amdgpu_device_pci_config_reset - reset the GPU
936 *
937 * @adev: amdgpu_device pointer
938 *
939 * Resets the GPU using the pci config reset sequence.
940 * Only applicable to asics prior to vega10.
941 */
8111c387 942void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
943{
944 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
945}
946
af484df8
AD
947/**
948 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
949 *
950 * @adev: amdgpu_device pointer
951 *
952 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
953 */
954int amdgpu_device_pci_reset(struct amdgpu_device *adev)
955{
956 return pci_reset_function(adev->pdev);
957}
958
d38ceaf9
AD
959/*
960 * GPU doorbell aperture helpers function.
961 */
962/**
06ec9070 963 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
964 *
965 * @adev: amdgpu_device pointer
966 *
967 * Init doorbell driver information (CIK)
968 * Returns 0 on success, error on failure.
969 */
06ec9070 970static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 971{
6585661d 972
705e519e
CK
973 /* No doorbell on SI hardware generation */
974 if (adev->asic_type < CHIP_BONAIRE) {
975 adev->doorbell.base = 0;
976 adev->doorbell.size = 0;
977 adev->doorbell.num_doorbells = 0;
978 adev->doorbell.ptr = NULL;
979 return 0;
980 }
981
d6895ad3
CK
982 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
983 return -EINVAL;
984
22357775
AD
985 amdgpu_asic_init_doorbell_index(adev);
986
d38ceaf9
AD
987 /* doorbell bar mapping */
988 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
989 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
990
edf600da 991 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 992 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
993 if (adev->doorbell.num_doorbells == 0)
994 return -EINVAL;
995
ec3db8a6 996 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
997 * paging queue doorbell use the second page. The
998 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
999 * doorbells are in the first page. So with paging queue enabled,
1000 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
1001 */
1002 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 1003 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 1004
8972e5d2
CK
1005 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1006 adev->doorbell.num_doorbells *
1007 sizeof(u32));
1008 if (adev->doorbell.ptr == NULL)
d38ceaf9 1009 return -ENOMEM;
d38ceaf9
AD
1010
1011 return 0;
1012}
1013
1014/**
06ec9070 1015 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1016 *
1017 * @adev: amdgpu_device pointer
1018 *
1019 * Tear down doorbell driver information (CIK)
1020 */
06ec9070 1021static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1022{
1023 iounmap(adev->doorbell.ptr);
1024 adev->doorbell.ptr = NULL;
1025}
1026
22cb0164 1027
d38ceaf9
AD
1028
1029/*
06ec9070 1030 * amdgpu_device_wb_*()
455a7bc2 1031 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1032 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1033 */
1034
1035/**
06ec9070 1036 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1037 *
1038 * @adev: amdgpu_device pointer
1039 *
1040 * Disables Writeback and frees the Writeback memory (all asics).
1041 * Used at driver shutdown.
1042 */
06ec9070 1043static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1044{
1045 if (adev->wb.wb_obj) {
a76ed485
AD
1046 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1047 &adev->wb.gpu_addr,
1048 (void **)&adev->wb.wb);
d38ceaf9
AD
1049 adev->wb.wb_obj = NULL;
1050 }
1051}
1052
1053/**
06ec9070 1054 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
1055 *
1056 * @adev: amdgpu_device pointer
1057 *
455a7bc2 1058 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1059 * Used at driver startup.
1060 * Returns 0 on success or an -error on failure.
1061 */
06ec9070 1062static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1063{
1064 int r;
1065
1066 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1067 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1068 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1069 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1070 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1071 (void **)&adev->wb.wb);
d38ceaf9
AD
1072 if (r) {
1073 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1074 return r;
1075 }
d38ceaf9
AD
1076
1077 adev->wb.num_wb = AMDGPU_MAX_WB;
1078 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1079
1080 /* clear wb memory */
73469585 1081 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1082 }
1083
1084 return 0;
1085}
1086
1087/**
131b4b36 1088 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1089 *
1090 * @adev: amdgpu_device pointer
1091 * @wb: wb index
1092 *
1093 * Allocate a wb slot for use by the driver (all asics).
1094 * Returns 0 on success or -EINVAL on failure.
1095 */
131b4b36 1096int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1097{
1098 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1099
97407b63 1100 if (offset < adev->wb.num_wb) {
7014285a 1101 __set_bit(offset, adev->wb.used);
63ae07ca 1102 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1103 return 0;
1104 } else {
1105 return -EINVAL;
1106 }
1107}
1108
d38ceaf9 1109/**
131b4b36 1110 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1111 *
1112 * @adev: amdgpu_device pointer
1113 * @wb: wb index
1114 *
1115 * Free a wb slot allocated for use by the driver (all asics)
1116 */
131b4b36 1117void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1118{
73469585 1119 wb >>= 3;
d38ceaf9 1120 if (wb < adev->wb.num_wb)
73469585 1121 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1122}
1123
d6895ad3
CK
1124/**
1125 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1126 *
1127 * @adev: amdgpu_device pointer
1128 *
1129 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1130 * to fail, but if any of the BARs is not accessible after the size we abort
1131 * driver loading by returning -ENODEV.
1132 */
1133int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1134{
453f617a 1135 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1136 struct pci_bus *root;
1137 struct resource *res;
1138 unsigned i;
d6895ad3
CK
1139 u16 cmd;
1140 int r;
1141
0c03b912 1142 /* Bypass for VF */
1143 if (amdgpu_sriov_vf(adev))
1144 return 0;
1145
b7221f2b
AD
1146 /* skip if the bios has already enabled large BAR */
1147 if (adev->gmc.real_vram_size &&
1148 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1149 return 0;
1150
31b8adab
CK
1151 /* Check if the root BUS has 64bit memory resources */
1152 root = adev->pdev->bus;
1153 while (root->parent)
1154 root = root->parent;
1155
1156 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1157 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1158 res->start > 0x100000000ull)
1159 break;
1160 }
1161
1162 /* Trying to resize is pointless without a root hub window above 4GB */
1163 if (!res)
1164 return 0;
1165
453f617a
ND
1166 /* Limit the BAR size to what is available */
1167 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1168 rbar_size);
1169
d6895ad3
CK
1170 /* Disable memory decoding while we change the BAR addresses and size */
1171 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1172 pci_write_config_word(adev->pdev, PCI_COMMAND,
1173 cmd & ~PCI_COMMAND_MEMORY);
1174
1175 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1176 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1177 if (adev->asic_type >= CHIP_BONAIRE)
1178 pci_release_resource(adev->pdev, 2);
1179
1180 pci_release_resource(adev->pdev, 0);
1181
1182 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1183 if (r == -ENOSPC)
1184 DRM_INFO("Not enough PCI address space for a large BAR.");
1185 else if (r && r != -ENOTSUPP)
1186 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1187
1188 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1189
1190 /* When the doorbell or fb BAR isn't available we have no chance of
1191 * using the device.
1192 */
06ec9070 1193 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1194 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1195 return -ENODEV;
1196
1197 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1198
1199 return 0;
1200}
a05502e5 1201
d38ceaf9
AD
1202/*
1203 * GPU helpers function.
1204 */
1205/**
39c640c0 1206 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1207 *
1208 * @adev: amdgpu_device pointer
1209 *
c836fec5
JQ
1210 * Check if the asic has been initialized (all asics) at driver startup
1211 * or post is needed if hw reset is performed.
1212 * Returns true if need or false if not.
d38ceaf9 1213 */
39c640c0 1214bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1215{
1216 uint32_t reg;
1217
bec86378
ML
1218 if (amdgpu_sriov_vf(adev))
1219 return false;
1220
1221 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1222 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1223 * some old smc fw still need driver do vPost otherwise gpu hang, while
1224 * those smc fw version above 22.15 doesn't have this flaw, so we force
1225 * vpost executed for smc version below 22.15
bec86378
ML
1226 */
1227 if (adev->asic_type == CHIP_FIJI) {
1228 int err;
1229 uint32_t fw_ver;
1230 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1231 /* force vPost if error occured */
1232 if (err)
1233 return true;
1234
1235 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1236 if (fw_ver < 0x00160e00)
1237 return true;
bec86378 1238 }
bec86378 1239 }
91fe77eb 1240
e3c1b071 1241 /* Don't post if we need to reset whole hive on init */
1242 if (adev->gmc.xgmi.pending_reset)
1243 return false;
1244
91fe77eb 1245 if (adev->has_hw_reset) {
1246 adev->has_hw_reset = false;
1247 return true;
1248 }
1249
1250 /* bios scratch used on CIK+ */
1251 if (adev->asic_type >= CHIP_BONAIRE)
1252 return amdgpu_atombios_scratch_need_asic_init(adev);
1253
1254 /* check MEM_SIZE for older asics */
1255 reg = amdgpu_asic_get_config_memsize(adev);
1256
1257 if ((reg != 0) && (reg != 0xffffffff))
1258 return false;
1259
1260 return true;
bec86378
ML
1261}
1262
d38ceaf9
AD
1263/* if we get transitioned to only one device, take VGA back */
1264/**
06ec9070 1265 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1266 *
1267 * @cookie: amdgpu_device pointer
1268 * @state: enable/disable vga decode
1269 *
1270 * Enable/disable vga decode (all asics).
1271 * Returns VGA resource flags.
1272 */
06ec9070 1273static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1274{
1275 struct amdgpu_device *adev = cookie;
1276 amdgpu_asic_set_vga_state(adev, state);
1277 if (state)
1278 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1279 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1280 else
1281 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1282}
1283
e3ecdffa
AD
1284/**
1285 * amdgpu_device_check_block_size - validate the vm block size
1286 *
1287 * @adev: amdgpu_device pointer
1288 *
1289 * Validates the vm block size specified via module parameter.
1290 * The vm block size defines number of bits in page table versus page directory,
1291 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1292 * page table and the remaining bits are in the page directory.
1293 */
06ec9070 1294static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1295{
1296 /* defines number of bits in page table versus page directory,
1297 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1298 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1299 if (amdgpu_vm_block_size == -1)
1300 return;
a1adf8be 1301
bab4fee7 1302 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1303 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1304 amdgpu_vm_block_size);
97489129 1305 amdgpu_vm_block_size = -1;
a1adf8be 1306 }
a1adf8be
CZ
1307}
1308
e3ecdffa
AD
1309/**
1310 * amdgpu_device_check_vm_size - validate the vm size
1311 *
1312 * @adev: amdgpu_device pointer
1313 *
1314 * Validates the vm size in GB specified via module parameter.
1315 * The VM size is the size of the GPU virtual memory space in GB.
1316 */
06ec9070 1317static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1318{
64dab074
AD
1319 /* no need to check the default value */
1320 if (amdgpu_vm_size == -1)
1321 return;
1322
83ca145d
ZJ
1323 if (amdgpu_vm_size < 1) {
1324 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1325 amdgpu_vm_size);
f3368128 1326 amdgpu_vm_size = -1;
83ca145d 1327 }
83ca145d
ZJ
1328}
1329
7951e376
RZ
1330static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1331{
1332 struct sysinfo si;
a9d4fe2f 1333 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1334 uint64_t total_memory;
1335 uint64_t dram_size_seven_GB = 0x1B8000000;
1336 uint64_t dram_size_three_GB = 0xB8000000;
1337
1338 if (amdgpu_smu_memory_pool_size == 0)
1339 return;
1340
1341 if (!is_os_64) {
1342 DRM_WARN("Not 64-bit OS, feature not supported\n");
1343 goto def_value;
1344 }
1345 si_meminfo(&si);
1346 total_memory = (uint64_t)si.totalram * si.mem_unit;
1347
1348 if ((amdgpu_smu_memory_pool_size == 1) ||
1349 (amdgpu_smu_memory_pool_size == 2)) {
1350 if (total_memory < dram_size_three_GB)
1351 goto def_value1;
1352 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1353 (amdgpu_smu_memory_pool_size == 8)) {
1354 if (total_memory < dram_size_seven_GB)
1355 goto def_value1;
1356 } else {
1357 DRM_WARN("Smu memory pool size not supported\n");
1358 goto def_value;
1359 }
1360 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1361
1362 return;
1363
1364def_value1:
1365 DRM_WARN("No enough system memory\n");
1366def_value:
1367 adev->pm.smu_prv_buffer_size = 0;
1368}
1369
d38ceaf9 1370/**
06ec9070 1371 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1372 *
1373 * @adev: amdgpu_device pointer
1374 *
1375 * Validates certain module parameters and updates
1376 * the associated values used by the driver (all asics).
1377 */
912dfc84 1378static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1379{
5b011235
CZ
1380 if (amdgpu_sched_jobs < 4) {
1381 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1382 amdgpu_sched_jobs);
1383 amdgpu_sched_jobs = 4;
76117507 1384 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1385 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1386 amdgpu_sched_jobs);
1387 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1388 }
d38ceaf9 1389
83e74db6 1390 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1391 /* gart size must be greater or equal to 32M */
1392 dev_warn(adev->dev, "gart size (%d) too small\n",
1393 amdgpu_gart_size);
83e74db6 1394 amdgpu_gart_size = -1;
d38ceaf9
AD
1395 }
1396
36d38372 1397 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1398 /* gtt size must be greater or equal to 32M */
36d38372
CK
1399 dev_warn(adev->dev, "gtt size (%d) too small\n",
1400 amdgpu_gtt_size);
1401 amdgpu_gtt_size = -1;
d38ceaf9
AD
1402 }
1403
d07f14be
RH
1404 /* valid range is between 4 and 9 inclusive */
1405 if (amdgpu_vm_fragment_size != -1 &&
1406 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1407 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1408 amdgpu_vm_fragment_size = -1;
1409 }
1410
5d5bd5e3
KW
1411 if (amdgpu_sched_hw_submission < 2) {
1412 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1413 amdgpu_sched_hw_submission);
1414 amdgpu_sched_hw_submission = 2;
1415 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1416 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1417 amdgpu_sched_hw_submission);
1418 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1419 }
1420
7951e376
RZ
1421 amdgpu_device_check_smu_prv_buffer_size(adev);
1422
06ec9070 1423 amdgpu_device_check_vm_size(adev);
d38ceaf9 1424
06ec9070 1425 amdgpu_device_check_block_size(adev);
6a7f76e7 1426
19aede77 1427 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1428
c6252390 1429 amdgpu_gmc_tmz_set(adev);
01a8dcec 1430
9b498efa
AD
1431 amdgpu_gmc_noretry_set(adev);
1432
e3c00faa 1433 return 0;
d38ceaf9
AD
1434}
1435
1436/**
1437 * amdgpu_switcheroo_set_state - set switcheroo state
1438 *
1439 * @pdev: pci dev pointer
1694467b 1440 * @state: vga_switcheroo state
d38ceaf9
AD
1441 *
1442 * Callback for the switcheroo driver. Suspends or resumes the
1443 * the asics before or after it is powered up using ACPI methods.
1444 */
8aba21b7
LT
1445static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1446 enum vga_switcheroo_state state)
d38ceaf9
AD
1447{
1448 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1449 int r;
d38ceaf9 1450
b98c6299 1451 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1452 return;
1453
1454 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1455 pr_info("switched on\n");
d38ceaf9
AD
1456 /* don't suspend or resume card normally */
1457 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1458
8f66090b
TZ
1459 pci_set_power_state(pdev, PCI_D0);
1460 amdgpu_device_load_pci_state(pdev);
1461 r = pci_enable_device(pdev);
de185019
AD
1462 if (r)
1463 DRM_WARN("pci_enable_device failed (%d)\n", r);
1464 amdgpu_device_resume(dev, true);
d38ceaf9 1465
d38ceaf9 1466 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1467 } else {
dd4fa6c1 1468 pr_info("switched off\n");
d38ceaf9 1469 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1470 amdgpu_device_suspend(dev, true);
8f66090b 1471 amdgpu_device_cache_pci_state(pdev);
de185019 1472 /* Shut down the device */
8f66090b
TZ
1473 pci_disable_device(pdev);
1474 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1475 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1476 }
1477}
1478
1479/**
1480 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1481 *
1482 * @pdev: pci dev pointer
1483 *
1484 * Callback for the switcheroo driver. Check of the switcheroo
1485 * state can be changed.
1486 * Returns true if the state can be changed, false if not.
1487 */
1488static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1489{
1490 struct drm_device *dev = pci_get_drvdata(pdev);
1491
1492 /*
1493 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1494 * locking inversion with the driver load path. And the access here is
1495 * completely racy anyway. So don't bother with locking for now.
1496 */
7e13ad89 1497 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1498}
1499
1500static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1501 .set_gpu_state = amdgpu_switcheroo_set_state,
1502 .reprobe = NULL,
1503 .can_switch = amdgpu_switcheroo_can_switch,
1504};
1505
e3ecdffa
AD
1506/**
1507 * amdgpu_device_ip_set_clockgating_state - set the CG state
1508 *
87e3f136 1509 * @dev: amdgpu_device pointer
e3ecdffa
AD
1510 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1511 * @state: clockgating state (gate or ungate)
1512 *
1513 * Sets the requested clockgating state for all instances of
1514 * the hardware IP specified.
1515 * Returns the error code from the last instance.
1516 */
43fa561f 1517int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1518 enum amd_ip_block_type block_type,
1519 enum amd_clockgating_state state)
d38ceaf9 1520{
43fa561f 1521 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1522 int i, r = 0;
1523
1524 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1525 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1526 continue;
c722865a
RZ
1527 if (adev->ip_blocks[i].version->type != block_type)
1528 continue;
1529 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1530 continue;
1531 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1532 (void *)adev, state);
1533 if (r)
1534 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1535 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1536 }
1537 return r;
1538}
1539
e3ecdffa
AD
1540/**
1541 * amdgpu_device_ip_set_powergating_state - set the PG state
1542 *
87e3f136 1543 * @dev: amdgpu_device pointer
e3ecdffa
AD
1544 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1545 * @state: powergating state (gate or ungate)
1546 *
1547 * Sets the requested powergating state for all instances of
1548 * the hardware IP specified.
1549 * Returns the error code from the last instance.
1550 */
43fa561f 1551int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1552 enum amd_ip_block_type block_type,
1553 enum amd_powergating_state state)
d38ceaf9 1554{
43fa561f 1555 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1556 int i, r = 0;
1557
1558 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1559 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1560 continue;
c722865a
RZ
1561 if (adev->ip_blocks[i].version->type != block_type)
1562 continue;
1563 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1564 continue;
1565 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1566 (void *)adev, state);
1567 if (r)
1568 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1569 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1570 }
1571 return r;
1572}
1573
e3ecdffa
AD
1574/**
1575 * amdgpu_device_ip_get_clockgating_state - get the CG state
1576 *
1577 * @adev: amdgpu_device pointer
1578 * @flags: clockgating feature flags
1579 *
1580 * Walks the list of IPs on the device and updates the clockgating
1581 * flags for each IP.
1582 * Updates @flags with the feature flags for each hardware IP where
1583 * clockgating is enabled.
1584 */
2990a1fc
AD
1585void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1586 u32 *flags)
6cb2d4e4
HR
1587{
1588 int i;
1589
1590 for (i = 0; i < adev->num_ip_blocks; i++) {
1591 if (!adev->ip_blocks[i].status.valid)
1592 continue;
1593 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1594 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1595 }
1596}
1597
e3ecdffa
AD
1598/**
1599 * amdgpu_device_ip_wait_for_idle - wait for idle
1600 *
1601 * @adev: amdgpu_device pointer
1602 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1603 *
1604 * Waits for the request hardware IP to be idle.
1605 * Returns 0 for success or a negative error code on failure.
1606 */
2990a1fc
AD
1607int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1608 enum amd_ip_block_type block_type)
5dbbb60b
AD
1609{
1610 int i, r;
1611
1612 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1613 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1614 continue;
a1255107
AD
1615 if (adev->ip_blocks[i].version->type == block_type) {
1616 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1617 if (r)
1618 return r;
1619 break;
1620 }
1621 }
1622 return 0;
1623
1624}
1625
e3ecdffa
AD
1626/**
1627 * amdgpu_device_ip_is_idle - is the hardware IP idle
1628 *
1629 * @adev: amdgpu_device pointer
1630 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1631 *
1632 * Check if the hardware IP is idle or not.
1633 * Returns true if it the IP is idle, false if not.
1634 */
2990a1fc
AD
1635bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1636 enum amd_ip_block_type block_type)
5dbbb60b
AD
1637{
1638 int i;
1639
1640 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1641 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1642 continue;
a1255107
AD
1643 if (adev->ip_blocks[i].version->type == block_type)
1644 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1645 }
1646 return true;
1647
1648}
1649
e3ecdffa
AD
1650/**
1651 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1652 *
1653 * @adev: amdgpu_device pointer
87e3f136 1654 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1655 *
1656 * Returns a pointer to the hardware IP block structure
1657 * if it exists for the asic, otherwise NULL.
1658 */
2990a1fc
AD
1659struct amdgpu_ip_block *
1660amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1661 enum amd_ip_block_type type)
d38ceaf9
AD
1662{
1663 int i;
1664
1665 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1666 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1667 return &adev->ip_blocks[i];
1668
1669 return NULL;
1670}
1671
1672/**
2990a1fc 1673 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1674 *
1675 * @adev: amdgpu_device pointer
5fc3aeeb 1676 * @type: enum amd_ip_block_type
d38ceaf9
AD
1677 * @major: major version
1678 * @minor: minor version
1679 *
1680 * return 0 if equal or greater
1681 * return 1 if smaller or the ip_block doesn't exist
1682 */
2990a1fc
AD
1683int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1684 enum amd_ip_block_type type,
1685 u32 major, u32 minor)
d38ceaf9 1686{
2990a1fc 1687 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1688
a1255107
AD
1689 if (ip_block && ((ip_block->version->major > major) ||
1690 ((ip_block->version->major == major) &&
1691 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1692 return 0;
1693
1694 return 1;
1695}
1696
a1255107 1697/**
2990a1fc 1698 * amdgpu_device_ip_block_add
a1255107
AD
1699 *
1700 * @adev: amdgpu_device pointer
1701 * @ip_block_version: pointer to the IP to add
1702 *
1703 * Adds the IP block driver information to the collection of IPs
1704 * on the asic.
1705 */
2990a1fc
AD
1706int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1707 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1708{
1709 if (!ip_block_version)
1710 return -EINVAL;
1711
7bd939d0
LG
1712 switch (ip_block_version->type) {
1713 case AMD_IP_BLOCK_TYPE_VCN:
1714 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1715 return 0;
1716 break;
1717 case AMD_IP_BLOCK_TYPE_JPEG:
1718 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1719 return 0;
1720 break;
1721 default:
1722 break;
1723 }
1724
e966a725 1725 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1726 ip_block_version->funcs->name);
1727
a1255107
AD
1728 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1729
1730 return 0;
1731}
1732
e3ecdffa
AD
1733/**
1734 * amdgpu_device_enable_virtual_display - enable virtual display feature
1735 *
1736 * @adev: amdgpu_device pointer
1737 *
1738 * Enabled the virtual display feature if the user has enabled it via
1739 * the module parameter virtual_display. This feature provides a virtual
1740 * display hardware on headless boards or in virtualized environments.
1741 * This function parses and validates the configuration string specified by
1742 * the user and configues the virtual display configuration (number of
1743 * virtual connectors, crtcs, etc.) specified.
1744 */
483ef985 1745static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1746{
1747 adev->enable_virtual_display = false;
1748
1749 if (amdgpu_virtual_display) {
8f66090b 1750 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1751 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1752
1753 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1754 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1755 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1756 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1757 if (!strcmp("all", pciaddname)
1758 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1759 long num_crtc;
1760 int res = -1;
1761
9accf2fd 1762 adev->enable_virtual_display = true;
0f66356d
ED
1763
1764 if (pciaddname_tmp)
1765 res = kstrtol(pciaddname_tmp, 10,
1766 &num_crtc);
1767
1768 if (!res) {
1769 if (num_crtc < 1)
1770 num_crtc = 1;
1771 if (num_crtc > 6)
1772 num_crtc = 6;
1773 adev->mode_info.num_crtc = num_crtc;
1774 } else {
1775 adev->mode_info.num_crtc = 1;
1776 }
9accf2fd
ED
1777 break;
1778 }
1779 }
1780
0f66356d
ED
1781 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1782 amdgpu_virtual_display, pci_address_name,
1783 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1784
1785 kfree(pciaddstr);
1786 }
1787}
1788
e3ecdffa
AD
1789/**
1790 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1791 *
1792 * @adev: amdgpu_device pointer
1793 *
1794 * Parses the asic configuration parameters specified in the gpu info
1795 * firmware and makes them availale to the driver for use in configuring
1796 * the asic.
1797 * Returns 0 on success, -EINVAL on failure.
1798 */
e2a75f88
AD
1799static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1800{
e2a75f88 1801 const char *chip_name;
c0a43457 1802 char fw_name[40];
e2a75f88
AD
1803 int err;
1804 const struct gpu_info_firmware_header_v1_0 *hdr;
1805
ab4fe3e1
HR
1806 adev->firmware.gpu_info_fw = NULL;
1807
72de33f8 1808 if (adev->mman.discovery_bin) {
258620d0 1809 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1810
1811 /*
1812 * FIXME: The bounding box is still needed by Navi12, so
1813 * temporarily read it from gpu_info firmware. Should be droped
1814 * when DAL no longer needs it.
1815 */
1816 if (adev->asic_type != CHIP_NAVI12)
1817 return 0;
258620d0
AD
1818 }
1819
e2a75f88 1820 switch (adev->asic_type) {
e2a75f88
AD
1821#ifdef CONFIG_DRM_AMDGPU_SI
1822 case CHIP_VERDE:
1823 case CHIP_TAHITI:
1824 case CHIP_PITCAIRN:
1825 case CHIP_OLAND:
1826 case CHIP_HAINAN:
1827#endif
1828#ifdef CONFIG_DRM_AMDGPU_CIK
1829 case CHIP_BONAIRE:
1830 case CHIP_HAWAII:
1831 case CHIP_KAVERI:
1832 case CHIP_KABINI:
1833 case CHIP_MULLINS:
1834#endif
da87c30b
AD
1835 case CHIP_TOPAZ:
1836 case CHIP_TONGA:
1837 case CHIP_FIJI:
1838 case CHIP_POLARIS10:
1839 case CHIP_POLARIS11:
1840 case CHIP_POLARIS12:
1841 case CHIP_VEGAM:
1842 case CHIP_CARRIZO:
1843 case CHIP_STONEY:
27c0bc71 1844 case CHIP_VEGA20:
44b3253a 1845 case CHIP_ALDEBARAN:
84d244a3
JC
1846 case CHIP_SIENNA_CICHLID:
1847 case CHIP_NAVY_FLOUNDER:
eac88a5f 1848 case CHIP_DIMGREY_CAVEFISH:
0e5f4b09 1849 case CHIP_BEIGE_GOBY:
e2a75f88
AD
1850 default:
1851 return 0;
1852 case CHIP_VEGA10:
1853 chip_name = "vega10";
1854 break;
3f76dced
AD
1855 case CHIP_VEGA12:
1856 chip_name = "vega12";
1857 break;
2d2e5e7e 1858 case CHIP_RAVEN:
54f78a76 1859 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1860 chip_name = "raven2";
54f78a76 1861 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1862 chip_name = "picasso";
54c4d17e
FX
1863 else
1864 chip_name = "raven";
2d2e5e7e 1865 break;
65e60f6e
LM
1866 case CHIP_ARCTURUS:
1867 chip_name = "arcturus";
1868 break;
b51a26a0 1869 case CHIP_RENOIR:
2e62f0b5
PL
1870 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1871 chip_name = "renoir";
1872 else
1873 chip_name = "green_sardine";
b51a26a0 1874 break;
23c6268e
HR
1875 case CHIP_NAVI10:
1876 chip_name = "navi10";
1877 break;
ed42cfe1
XY
1878 case CHIP_NAVI14:
1879 chip_name = "navi14";
1880 break;
42b325e5
XY
1881 case CHIP_NAVI12:
1882 chip_name = "navi12";
1883 break;
4e52a9f8
HR
1884 case CHIP_VANGOGH:
1885 chip_name = "vangogh";
1886 break;
e2a75f88
AD
1887 }
1888
1889 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1890 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1891 if (err) {
1892 dev_err(adev->dev,
1893 "Failed to load gpu_info firmware \"%s\"\n",
1894 fw_name);
1895 goto out;
1896 }
ab4fe3e1 1897 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1898 if (err) {
1899 dev_err(adev->dev,
1900 "Failed to validate gpu_info firmware \"%s\"\n",
1901 fw_name);
1902 goto out;
1903 }
1904
ab4fe3e1 1905 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1906 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1907
1908 switch (hdr->version_major) {
1909 case 1:
1910 {
1911 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1912 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1913 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1914
cc375d8c
TY
1915 /*
1916 * Should be droped when DAL no longer needs it.
1917 */
1918 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1919 goto parse_soc_bounding_box;
1920
b5ab16bf
AD
1921 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1922 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1923 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1924 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1925 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1926 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1927 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1928 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1929 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1930 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1931 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1932 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1933 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1934 adev->gfx.cu_info.max_waves_per_simd =
1935 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1936 adev->gfx.cu_info.max_scratch_slots_per_cu =
1937 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1938 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1939 if (hdr->version_minor >= 1) {
35c2e910
HZ
1940 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1941 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1942 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1943 adev->gfx.config.num_sc_per_sh =
1944 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1945 adev->gfx.config.num_packer_per_sc =
1946 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1947 }
ec51d3fa
XY
1948
1949parse_soc_bounding_box:
ec51d3fa
XY
1950 /*
1951 * soc bounding box info is not integrated in disocovery table,
258620d0 1952 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1953 */
48321c3d
HW
1954 if (hdr->version_minor == 2) {
1955 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1956 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1957 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1958 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1959 }
e2a75f88
AD
1960 break;
1961 }
1962 default:
1963 dev_err(adev->dev,
1964 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1965 err = -EINVAL;
1966 goto out;
1967 }
1968out:
e2a75f88
AD
1969 return err;
1970}
1971
e3ecdffa
AD
1972/**
1973 * amdgpu_device_ip_early_init - run early init for hardware IPs
1974 *
1975 * @adev: amdgpu_device pointer
1976 *
1977 * Early initialization pass for hardware IPs. The hardware IPs that make
1978 * up each asic are discovered each IP's early_init callback is run. This
1979 * is the first stage in initializing the asic.
1980 * Returns 0 on success, negative error code on failure.
1981 */
06ec9070 1982static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1983{
aaa36a97 1984 int i, r;
d38ceaf9 1985
483ef985 1986 amdgpu_device_enable_virtual_display(adev);
a6be7570 1987
00a979f3 1988 if (amdgpu_sriov_vf(adev)) {
00a979f3 1989 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
1990 if (r)
1991 return r;
00a979f3
WS
1992 }
1993
d38ceaf9 1994 switch (adev->asic_type) {
33f34802
KW
1995#ifdef CONFIG_DRM_AMDGPU_SI
1996 case CHIP_VERDE:
1997 case CHIP_TAHITI:
1998 case CHIP_PITCAIRN:
1999 case CHIP_OLAND:
2000 case CHIP_HAINAN:
295d0daf 2001 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
2002 r = si_set_ip_blocks(adev);
2003 if (r)
2004 return r;
2005 break;
2006#endif
a2e73f56
AD
2007#ifdef CONFIG_DRM_AMDGPU_CIK
2008 case CHIP_BONAIRE:
2009 case CHIP_HAWAII:
2010 case CHIP_KAVERI:
2011 case CHIP_KABINI:
2012 case CHIP_MULLINS:
e1ad2d53 2013 if (adev->flags & AMD_IS_APU)
a2e73f56 2014 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
2015 else
2016 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
2017
2018 r = cik_set_ip_blocks(adev);
2019 if (r)
2020 return r;
2021 break;
2022#endif
da87c30b
AD
2023 case CHIP_TOPAZ:
2024 case CHIP_TONGA:
2025 case CHIP_FIJI:
2026 case CHIP_POLARIS10:
2027 case CHIP_POLARIS11:
2028 case CHIP_POLARIS12:
2029 case CHIP_VEGAM:
2030 case CHIP_CARRIZO:
2031 case CHIP_STONEY:
2032 if (adev->flags & AMD_IS_APU)
2033 adev->family = AMDGPU_FAMILY_CZ;
2034 else
2035 adev->family = AMDGPU_FAMILY_VI;
2036
2037 r = vi_set_ip_blocks(adev);
2038 if (r)
2039 return r;
2040 break;
e48a3cd9
AD
2041 case CHIP_VEGA10:
2042 case CHIP_VEGA12:
e4bd8170 2043 case CHIP_VEGA20:
e48a3cd9 2044 case CHIP_RAVEN:
61cf44c1 2045 case CHIP_ARCTURUS:
b51a26a0 2046 case CHIP_RENOIR:
c00a18ec 2047 case CHIP_ALDEBARAN:
70534d1e 2048 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
2049 adev->family = AMDGPU_FAMILY_RV;
2050 else
2051 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
2052
2053 r = soc15_set_ip_blocks(adev);
2054 if (r)
2055 return r;
2056 break;
0a5b8c7b 2057 case CHIP_NAVI10:
7ecb5cd4 2058 case CHIP_NAVI14:
4808cf9c 2059 case CHIP_NAVI12:
11e8aef5 2060 case CHIP_SIENNA_CICHLID:
41f446bf 2061 case CHIP_NAVY_FLOUNDER:
144722fa 2062 case CHIP_DIMGREY_CAVEFISH:
b41f5b7a 2063 case CHIP_BEIGE_GOBY:
4e52a9f8
HR
2064 case CHIP_VANGOGH:
2065 if (adev->asic_type == CHIP_VANGOGH)
2066 adev->family = AMDGPU_FAMILY_VGH;
2067 else
2068 adev->family = AMDGPU_FAMILY_NV;
0a5b8c7b
HR
2069
2070 r = nv_set_ip_blocks(adev);
2071 if (r)
2072 return r;
2073 break;
d38ceaf9
AD
2074 default:
2075 /* FIXME: not supported yet */
2076 return -EINVAL;
2077 }
2078
1884734a 2079 amdgpu_amdkfd_device_probe(adev);
2080
3b94fb10 2081 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2082 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2083 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2084 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2085 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2086
d38ceaf9
AD
2087 for (i = 0; i < adev->num_ip_blocks; i++) {
2088 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2089 DRM_ERROR("disabled ip block: %d <%s>\n",
2090 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2091 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2092 } else {
a1255107
AD
2093 if (adev->ip_blocks[i].version->funcs->early_init) {
2094 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2095 if (r == -ENOENT) {
a1255107 2096 adev->ip_blocks[i].status.valid = false;
2c1a2784 2097 } else if (r) {
a1255107
AD
2098 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2099 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2100 return r;
2c1a2784 2101 } else {
a1255107 2102 adev->ip_blocks[i].status.valid = true;
2c1a2784 2103 }
974e6b64 2104 } else {
a1255107 2105 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2106 }
d38ceaf9 2107 }
21a249ca
AD
2108 /* get the vbios after the asic_funcs are set up */
2109 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2110 r = amdgpu_device_parse_gpu_info_fw(adev);
2111 if (r)
2112 return r;
2113
21a249ca
AD
2114 /* Read BIOS */
2115 if (!amdgpu_get_bios(adev))
2116 return -EINVAL;
2117
2118 r = amdgpu_atombios_init(adev);
2119 if (r) {
2120 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2121 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2122 return r;
2123 }
77eabc6f
PJZ
2124
2125 /*get pf2vf msg info at it's earliest time*/
2126 if (amdgpu_sriov_vf(adev))
2127 amdgpu_virt_init_data_exchange(adev);
2128
21a249ca 2129 }
d38ceaf9
AD
2130 }
2131
395d1fb9
NH
2132 adev->cg_flags &= amdgpu_cg_mask;
2133 adev->pg_flags &= amdgpu_pg_mask;
2134
d38ceaf9
AD
2135 return 0;
2136}
2137
0a4f2520
RZ
2138static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2139{
2140 int i, r;
2141
2142 for (i = 0; i < adev->num_ip_blocks; i++) {
2143 if (!adev->ip_blocks[i].status.sw)
2144 continue;
2145 if (adev->ip_blocks[i].status.hw)
2146 continue;
2147 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2148 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2149 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2150 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2151 if (r) {
2152 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2153 adev->ip_blocks[i].version->funcs->name, r);
2154 return r;
2155 }
2156 adev->ip_blocks[i].status.hw = true;
2157 }
2158 }
2159
2160 return 0;
2161}
2162
2163static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2164{
2165 int i, r;
2166
2167 for (i = 0; i < adev->num_ip_blocks; i++) {
2168 if (!adev->ip_blocks[i].status.sw)
2169 continue;
2170 if (adev->ip_blocks[i].status.hw)
2171 continue;
2172 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2173 if (r) {
2174 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2175 adev->ip_blocks[i].version->funcs->name, r);
2176 return r;
2177 }
2178 adev->ip_blocks[i].status.hw = true;
2179 }
2180
2181 return 0;
2182}
2183
7a3e0bb2
RZ
2184static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2185{
2186 int r = 0;
2187 int i;
80f41f84 2188 uint32_t smu_version;
7a3e0bb2
RZ
2189
2190 if (adev->asic_type >= CHIP_VEGA10) {
2191 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2192 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2193 continue;
2194
e3c1b071 2195 if (!adev->ip_blocks[i].status.sw)
2196 continue;
2197
482f0e53
ML
2198 /* no need to do the fw loading again if already done*/
2199 if (adev->ip_blocks[i].status.hw == true)
2200 break;
2201
53b3f8f4 2202 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2203 r = adev->ip_blocks[i].version->funcs->resume(adev);
2204 if (r) {
2205 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2206 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2207 return r;
2208 }
2209 } else {
2210 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2211 if (r) {
2212 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2213 adev->ip_blocks[i].version->funcs->name, r);
2214 return r;
7a3e0bb2 2215 }
7a3e0bb2 2216 }
482f0e53
ML
2217
2218 adev->ip_blocks[i].status.hw = true;
2219 break;
7a3e0bb2
RZ
2220 }
2221 }
482f0e53 2222
8973d9ec
ED
2223 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2224 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2225
80f41f84 2226 return r;
7a3e0bb2
RZ
2227}
2228
e3ecdffa
AD
2229/**
2230 * amdgpu_device_ip_init - run init for hardware IPs
2231 *
2232 * @adev: amdgpu_device pointer
2233 *
2234 * Main initialization pass for hardware IPs. The list of all the hardware
2235 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2236 * are run. sw_init initializes the software state associated with each IP
2237 * and hw_init initializes the hardware associated with each IP.
2238 * Returns 0 on success, negative error code on failure.
2239 */
06ec9070 2240static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2241{
2242 int i, r;
2243
c030f2e4 2244 r = amdgpu_ras_init(adev);
2245 if (r)
2246 return r;
2247
d38ceaf9 2248 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2249 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2250 continue;
a1255107 2251 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2252 if (r) {
a1255107
AD
2253 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2254 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2255 goto init_failed;
2c1a2784 2256 }
a1255107 2257 adev->ip_blocks[i].status.sw = true;
bfca0289 2258
d38ceaf9 2259 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 2260 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 2261 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2262 if (r) {
2263 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2264 goto init_failed;
2c1a2784 2265 }
a1255107 2266 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2267 if (r) {
2268 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2269 goto init_failed;
2c1a2784 2270 }
06ec9070 2271 r = amdgpu_device_wb_init(adev);
2c1a2784 2272 if (r) {
06ec9070 2273 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2274 goto init_failed;
2c1a2784 2275 }
a1255107 2276 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2277
2278 /* right after GMC hw init, we create CSA */
f92d5c61 2279 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2280 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2281 AMDGPU_GEM_DOMAIN_VRAM,
2282 AMDGPU_CSA_SIZE);
2493664f
ML
2283 if (r) {
2284 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2285 goto init_failed;
2493664f
ML
2286 }
2287 }
d38ceaf9
AD
2288 }
2289 }
2290
c9ffa427
YT
2291 if (amdgpu_sriov_vf(adev))
2292 amdgpu_virt_init_data_exchange(adev);
2293
533aed27
AG
2294 r = amdgpu_ib_pool_init(adev);
2295 if (r) {
2296 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2297 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2298 goto init_failed;
2299 }
2300
c8963ea4
RZ
2301 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2302 if (r)
72d3f592 2303 goto init_failed;
0a4f2520
RZ
2304
2305 r = amdgpu_device_ip_hw_init_phase1(adev);
2306 if (r)
72d3f592 2307 goto init_failed;
0a4f2520 2308
7a3e0bb2
RZ
2309 r = amdgpu_device_fw_loading(adev);
2310 if (r)
72d3f592 2311 goto init_failed;
7a3e0bb2 2312
0a4f2520
RZ
2313 r = amdgpu_device_ip_hw_init_phase2(adev);
2314 if (r)
72d3f592 2315 goto init_failed;
d38ceaf9 2316
121a2bc6
AG
2317 /*
2318 * retired pages will be loaded from eeprom and reserved here,
2319 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2320 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2321 * for I2C communication which only true at this point.
b82e65a9
GC
2322 *
2323 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2324 * failure from bad gpu situation and stop amdgpu init process
2325 * accordingly. For other failed cases, it will still release all
2326 * the resource and print error message, rather than returning one
2327 * negative value to upper level.
121a2bc6
AG
2328 *
2329 * Note: theoretically, this should be called before all vram allocations
2330 * to protect retired page from abusing
2331 */
b82e65a9
GC
2332 r = amdgpu_ras_recovery_init(adev);
2333 if (r)
2334 goto init_failed;
121a2bc6 2335
3e2e2ab5
HZ
2336 if (adev->gmc.xgmi.num_physical_nodes > 1)
2337 amdgpu_xgmi_add_device(adev);
e3c1b071 2338
2339 /* Don't init kfd if whole hive need to be reset during init */
2340 if (!adev->gmc.xgmi.pending_reset)
2341 amdgpu_amdkfd_device_init(adev);
c6332b97 2342
bd607166
KR
2343 amdgpu_fru_get_product_info(adev);
2344
72d3f592 2345init_failed:
c9ffa427 2346 if (amdgpu_sriov_vf(adev))
c6332b97 2347 amdgpu_virt_release_full_gpu(adev, true);
2348
72d3f592 2349 return r;
d38ceaf9
AD
2350}
2351
e3ecdffa
AD
2352/**
2353 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2354 *
2355 * @adev: amdgpu_device pointer
2356 *
2357 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2358 * this function before a GPU reset. If the value is retained after a
2359 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2360 */
06ec9070 2361static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2362{
2363 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2364}
2365
e3ecdffa
AD
2366/**
2367 * amdgpu_device_check_vram_lost - check if vram is valid
2368 *
2369 * @adev: amdgpu_device pointer
2370 *
2371 * Checks the reset magic value written to the gart pointer in VRAM.
2372 * The driver calls this after a GPU reset to see if the contents of
2373 * VRAM is lost or now.
2374 * returns true if vram is lost, false if not.
2375 */
06ec9070 2376static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2377{
dadce777
EQ
2378 if (memcmp(adev->gart.ptr, adev->reset_magic,
2379 AMDGPU_RESET_MAGIC_NUM))
2380 return true;
2381
53b3f8f4 2382 if (!amdgpu_in_reset(adev))
dadce777
EQ
2383 return false;
2384
2385 /*
2386 * For all ASICs with baco/mode1 reset, the VRAM is
2387 * always assumed to be lost.
2388 */
2389 switch (amdgpu_asic_reset_method(adev)) {
2390 case AMD_RESET_METHOD_BACO:
2391 case AMD_RESET_METHOD_MODE1:
2392 return true;
2393 default:
2394 return false;
2395 }
0c49e0b8
CZ
2396}
2397
e3ecdffa 2398/**
1112a46b 2399 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2400 *
2401 * @adev: amdgpu_device pointer
b8b72130 2402 * @state: clockgating state (gate or ungate)
e3ecdffa 2403 *
e3ecdffa 2404 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2405 * set_clockgating_state callbacks are run.
2406 * Late initialization pass enabling clockgating for hardware IPs.
2407 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2408 * Returns 0 on success, negative error code on failure.
2409 */
fdd34271 2410
5d89bb2d
LL
2411int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2412 enum amd_clockgating_state state)
d38ceaf9 2413{
1112a46b 2414 int i, j, r;
d38ceaf9 2415
4a2ba394
SL
2416 if (amdgpu_emu_mode == 1)
2417 return 0;
2418
1112a46b
RZ
2419 for (j = 0; j < adev->num_ip_blocks; j++) {
2420 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2421 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2422 continue;
5d70a549
PV
2423 /* skip CG for GFX on S0ix */
2424 if (adev->in_s0ix &&
2425 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2426 continue;
4a446d55 2427 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2428 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2429 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2430 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2431 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2432 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2433 /* enable clockgating to save power */
a1255107 2434 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2435 state);
4a446d55
AD
2436 if (r) {
2437 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2438 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2439 return r;
2440 }
b0b00ff1 2441 }
d38ceaf9 2442 }
06b18f61 2443
c9f96fd5
RZ
2444 return 0;
2445}
2446
5d89bb2d
LL
2447int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2448 enum amd_powergating_state state)
c9f96fd5 2449{
1112a46b 2450 int i, j, r;
06b18f61 2451
c9f96fd5
RZ
2452 if (amdgpu_emu_mode == 1)
2453 return 0;
2454
1112a46b
RZ
2455 for (j = 0; j < adev->num_ip_blocks; j++) {
2456 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2457 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5 2458 continue;
5d70a549
PV
2459 /* skip PG for GFX on S0ix */
2460 if (adev->in_s0ix &&
2461 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2462 continue;
c9f96fd5
RZ
2463 /* skip CG for VCE/UVD, it's handled specially */
2464 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2465 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2466 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2467 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2468 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2469 /* enable powergating to save power */
2470 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2471 state);
c9f96fd5
RZ
2472 if (r) {
2473 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2474 adev->ip_blocks[i].version->funcs->name, r);
2475 return r;
2476 }
2477 }
2478 }
2dc80b00
S
2479 return 0;
2480}
2481
beff74bc
AD
2482static int amdgpu_device_enable_mgpu_fan_boost(void)
2483{
2484 struct amdgpu_gpu_instance *gpu_ins;
2485 struct amdgpu_device *adev;
2486 int i, ret = 0;
2487
2488 mutex_lock(&mgpu_info.mutex);
2489
2490 /*
2491 * MGPU fan boost feature should be enabled
2492 * only when there are two or more dGPUs in
2493 * the system
2494 */
2495 if (mgpu_info.num_dgpu < 2)
2496 goto out;
2497
2498 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2499 gpu_ins = &(mgpu_info.gpu_ins[i]);
2500 adev = gpu_ins->adev;
2501 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2502 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2503 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2504 if (ret)
2505 break;
2506
2507 gpu_ins->mgpu_fan_enabled = 1;
2508 }
2509 }
2510
2511out:
2512 mutex_unlock(&mgpu_info.mutex);
2513
2514 return ret;
2515}
2516
e3ecdffa
AD
2517/**
2518 * amdgpu_device_ip_late_init - run late init for hardware IPs
2519 *
2520 * @adev: amdgpu_device pointer
2521 *
2522 * Late initialization pass for hardware IPs. The list of all the hardware
2523 * IPs that make up the asic is walked and the late_init callbacks are run.
2524 * late_init covers any special initialization that an IP requires
2525 * after all of the have been initialized or something that needs to happen
2526 * late in the init process.
2527 * Returns 0 on success, negative error code on failure.
2528 */
06ec9070 2529static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2530{
60599a03 2531 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2532 int i = 0, r;
2533
2534 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2535 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2536 continue;
2537 if (adev->ip_blocks[i].version->funcs->late_init) {
2538 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2539 if (r) {
2540 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2541 adev->ip_blocks[i].version->funcs->name, r);
2542 return r;
2543 }
2dc80b00 2544 }
73f847db 2545 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2546 }
2547
a891d239
DL
2548 amdgpu_ras_set_error_query_ready(adev, true);
2549
1112a46b
RZ
2550 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2551 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2552
06ec9070 2553 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2554
beff74bc
AD
2555 r = amdgpu_device_enable_mgpu_fan_boost();
2556 if (r)
2557 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2558
2d02893f 2559 /* For XGMI + passthrough configuration on arcturus, enable light SBR */
2560 if (adev->asic_type == CHIP_ARCTURUS &&
2561 amdgpu_passthrough(adev) &&
2562 adev->gmc.xgmi.num_physical_nodes > 1)
2563 smu_set_light_sbr(&adev->smu, true);
60599a03
EQ
2564
2565 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2566 mutex_lock(&mgpu_info.mutex);
2567
2568 /*
2569 * Reset device p-state to low as this was booted with high.
2570 *
2571 * This should be performed only after all devices from the same
2572 * hive get initialized.
2573 *
2574 * However, it's unknown how many device in the hive in advance.
2575 * As this is counted one by one during devices initializations.
2576 *
2577 * So, we wait for all XGMI interlinked devices initialized.
2578 * This may bring some delays as those devices may come from
2579 * different hives. But that should be OK.
2580 */
2581 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2582 for (i = 0; i < mgpu_info.num_gpu; i++) {
2583 gpu_instance = &(mgpu_info.gpu_ins[i]);
2584 if (gpu_instance->adev->flags & AMD_IS_APU)
2585 continue;
2586
d84a430d
JK
2587 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2588 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2589 if (r) {
2590 DRM_ERROR("pstate setting failed (%d).\n", r);
2591 break;
2592 }
2593 }
2594 }
2595
2596 mutex_unlock(&mgpu_info.mutex);
2597 }
2598
d38ceaf9
AD
2599 return 0;
2600}
2601
e9669fb7 2602static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
d38ceaf9
AD
2603{
2604 int i, r;
2605
e9669fb7
AG
2606 for (i = 0; i < adev->num_ip_blocks; i++) {
2607 if (!adev->ip_blocks[i].version->funcs->early_fini)
2608 continue;
5278a159 2609
e9669fb7
AG
2610 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2611 if (r) {
2612 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2613 adev->ip_blocks[i].version->funcs->name, r);
2614 }
2615 }
c030f2e4 2616
e9669fb7 2617 amdgpu_amdkfd_suspend(adev, false);
a82400b5 2618
05df1f01 2619 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2620 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2621
3e96dbfd
AD
2622 /* need to disable SMC first */
2623 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2624 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2625 continue;
fdd34271 2626 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2627 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2628 /* XXX handle errors */
2629 if (r) {
2630 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2631 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2632 }
a1255107 2633 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2634 break;
2635 }
2636 }
2637
d38ceaf9 2638 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2639 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2640 continue;
8201a67a 2641
a1255107 2642 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2643 /* XXX handle errors */
2c1a2784 2644 if (r) {
a1255107
AD
2645 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2646 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2647 }
8201a67a 2648
a1255107 2649 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2650 }
2651
e9669fb7
AG
2652 return 0;
2653}
2654
2655/**
2656 * amdgpu_device_ip_fini - run fini for hardware IPs
2657 *
2658 * @adev: amdgpu_device pointer
2659 *
2660 * Main teardown pass for hardware IPs. The list of all the hardware
2661 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2662 * are run. hw_fini tears down the hardware associated with each IP
2663 * and sw_fini tears down any software state associated with each IP.
2664 * Returns 0 on success, negative error code on failure.
2665 */
2666static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2667{
2668 int i, r;
2669
2670 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2671 amdgpu_virt_release_ras_err_handler_data(adev);
2672
2673 amdgpu_ras_pre_fini(adev);
2674
2675 if (adev->gmc.xgmi.num_physical_nodes > 1)
2676 amdgpu_xgmi_remove_device(adev);
2677
2678 amdgpu_amdkfd_device_fini_sw(adev);
9950cda2 2679
d38ceaf9 2680 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2681 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2682 continue;
c12aba3a
ML
2683
2684 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2685 amdgpu_ucode_free_bo(adev);
1e256e27 2686 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2687 amdgpu_device_wb_fini(adev);
2688 amdgpu_device_vram_scratch_fini(adev);
533aed27 2689 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2690 }
2691
a1255107 2692 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2693 /* XXX handle errors */
2c1a2784 2694 if (r) {
a1255107
AD
2695 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2696 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2697 }
a1255107
AD
2698 adev->ip_blocks[i].status.sw = false;
2699 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2700 }
2701
a6dcfd9c 2702 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2703 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2704 continue;
a1255107
AD
2705 if (adev->ip_blocks[i].version->funcs->late_fini)
2706 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2707 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2708 }
2709
c030f2e4 2710 amdgpu_ras_fini(adev);
2711
030308fc 2712 if (amdgpu_sriov_vf(adev))
24136135
ML
2713 if (amdgpu_virt_release_full_gpu(adev, false))
2714 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2715
d38ceaf9
AD
2716 return 0;
2717}
2718
e3ecdffa 2719/**
beff74bc 2720 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2721 *
1112a46b 2722 * @work: work_struct.
e3ecdffa 2723 */
beff74bc 2724static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2725{
2726 struct amdgpu_device *adev =
beff74bc 2727 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2728 int r;
2729
2730 r = amdgpu_ib_ring_tests(adev);
2731 if (r)
2732 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2733}
2734
1e317b99
RZ
2735static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2736{
2737 struct amdgpu_device *adev =
2738 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2739
2740 mutex_lock(&adev->gfx.gfx_off_mutex);
2741 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2742 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2743 adev->gfx.gfx_off_state = true;
2744 }
2745 mutex_unlock(&adev->gfx.gfx_off_mutex);
2746}
2747
e3ecdffa 2748/**
e7854a03 2749 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2750 *
2751 * @adev: amdgpu_device pointer
2752 *
2753 * Main suspend function for hardware IPs. The list of all the hardware
2754 * IPs that make up the asic is walked, clockgating is disabled and the
2755 * suspend callbacks are run. suspend puts the hardware and software state
2756 * in each IP into a state suitable for suspend.
2757 * Returns 0 on success, negative error code on failure.
2758 */
e7854a03
AD
2759static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2760{
2761 int i, r;
2762
50ec83f0
AD
2763 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2764 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2765
e7854a03
AD
2766 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2767 if (!adev->ip_blocks[i].status.valid)
2768 continue;
2b9f7848 2769
e7854a03 2770 /* displays are handled separately */
2b9f7848
ND
2771 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2772 continue;
2773
2774 /* XXX handle errors */
2775 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2776 /* XXX handle errors */
2777 if (r) {
2778 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2779 adev->ip_blocks[i].version->funcs->name, r);
2780 return r;
e7854a03 2781 }
2b9f7848
ND
2782
2783 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2784 }
2785
e7854a03
AD
2786 return 0;
2787}
2788
2789/**
2790 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2791 *
2792 * @adev: amdgpu_device pointer
2793 *
2794 * Main suspend function for hardware IPs. The list of all the hardware
2795 * IPs that make up the asic is walked, clockgating is disabled and the
2796 * suspend callbacks are run. suspend puts the hardware and software state
2797 * in each IP into a state suitable for suspend.
2798 * Returns 0 on success, negative error code on failure.
2799 */
2800static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2801{
2802 int i, r;
2803
557f42a2 2804 if (adev->in_s0ix)
34416931 2805 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
34416931 2806
d38ceaf9 2807 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2808 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2809 continue;
e7854a03
AD
2810 /* displays are handled in phase1 */
2811 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2812 continue;
bff77e86
LM
2813 /* PSP lost connection when err_event_athub occurs */
2814 if (amdgpu_ras_intr_triggered() &&
2815 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2816 adev->ip_blocks[i].status.hw = false;
2817 continue;
2818 }
e3c1b071 2819
2820 /* skip unnecessary suspend if we do not initialize them yet */
2821 if (adev->gmc.xgmi.pending_reset &&
2822 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2823 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2824 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2825 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2826 adev->ip_blocks[i].status.hw = false;
2827 continue;
2828 }
557f42a2 2829
32ff160d
AD
2830 /* skip suspend of gfx and psp for S0ix
2831 * gfx is in gfxoff state, so on resume it will exit gfxoff just
2832 * like at runtime. PSP is also part of the always on hardware
2833 * so no need to suspend it.
2834 */
557f42a2 2835 if (adev->in_s0ix &&
32ff160d
AD
2836 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
2837 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
557f42a2
AD
2838 continue;
2839
d38ceaf9 2840 /* XXX handle errors */
a1255107 2841 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2842 /* XXX handle errors */
2c1a2784 2843 if (r) {
a1255107
AD
2844 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2845 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2846 }
876923fb 2847 adev->ip_blocks[i].status.hw = false;
a3a09142 2848 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2849 if(!amdgpu_sriov_vf(adev)){
2850 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2851 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2852 if (r) {
2853 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2854 adev->mp1_state, r);
2855 return r;
2856 }
a3a09142
AD
2857 }
2858 }
d38ceaf9
AD
2859 }
2860
2861 return 0;
2862}
2863
e7854a03
AD
2864/**
2865 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2866 *
2867 * @adev: amdgpu_device pointer
2868 *
2869 * Main suspend function for hardware IPs. The list of all the hardware
2870 * IPs that make up the asic is walked, clockgating is disabled and the
2871 * suspend callbacks are run. suspend puts the hardware and software state
2872 * in each IP into a state suitable for suspend.
2873 * Returns 0 on success, negative error code on failure.
2874 */
2875int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2876{
2877 int r;
2878
3c73683c
JC
2879 if (amdgpu_sriov_vf(adev)) {
2880 amdgpu_virt_fini_data_exchange(adev);
e7819644 2881 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 2882 }
e7819644 2883
e7854a03
AD
2884 r = amdgpu_device_ip_suspend_phase1(adev);
2885 if (r)
2886 return r;
2887 r = amdgpu_device_ip_suspend_phase2(adev);
2888
e7819644
YT
2889 if (amdgpu_sriov_vf(adev))
2890 amdgpu_virt_release_full_gpu(adev, false);
2891
e7854a03
AD
2892 return r;
2893}
2894
06ec9070 2895static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2896{
2897 int i, r;
2898
2cb681b6
ML
2899 static enum amd_ip_block_type ip_order[] = {
2900 AMD_IP_BLOCK_TYPE_GMC,
2901 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2902 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2903 AMD_IP_BLOCK_TYPE_IH,
2904 };
a90ad3c2 2905
95ea3dbc 2906 for (i = 0; i < adev->num_ip_blocks; i++) {
2cb681b6
ML
2907 int j;
2908 struct amdgpu_ip_block *block;
a90ad3c2 2909
4cd2a96d
J
2910 block = &adev->ip_blocks[i];
2911 block->status.hw = false;
2cb681b6 2912
4cd2a96d 2913 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 2914
4cd2a96d 2915 if (block->version->type != ip_order[j] ||
2cb681b6
ML
2916 !block->status.valid)
2917 continue;
2918
2919 r = block->version->funcs->hw_init(adev);
0aaeefcc 2920 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2921 if (r)
2922 return r;
482f0e53 2923 block->status.hw = true;
a90ad3c2
ML
2924 }
2925 }
2926
2927 return 0;
2928}
2929
06ec9070 2930static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2931{
2932 int i, r;
2933
2cb681b6
ML
2934 static enum amd_ip_block_type ip_order[] = {
2935 AMD_IP_BLOCK_TYPE_SMC,
2936 AMD_IP_BLOCK_TYPE_DCE,
2937 AMD_IP_BLOCK_TYPE_GFX,
2938 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2939 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2940 AMD_IP_BLOCK_TYPE_VCE,
2941 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2942 };
a90ad3c2 2943
2cb681b6
ML
2944 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2945 int j;
2946 struct amdgpu_ip_block *block;
a90ad3c2 2947
2cb681b6
ML
2948 for (j = 0; j < adev->num_ip_blocks; j++) {
2949 block = &adev->ip_blocks[j];
2950
2951 if (block->version->type != ip_order[i] ||
482f0e53
ML
2952 !block->status.valid ||
2953 block->status.hw)
2cb681b6
ML
2954 continue;
2955
895bd048
JZ
2956 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2957 r = block->version->funcs->resume(adev);
2958 else
2959 r = block->version->funcs->hw_init(adev);
2960
0aaeefcc 2961 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2962 if (r)
2963 return r;
482f0e53 2964 block->status.hw = true;
a90ad3c2
ML
2965 }
2966 }
2967
2968 return 0;
2969}
2970
e3ecdffa
AD
2971/**
2972 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2973 *
2974 * @adev: amdgpu_device pointer
2975 *
2976 * First resume function for hardware IPs. The list of all the hardware
2977 * IPs that make up the asic is walked and the resume callbacks are run for
2978 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2979 * after a suspend and updates the software state as necessary. This
2980 * function is also used for restoring the GPU after a GPU reset.
2981 * Returns 0 on success, negative error code on failure.
2982 */
06ec9070 2983static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2984{
2985 int i, r;
2986
a90ad3c2 2987 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2988 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2989 continue;
a90ad3c2 2990 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2991 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2992 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2993
fcf0649f
CZ
2994 r = adev->ip_blocks[i].version->funcs->resume(adev);
2995 if (r) {
2996 DRM_ERROR("resume of IP block <%s> failed %d\n",
2997 adev->ip_blocks[i].version->funcs->name, r);
2998 return r;
2999 }
482f0e53 3000 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
3001 }
3002 }
3003
3004 return 0;
3005}
3006
e3ecdffa
AD
3007/**
3008 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3009 *
3010 * @adev: amdgpu_device pointer
3011 *
3012 * First resume function for hardware IPs. The list of all the hardware
3013 * IPs that make up the asic is walked and the resume callbacks are run for
3014 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3015 * functional state after a suspend and updates the software state as
3016 * necessary. This function is also used for restoring the GPU after a GPU
3017 * reset.
3018 * Returns 0 on success, negative error code on failure.
3019 */
06ec9070 3020static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
3021{
3022 int i, r;
3023
3024 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 3025 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 3026 continue;
fcf0649f 3027 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 3028 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
3029 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3030 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 3031 continue;
a1255107 3032 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 3033 if (r) {
a1255107
AD
3034 DRM_ERROR("resume of IP block <%s> failed %d\n",
3035 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 3036 return r;
2c1a2784 3037 }
482f0e53 3038 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
3039 }
3040
3041 return 0;
3042}
3043
e3ecdffa
AD
3044/**
3045 * amdgpu_device_ip_resume - run resume for hardware IPs
3046 *
3047 * @adev: amdgpu_device pointer
3048 *
3049 * Main resume function for hardware IPs. The hardware IPs
3050 * are split into two resume functions because they are
3051 * are also used in in recovering from a GPU reset and some additional
3052 * steps need to be take between them. In this case (S3/S4) they are
3053 * run sequentially.
3054 * Returns 0 on success, negative error code on failure.
3055 */
06ec9070 3056static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
3057{
3058 int r;
3059
06ec9070 3060 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
3061 if (r)
3062 return r;
7a3e0bb2
RZ
3063
3064 r = amdgpu_device_fw_loading(adev);
3065 if (r)
3066 return r;
3067
06ec9070 3068 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
3069
3070 return r;
3071}
3072
e3ecdffa
AD
3073/**
3074 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3075 *
3076 * @adev: amdgpu_device pointer
3077 *
3078 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3079 */
4e99a44e 3080static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 3081{
6867e1b5
ML
3082 if (amdgpu_sriov_vf(adev)) {
3083 if (adev->is_atom_fw) {
58ff791a 3084 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
6867e1b5
ML
3085 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3086 } else {
3087 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3088 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3089 }
3090
3091 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3092 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 3093 }
048765ad
AR
3094}
3095
e3ecdffa
AD
3096/**
3097 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3098 *
3099 * @asic_type: AMD asic type
3100 *
3101 * Check if there is DC (new modesetting infrastructre) support for an asic.
3102 * returns true if DC has support, false if not.
3103 */
4562236b
HW
3104bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3105{
3106 switch (asic_type) {
3107#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3108#if defined(CONFIG_DRM_AMD_DC_SI)
3109 case CHIP_TAHITI:
3110 case CHIP_PITCAIRN:
3111 case CHIP_VERDE:
3112 case CHIP_OLAND:
3113#endif
4562236b 3114 case CHIP_BONAIRE:
0d6fbccb 3115 case CHIP_KAVERI:
367e6687
AD
3116 case CHIP_KABINI:
3117 case CHIP_MULLINS:
d9fda248
HW
3118 /*
3119 * We have systems in the wild with these ASICs that require
3120 * LVDS and VGA support which is not supported with DC.
3121 *
3122 * Fallback to the non-DC driver here by default so as not to
3123 * cause regressions.
3124 */
3125 return amdgpu_dc > 0;
3126 case CHIP_HAWAII:
4562236b
HW
3127 case CHIP_CARRIZO:
3128 case CHIP_STONEY:
4562236b 3129 case CHIP_POLARIS10:
675fd32b 3130 case CHIP_POLARIS11:
2c8ad2d5 3131 case CHIP_POLARIS12:
675fd32b 3132 case CHIP_VEGAM:
4562236b
HW
3133 case CHIP_TONGA:
3134 case CHIP_FIJI:
42f8ffa1 3135 case CHIP_VEGA10:
dca7b401 3136 case CHIP_VEGA12:
c6034aa2 3137 case CHIP_VEGA20:
b86a1aa3 3138#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 3139 case CHIP_RAVEN:
b4f199c7 3140 case CHIP_NAVI10:
8fceceb6 3141 case CHIP_NAVI14:
078655d9 3142 case CHIP_NAVI12:
e1c14c43 3143 case CHIP_RENOIR:
81d9bfb8 3144 case CHIP_SIENNA_CICHLID:
a6c5308f 3145 case CHIP_NAVY_FLOUNDER:
7cc656e2 3146 case CHIP_DIMGREY_CAVEFISH:
ddaed58b 3147 case CHIP_BEIGE_GOBY:
84b934bc 3148 case CHIP_VANGOGH:
42f8ffa1 3149#endif
fd187853 3150 return amdgpu_dc != 0;
4562236b
HW
3151#endif
3152 default:
93b09a9a 3153 if (amdgpu_dc > 0)
044a48f4 3154 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3155 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
3156 return false;
3157 }
3158}
3159
3160/**
3161 * amdgpu_device_has_dc_support - check if dc is supported
3162 *
982a820b 3163 * @adev: amdgpu_device pointer
4562236b
HW
3164 *
3165 * Returns true for supported, false for not supported
3166 */
3167bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3168{
abaf210c
AS
3169 if (amdgpu_sriov_vf(adev) ||
3170 adev->enable_virtual_display ||
3171 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
2555039d
XY
3172 return false;
3173
4562236b
HW
3174 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3175}
3176
d4535e2c
AG
3177static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3178{
3179 struct amdgpu_device *adev =
3180 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3181 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3182
c6a6e2db
AG
3183 /* It's a bug to not have a hive within this function */
3184 if (WARN_ON(!hive))
3185 return;
3186
3187 /*
3188 * Use task barrier to synchronize all xgmi reset works across the
3189 * hive. task_barrier_enter and task_barrier_exit will block
3190 * until all the threads running the xgmi reset works reach
3191 * those points. task_barrier_full will do both blocks.
3192 */
3193 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3194
3195 task_barrier_enter(&hive->tb);
4a580877 3196 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3197
3198 if (adev->asic_reset_res)
3199 goto fail;
3200
3201 task_barrier_exit(&hive->tb);
4a580877 3202 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3203
3204 if (adev->asic_reset_res)
3205 goto fail;
43c4d576 3206
8bc7b360
HZ
3207 if (adev->mmhub.ras_funcs &&
3208 adev->mmhub.ras_funcs->reset_ras_error_count)
3209 adev->mmhub.ras_funcs->reset_ras_error_count(adev);
c6a6e2db
AG
3210 } else {
3211
3212 task_barrier_full(&hive->tb);
3213 adev->asic_reset_res = amdgpu_asic_reset(adev);
3214 }
ce316fa5 3215
c6a6e2db 3216fail:
d4535e2c 3217 if (adev->asic_reset_res)
fed184e9 3218 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3219 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3220 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3221}
3222
71f98027
AD
3223static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3224{
3225 char *input = amdgpu_lockup_timeout;
3226 char *timeout_setting = NULL;
3227 int index = 0;
3228 long timeout;
3229 int ret = 0;
3230
3231 /*
67387dfe
AD
3232 * By default timeout for non compute jobs is 10000
3233 * and 60000 for compute jobs.
71f98027 3234 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3235 * jobs are 60000 by default.
71f98027
AD
3236 */
3237 adev->gfx_timeout = msecs_to_jiffies(10000);
3238 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3239 if (amdgpu_sriov_vf(adev))
3240 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3241 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
71f98027 3242 else
67387dfe 3243 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027 3244
f440ff44 3245 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3246 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3247 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3248 ret = kstrtol(timeout_setting, 0, &timeout);
3249 if (ret)
3250 return ret;
3251
3252 if (timeout == 0) {
3253 index++;
3254 continue;
3255 } else if (timeout < 0) {
3256 timeout = MAX_SCHEDULE_TIMEOUT;
3257 } else {
3258 timeout = msecs_to_jiffies(timeout);
3259 }
3260
3261 switch (index++) {
3262 case 0:
3263 adev->gfx_timeout = timeout;
3264 break;
3265 case 1:
3266 adev->compute_timeout = timeout;
3267 break;
3268 case 2:
3269 adev->sdma_timeout = timeout;
3270 break;
3271 case 3:
3272 adev->video_timeout = timeout;
3273 break;
3274 default:
3275 break;
3276 }
3277 }
3278 /*
3279 * There is only one value specified and
3280 * it should apply to all non-compute jobs.
3281 */
bcccee89 3282 if (index == 1) {
71f98027 3283 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3284 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3285 adev->compute_timeout = adev->gfx_timeout;
3286 }
71f98027
AD
3287 }
3288
3289 return ret;
3290}
d4535e2c 3291
77f3a5cd
ND
3292static const struct attribute *amdgpu_dev_attributes[] = {
3293 &dev_attr_product_name.attr,
3294 &dev_attr_product_number.attr,
3295 &dev_attr_serial_number.attr,
3296 &dev_attr_pcie_replay_count.attr,
3297 NULL
3298};
3299
d38ceaf9
AD
3300/**
3301 * amdgpu_device_init - initialize the driver
3302 *
3303 * @adev: amdgpu_device pointer
d38ceaf9
AD
3304 * @flags: driver flags
3305 *
3306 * Initializes the driver info and hw (all asics).
3307 * Returns 0 for success or an error on failure.
3308 * Called at driver startup.
3309 */
3310int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3311 uint32_t flags)
3312{
8aba21b7
LT
3313 struct drm_device *ddev = adev_to_drm(adev);
3314 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3315 int r, i;
b98c6299 3316 bool px = false;
95844d20 3317 u32 max_MBps;
d38ceaf9
AD
3318
3319 adev->shutdown = false;
d38ceaf9 3320 adev->flags = flags;
4e66d7d2
YZ
3321
3322 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3323 adev->asic_type = amdgpu_force_asic_type;
3324 else
3325 adev->asic_type = flags & AMD_ASIC_MASK;
3326
d38ceaf9 3327 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3328 if (amdgpu_emu_mode == 1)
8bdab6bb 3329 adev->usec_timeout *= 10;
770d13b1 3330 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3331 adev->accel_working = false;
3332 adev->num_rings = 0;
3333 adev->mman.buffer_funcs = NULL;
3334 adev->mman.buffer_funcs_ring = NULL;
3335 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3336 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3337 adev->gmc.gmc_funcs = NULL;
7bd939d0 3338 adev->harvest_ip_mask = 0x0;
f54d1867 3339 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3340 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3341
3342 adev->smc_rreg = &amdgpu_invalid_rreg;
3343 adev->smc_wreg = &amdgpu_invalid_wreg;
3344 adev->pcie_rreg = &amdgpu_invalid_rreg;
3345 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3346 adev->pciep_rreg = &amdgpu_invalid_rreg;
3347 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3348 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3349 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3350 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3351 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3352 adev->didt_rreg = &amdgpu_invalid_rreg;
3353 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3354 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3355 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3356 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3357 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3358
3e39ab90
AD
3359 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3360 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3361 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3362
3363 /* mutex initialization are all done here so we
3364 * can recall function without having locking issues */
0e5ca0d1 3365 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3366 mutex_init(&adev->pm.mutex);
3367 mutex_init(&adev->gfx.gpu_clock_mutex);
3368 mutex_init(&adev->srbm_mutex);
b8866c26 3369 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3370 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3371 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3372 mutex_init(&adev->mn_lock);
e23b74aa 3373 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3374 hash_init(adev->mn_hash);
53b3f8f4 3375 atomic_set(&adev->in_gpu_reset, 0);
6049db43 3376 init_rwsem(&adev->reset_sem);
32eaeae0 3377 mutex_init(&adev->psp.mutex);
bd052211 3378 mutex_init(&adev->notifier_lock);
d38ceaf9 3379
912dfc84
EQ
3380 r = amdgpu_device_check_arguments(adev);
3381 if (r)
3382 return r;
d38ceaf9 3383
d38ceaf9
AD
3384 spin_lock_init(&adev->mmio_idx_lock);
3385 spin_lock_init(&adev->smc_idx_lock);
3386 spin_lock_init(&adev->pcie_idx_lock);
3387 spin_lock_init(&adev->uvd_ctx_idx_lock);
3388 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3389 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3390 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3391 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3392 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3393
0c4e7fa5
CZ
3394 INIT_LIST_HEAD(&adev->shadow_list);
3395 mutex_init(&adev->shadow_list_lock);
3396
655ce9cb 3397 INIT_LIST_HEAD(&adev->reset_list);
3398
beff74bc
AD
3399 INIT_DELAYED_WORK(&adev->delayed_init_work,
3400 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3401 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3402 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3403
d4535e2c
AG
3404 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3405
d23ee13f 3406 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3407 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3408
b265bdbd
EQ
3409 atomic_set(&adev->throttling_logging_enabled, 1);
3410 /*
3411 * If throttling continues, logging will be performed every minute
3412 * to avoid log flooding. "-1" is subtracted since the thermal
3413 * throttling interrupt comes every second. Thus, the total logging
3414 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3415 * for throttling interrupt) = 60 seconds.
3416 */
3417 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3418 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3419
0fa49558
AX
3420 /* Registers mapping */
3421 /* TODO: block userspace mapping of io register */
da69c161
KW
3422 if (adev->asic_type >= CHIP_BONAIRE) {
3423 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3424 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3425 } else {
3426 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3427 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3428 }
d38ceaf9 3429
d38ceaf9
AD
3430 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3431 if (adev->rmmio == NULL) {
3432 return -ENOMEM;
3433 }
3434 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3435 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3436
b2109d8e
JX
3437 /* enable PCIE atomic ops */
3438 r = pci_enable_atomic_ops_to_root(adev->pdev,
3439 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3440 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3441 if (r) {
3442 adev->have_atomics_support = false;
3443 DRM_INFO("PCIE atomic ops is not supported\n");
3444 } else {
3445 adev->have_atomics_support = true;
3446 }
3447
5494d864
AD
3448 amdgpu_device_get_pcie_info(adev);
3449
b239c017
JX
3450 if (amdgpu_mcbp)
3451 DRM_INFO("MCBP is enabled\n");
3452
5f84cc63
JX
3453 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3454 adev->enable_mes = true;
3455
3aa0115d
ML
3456 /* detect hw virtualization here */
3457 amdgpu_detect_virtualization(adev);
3458
dffa11b4
ML
3459 r = amdgpu_device_get_job_timeout_settings(adev);
3460 if (r) {
3461 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4192f7b5 3462 goto failed_unmap;
a190d1c7
XY
3463 }
3464
d38ceaf9 3465 /* early init functions */
06ec9070 3466 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3467 if (r)
4192f7b5 3468 goto failed_unmap;
d38ceaf9 3469
6585661d
OZ
3470 /* doorbell bar mapping and doorbell index init*/
3471 amdgpu_device_doorbell_init(adev);
3472
9475a943
SL
3473 if (amdgpu_emu_mode == 1) {
3474 /* post the asic on emulation mode */
3475 emu_soc_asic_init(adev);
bfca0289 3476 goto fence_driver_init;
9475a943 3477 }
bfca0289 3478
04442bf7
LL
3479 amdgpu_reset_init(adev);
3480
4e99a44e
ML
3481 /* detect if we are with an SRIOV vbios */
3482 amdgpu_device_detect_sriov_bios(adev);
048765ad 3483
95e8e59e
AD
3484 /* check if we need to reset the asic
3485 * E.g., driver was not cleanly unloaded previously, etc.
3486 */
f14899fd 3487 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3488 if (adev->gmc.xgmi.num_physical_nodes) {
3489 dev_info(adev->dev, "Pending hive reset.\n");
3490 adev->gmc.xgmi.pending_reset = true;
3491 /* Only need to init necessary block for SMU to handle the reset */
3492 for (i = 0; i < adev->num_ip_blocks; i++) {
3493 if (!adev->ip_blocks[i].status.valid)
3494 continue;
3495 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3496 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3497 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3498 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3499 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3500 adev->ip_blocks[i].version->funcs->name);
3501 adev->ip_blocks[i].status.hw = true;
3502 }
3503 }
3504 } else {
3505 r = amdgpu_asic_reset(adev);
3506 if (r) {
3507 dev_err(adev->dev, "asic reset on init failed\n");
3508 goto failed;
3509 }
95e8e59e
AD
3510 }
3511 }
3512
8f66090b 3513 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3514
d38ceaf9 3515 /* Post card if necessary */
39c640c0 3516 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3517 if (!adev->bios) {
bec86378 3518 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3519 r = -EINVAL;
3520 goto failed;
d38ceaf9 3521 }
bec86378 3522 DRM_INFO("GPU posting now...\n");
4d2997ab 3523 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3524 if (r) {
3525 dev_err(adev->dev, "gpu post error!\n");
3526 goto failed;
3527 }
d38ceaf9
AD
3528 }
3529
88b64e95
AD
3530 if (adev->is_atom_fw) {
3531 /* Initialize clocks */
3532 r = amdgpu_atomfirmware_get_clock_info(adev);
3533 if (r) {
3534 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3535 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3536 goto failed;
3537 }
3538 } else {
a5bde2f9
AD
3539 /* Initialize clocks */
3540 r = amdgpu_atombios_get_clock_info(adev);
3541 if (r) {
3542 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3543 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3544 goto failed;
a5bde2f9
AD
3545 }
3546 /* init i2c buses */
4562236b
HW
3547 if (!amdgpu_device_has_dc_support(adev))
3548 amdgpu_atombios_i2c_init(adev);
2c1a2784 3549 }
d38ceaf9 3550
bfca0289 3551fence_driver_init:
d38ceaf9
AD
3552 /* Fence driver */
3553 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3554 if (r) {
3555 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3556 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3557 goto failed;
2c1a2784 3558 }
d38ceaf9
AD
3559
3560 /* init the mode config */
4a580877 3561 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3562
06ec9070 3563 r = amdgpu_device_ip_init(adev);
d38ceaf9 3564 if (r) {
8840a387 3565 /* failed in exclusive mode due to timeout */
3566 if (amdgpu_sriov_vf(adev) &&
3567 !amdgpu_sriov_runtime(adev) &&
3568 amdgpu_virt_mmio_blocked(adev) &&
3569 !amdgpu_virt_wait_reset(adev)) {
3570 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3571 /* Don't send request since VF is inactive. */
3572 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3573 adev->virt.ops = NULL;
8840a387 3574 r = -EAGAIN;
970fd197 3575 goto release_ras_con;
8840a387 3576 }
06ec9070 3577 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3578 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3579 goto release_ras_con;
d38ceaf9
AD
3580 }
3581
d69b8971
YZ
3582 dev_info(adev->dev,
3583 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3584 adev->gfx.config.max_shader_engines,
3585 adev->gfx.config.max_sh_per_se,
3586 adev->gfx.config.max_cu_per_sh,
3587 adev->gfx.cu_info.number);
3588
d38ceaf9
AD
3589 adev->accel_working = true;
3590
e59c0205
AX
3591 amdgpu_vm_check_compute_bug(adev);
3592
95844d20
MO
3593 /* Initialize the buffer migration limit. */
3594 if (amdgpu_moverate >= 0)
3595 max_MBps = amdgpu_moverate;
3596 else
3597 max_MBps = 8; /* Allow 8 MB/s. */
3598 /* Get a log2 for easy divisions. */
3599 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3600
9bc92b9c
ML
3601 amdgpu_fbdev_init(adev);
3602
d2f52ac8 3603 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3604 if (r) {
3605 adev->pm_sysfs_en = false;
d2f52ac8 3606 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3607 } else
3608 adev->pm_sysfs_en = true;
d2f52ac8 3609
5bb23532 3610 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3611 if (r) {
3612 adev->ucode_sysfs_en = false;
5bb23532 3613 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3614 } else
3615 adev->ucode_sysfs_en = true;
5bb23532 3616
d38ceaf9
AD
3617 if ((amdgpu_testing & 1)) {
3618 if (adev->accel_working)
3619 amdgpu_test_moves(adev);
3620 else
3621 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3622 }
d38ceaf9
AD
3623 if (amdgpu_benchmarking) {
3624 if (adev->accel_working)
3625 amdgpu_benchmark(adev, amdgpu_benchmarking);
3626 else
3627 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3628 }
3629
b0adca4d
EQ
3630 /*
3631 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3632 * Otherwise the mgpu fan boost feature will be skipped due to the
3633 * gpu instance is counted less.
3634 */
3635 amdgpu_register_gpu_instance(adev);
3636
d38ceaf9
AD
3637 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3638 * explicit gating rather than handling it automatically.
3639 */
e3c1b071 3640 if (!adev->gmc.xgmi.pending_reset) {
3641 r = amdgpu_device_ip_late_init(adev);
3642 if (r) {
3643 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3644 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3645 goto release_ras_con;
e3c1b071 3646 }
3647 /* must succeed. */
3648 amdgpu_ras_resume(adev);
3649 queue_delayed_work(system_wq, &adev->delayed_init_work,
3650 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3651 }
d38ceaf9 3652
2c738637
ML
3653 if (amdgpu_sriov_vf(adev))
3654 flush_delayed_work(&adev->delayed_init_work);
3655
77f3a5cd 3656 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3657 if (r)
77f3a5cd 3658 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3659
d155bef0
AB
3660 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3661 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3662 if (r)
3663 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3664
c1dd4aa6
AG
3665 /* Have stored pci confspace at hand for restore in sudden PCI error */
3666 if (amdgpu_device_cache_pci_state(adev->pdev))
3667 pci_restore_state(pdev);
3668
8c3dd61c
KHF
3669 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3670 /* this will fail for cards that aren't VGA class devices, just
3671 * ignore it */
3672 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3673 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
3674
3675 if (amdgpu_device_supports_px(ddev)) {
3676 px = true;
3677 vga_switcheroo_register_client(adev->pdev,
3678 &amdgpu_switcheroo_ops, px);
3679 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3680 }
3681
e3c1b071 3682 if (adev->gmc.xgmi.pending_reset)
3683 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3684 msecs_to_jiffies(AMDGPU_RESUME_MS));
3685
d38ceaf9 3686 return 0;
83ba126a 3687
970fd197
SY
3688release_ras_con:
3689 amdgpu_release_ras_context(adev);
3690
83ba126a 3691failed:
89041940 3692 amdgpu_vf_error_trans_all(adev);
8840a387 3693
4192f7b5
AD
3694failed_unmap:
3695 iounmap(adev->rmmio);
3696 adev->rmmio = NULL;
3697
83ba126a 3698 return r;
d38ceaf9
AD
3699}
3700
07775fc1
AG
3701static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3702{
3703 /* Clear all CPU mappings pointing to this device */
3704 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3705
3706 /* Unmap all mapped bars - Doorbell, registers and VRAM */
3707 amdgpu_device_doorbell_fini(adev);
3708
3709 iounmap(adev->rmmio);
3710 adev->rmmio = NULL;
3711 if (adev->mman.aper_base_kaddr)
3712 iounmap(adev->mman.aper_base_kaddr);
3713 adev->mman.aper_base_kaddr = NULL;
3714
3715 /* Memory manager related */
3716 if (!adev->gmc.xgmi.connected_to_cpu) {
3717 arch_phys_wc_del(adev->gmc.vram_mtrr);
3718 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3719 }
3720}
3721
d38ceaf9
AD
3722/**
3723 * amdgpu_device_fini - tear down the driver
3724 *
3725 * @adev: amdgpu_device pointer
3726 *
3727 * Tear down the driver info (all asics).
3728 * Called at driver shutdown.
3729 */
72c8c97b 3730void amdgpu_device_fini_hw(struct amdgpu_device *adev)
d38ceaf9 3731{
aac89168 3732 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3733 flush_delayed_work(&adev->delayed_init_work);
bb0cd09b 3734 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
d0d13fe8 3735 adev->shutdown = true;
9f875167 3736
752c683d
ML
3737 /* make sure IB test finished before entering exclusive mode
3738 * to avoid preemption on IB test
3739 * */
519b8b76 3740 if (amdgpu_sriov_vf(adev)) {
752c683d 3741 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3742 amdgpu_virt_fini_data_exchange(adev);
3743 }
752c683d 3744
e5b03032
ML
3745 /* disable all interrupts */
3746 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3747 if (adev->mode_info.mode_config_initialized){
3748 if (!amdgpu_device_has_dc_support(adev))
4a580877 3749 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3750 else
4a580877 3751 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3752 }
72c8c97b
AG
3753 amdgpu_fence_driver_fini_hw(adev);
3754
7c868b59
YT
3755 if (adev->pm_sysfs_en)
3756 amdgpu_pm_sysfs_fini(adev);
72c8c97b
AG
3757 if (adev->ucode_sysfs_en)
3758 amdgpu_ucode_sysfs_fini(adev);
3759 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
3760
d38ceaf9 3761 amdgpu_fbdev_fini(adev);
72c8c97b
AG
3762
3763 amdgpu_irq_fini_hw(adev);
e9669fb7
AG
3764
3765 amdgpu_device_ip_fini_early(adev);
d10d0daa
AG
3766
3767 amdgpu_gart_dummy_page_fini(adev);
07775fc1
AG
3768
3769 amdgpu_device_unmap_mmio(adev);
72c8c97b
AG
3770}
3771
3772void amdgpu_device_fini_sw(struct amdgpu_device *adev)
3773{
e230ac11 3774 amdgpu_device_ip_fini(adev);
72c8c97b 3775 amdgpu_fence_driver_fini_sw(adev);
75e1658e
ND
3776 release_firmware(adev->firmware.gpu_info_fw);
3777 adev->firmware.gpu_info_fw = NULL;
d38ceaf9 3778 adev->accel_working = false;
04442bf7
LL
3779
3780 amdgpu_reset_fini(adev);
3781
d38ceaf9 3782 /* free i2c buses */
4562236b
HW
3783 if (!amdgpu_device_has_dc_support(adev))
3784 amdgpu_i2c_fini(adev);
bfca0289
SL
3785
3786 if (amdgpu_emu_mode != 1)
3787 amdgpu_atombios_fini(adev);
3788
d38ceaf9
AD
3789 kfree(adev->bios);
3790 adev->bios = NULL;
b98c6299 3791 if (amdgpu_device_supports_px(adev_to_drm(adev))) {
84c8b22e 3792 vga_switcheroo_unregister_client(adev->pdev);
83ba126a 3793 vga_switcheroo_fini_domain_pm_ops(adev->dev);
b98c6299 3794 }
38d6be81
AD
3795 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3796 vga_client_register(adev->pdev, NULL, NULL, NULL);
e9bc1bf7 3797
d155bef0
AB
3798 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3799 amdgpu_pmu_fini(adev);
72de33f8 3800 if (adev->mman.discovery_bin)
a190d1c7 3801 amdgpu_discovery_fini(adev);
72c8c97b
AG
3802
3803 kfree(adev->pci_state);
3804
d38ceaf9
AD
3805}
3806
3807
3808/*
3809 * Suspend & resume.
3810 */
3811/**
810ddc3a 3812 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3813 *
87e3f136 3814 * @dev: drm dev pointer
87e3f136 3815 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3816 *
3817 * Puts the hw in the suspend state (all asics).
3818 * Returns 0 for success or an error on failure.
3819 * Called at driver suspend.
3820 */
de185019 3821int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9 3822{
a2e15b0e 3823 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 3824
d38ceaf9
AD
3825 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3826 return 0;
3827
44779b43 3828 adev->in_suspend = true;
3fa8f89d
S
3829
3830 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
3831 DRM_WARN("smart shift update failed\n");
3832
d38ceaf9
AD
3833 drm_kms_helper_poll_disable(dev);
3834
5f818173
S
3835 if (fbcon)
3836 amdgpu_fbdev_set_suspend(adev, 1);
3837
beff74bc 3838 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3839
5e6932fe 3840 amdgpu_ras_suspend(adev);
3841
2196927b 3842 amdgpu_device_ip_suspend_phase1(adev);
fe1053b7 3843
5d3a2d95
AD
3844 if (!adev->in_s0ix)
3845 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 3846
d38ceaf9
AD
3847 /* evict vram memory */
3848 amdgpu_bo_evict_vram(adev);
3849
5ceb54c6 3850 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3851
2196927b 3852 amdgpu_device_ip_suspend_phase2(adev);
a0a71e49
AD
3853 /* evict remaining vram memory
3854 * This second call to evict vram is to evict the gart page table
3855 * using the CPU.
3856 */
d38ceaf9
AD
3857 amdgpu_bo_evict_vram(adev);
3858
d38ceaf9
AD
3859 return 0;
3860}
3861
3862/**
810ddc3a 3863 * amdgpu_device_resume - initiate device resume
d38ceaf9 3864 *
87e3f136 3865 * @dev: drm dev pointer
87e3f136 3866 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3867 *
3868 * Bring the hw back to operating state (all asics).
3869 * Returns 0 for success or an error on failure.
3870 * Called at driver resume.
3871 */
de185019 3872int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9 3873{
1348969a 3874 struct amdgpu_device *adev = drm_to_adev(dev);
03161a6e 3875 int r = 0;
d38ceaf9
AD
3876
3877 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3878 return 0;
3879
62498733 3880 if (adev->in_s0ix)
628c36d7
PL
3881 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
3882
d38ceaf9 3883 /* post card */
39c640c0 3884 if (amdgpu_device_need_post(adev)) {
4d2997ab 3885 r = amdgpu_device_asic_init(adev);
74b0b157 3886 if (r)
aac89168 3887 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 3888 }
d38ceaf9 3889
06ec9070 3890 r = amdgpu_device_ip_resume(adev);
e6707218 3891 if (r) {
aac89168 3892 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3893 return r;
e6707218 3894 }
5ceb54c6
AD
3895 amdgpu_fence_driver_resume(adev);
3896
d38ceaf9 3897
06ec9070 3898 r = amdgpu_device_ip_late_init(adev);
03161a6e 3899 if (r)
4d3b9ae5 3900 return r;
d38ceaf9 3901
beff74bc
AD
3902 queue_delayed_work(system_wq, &adev->delayed_init_work,
3903 msecs_to_jiffies(AMDGPU_RESUME_MS));
3904
5d3a2d95
AD
3905 if (!adev->in_s0ix) {
3906 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
3907 if (r)
3908 return r;
3909 }
756e6880 3910
96a5d8d4 3911 /* Make sure IB tests flushed */
beff74bc 3912 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3913
a2e15b0e 3914 if (fbcon)
4d3b9ae5 3915 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3916
3917 drm_kms_helper_poll_enable(dev);
23a1a9e5 3918
5e6932fe 3919 amdgpu_ras_resume(adev);
3920
23a1a9e5
L
3921 /*
3922 * Most of the connector probing functions try to acquire runtime pm
3923 * refs to ensure that the GPU is powered on when connector polling is
3924 * performed. Since we're calling this from a runtime PM callback,
3925 * trying to acquire rpm refs will cause us to deadlock.
3926 *
3927 * Since we're guaranteed to be holding the rpm lock, it's safe to
3928 * temporarily disable the rpm helpers so this doesn't deadlock us.
3929 */
3930#ifdef CONFIG_PM
3931 dev->dev->power.disable_depth++;
3932#endif
4562236b
HW
3933 if (!amdgpu_device_has_dc_support(adev))
3934 drm_helper_hpd_irq_event(dev);
3935 else
3936 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3937#ifdef CONFIG_PM
3938 dev->dev->power.disable_depth--;
3939#endif
44779b43
RZ
3940 adev->in_suspend = false;
3941
3fa8f89d
S
3942 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
3943 DRM_WARN("smart shift update failed\n");
3944
4d3b9ae5 3945 return 0;
d38ceaf9
AD
3946}
3947
e3ecdffa
AD
3948/**
3949 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3950 *
3951 * @adev: amdgpu_device pointer
3952 *
3953 * The list of all the hardware IPs that make up the asic is walked and
3954 * the check_soft_reset callbacks are run. check_soft_reset determines
3955 * if the asic is still hung or not.
3956 * Returns true if any of the IPs are still in a hung state, false if not.
3957 */
06ec9070 3958static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3959{
3960 int i;
3961 bool asic_hang = false;
3962
f993d628
ML
3963 if (amdgpu_sriov_vf(adev))
3964 return true;
3965
8bc04c29
AD
3966 if (amdgpu_asic_need_full_reset(adev))
3967 return true;
3968
63fbf42f 3969 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3970 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3971 continue;
a1255107
AD
3972 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3973 adev->ip_blocks[i].status.hang =
3974 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3975 if (adev->ip_blocks[i].status.hang) {
aac89168 3976 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3977 asic_hang = true;
3978 }
3979 }
3980 return asic_hang;
3981}
3982
e3ecdffa
AD
3983/**
3984 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3985 *
3986 * @adev: amdgpu_device pointer
3987 *
3988 * The list of all the hardware IPs that make up the asic is walked and the
3989 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3990 * handles any IP specific hardware or software state changes that are
3991 * necessary for a soft reset to succeed.
3992 * Returns 0 on success, negative error code on failure.
3993 */
06ec9070 3994static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3995{
3996 int i, r = 0;
3997
3998 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3999 if (!adev->ip_blocks[i].status.valid)
d31a501e 4000 continue;
a1255107
AD
4001 if (adev->ip_blocks[i].status.hang &&
4002 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4003 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
4004 if (r)
4005 return r;
4006 }
4007 }
4008
4009 return 0;
4010}
4011
e3ecdffa
AD
4012/**
4013 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4014 *
4015 * @adev: amdgpu_device pointer
4016 *
4017 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4018 * reset is necessary to recover.
4019 * Returns true if a full asic reset is required, false if not.
4020 */
06ec9070 4021static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 4022{
da146d3b
AD
4023 int i;
4024
8bc04c29
AD
4025 if (amdgpu_asic_need_full_reset(adev))
4026 return true;
4027
da146d3b 4028 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4029 if (!adev->ip_blocks[i].status.valid)
da146d3b 4030 continue;
a1255107
AD
4031 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4032 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4033 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
4034 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4035 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 4036 if (adev->ip_blocks[i].status.hang) {
aac89168 4037 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
4038 return true;
4039 }
4040 }
35d782fe
CZ
4041 }
4042 return false;
4043}
4044
e3ecdffa
AD
4045/**
4046 * amdgpu_device_ip_soft_reset - do a soft reset
4047 *
4048 * @adev: amdgpu_device pointer
4049 *
4050 * The list of all the hardware IPs that make up the asic is walked and the
4051 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4052 * IP specific hardware or software state changes that are necessary to soft
4053 * reset the IP.
4054 * Returns 0 on success, negative error code on failure.
4055 */
06ec9070 4056static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4057{
4058 int i, r = 0;
4059
4060 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4061 if (!adev->ip_blocks[i].status.valid)
35d782fe 4062 continue;
a1255107
AD
4063 if (adev->ip_blocks[i].status.hang &&
4064 adev->ip_blocks[i].version->funcs->soft_reset) {
4065 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4066 if (r)
4067 return r;
4068 }
4069 }
4070
4071 return 0;
4072}
4073
e3ecdffa
AD
4074/**
4075 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4076 *
4077 * @adev: amdgpu_device pointer
4078 *
4079 * The list of all the hardware IPs that make up the asic is walked and the
4080 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4081 * handles any IP specific hardware or software state changes that are
4082 * necessary after the IP has been soft reset.
4083 * Returns 0 on success, negative error code on failure.
4084 */
06ec9070 4085static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4086{
4087 int i, r = 0;
4088
4089 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4090 if (!adev->ip_blocks[i].status.valid)
35d782fe 4091 continue;
a1255107
AD
4092 if (adev->ip_blocks[i].status.hang &&
4093 adev->ip_blocks[i].version->funcs->post_soft_reset)
4094 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4095 if (r)
4096 return r;
4097 }
4098
4099 return 0;
4100}
4101
e3ecdffa 4102/**
c33adbc7 4103 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4104 *
4105 * @adev: amdgpu_device pointer
4106 *
4107 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4108 * restore things like GPUVM page tables after a GPU reset where
4109 * the contents of VRAM might be lost.
403009bf
CK
4110 *
4111 * Returns:
4112 * 0 on success, negative error code on failure.
e3ecdffa 4113 */
c33adbc7 4114static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4115{
c41d1cf6 4116 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
4117 struct amdgpu_bo *shadow;
4118 long r = 1, tmo;
c41d1cf6
ML
4119
4120 if (amdgpu_sriov_runtime(adev))
b045d3af 4121 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4122 else
4123 tmo = msecs_to_jiffies(100);
4124
aac89168 4125 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4126 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
4127 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
4128
4129 /* No need to recover an evicted BO */
4130 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 4131 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
4132 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
4133 continue;
4134
4135 r = amdgpu_bo_restore_shadow(shadow, &next);
4136 if (r)
4137 break;
4138
c41d1cf6 4139 if (fence) {
1712fb1a 4140 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4141 dma_fence_put(fence);
4142 fence = next;
1712fb1a 4143 if (tmo == 0) {
4144 r = -ETIMEDOUT;
c41d1cf6 4145 break;
1712fb1a 4146 } else if (tmo < 0) {
4147 r = tmo;
4148 break;
4149 }
403009bf
CK
4150 } else {
4151 fence = next;
c41d1cf6 4152 }
c41d1cf6
ML
4153 }
4154 mutex_unlock(&adev->shadow_list_lock);
4155
403009bf
CK
4156 if (fence)
4157 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4158 dma_fence_put(fence);
4159
1712fb1a 4160 if (r < 0 || tmo <= 0) {
aac89168 4161 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4162 return -EIO;
4163 }
c41d1cf6 4164
aac89168 4165 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4166 return 0;
c41d1cf6
ML
4167}
4168
a90ad3c2 4169
e3ecdffa 4170/**
06ec9070 4171 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4172 *
982a820b 4173 * @adev: amdgpu_device pointer
87e3f136 4174 * @from_hypervisor: request from hypervisor
5740682e
ML
4175 *
4176 * do VF FLR and reinitialize Asic
3f48c681 4177 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4178 */
4179static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4180 bool from_hypervisor)
5740682e
ML
4181{
4182 int r;
4183
4184 if (from_hypervisor)
4185 r = amdgpu_virt_request_full_gpu(adev, true);
4186 else
4187 r = amdgpu_virt_reset_gpu(adev);
4188 if (r)
4189 return r;
a90ad3c2 4190
b639c22c
JZ
4191 amdgpu_amdkfd_pre_reset(adev);
4192
a90ad3c2 4193 /* Resume IP prior to SMC */
06ec9070 4194 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4195 if (r)
4196 goto error;
a90ad3c2 4197
c9ffa427 4198 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4199 /* we need recover gart prior to run SMC/CP/SDMA resume */
6c28aed6 4200 amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
a90ad3c2 4201
7a3e0bb2
RZ
4202 r = amdgpu_device_fw_loading(adev);
4203 if (r)
4204 return r;
4205
a90ad3c2 4206 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4207 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4208 if (r)
4209 goto error;
a90ad3c2
ML
4210
4211 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 4212 r = amdgpu_ib_ring_tests(adev);
f81e8d53 4213 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 4214
abc34253 4215error:
c41d1cf6 4216 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4217 amdgpu_inc_vram_lost(adev);
c33adbc7 4218 r = amdgpu_device_recover_vram(adev);
a90ad3c2 4219 }
437f3e0b 4220 amdgpu_virt_release_full_gpu(adev, true);
a90ad3c2
ML
4221
4222 return r;
4223}
4224
9a1cddd6 4225/**
4226 * amdgpu_device_has_job_running - check if there is any job in mirror list
4227 *
982a820b 4228 * @adev: amdgpu_device pointer
9a1cddd6 4229 *
4230 * check if there is any job in mirror list
4231 */
4232bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4233{
4234 int i;
4235 struct drm_sched_job *job;
4236
4237 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4238 struct amdgpu_ring *ring = adev->rings[i];
4239
4240 if (!ring || !ring->sched.thread)
4241 continue;
4242
4243 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4244 job = list_first_entry_or_null(&ring->sched.pending_list,
4245 struct drm_sched_job, list);
9a1cddd6 4246 spin_unlock(&ring->sched.job_list_lock);
4247 if (job)
4248 return true;
4249 }
4250 return false;
4251}
4252
12938fad
CK
4253/**
4254 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4255 *
982a820b 4256 * @adev: amdgpu_device pointer
12938fad
CK
4257 *
4258 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4259 * a hung GPU.
4260 */
4261bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4262{
4263 if (!amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4264 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
12938fad
CK
4265 return false;
4266 }
4267
3ba7b418
AG
4268 if (amdgpu_gpu_recovery == 0)
4269 goto disabled;
4270
4271 if (amdgpu_sriov_vf(adev))
4272 return true;
4273
4274 if (amdgpu_gpu_recovery == -1) {
4275 switch (adev->asic_type) {
fc42d47c
AG
4276 case CHIP_BONAIRE:
4277 case CHIP_HAWAII:
3ba7b418
AG
4278 case CHIP_TOPAZ:
4279 case CHIP_TONGA:
4280 case CHIP_FIJI:
4281 case CHIP_POLARIS10:
4282 case CHIP_POLARIS11:
4283 case CHIP_POLARIS12:
4284 case CHIP_VEGAM:
4285 case CHIP_VEGA20:
4286 case CHIP_VEGA10:
4287 case CHIP_VEGA12:
c43b849f 4288 case CHIP_RAVEN:
e9d4cf91 4289 case CHIP_ARCTURUS:
2cb44fb0 4290 case CHIP_RENOIR:
658c6639
AD
4291 case CHIP_NAVI10:
4292 case CHIP_NAVI14:
4293 case CHIP_NAVI12:
131a3c74 4294 case CHIP_SIENNA_CICHLID:
665fe4dc 4295 case CHIP_NAVY_FLOUNDER:
27859ee3 4296 case CHIP_DIMGREY_CAVEFISH:
fe68ceef 4297 case CHIP_VANGOGH:
ea4e96a7 4298 case CHIP_ALDEBARAN:
3ba7b418
AG
4299 break;
4300 default:
4301 goto disabled;
4302 }
12938fad
CK
4303 }
4304
4305 return true;
3ba7b418
AG
4306
4307disabled:
aac89168 4308 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4309 return false;
12938fad
CK
4310}
4311
5c03e584
FX
4312int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4313{
4314 u32 i;
4315 int ret = 0;
4316
4317 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4318
4319 dev_info(adev->dev, "GPU mode1 reset\n");
4320
4321 /* disable BM */
4322 pci_clear_master(adev->pdev);
4323
4324 amdgpu_device_cache_pci_state(adev->pdev);
4325
4326 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4327 dev_info(adev->dev, "GPU smu mode1 reset\n");
4328 ret = amdgpu_dpm_mode1_reset(adev);
4329 } else {
4330 dev_info(adev->dev, "GPU psp mode1 reset\n");
4331 ret = psp_gpu_reset(adev);
4332 }
4333
4334 if (ret)
4335 dev_err(adev->dev, "GPU mode1 reset failed\n");
4336
4337 amdgpu_device_load_pci_state(adev->pdev);
4338
4339 /* wait for asic to come out of reset */
4340 for (i = 0; i < adev->usec_timeout; i++) {
4341 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4342
4343 if (memsize != 0xffffffff)
4344 break;
4345 udelay(1);
4346 }
4347
4348 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4349 return ret;
4350}
5c6dd71e 4351
e3c1b071 4352int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
04442bf7 4353 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4354{
4355 int i, r = 0;
04442bf7
LL
4356 struct amdgpu_job *job = NULL;
4357 bool need_full_reset =
4358 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4359
4360 if (reset_context->reset_req_dev == adev)
4361 job = reset_context->job;
71182665 4362
e3c1b071 4363 /* no need to dump if device is not in good state during probe period */
4364 if (!adev->gmc.xgmi.pending_reset)
4365 amdgpu_debugfs_wait_dump(adev);
728e7e0c 4366
b602ca5f
TZ
4367 if (amdgpu_sriov_vf(adev)) {
4368 /* stop the data exchange thread */
4369 amdgpu_virt_fini_data_exchange(adev);
4370 }
4371
71182665 4372 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4373 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4374 struct amdgpu_ring *ring = adev->rings[i];
4375
51687759 4376 if (!ring || !ring->sched.thread)
0875dc9e 4377 continue;
5740682e 4378
2f9d4084
ML
4379 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4380 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4381 }
d38ceaf9 4382
222b5f04
AG
4383 if(job)
4384 drm_sched_increase_karma(&job->base);
4385
04442bf7 4386 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
404b277b
LL
4387 /* If reset handler not implemented, continue; otherwise return */
4388 if (r == -ENOSYS)
4389 r = 0;
4390 else
04442bf7
LL
4391 return r;
4392
1d721ed6 4393 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4394 if (!amdgpu_sriov_vf(adev)) {
4395
4396 if (!need_full_reset)
4397 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4398
4399 if (!need_full_reset) {
4400 amdgpu_device_ip_pre_soft_reset(adev);
4401 r = amdgpu_device_ip_soft_reset(adev);
4402 amdgpu_device_ip_post_soft_reset(adev);
4403 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4404 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4405 need_full_reset = true;
4406 }
4407 }
4408
4409 if (need_full_reset)
4410 r = amdgpu_device_ip_suspend(adev);
04442bf7
LL
4411 if (need_full_reset)
4412 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4413 else
4414 clear_bit(AMDGPU_NEED_FULL_RESET,
4415 &reset_context->flags);
26bc5340
AG
4416 }
4417
4418 return r;
4419}
4420
04442bf7
LL
4421int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4422 struct amdgpu_reset_context *reset_context)
26bc5340
AG
4423{
4424 struct amdgpu_device *tmp_adev = NULL;
04442bf7 4425 bool need_full_reset, skip_hw_reset, vram_lost = false;
26bc5340
AG
4426 int r = 0;
4427
04442bf7
LL
4428 /* Try reset handler method first */
4429 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4430 reset_list);
4431 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
404b277b
LL
4432 /* If reset handler not implemented, continue; otherwise return */
4433 if (r == -ENOSYS)
4434 r = 0;
4435 else
04442bf7
LL
4436 return r;
4437
4438 /* Reset handler not implemented, use the default method */
4439 need_full_reset =
4440 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4441 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4442
26bc5340 4443 /*
655ce9cb 4444 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4445 * to allow proper links negotiation in FW (within 1 sec)
4446 */
7ac71382 4447 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4448 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4449 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4450 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4451 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4452 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4453 r = -EALREADY;
4454 } else
4455 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4456
041a62bc 4457 if (r) {
aac89168 4458 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4459 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4460 break;
ce316fa5
LM
4461 }
4462 }
4463
041a62bc
AG
4464 /* For XGMI wait for all resets to complete before proceed */
4465 if (!r) {
655ce9cb 4466 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4467 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4468 flush_work(&tmp_adev->xgmi_reset_work);
4469 r = tmp_adev->asic_reset_res;
4470 if (r)
4471 break;
ce316fa5
LM
4472 }
4473 }
4474 }
ce316fa5 4475 }
26bc5340 4476
43c4d576 4477 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4478 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
8bc7b360
HZ
4479 if (tmp_adev->mmhub.ras_funcs &&
4480 tmp_adev->mmhub.ras_funcs->reset_ras_error_count)
4481 tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);
43c4d576
JC
4482 }
4483
00eaa571 4484 amdgpu_ras_intr_cleared();
43c4d576 4485 }
00eaa571 4486
655ce9cb 4487 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4488 if (need_full_reset) {
4489 /* post card */
e3c1b071 4490 r = amdgpu_device_asic_init(tmp_adev);
4491 if (r) {
aac89168 4492 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4493 } else {
26bc5340
AG
4494 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4495 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4496 if (r)
4497 goto out;
4498
4499 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4500 if (vram_lost) {
77e7f829 4501 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4502 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4503 }
4504
6c28aed6 4505 r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
26bc5340
AG
4506 if (r)
4507 goto out;
4508
4509 r = amdgpu_device_fw_loading(tmp_adev);
4510 if (r)
4511 return r;
4512
4513 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4514 if (r)
4515 goto out;
4516
4517 if (vram_lost)
4518 amdgpu_device_fill_reset_magic(tmp_adev);
4519
fdafb359
EQ
4520 /*
4521 * Add this ASIC as tracked as reset was already
4522 * complete successfully.
4523 */
4524 amdgpu_register_gpu_instance(tmp_adev);
4525
04442bf7
LL
4526 if (!reset_context->hive &&
4527 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
e3c1b071 4528 amdgpu_xgmi_add_device(tmp_adev);
4529
7c04ca50 4530 r = amdgpu_device_ip_late_init(tmp_adev);
4531 if (r)
4532 goto out;
4533
565d1941
EQ
4534 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4535
e8fbaf03
GC
4536 /*
4537 * The GPU enters bad state once faulty pages
4538 * by ECC has reached the threshold, and ras
4539 * recovery is scheduled next. So add one check
4540 * here to break recovery if it indeed exceeds
4541 * bad page threshold, and remind user to
4542 * retire this GPU or setting one bigger
4543 * bad_page_threshold value to fix this once
4544 * probing driver again.
4545 */
11003c68 4546 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
4547 /* must succeed. */
4548 amdgpu_ras_resume(tmp_adev);
4549 } else {
4550 r = -EINVAL;
4551 goto out;
4552 }
e79a04d5 4553
26bc5340 4554 /* Update PSP FW topology after reset */
04442bf7
LL
4555 if (reset_context->hive &&
4556 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4557 r = amdgpu_xgmi_update_topology(
4558 reset_context->hive, tmp_adev);
26bc5340
AG
4559 }
4560 }
4561
26bc5340
AG
4562out:
4563 if (!r) {
4564 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4565 r = amdgpu_ib_ring_tests(tmp_adev);
4566 if (r) {
4567 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
26bc5340
AG
4568 need_full_reset = true;
4569 r = -EAGAIN;
4570 goto end;
4571 }
4572 }
4573
4574 if (!r)
4575 r = amdgpu_device_recover_vram(tmp_adev);
4576 else
4577 tmp_adev->asic_reset_res = r;
4578 }
4579
4580end:
04442bf7
LL
4581 if (need_full_reset)
4582 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4583 else
4584 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
26bc5340
AG
4585 return r;
4586}
4587
08ebb485
DL
4588static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
4589 struct amdgpu_hive_info *hive)
26bc5340 4590{
53b3f8f4
DL
4591 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4592 return false;
4593
08ebb485
DL
4594 if (hive) {
4595 down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
4596 } else {
4597 down_write(&adev->reset_sem);
4598 }
5740682e 4599
a3a09142
AD
4600 switch (amdgpu_asic_reset_method(adev)) {
4601 case AMD_RESET_METHOD_MODE1:
4602 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4603 break;
4604 case AMD_RESET_METHOD_MODE2:
4605 adev->mp1_state = PP_MP1_STATE_RESET;
4606 break;
4607 default:
4608 adev->mp1_state = PP_MP1_STATE_NONE;
4609 break;
4610 }
1d721ed6
AG
4611
4612 return true;
26bc5340 4613}
d38ceaf9 4614
26bc5340
AG
4615static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4616{
89041940 4617 amdgpu_vf_error_trans_all(adev);
a3a09142 4618 adev->mp1_state = PP_MP1_STATE_NONE;
53b3f8f4 4619 atomic_set(&adev->in_gpu_reset, 0);
6049db43 4620 up_write(&adev->reset_sem);
26bc5340
AG
4621}
4622
91fb309d
HC
4623/*
4624 * to lockup a list of amdgpu devices in a hive safely, if not a hive
4625 * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
4626 *
4627 * unlock won't require roll back.
4628 */
4629static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
4630{
4631 struct amdgpu_device *tmp_adev = NULL;
4632
4633 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4634 if (!hive) {
4635 dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
4636 return -ENODEV;
4637 }
4638 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4639 if (!amdgpu_device_lock_adev(tmp_adev, hive))
4640 goto roll_back;
4641 }
4642 } else if (!amdgpu_device_lock_adev(adev, hive))
4643 return -EAGAIN;
4644
4645 return 0;
4646roll_back:
4647 if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
4648 /*
4649 * if the lockup iteration break in the middle of a hive,
4650 * it may means there may has a race issue,
4651 * or a hive device locked up independently.
4652 * we may be in trouble and may not, so will try to roll back
4653 * the lock and give out a warnning.
4654 */
4655 dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
4656 list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4657 amdgpu_device_unlock_adev(tmp_adev);
4658 }
4659 }
4660 return -EAGAIN;
4661}
4662
3f12acc8
EQ
4663static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4664{
4665 struct pci_dev *p = NULL;
4666
4667 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4668 adev->pdev->bus->number, 1);
4669 if (p) {
4670 pm_runtime_enable(&(p->dev));
4671 pm_runtime_resume(&(p->dev));
4672 }
4673}
4674
4675static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4676{
4677 enum amd_reset_method reset_method;
4678 struct pci_dev *p = NULL;
4679 u64 expires;
4680
4681 /*
4682 * For now, only BACO and mode1 reset are confirmed
4683 * to suffer the audio issue without proper suspended.
4684 */
4685 reset_method = amdgpu_asic_reset_method(adev);
4686 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4687 (reset_method != AMD_RESET_METHOD_MODE1))
4688 return -EINVAL;
4689
4690 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4691 adev->pdev->bus->number, 1);
4692 if (!p)
4693 return -ENODEV;
4694
4695 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4696 if (!expires)
4697 /*
4698 * If we cannot get the audio device autosuspend delay,
4699 * a fixed 4S interval will be used. Considering 3S is
4700 * the audio controller default autosuspend delay setting.
4701 * 4S used here is guaranteed to cover that.
4702 */
54b7feb9 4703 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4704
4705 while (!pm_runtime_status_suspended(&(p->dev))) {
4706 if (!pm_runtime_suspend(&(p->dev)))
4707 break;
4708
4709 if (expires < ktime_get_mono_fast_ns()) {
4710 dev_warn(adev->dev, "failed to suspend display audio\n");
4711 /* TODO: abort the succeeding gpu reset? */
4712 return -ETIMEDOUT;
4713 }
4714 }
4715
4716 pm_runtime_disable(&(p->dev));
4717
4718 return 0;
4719}
4720
9d8d96be 4721static void amdgpu_device_recheck_guilty_jobs(
04442bf7
LL
4722 struct amdgpu_device *adev, struct list_head *device_list_handle,
4723 struct amdgpu_reset_context *reset_context)
e6c6338f
JZ
4724{
4725 int i, r = 0;
4726
4727 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4728 struct amdgpu_ring *ring = adev->rings[i];
4729 int ret = 0;
4730 struct drm_sched_job *s_job;
4731
4732 if (!ring || !ring->sched.thread)
4733 continue;
4734
4735 s_job = list_first_entry_or_null(&ring->sched.pending_list,
4736 struct drm_sched_job, list);
4737 if (s_job == NULL)
4738 continue;
4739
4740 /* clear job's guilty and depend the folowing step to decide the real one */
4741 drm_sched_reset_karma(s_job);
4742 drm_sched_resubmit_jobs_ext(&ring->sched, 1);
4743
4744 ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
4745 if (ret == 0) { /* timeout */
4746 DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
4747 ring->sched.name, s_job->id);
4748
4749 /* set guilty */
4750 drm_sched_increase_karma(s_job);
4751retry:
4752 /* do hw reset */
4753 if (amdgpu_sriov_vf(adev)) {
4754 amdgpu_virt_fini_data_exchange(adev);
4755 r = amdgpu_device_reset_sriov(adev, false);
4756 if (r)
4757 adev->asic_reset_res = r;
4758 } else {
04442bf7
LL
4759 clear_bit(AMDGPU_SKIP_HW_RESET,
4760 &reset_context->flags);
4761 r = amdgpu_do_asic_reset(device_list_handle,
4762 reset_context);
e6c6338f
JZ
4763 if (r && r == -EAGAIN)
4764 goto retry;
4765 }
4766
4767 /*
4768 * add reset counter so that the following
4769 * resubmitted job could flush vmid
4770 */
4771 atomic_inc(&adev->gpu_reset_counter);
4772 continue;
4773 }
4774
4775 /* got the hw fence, signal finished fence */
4776 atomic_dec(ring->sched.score);
4777 dma_fence_get(&s_job->s_fence->finished);
4778 dma_fence_signal(&s_job->s_fence->finished);
4779 dma_fence_put(&s_job->s_fence->finished);
4780
4781 /* remove node from list and free the job */
4782 spin_lock(&ring->sched.job_list_lock);
4783 list_del_init(&s_job->list);
4784 spin_unlock(&ring->sched.job_list_lock);
4785 ring->sched.ops->free_job(s_job);
4786 }
4787}
4788
26bc5340
AG
4789/**
4790 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4791 *
982a820b 4792 * @adev: amdgpu_device pointer
26bc5340
AG
4793 * @job: which job trigger hang
4794 *
4795 * Attempt to reset the GPU if it has hung (all asics).
4796 * Attempt to do soft-reset or full-reset and reinitialize Asic
4797 * Returns 0 for success or an error on failure.
4798 */
4799
4800int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4801 struct amdgpu_job *job)
4802{
1d721ed6 4803 struct list_head device_list, *device_list_handle = NULL;
7dd8c205 4804 bool job_signaled = false;
26bc5340 4805 struct amdgpu_hive_info *hive = NULL;
26bc5340 4806 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4807 int i, r = 0;
bb5c7235 4808 bool need_emergency_restart = false;
3f12acc8 4809 bool audio_suspended = false;
e6c6338f 4810 int tmp_vram_lost_counter;
04442bf7
LL
4811 struct amdgpu_reset_context reset_context;
4812
4813 memset(&reset_context, 0, sizeof(reset_context));
26bc5340 4814
6e3cd2a9 4815 /*
bb5c7235
WS
4816 * Special case: RAS triggered and full reset isn't supported
4817 */
4818 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4819
d5ea093e
AG
4820 /*
4821 * Flush RAM to disk so that after reboot
4822 * the user can read log and see why the system rebooted.
4823 */
bb5c7235 4824 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4825 DRM_WARN("Emergency reboot.");
4826
4827 ksys_sync_helper();
4828 emergency_restart();
4829 }
4830
b823821f 4831 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4832 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4833
4834 /*
1d721ed6
AG
4835 * Here we trylock to avoid chain of resets executing from
4836 * either trigger by jobs on different adevs in XGMI hive or jobs on
4837 * different schedulers for same device while this TO handler is running.
4838 * We always reset all schedulers for device and all devices for XGMI
4839 * hive so that should take care of them too.
26bc5340 4840 */
d95e8e97 4841 hive = amdgpu_get_xgmi_hive(adev);
53b3f8f4
DL
4842 if (hive) {
4843 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4844 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4845 job ? job->base.id : -1, hive->hive_id);
d95e8e97 4846 amdgpu_put_xgmi_hive(hive);
91fb309d
HC
4847 if (job)
4848 drm_sched_increase_karma(&job->base);
53b3f8f4
DL
4849 return 0;
4850 }
4851 mutex_lock(&hive->hive_lock);
1d721ed6 4852 }
26bc5340 4853
04442bf7
LL
4854 reset_context.method = AMD_RESET_METHOD_NONE;
4855 reset_context.reset_req_dev = adev;
4856 reset_context.job = job;
4857 reset_context.hive = hive;
4858 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
4859
91fb309d
HC
4860 /*
4861 * lock the device before we try to operate the linked list
4862 * if didn't get the device lock, don't touch the linked list since
4863 * others may iterating it.
4864 */
4865 r = amdgpu_device_lock_hive_adev(adev, hive);
4866 if (r) {
4867 dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
4868 job ? job->base.id : -1);
4869
4870 /* even we skipped this reset, still need to set the job to guilty */
4871 if (job)
4872 drm_sched_increase_karma(&job->base);
4873 goto skip_recovery;
4874 }
4875
9e94d22c
EQ
4876 /*
4877 * Build list of devices to reset.
4878 * In case we are in XGMI hive mode, resort the device list
4879 * to put adev in the 1st position.
4880 */
4881 INIT_LIST_HEAD(&device_list);
4882 if (adev->gmc.xgmi.num_physical_nodes > 1) {
655ce9cb 4883 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
4884 list_add_tail(&tmp_adev->reset_list, &device_list);
4885 if (!list_is_first(&adev->reset_list, &device_list))
4886 list_rotate_to_front(&adev->reset_list, &device_list);
4887 device_list_handle = &device_list;
26bc5340 4888 } else {
655ce9cb 4889 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
4890 device_list_handle = &device_list;
4891 }
4892
1d721ed6 4893 /* block all schedulers and reset given job's ring */
655ce9cb 4894 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
3f12acc8
EQ
4895 /*
4896 * Try to put the audio codec into suspend state
4897 * before gpu reset started.
4898 *
4899 * Due to the power domain of the graphics device
4900 * is shared with AZ power domain. Without this,
4901 * we may change the audio hardware from behind
4902 * the audio driver's back. That will trigger
4903 * some audio codec errors.
4904 */
4905 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4906 audio_suspended = true;
4907
9e94d22c
EQ
4908 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4909
52fb44cf
EQ
4910 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4911
9e94d22c
EQ
4912 if (!amdgpu_sriov_vf(tmp_adev))
4913 amdgpu_amdkfd_pre_reset(tmp_adev);
4914
12ffa55d
AG
4915 /*
4916 * Mark these ASICs to be reseted as untracked first
4917 * And add them back after reset completed
4918 */
4919 amdgpu_unregister_gpu_instance(tmp_adev);
4920
a2f63ee8 4921 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4922
f1c1314b 4923 /* disable ras on ALL IPs */
bb5c7235 4924 if (!need_emergency_restart &&
b823821f 4925 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4926 amdgpu_ras_suspend(tmp_adev);
4927
1d721ed6
AG
4928 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4929 struct amdgpu_ring *ring = tmp_adev->rings[i];
4930
4931 if (!ring || !ring->sched.thread)
4932 continue;
4933
0b2d2c2e 4934 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4935
bb5c7235 4936 if (need_emergency_restart)
7c6e68c7 4937 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 4938 }
8f8c80f4 4939 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
4940 }
4941
bb5c7235 4942 if (need_emergency_restart)
7c6e68c7
AG
4943 goto skip_sched_resume;
4944
1d721ed6
AG
4945 /*
4946 * Must check guilty signal here since after this point all old
4947 * HW fences are force signaled.
4948 *
4949 * job->base holds a reference to parent fence
4950 */
4951 if (job && job->base.s_fence->parent &&
7dd8c205 4952 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4953 job_signaled = true;
1d721ed6
AG
4954 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4955 goto skip_hw_reset;
4956 }
4957
26bc5340 4958retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 4959 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
04442bf7 4960 r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context);
26bc5340
AG
4961 /*TODO Should we stop ?*/
4962 if (r) {
aac89168 4963 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 4964 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
4965 tmp_adev->asic_reset_res = r;
4966 }
4967 }
4968
e6c6338f 4969 tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
26bc5340
AG
4970 /* Actual ASIC resets if needed.*/
4971 /* TODO Implement XGMI hive reset logic for SRIOV */
4972 if (amdgpu_sriov_vf(adev)) {
4973 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4974 if (r)
4975 adev->asic_reset_res = r;
4976 } else {
04442bf7 4977 r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
26bc5340
AG
4978 if (r && r == -EAGAIN)
4979 goto retry;
4980 }
4981
1d721ed6
AG
4982skip_hw_reset:
4983
26bc5340 4984 /* Post ASIC reset for all devs .*/
655ce9cb 4985 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 4986
e6c6338f
JZ
4987 /*
4988 * Sometimes a later bad compute job can block a good gfx job as gfx
4989 * and compute ring share internal GC HW mutually. We add an additional
4990 * guilty jobs recheck step to find the real guilty job, it synchronously
4991 * submits and pends for the first job being signaled. If it gets timeout,
4992 * we identify it as a real guilty job.
4993 */
4994 if (amdgpu_gpu_recovery == 2 &&
4995 !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
04442bf7
LL
4996 amdgpu_device_recheck_guilty_jobs(
4997 tmp_adev, device_list_handle, &reset_context);
e6c6338f 4998
1d721ed6
AG
4999 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5000 struct amdgpu_ring *ring = tmp_adev->rings[i];
5001
5002 if (!ring || !ring->sched.thread)
5003 continue;
5004
5005 /* No point to resubmit jobs if we didn't HW reset*/
5006 if (!tmp_adev->asic_reset_res && !job_signaled)
5007 drm_sched_resubmit_jobs(&ring->sched);
5008
5009 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
5010 }
5011
5012 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4a580877 5013 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
5014 }
5015
5016 tmp_adev->asic_reset_res = 0;
26bc5340
AG
5017
5018 if (r) {
5019 /* bad news, how to tell it to userspace ? */
12ffa55d 5020 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
5021 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5022 } else {
12ffa55d 5023 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
3fa8f89d
S
5024 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5025 DRM_WARN("smart shift update failed\n");
26bc5340 5026 }
7c6e68c7 5027 }
26bc5340 5028
7c6e68c7 5029skip_sched_resume:
655ce9cb 5030 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
8e2712e7 5031 /* unlock kfd: SRIOV would do it separately */
bb5c7235 5032 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 5033 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 5034
5035 /* kfd_post_reset will do nothing if kfd device is not initialized,
5036 * need to bring up kfd here if it's not be initialized before
5037 */
5038 if (!adev->kfd.init_complete)
5039 amdgpu_amdkfd_device_init(adev);
5040
3f12acc8
EQ
5041 if (audio_suspended)
5042 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
5043 amdgpu_device_unlock_adev(tmp_adev);
5044 }
5045
cbfd17f7 5046skip_recovery:
9e94d22c 5047 if (hive) {
53b3f8f4 5048 atomic_set(&hive->in_reset, 0);
9e94d22c 5049 mutex_unlock(&hive->hive_lock);
d95e8e97 5050 amdgpu_put_xgmi_hive(hive);
9e94d22c 5051 }
26bc5340 5052
91fb309d 5053 if (r && r != -EAGAIN)
26bc5340 5054 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
5055 return r;
5056}
5057
e3ecdffa
AD
5058/**
5059 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5060 *
5061 * @adev: amdgpu_device pointer
5062 *
5063 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5064 * and lanes) of the slot the device is in. Handles APUs and
5065 * virtualized environments where PCIE config space may not be available.
5066 */
5494d864 5067static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 5068{
5d9a6330 5069 struct pci_dev *pdev;
c5313457
HK
5070 enum pci_bus_speed speed_cap, platform_speed_cap;
5071 enum pcie_link_width platform_link_width;
d0dd7f0c 5072
cd474ba0
AD
5073 if (amdgpu_pcie_gen_cap)
5074 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 5075
cd474ba0
AD
5076 if (amdgpu_pcie_lane_cap)
5077 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 5078
cd474ba0
AD
5079 /* covers APUs as well */
5080 if (pci_is_root_bus(adev->pdev->bus)) {
5081 if (adev->pm.pcie_gen_mask == 0)
5082 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5083 if (adev->pm.pcie_mlw_mask == 0)
5084 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 5085 return;
cd474ba0 5086 }
d0dd7f0c 5087
c5313457
HK
5088 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5089 return;
5090
dbaa922b
AD
5091 pcie_bandwidth_available(adev->pdev, NULL,
5092 &platform_speed_cap, &platform_link_width);
c5313457 5093
cd474ba0 5094 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
5095 /* asic caps */
5096 pdev = adev->pdev;
5097 speed_cap = pcie_get_speed_cap(pdev);
5098 if (speed_cap == PCI_SPEED_UNKNOWN) {
5099 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
5100 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5101 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 5102 } else {
2b3a1f51
FX
5103 if (speed_cap == PCIE_SPEED_32_0GT)
5104 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5105 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5106 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5107 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5108 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5109 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5110 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5111 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5112 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5113 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5114 else if (speed_cap == PCIE_SPEED_8_0GT)
5115 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5116 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5117 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5118 else if (speed_cap == PCIE_SPEED_5_0GT)
5119 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5120 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5121 else
5122 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5123 }
5124 /* platform caps */
c5313457 5125 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
5126 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5127 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5128 } else {
2b3a1f51
FX
5129 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5130 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5131 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5132 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5133 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5134 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5135 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
5136 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5137 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5138 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5139 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 5140 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
5141 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5142 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5143 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 5144 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
5145 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5146 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5147 else
5148 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5149
cd474ba0
AD
5150 }
5151 }
5152 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 5153 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
5154 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5155 } else {
c5313457 5156 switch (platform_link_width) {
5d9a6330 5157 case PCIE_LNK_X32:
cd474ba0
AD
5158 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5159 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5160 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5161 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5162 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5163 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5164 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5165 break;
5d9a6330 5166 case PCIE_LNK_X16:
cd474ba0
AD
5167 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5168 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5169 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5170 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5171 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5172 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5173 break;
5d9a6330 5174 case PCIE_LNK_X12:
cd474ba0
AD
5175 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5176 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5177 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5178 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5179 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5180 break;
5d9a6330 5181 case PCIE_LNK_X8:
cd474ba0
AD
5182 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5183 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5184 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5185 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5186 break;
5d9a6330 5187 case PCIE_LNK_X4:
cd474ba0
AD
5188 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5189 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5190 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5191 break;
5d9a6330 5192 case PCIE_LNK_X2:
cd474ba0
AD
5193 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5194 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5195 break;
5d9a6330 5196 case PCIE_LNK_X1:
cd474ba0
AD
5197 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5198 break;
5199 default:
5200 break;
5201 }
d0dd7f0c
AD
5202 }
5203 }
5204}
d38ceaf9 5205
361dbd01
AD
5206int amdgpu_device_baco_enter(struct drm_device *dev)
5207{
1348969a 5208 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5209 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5210
4a580877 5211 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5212 return -ENOTSUPP;
5213
8ab0d6f0 5214 if (ras && adev->ras_enabled &&
acdae216 5215 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5216 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5217
9530273e 5218 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5219}
5220
5221int amdgpu_device_baco_exit(struct drm_device *dev)
5222{
1348969a 5223 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5224 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5225 int ret = 0;
361dbd01 5226
4a580877 5227 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5228 return -ENOTSUPP;
5229
9530273e
EQ
5230 ret = amdgpu_dpm_baco_exit(adev);
5231 if (ret)
5232 return ret;
7a22677b 5233
8ab0d6f0 5234 if (ras && adev->ras_enabled &&
acdae216 5235 adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5236 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5237
5238 return 0;
361dbd01 5239}
c9a6b82f 5240
acd89fca
AG
5241static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
5242{
5243 int i;
5244
5245 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5246 struct amdgpu_ring *ring = adev->rings[i];
5247
5248 if (!ring || !ring->sched.thread)
5249 continue;
5250
5251 cancel_delayed_work_sync(&ring->sched.work_tdr);
5252 }
5253}
5254
c9a6b82f
AG
5255/**
5256 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5257 * @pdev: PCI device struct
5258 * @state: PCI channel state
5259 *
5260 * Description: Called when a PCI error is detected.
5261 *
5262 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5263 */
5264pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5265{
5266 struct drm_device *dev = pci_get_drvdata(pdev);
5267 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5268 int i;
c9a6b82f
AG
5269
5270 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5271
6894305c
AG
5272 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5273 DRM_WARN("No support for XGMI hive yet...");
5274 return PCI_ERS_RESULT_DISCONNECT;
5275 }
5276
c9a6b82f
AG
5277 switch (state) {
5278 case pci_channel_io_normal:
5279 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5280 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5281 case pci_channel_io_frozen:
5282 /*
acd89fca
AG
5283 * Cancel and wait for all TDRs in progress if failing to
5284 * set adev->in_gpu_reset in amdgpu_device_lock_adev
5285 *
5286 * Locking adev->reset_sem will prevent any external access
5287 * to GPU during PCI error recovery
5288 */
5289 while (!amdgpu_device_lock_adev(adev, NULL))
5290 amdgpu_cancel_all_tdr(adev);
5291
5292 /*
5293 * Block any work scheduling as we do for regular GPU reset
5294 * for the duration of the recovery
5295 */
5296 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5297 struct amdgpu_ring *ring = adev->rings[i];
5298
5299 if (!ring || !ring->sched.thread)
5300 continue;
5301
5302 drm_sched_stop(&ring->sched, NULL);
5303 }
8f8c80f4 5304 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5305 return PCI_ERS_RESULT_NEED_RESET;
5306 case pci_channel_io_perm_failure:
5307 /* Permanent error, prepare for device removal */
5308 return PCI_ERS_RESULT_DISCONNECT;
5309 }
5310
5311 return PCI_ERS_RESULT_NEED_RESET;
5312}
5313
5314/**
5315 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5316 * @pdev: pointer to PCI device
5317 */
5318pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5319{
5320
5321 DRM_INFO("PCI error: mmio enabled callback!!\n");
5322
5323 /* TODO - dump whatever for debugging purposes */
5324
5325 /* This called only if amdgpu_pci_error_detected returns
5326 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5327 * works, no need to reset slot.
5328 */
5329
5330 return PCI_ERS_RESULT_RECOVERED;
5331}
5332
5333/**
5334 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5335 * @pdev: PCI device struct
5336 *
5337 * Description: This routine is called by the pci error recovery
5338 * code after the PCI slot has been reset, just before we
5339 * should resume normal operations.
5340 */
5341pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5342{
5343 struct drm_device *dev = pci_get_drvdata(pdev);
5344 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5345 int r, i;
04442bf7 5346 struct amdgpu_reset_context reset_context;
362c7b91 5347 u32 memsize;
7ac71382 5348 struct list_head device_list;
c9a6b82f
AG
5349
5350 DRM_INFO("PCI error: slot reset callback!!\n");
5351
04442bf7
LL
5352 memset(&reset_context, 0, sizeof(reset_context));
5353
7ac71382 5354 INIT_LIST_HEAD(&device_list);
655ce9cb 5355 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5356
362c7b91
AG
5357 /* wait for asic to come out of reset */
5358 msleep(500);
5359
7ac71382 5360 /* Restore PCI confspace */
c1dd4aa6 5361 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5362
362c7b91
AG
5363 /* confirm ASIC came out of reset */
5364 for (i = 0; i < adev->usec_timeout; i++) {
5365 memsize = amdgpu_asic_get_config_memsize(adev);
5366
5367 if (memsize != 0xffffffff)
5368 break;
5369 udelay(1);
5370 }
5371 if (memsize == 0xffffffff) {
5372 r = -ETIME;
5373 goto out;
5374 }
5375
04442bf7
LL
5376 reset_context.method = AMD_RESET_METHOD_NONE;
5377 reset_context.reset_req_dev = adev;
5378 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5379 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5380
7afefb81 5381 adev->no_hw_access = true;
04442bf7 5382 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
7afefb81 5383 adev->no_hw_access = false;
c9a6b82f
AG
5384 if (r)
5385 goto out;
5386
04442bf7 5387 r = amdgpu_do_asic_reset(&device_list, &reset_context);
c9a6b82f
AG
5388
5389out:
c9a6b82f 5390 if (!r) {
c1dd4aa6
AG
5391 if (amdgpu_device_cache_pci_state(adev->pdev))
5392 pci_restore_state(adev->pdev);
5393
c9a6b82f
AG
5394 DRM_INFO("PCIe error recovery succeeded\n");
5395 } else {
5396 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5397 amdgpu_device_unlock_adev(adev);
5398 }
5399
5400 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5401}
5402
5403/**
5404 * amdgpu_pci_resume() - resume normal ops after PCI reset
5405 * @pdev: pointer to PCI device
5406 *
5407 * Called when the error recovery driver tells us that its
505199a3 5408 * OK to resume normal operation.
c9a6b82f
AG
5409 */
5410void amdgpu_pci_resume(struct pci_dev *pdev)
5411{
5412 struct drm_device *dev = pci_get_drvdata(pdev);
5413 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5414 int i;
c9a6b82f 5415
c9a6b82f
AG
5416
5417 DRM_INFO("PCI error: resume callback!!\n");
acd89fca
AG
5418
5419 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5420 struct amdgpu_ring *ring = adev->rings[i];
5421
5422 if (!ring || !ring->sched.thread)
5423 continue;
5424
5425
5426 drm_sched_resubmit_jobs(&ring->sched);
5427 drm_sched_start(&ring->sched, true);
5428 }
5429
5430 amdgpu_device_unlock_adev(adev);
c9a6b82f 5431}
c1dd4aa6
AG
5432
5433bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5434{
5435 struct drm_device *dev = pci_get_drvdata(pdev);
5436 struct amdgpu_device *adev = drm_to_adev(dev);
5437 int r;
5438
5439 r = pci_save_state(pdev);
5440 if (!r) {
5441 kfree(adev->pci_state);
5442
5443 adev->pci_state = pci_store_saved_state(pdev);
5444
5445 if (!adev->pci_state) {
5446 DRM_ERROR("Failed to store PCI saved state");
5447 return false;
5448 }
5449 } else {
5450 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5451 return false;
5452 }
5453
5454 return true;
5455}
5456
5457bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5458{
5459 struct drm_device *dev = pci_get_drvdata(pdev);
5460 struct amdgpu_device *adev = drm_to_adev(dev);
5461 int r;
5462
5463 if (!adev->pci_state)
5464 return false;
5465
5466 r = pci_load_saved_state(pdev, adev->pci_state);
5467
5468 if (!r) {
5469 pci_restore_state(pdev);
5470 } else {
5471 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5472 return false;
5473 }
5474
5475 return true;
5476}
5477
5478