drm/amdgpu: add sdma 4_x interrupts printing
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
4e52a9f8 83MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
e2a75f88 84
2dc80b00
S
85#define AMDGPU_RESUME_MS 2000
86
050091ab 87const char *amdgpu_asic_name[] = {
da69c161
KW
88 "TAHITI",
89 "PITCAIRN",
90 "VERDE",
91 "OLAND",
92 "HAINAN",
d38ceaf9
AD
93 "BONAIRE",
94 "KAVERI",
95 "KABINI",
96 "HAWAII",
97 "MULLINS",
98 "TOPAZ",
99 "TONGA",
48299f95 100 "FIJI",
d38ceaf9 101 "CARRIZO",
139f4917 102 "STONEY",
2cc0c0b5
FC
103 "POLARIS10",
104 "POLARIS11",
c4642a47 105 "POLARIS12",
48ff108d 106 "VEGAM",
d4196f01 107 "VEGA10",
8fab806a 108 "VEGA12",
956fcddc 109 "VEGA20",
2ca8a5d2 110 "RAVEN",
d6c3b24e 111 "ARCTURUS",
1eee4228 112 "RENOIR",
852a6626 113 "NAVI10",
87dbad02 114 "NAVI14",
9802f5d7 115 "NAVI12",
ccaf72d3 116 "SIENNA_CICHLID",
ddd8fbe7 117 "NAVY_FLOUNDER",
4f1e9a76 118 "VANGOGH",
a2468e04 119 "DIMGREY_CAVEFISH",
d38ceaf9
AD
120 "LAST",
121};
122
dcea6e65
KR
123/**
124 * DOC: pcie_replay_count
125 *
126 * The amdgpu driver provides a sysfs API for reporting the total number
127 * of PCIe replays (NAKs)
128 * The file pcie_replay_count is used for this and returns the total
129 * number of replays as a sum of the NAKs generated and NAKs received
130 */
131
132static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
133 struct device_attribute *attr, char *buf)
134{
135 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 136 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
137 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
138
139 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
140}
141
142static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
143 amdgpu_device_get_pcie_replay_count, NULL);
144
5494d864
AD
145static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
146
bd607166
KR
147/**
148 * DOC: product_name
149 *
150 * The amdgpu driver provides a sysfs API for reporting the product name
151 * for the device
152 * The file serial_number is used for this and returns the product name
153 * as returned from the FRU.
154 * NOTE: This is only available for certain server cards
155 */
156
157static ssize_t amdgpu_device_get_product_name(struct device *dev,
158 struct device_attribute *attr, char *buf)
159{
160 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 161 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
162
163 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
164}
165
166static DEVICE_ATTR(product_name, S_IRUGO,
167 amdgpu_device_get_product_name, NULL);
168
169/**
170 * DOC: product_number
171 *
172 * The amdgpu driver provides a sysfs API for reporting the part number
173 * for the device
174 * The file serial_number is used for this and returns the part number
175 * as returned from the FRU.
176 * NOTE: This is only available for certain server cards
177 */
178
179static ssize_t amdgpu_device_get_product_number(struct device *dev,
180 struct device_attribute *attr, char *buf)
181{
182 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 183 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
184
185 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
186}
187
188static DEVICE_ATTR(product_number, S_IRUGO,
189 amdgpu_device_get_product_number, NULL);
190
191/**
192 * DOC: serial_number
193 *
194 * The amdgpu driver provides a sysfs API for reporting the serial number
195 * for the device
196 * The file serial_number is used for this and returns the serial number
197 * as returned from the FRU.
198 * NOTE: This is only available for certain server cards
199 */
200
201static ssize_t amdgpu_device_get_serial_number(struct device *dev,
202 struct device_attribute *attr, char *buf)
203{
204 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 205 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
206
207 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
208}
209
210static DEVICE_ATTR(serial_number, S_IRUGO,
211 amdgpu_device_get_serial_number, NULL);
212
fd496ca8
AD
213/**
214 * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control
215 *
216 * @dev: drm_device pointer
217 *
218 * Returns true if the device is a dGPU with HG/PX power control,
219 * otherwise return false.
220 */
221bool amdgpu_device_supports_atpx(struct drm_device *dev)
222{
223 struct amdgpu_device *adev = drm_to_adev(dev);
224
225 if (adev->flags & AMD_IS_PX)
226 return true;
227 return false;
228}
229
e3ecdffa 230/**
0330b848 231 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
232 *
233 * @dev: drm_device pointer
234 *
235 * Returns true if the device is a dGPU with HG/PX power control,
236 * otherwise return false.
237 */
31af062a 238bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 239{
1348969a 240 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 241
0330b848 242 if (adev->has_pr3)
d38ceaf9
AD
243 return true;
244 return false;
245}
246
a69cba42
AD
247/**
248 * amdgpu_device_supports_baco - Does the device support BACO
249 *
250 * @dev: drm_device pointer
251 *
252 * Returns true if the device supporte BACO,
253 * otherwise return false.
254 */
255bool amdgpu_device_supports_baco(struct drm_device *dev)
256{
1348969a 257 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
258
259 return amdgpu_asic_supports_baco(adev);
260}
261
6e3cd2a9
MCC
262/*
263 * VRAM access helper functions
264 */
265
e35e2b11 266/**
e35e2b11
TY
267 * amdgpu_device_vram_access - read/write a buffer in vram
268 *
269 * @adev: amdgpu_device pointer
270 * @pos: offset of the buffer in vram
271 * @buf: virtual address of the buffer in system memory
272 * @size: read/write size, sizeof(@buf) must > @size
273 * @write: true - write to vram, otherwise - read from vram
274 */
275void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
276 uint32_t *buf, size_t size, bool write)
277{
e35e2b11 278 unsigned long flags;
ce05ac56
CK
279 uint32_t hi = ~0;
280 uint64_t last;
281
9d11eb0d
CK
282
283#ifdef CONFIG_64BIT
284 last = min(pos + size, adev->gmc.visible_vram_size);
285 if (last > pos) {
286 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
287 size_t count = last - pos;
288
289 if (write) {
290 memcpy_toio(addr, buf, count);
291 mb();
292 amdgpu_asic_flush_hdp(adev, NULL);
293 } else {
294 amdgpu_asic_invalidate_hdp(adev, NULL);
295 mb();
296 memcpy_fromio(buf, addr, count);
297 }
298
299 if (count == size)
300 return;
301
302 pos += count;
303 buf += count / 4;
304 size -= count;
305 }
306#endif
307
ce05ac56
CK
308 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
309 for (last = pos + size; pos < last; pos += 4) {
310 uint32_t tmp = pos >> 31;
e35e2b11 311
e35e2b11 312 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
313 if (tmp != hi) {
314 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
315 hi = tmp;
316 }
e35e2b11
TY
317 if (write)
318 WREG32_NO_KIQ(mmMM_DATA, *buf++);
319 else
320 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 321 }
ce05ac56 322 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
323}
324
d38ceaf9 325/*
f7ee1874 326 * register access helper functions.
d38ceaf9 327 */
e3ecdffa 328/**
f7ee1874 329 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
330 *
331 * @adev: amdgpu_device pointer
332 * @reg: dword aligned register offset
333 * @acc_flags: access flags which require special behavior
334 *
335 * Returns the 32 bit value from the offset specified.
336 */
f7ee1874
HZ
337uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
338 uint32_t reg, uint32_t acc_flags)
d38ceaf9 339{
f4b373f4
TSD
340 uint32_t ret;
341
bf36b52e
AG
342 if (adev->in_pci_err_recovery)
343 return 0;
344
f7ee1874
HZ
345 if ((reg * 4) < adev->rmmio_size) {
346 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
347 amdgpu_sriov_runtime(adev) &&
348 down_read_trylock(&adev->reset_sem)) {
349 ret = amdgpu_kiq_rreg(adev, reg);
350 up_read(&adev->reset_sem);
351 } else {
352 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
353 }
354 } else {
355 ret = adev->pcie_rreg(adev, reg * 4);
81202807 356 }
bc992ba5 357
f7ee1874 358 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 359
f4b373f4 360 return ret;
d38ceaf9
AD
361}
362
421a2a30
ML
363/*
364 * MMIO register read with bytes helper functions
365 * @offset:bytes offset from MMIO start
366 *
367*/
368
e3ecdffa
AD
369/**
370 * amdgpu_mm_rreg8 - read a memory mapped IO register
371 *
372 * @adev: amdgpu_device pointer
373 * @offset: byte aligned register offset
374 *
375 * Returns the 8 bit value from the offset specified.
376 */
7cbbc745
AG
377uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
378{
bf36b52e
AG
379 if (adev->in_pci_err_recovery)
380 return 0;
381
421a2a30
ML
382 if (offset < adev->rmmio_size)
383 return (readb(adev->rmmio + offset));
384 BUG();
385}
386
387/*
388 * MMIO register write with bytes helper functions
389 * @offset:bytes offset from MMIO start
390 * @value: the value want to be written to the register
391 *
392*/
e3ecdffa
AD
393/**
394 * amdgpu_mm_wreg8 - read a memory mapped IO register
395 *
396 * @adev: amdgpu_device pointer
397 * @offset: byte aligned register offset
398 * @value: 8 bit value to write
399 *
400 * Writes the value specified to the offset specified.
401 */
7cbbc745
AG
402void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
403{
bf36b52e
AG
404 if (adev->in_pci_err_recovery)
405 return;
406
421a2a30
ML
407 if (offset < adev->rmmio_size)
408 writeb(value, adev->rmmio + offset);
409 else
410 BUG();
411}
412
e3ecdffa 413/**
f7ee1874 414 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
415 *
416 * @adev: amdgpu_device pointer
417 * @reg: dword aligned register offset
418 * @v: 32 bit value to write to the register
419 * @acc_flags: access flags which require special behavior
420 *
421 * Writes the value specified to the offset specified.
422 */
f7ee1874
HZ
423void amdgpu_device_wreg(struct amdgpu_device *adev,
424 uint32_t reg, uint32_t v,
425 uint32_t acc_flags)
d38ceaf9 426{
bf36b52e
AG
427 if (adev->in_pci_err_recovery)
428 return;
429
f7ee1874
HZ
430 if ((reg * 4) < adev->rmmio_size) {
431 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
432 amdgpu_sriov_runtime(adev) &&
433 down_read_trylock(&adev->reset_sem)) {
434 amdgpu_kiq_wreg(adev, reg, v);
435 up_read(&adev->reset_sem);
436 } else {
437 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
438 }
439 } else {
440 adev->pcie_wreg(adev, reg * 4, v);
81202807 441 }
bc992ba5 442
f7ee1874 443 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 444}
d38ceaf9 445
2e0cc4d4
ML
446/*
447 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
448 *
449 * this function is invoked only the debugfs register access
450 * */
f7ee1874
HZ
451void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
452 uint32_t reg, uint32_t v)
2e0cc4d4 453{
bf36b52e
AG
454 if (adev->in_pci_err_recovery)
455 return;
456
2e0cc4d4 457 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
458 adev->gfx.rlc.funcs &&
459 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4
ML
460 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
461 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
f7ee1874
HZ
462 } else {
463 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 464 }
d38ceaf9
AD
465}
466
e3ecdffa
AD
467/**
468 * amdgpu_io_rreg - read an IO register
469 *
470 * @adev: amdgpu_device pointer
471 * @reg: dword aligned register offset
472 *
473 * Returns the 32 bit value from the offset specified.
474 */
d38ceaf9
AD
475u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
476{
bf36b52e
AG
477 if (adev->in_pci_err_recovery)
478 return 0;
479
d38ceaf9
AD
480 if ((reg * 4) < adev->rio_mem_size)
481 return ioread32(adev->rio_mem + (reg * 4));
482 else {
483 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
484 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
485 }
486}
487
e3ecdffa
AD
488/**
489 * amdgpu_io_wreg - write to an IO register
490 *
491 * @adev: amdgpu_device pointer
492 * @reg: dword aligned register offset
493 * @v: 32 bit value to write to the register
494 *
495 * Writes the value specified to the offset specified.
496 */
d38ceaf9
AD
497void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
498{
bf36b52e
AG
499 if (adev->in_pci_err_recovery)
500 return;
501
d38ceaf9
AD
502 if ((reg * 4) < adev->rio_mem_size)
503 iowrite32(v, adev->rio_mem + (reg * 4));
504 else {
505 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
506 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
507 }
508}
509
510/**
511 * amdgpu_mm_rdoorbell - read a doorbell dword
512 *
513 * @adev: amdgpu_device pointer
514 * @index: doorbell index
515 *
516 * Returns the value in the doorbell aperture at the
517 * requested doorbell index (CIK).
518 */
519u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
520{
bf36b52e
AG
521 if (adev->in_pci_err_recovery)
522 return 0;
523
d38ceaf9
AD
524 if (index < adev->doorbell.num_doorbells) {
525 return readl(adev->doorbell.ptr + index);
526 } else {
527 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
528 return 0;
529 }
530}
531
532/**
533 * amdgpu_mm_wdoorbell - write a doorbell dword
534 *
535 * @adev: amdgpu_device pointer
536 * @index: doorbell index
537 * @v: value to write
538 *
539 * Writes @v to the doorbell aperture at the
540 * requested doorbell index (CIK).
541 */
542void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
543{
bf36b52e
AG
544 if (adev->in_pci_err_recovery)
545 return;
546
d38ceaf9
AD
547 if (index < adev->doorbell.num_doorbells) {
548 writel(v, adev->doorbell.ptr + index);
549 } else {
550 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
551 }
552}
553
832be404
KW
554/**
555 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
556 *
557 * @adev: amdgpu_device pointer
558 * @index: doorbell index
559 *
560 * Returns the value in the doorbell aperture at the
561 * requested doorbell index (VEGA10+).
562 */
563u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
564{
bf36b52e
AG
565 if (adev->in_pci_err_recovery)
566 return 0;
567
832be404
KW
568 if (index < adev->doorbell.num_doorbells) {
569 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
570 } else {
571 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
572 return 0;
573 }
574}
575
576/**
577 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
578 *
579 * @adev: amdgpu_device pointer
580 * @index: doorbell index
581 * @v: value to write
582 *
583 * Writes @v to the doorbell aperture at the
584 * requested doorbell index (VEGA10+).
585 */
586void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
587{
bf36b52e
AG
588 if (adev->in_pci_err_recovery)
589 return;
590
832be404
KW
591 if (index < adev->doorbell.num_doorbells) {
592 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
593 } else {
594 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
595 }
596}
597
1bba3683
HZ
598/**
599 * amdgpu_device_indirect_rreg - read an indirect register
600 *
601 * @adev: amdgpu_device pointer
602 * @pcie_index: mmio register offset
603 * @pcie_data: mmio register offset
22f453fb 604 * @reg_addr: indirect register address to read from
1bba3683
HZ
605 *
606 * Returns the value of indirect register @reg_addr
607 */
608u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
609 u32 pcie_index, u32 pcie_data,
610 u32 reg_addr)
611{
612 unsigned long flags;
613 u32 r;
614 void __iomem *pcie_index_offset;
615 void __iomem *pcie_data_offset;
616
617 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
618 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
619 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
620
621 writel(reg_addr, pcie_index_offset);
622 readl(pcie_index_offset);
623 r = readl(pcie_data_offset);
624 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
625
626 return r;
627}
628
629/**
630 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
631 *
632 * @adev: amdgpu_device pointer
633 * @pcie_index: mmio register offset
634 * @pcie_data: mmio register offset
22f453fb 635 * @reg_addr: indirect register address to read from
1bba3683
HZ
636 *
637 * Returns the value of indirect register @reg_addr
638 */
639u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
640 u32 pcie_index, u32 pcie_data,
641 u32 reg_addr)
642{
643 unsigned long flags;
644 u64 r;
645 void __iomem *pcie_index_offset;
646 void __iomem *pcie_data_offset;
647
648 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
649 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
650 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
651
652 /* read low 32 bits */
653 writel(reg_addr, pcie_index_offset);
654 readl(pcie_index_offset);
655 r = readl(pcie_data_offset);
656 /* read high 32 bits */
657 writel(reg_addr + 4, pcie_index_offset);
658 readl(pcie_index_offset);
659 r |= ((u64)readl(pcie_data_offset) << 32);
660 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
661
662 return r;
663}
664
665/**
666 * amdgpu_device_indirect_wreg - write an indirect register address
667 *
668 * @adev: amdgpu_device pointer
669 * @pcie_index: mmio register offset
670 * @pcie_data: mmio register offset
671 * @reg_addr: indirect register offset
672 * @reg_data: indirect register data
673 *
674 */
675void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
676 u32 pcie_index, u32 pcie_data,
677 u32 reg_addr, u32 reg_data)
678{
679 unsigned long flags;
680 void __iomem *pcie_index_offset;
681 void __iomem *pcie_data_offset;
682
683 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
684 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
685 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
686
687 writel(reg_addr, pcie_index_offset);
688 readl(pcie_index_offset);
689 writel(reg_data, pcie_data_offset);
690 readl(pcie_data_offset);
691 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
692}
693
694/**
695 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
696 *
697 * @adev: amdgpu_device pointer
698 * @pcie_index: mmio register offset
699 * @pcie_data: mmio register offset
700 * @reg_addr: indirect register offset
701 * @reg_data: indirect register data
702 *
703 */
704void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
705 u32 pcie_index, u32 pcie_data,
706 u32 reg_addr, u64 reg_data)
707{
708 unsigned long flags;
709 void __iomem *pcie_index_offset;
710 void __iomem *pcie_data_offset;
711
712 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
713 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
714 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
715
716 /* write low 32 bits */
717 writel(reg_addr, pcie_index_offset);
718 readl(pcie_index_offset);
719 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
720 readl(pcie_data_offset);
721 /* write high 32 bits */
722 writel(reg_addr + 4, pcie_index_offset);
723 readl(pcie_index_offset);
724 writel((u32)(reg_data >> 32), pcie_data_offset);
725 readl(pcie_data_offset);
726 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
727}
728
d38ceaf9
AD
729/**
730 * amdgpu_invalid_rreg - dummy reg read function
731 *
982a820b 732 * @adev: amdgpu_device pointer
d38ceaf9
AD
733 * @reg: offset of register
734 *
735 * Dummy register read function. Used for register blocks
736 * that certain asics don't have (all asics).
737 * Returns the value in the register.
738 */
739static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
740{
741 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
742 BUG();
743 return 0;
744}
745
746/**
747 * amdgpu_invalid_wreg - dummy reg write function
748 *
982a820b 749 * @adev: amdgpu_device pointer
d38ceaf9
AD
750 * @reg: offset of register
751 * @v: value to write to the register
752 *
753 * Dummy register read function. Used for register blocks
754 * that certain asics don't have (all asics).
755 */
756static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
757{
758 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
759 reg, v);
760 BUG();
761}
762
4fa1c6a6
TZ
763/**
764 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
765 *
982a820b 766 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
767 * @reg: offset of register
768 *
769 * Dummy register read function. Used for register blocks
770 * that certain asics don't have (all asics).
771 * Returns the value in the register.
772 */
773static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
774{
775 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
776 BUG();
777 return 0;
778}
779
780/**
781 * amdgpu_invalid_wreg64 - dummy reg write function
782 *
982a820b 783 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
784 * @reg: offset of register
785 * @v: value to write to the register
786 *
787 * Dummy register read function. Used for register blocks
788 * that certain asics don't have (all asics).
789 */
790static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
791{
792 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
793 reg, v);
794 BUG();
795}
796
d38ceaf9
AD
797/**
798 * amdgpu_block_invalid_rreg - dummy reg read function
799 *
982a820b 800 * @adev: amdgpu_device pointer
d38ceaf9
AD
801 * @block: offset of instance
802 * @reg: offset of register
803 *
804 * Dummy register read function. Used for register blocks
805 * that certain asics don't have (all asics).
806 * Returns the value in the register.
807 */
808static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
809 uint32_t block, uint32_t reg)
810{
811 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
812 reg, block);
813 BUG();
814 return 0;
815}
816
817/**
818 * amdgpu_block_invalid_wreg - dummy reg write function
819 *
982a820b 820 * @adev: amdgpu_device pointer
d38ceaf9
AD
821 * @block: offset of instance
822 * @reg: offset of register
823 * @v: value to write to the register
824 *
825 * Dummy register read function. Used for register blocks
826 * that certain asics don't have (all asics).
827 */
828static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
829 uint32_t block,
830 uint32_t reg, uint32_t v)
831{
832 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
833 reg, block, v);
834 BUG();
835}
836
4d2997ab
AD
837/**
838 * amdgpu_device_asic_init - Wrapper for atom asic_init
839 *
982a820b 840 * @adev: amdgpu_device pointer
4d2997ab
AD
841 *
842 * Does any asic specific work and then calls atom asic init.
843 */
844static int amdgpu_device_asic_init(struct amdgpu_device *adev)
845{
846 amdgpu_asic_pre_asic_init(adev);
847
848 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
849}
850
e3ecdffa
AD
851/**
852 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
853 *
982a820b 854 * @adev: amdgpu_device pointer
e3ecdffa
AD
855 *
856 * Allocates a scratch page of VRAM for use by various things in the
857 * driver.
858 */
06ec9070 859static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 860{
a4a02777
CK
861 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
862 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
863 &adev->vram_scratch.robj,
864 &adev->vram_scratch.gpu_addr,
865 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
866}
867
e3ecdffa
AD
868/**
869 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
870 *
982a820b 871 * @adev: amdgpu_device pointer
e3ecdffa
AD
872 *
873 * Frees the VRAM scratch page.
874 */
06ec9070 875static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 876{
078af1a3 877 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
878}
879
880/**
9c3f2b54 881 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
882 *
883 * @adev: amdgpu_device pointer
884 * @registers: pointer to the register array
885 * @array_size: size of the register array
886 *
887 * Programs an array or registers with and and or masks.
888 * This is a helper for setting golden registers.
889 */
9c3f2b54
AD
890void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
891 const u32 *registers,
892 const u32 array_size)
d38ceaf9
AD
893{
894 u32 tmp, reg, and_mask, or_mask;
895 int i;
896
897 if (array_size % 3)
898 return;
899
900 for (i = 0; i < array_size; i +=3) {
901 reg = registers[i + 0];
902 and_mask = registers[i + 1];
903 or_mask = registers[i + 2];
904
905 if (and_mask == 0xffffffff) {
906 tmp = or_mask;
907 } else {
908 tmp = RREG32(reg);
909 tmp &= ~and_mask;
e0d07657
HZ
910 if (adev->family >= AMDGPU_FAMILY_AI)
911 tmp |= (or_mask & and_mask);
912 else
913 tmp |= or_mask;
d38ceaf9
AD
914 }
915 WREG32(reg, tmp);
916 }
917}
918
e3ecdffa
AD
919/**
920 * amdgpu_device_pci_config_reset - reset the GPU
921 *
922 * @adev: amdgpu_device pointer
923 *
924 * Resets the GPU using the pci config reset sequence.
925 * Only applicable to asics prior to vega10.
926 */
8111c387 927void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
928{
929 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
930}
931
af484df8
AD
932/**
933 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
934 *
935 * @adev: amdgpu_device pointer
936 *
937 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
938 */
939int amdgpu_device_pci_reset(struct amdgpu_device *adev)
940{
941 return pci_reset_function(adev->pdev);
942}
943
d38ceaf9
AD
944/*
945 * GPU doorbell aperture helpers function.
946 */
947/**
06ec9070 948 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
949 *
950 * @adev: amdgpu_device pointer
951 *
952 * Init doorbell driver information (CIK)
953 * Returns 0 on success, error on failure.
954 */
06ec9070 955static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 956{
6585661d 957
705e519e
CK
958 /* No doorbell on SI hardware generation */
959 if (adev->asic_type < CHIP_BONAIRE) {
960 adev->doorbell.base = 0;
961 adev->doorbell.size = 0;
962 adev->doorbell.num_doorbells = 0;
963 adev->doorbell.ptr = NULL;
964 return 0;
965 }
966
d6895ad3
CK
967 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
968 return -EINVAL;
969
22357775
AD
970 amdgpu_asic_init_doorbell_index(adev);
971
d38ceaf9
AD
972 /* doorbell bar mapping */
973 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
974 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
975
edf600da 976 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 977 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
978 if (adev->doorbell.num_doorbells == 0)
979 return -EINVAL;
980
ec3db8a6 981 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
982 * paging queue doorbell use the second page. The
983 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
984 * doorbells are in the first page. So with paging queue enabled,
985 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
986 */
987 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 988 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 989
8972e5d2
CK
990 adev->doorbell.ptr = ioremap(adev->doorbell.base,
991 adev->doorbell.num_doorbells *
992 sizeof(u32));
993 if (adev->doorbell.ptr == NULL)
d38ceaf9 994 return -ENOMEM;
d38ceaf9
AD
995
996 return 0;
997}
998
999/**
06ec9070 1000 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1001 *
1002 * @adev: amdgpu_device pointer
1003 *
1004 * Tear down doorbell driver information (CIK)
1005 */
06ec9070 1006static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1007{
1008 iounmap(adev->doorbell.ptr);
1009 adev->doorbell.ptr = NULL;
1010}
1011
22cb0164 1012
d38ceaf9
AD
1013
1014/*
06ec9070 1015 * amdgpu_device_wb_*()
455a7bc2 1016 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1017 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1018 */
1019
1020/**
06ec9070 1021 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1022 *
1023 * @adev: amdgpu_device pointer
1024 *
1025 * Disables Writeback and frees the Writeback memory (all asics).
1026 * Used at driver shutdown.
1027 */
06ec9070 1028static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1029{
1030 if (adev->wb.wb_obj) {
a76ed485
AD
1031 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1032 &adev->wb.gpu_addr,
1033 (void **)&adev->wb.wb);
d38ceaf9
AD
1034 adev->wb.wb_obj = NULL;
1035 }
1036}
1037
1038/**
06ec9070 1039 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
1040 *
1041 * @adev: amdgpu_device pointer
1042 *
455a7bc2 1043 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1044 * Used at driver startup.
1045 * Returns 0 on success or an -error on failure.
1046 */
06ec9070 1047static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1048{
1049 int r;
1050
1051 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1052 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1053 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1054 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1055 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1056 (void **)&adev->wb.wb);
d38ceaf9
AD
1057 if (r) {
1058 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1059 return r;
1060 }
d38ceaf9
AD
1061
1062 adev->wb.num_wb = AMDGPU_MAX_WB;
1063 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1064
1065 /* clear wb memory */
73469585 1066 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1067 }
1068
1069 return 0;
1070}
1071
1072/**
131b4b36 1073 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1074 *
1075 * @adev: amdgpu_device pointer
1076 * @wb: wb index
1077 *
1078 * Allocate a wb slot for use by the driver (all asics).
1079 * Returns 0 on success or -EINVAL on failure.
1080 */
131b4b36 1081int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1082{
1083 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1084
97407b63 1085 if (offset < adev->wb.num_wb) {
7014285a 1086 __set_bit(offset, adev->wb.used);
63ae07ca 1087 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1088 return 0;
1089 } else {
1090 return -EINVAL;
1091 }
1092}
1093
d38ceaf9 1094/**
131b4b36 1095 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1096 *
1097 * @adev: amdgpu_device pointer
1098 * @wb: wb index
1099 *
1100 * Free a wb slot allocated for use by the driver (all asics)
1101 */
131b4b36 1102void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1103{
73469585 1104 wb >>= 3;
d38ceaf9 1105 if (wb < adev->wb.num_wb)
73469585 1106 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1107}
1108
d6895ad3
CK
1109/**
1110 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1111 *
1112 * @adev: amdgpu_device pointer
1113 *
1114 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1115 * to fail, but if any of the BARs is not accessible after the size we abort
1116 * driver loading by returning -ENODEV.
1117 */
1118int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1119{
453f617a 1120 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1121 struct pci_bus *root;
1122 struct resource *res;
1123 unsigned i;
d6895ad3
CK
1124 u16 cmd;
1125 int r;
1126
0c03b912 1127 /* Bypass for VF */
1128 if (amdgpu_sriov_vf(adev))
1129 return 0;
1130
b7221f2b
AD
1131 /* skip if the bios has already enabled large BAR */
1132 if (adev->gmc.real_vram_size &&
1133 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1134 return 0;
1135
31b8adab
CK
1136 /* Check if the root BUS has 64bit memory resources */
1137 root = adev->pdev->bus;
1138 while (root->parent)
1139 root = root->parent;
1140
1141 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1142 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1143 res->start > 0x100000000ull)
1144 break;
1145 }
1146
1147 /* Trying to resize is pointless without a root hub window above 4GB */
1148 if (!res)
1149 return 0;
1150
453f617a
ND
1151 /* Limit the BAR size to what is available */
1152 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1153 rbar_size);
1154
d6895ad3
CK
1155 /* Disable memory decoding while we change the BAR addresses and size */
1156 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1157 pci_write_config_word(adev->pdev, PCI_COMMAND,
1158 cmd & ~PCI_COMMAND_MEMORY);
1159
1160 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1161 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1162 if (adev->asic_type >= CHIP_BONAIRE)
1163 pci_release_resource(adev->pdev, 2);
1164
1165 pci_release_resource(adev->pdev, 0);
1166
1167 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1168 if (r == -ENOSPC)
1169 DRM_INFO("Not enough PCI address space for a large BAR.");
1170 else if (r && r != -ENOTSUPP)
1171 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1172
1173 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1174
1175 /* When the doorbell or fb BAR isn't available we have no chance of
1176 * using the device.
1177 */
06ec9070 1178 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1179 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1180 return -ENODEV;
1181
1182 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1183
1184 return 0;
1185}
a05502e5 1186
d38ceaf9
AD
1187/*
1188 * GPU helpers function.
1189 */
1190/**
39c640c0 1191 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1192 *
1193 * @adev: amdgpu_device pointer
1194 *
c836fec5
JQ
1195 * Check if the asic has been initialized (all asics) at driver startup
1196 * or post is needed if hw reset is performed.
1197 * Returns true if need or false if not.
d38ceaf9 1198 */
39c640c0 1199bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1200{
1201 uint32_t reg;
1202
bec86378
ML
1203 if (amdgpu_sriov_vf(adev))
1204 return false;
1205
1206 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1207 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1208 * some old smc fw still need driver do vPost otherwise gpu hang, while
1209 * those smc fw version above 22.15 doesn't have this flaw, so we force
1210 * vpost executed for smc version below 22.15
bec86378
ML
1211 */
1212 if (adev->asic_type == CHIP_FIJI) {
1213 int err;
1214 uint32_t fw_ver;
1215 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1216 /* force vPost if error occured */
1217 if (err)
1218 return true;
1219
1220 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1221 if (fw_ver < 0x00160e00)
1222 return true;
bec86378 1223 }
bec86378 1224 }
91fe77eb 1225
1226 if (adev->has_hw_reset) {
1227 adev->has_hw_reset = false;
1228 return true;
1229 }
1230
1231 /* bios scratch used on CIK+ */
1232 if (adev->asic_type >= CHIP_BONAIRE)
1233 return amdgpu_atombios_scratch_need_asic_init(adev);
1234
1235 /* check MEM_SIZE for older asics */
1236 reg = amdgpu_asic_get_config_memsize(adev);
1237
1238 if ((reg != 0) && (reg != 0xffffffff))
1239 return false;
1240
1241 return true;
bec86378
ML
1242}
1243
d38ceaf9
AD
1244/* if we get transitioned to only one device, take VGA back */
1245/**
06ec9070 1246 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1247 *
1248 * @cookie: amdgpu_device pointer
1249 * @state: enable/disable vga decode
1250 *
1251 * Enable/disable vga decode (all asics).
1252 * Returns VGA resource flags.
1253 */
06ec9070 1254static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1255{
1256 struct amdgpu_device *adev = cookie;
1257 amdgpu_asic_set_vga_state(adev, state);
1258 if (state)
1259 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1260 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1261 else
1262 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1263}
1264
e3ecdffa
AD
1265/**
1266 * amdgpu_device_check_block_size - validate the vm block size
1267 *
1268 * @adev: amdgpu_device pointer
1269 *
1270 * Validates the vm block size specified via module parameter.
1271 * The vm block size defines number of bits in page table versus page directory,
1272 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1273 * page table and the remaining bits are in the page directory.
1274 */
06ec9070 1275static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1276{
1277 /* defines number of bits in page table versus page directory,
1278 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1279 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1280 if (amdgpu_vm_block_size == -1)
1281 return;
a1adf8be 1282
bab4fee7 1283 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1284 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1285 amdgpu_vm_block_size);
97489129 1286 amdgpu_vm_block_size = -1;
a1adf8be 1287 }
a1adf8be
CZ
1288}
1289
e3ecdffa
AD
1290/**
1291 * amdgpu_device_check_vm_size - validate the vm size
1292 *
1293 * @adev: amdgpu_device pointer
1294 *
1295 * Validates the vm size in GB specified via module parameter.
1296 * The VM size is the size of the GPU virtual memory space in GB.
1297 */
06ec9070 1298static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1299{
64dab074
AD
1300 /* no need to check the default value */
1301 if (amdgpu_vm_size == -1)
1302 return;
1303
83ca145d
ZJ
1304 if (amdgpu_vm_size < 1) {
1305 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1306 amdgpu_vm_size);
f3368128 1307 amdgpu_vm_size = -1;
83ca145d 1308 }
83ca145d
ZJ
1309}
1310
7951e376
RZ
1311static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1312{
1313 struct sysinfo si;
a9d4fe2f 1314 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1315 uint64_t total_memory;
1316 uint64_t dram_size_seven_GB = 0x1B8000000;
1317 uint64_t dram_size_three_GB = 0xB8000000;
1318
1319 if (amdgpu_smu_memory_pool_size == 0)
1320 return;
1321
1322 if (!is_os_64) {
1323 DRM_WARN("Not 64-bit OS, feature not supported\n");
1324 goto def_value;
1325 }
1326 si_meminfo(&si);
1327 total_memory = (uint64_t)si.totalram * si.mem_unit;
1328
1329 if ((amdgpu_smu_memory_pool_size == 1) ||
1330 (amdgpu_smu_memory_pool_size == 2)) {
1331 if (total_memory < dram_size_three_GB)
1332 goto def_value1;
1333 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1334 (amdgpu_smu_memory_pool_size == 8)) {
1335 if (total_memory < dram_size_seven_GB)
1336 goto def_value1;
1337 } else {
1338 DRM_WARN("Smu memory pool size not supported\n");
1339 goto def_value;
1340 }
1341 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1342
1343 return;
1344
1345def_value1:
1346 DRM_WARN("No enough system memory\n");
1347def_value:
1348 adev->pm.smu_prv_buffer_size = 0;
1349}
1350
d38ceaf9 1351/**
06ec9070 1352 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1353 *
1354 * @adev: amdgpu_device pointer
1355 *
1356 * Validates certain module parameters and updates
1357 * the associated values used by the driver (all asics).
1358 */
912dfc84 1359static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1360{
5b011235
CZ
1361 if (amdgpu_sched_jobs < 4) {
1362 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1363 amdgpu_sched_jobs);
1364 amdgpu_sched_jobs = 4;
76117507 1365 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1366 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1367 amdgpu_sched_jobs);
1368 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1369 }
d38ceaf9 1370
83e74db6 1371 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1372 /* gart size must be greater or equal to 32M */
1373 dev_warn(adev->dev, "gart size (%d) too small\n",
1374 amdgpu_gart_size);
83e74db6 1375 amdgpu_gart_size = -1;
d38ceaf9
AD
1376 }
1377
36d38372 1378 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1379 /* gtt size must be greater or equal to 32M */
36d38372
CK
1380 dev_warn(adev->dev, "gtt size (%d) too small\n",
1381 amdgpu_gtt_size);
1382 amdgpu_gtt_size = -1;
d38ceaf9
AD
1383 }
1384
d07f14be
RH
1385 /* valid range is between 4 and 9 inclusive */
1386 if (amdgpu_vm_fragment_size != -1 &&
1387 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1388 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1389 amdgpu_vm_fragment_size = -1;
1390 }
1391
5d5bd5e3
KW
1392 if (amdgpu_sched_hw_submission < 2) {
1393 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1394 amdgpu_sched_hw_submission);
1395 amdgpu_sched_hw_submission = 2;
1396 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1397 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1398 amdgpu_sched_hw_submission);
1399 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1400 }
1401
7951e376
RZ
1402 amdgpu_device_check_smu_prv_buffer_size(adev);
1403
06ec9070 1404 amdgpu_device_check_vm_size(adev);
d38ceaf9 1405
06ec9070 1406 amdgpu_device_check_block_size(adev);
6a7f76e7 1407
19aede77 1408 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1409
c6252390 1410 amdgpu_gmc_tmz_set(adev);
01a8dcec 1411
9b498efa
AD
1412 amdgpu_gmc_noretry_set(adev);
1413
e3c00faa 1414 return 0;
d38ceaf9
AD
1415}
1416
1417/**
1418 * amdgpu_switcheroo_set_state - set switcheroo state
1419 *
1420 * @pdev: pci dev pointer
1694467b 1421 * @state: vga_switcheroo state
d38ceaf9
AD
1422 *
1423 * Callback for the switcheroo driver. Suspends or resumes the
1424 * the asics before or after it is powered up using ACPI methods.
1425 */
8aba21b7
LT
1426static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1427 enum vga_switcheroo_state state)
d38ceaf9
AD
1428{
1429 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1430 int r;
d38ceaf9 1431
fd496ca8 1432 if (amdgpu_device_supports_atpx(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1433 return;
1434
1435 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1436 pr_info("switched on\n");
d38ceaf9
AD
1437 /* don't suspend or resume card normally */
1438 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1439
8f66090b
TZ
1440 pci_set_power_state(pdev, PCI_D0);
1441 amdgpu_device_load_pci_state(pdev);
1442 r = pci_enable_device(pdev);
de185019
AD
1443 if (r)
1444 DRM_WARN("pci_enable_device failed (%d)\n", r);
1445 amdgpu_device_resume(dev, true);
d38ceaf9 1446
d38ceaf9 1447 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1448 } else {
dd4fa6c1 1449 pr_info("switched off\n");
d38ceaf9 1450 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1451 amdgpu_device_suspend(dev, true);
8f66090b 1452 amdgpu_device_cache_pci_state(pdev);
de185019 1453 /* Shut down the device */
8f66090b
TZ
1454 pci_disable_device(pdev);
1455 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1456 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1457 }
1458}
1459
1460/**
1461 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1462 *
1463 * @pdev: pci dev pointer
1464 *
1465 * Callback for the switcheroo driver. Check of the switcheroo
1466 * state can be changed.
1467 * Returns true if the state can be changed, false if not.
1468 */
1469static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1470{
1471 struct drm_device *dev = pci_get_drvdata(pdev);
1472
1473 /*
1474 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1475 * locking inversion with the driver load path. And the access here is
1476 * completely racy anyway. So don't bother with locking for now.
1477 */
7e13ad89 1478 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1479}
1480
1481static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1482 .set_gpu_state = amdgpu_switcheroo_set_state,
1483 .reprobe = NULL,
1484 .can_switch = amdgpu_switcheroo_can_switch,
1485};
1486
e3ecdffa
AD
1487/**
1488 * amdgpu_device_ip_set_clockgating_state - set the CG state
1489 *
87e3f136 1490 * @dev: amdgpu_device pointer
e3ecdffa
AD
1491 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1492 * @state: clockgating state (gate or ungate)
1493 *
1494 * Sets the requested clockgating state for all instances of
1495 * the hardware IP specified.
1496 * Returns the error code from the last instance.
1497 */
43fa561f 1498int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1499 enum amd_ip_block_type block_type,
1500 enum amd_clockgating_state state)
d38ceaf9 1501{
43fa561f 1502 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1503 int i, r = 0;
1504
1505 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1506 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1507 continue;
c722865a
RZ
1508 if (adev->ip_blocks[i].version->type != block_type)
1509 continue;
1510 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1511 continue;
1512 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1513 (void *)adev, state);
1514 if (r)
1515 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1516 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1517 }
1518 return r;
1519}
1520
e3ecdffa
AD
1521/**
1522 * amdgpu_device_ip_set_powergating_state - set the PG state
1523 *
87e3f136 1524 * @dev: amdgpu_device pointer
e3ecdffa
AD
1525 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1526 * @state: powergating state (gate or ungate)
1527 *
1528 * Sets the requested powergating state for all instances of
1529 * the hardware IP specified.
1530 * Returns the error code from the last instance.
1531 */
43fa561f 1532int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1533 enum amd_ip_block_type block_type,
1534 enum amd_powergating_state state)
d38ceaf9 1535{
43fa561f 1536 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1537 int i, r = 0;
1538
1539 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1540 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1541 continue;
c722865a
RZ
1542 if (adev->ip_blocks[i].version->type != block_type)
1543 continue;
1544 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1545 continue;
1546 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1547 (void *)adev, state);
1548 if (r)
1549 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1550 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1551 }
1552 return r;
1553}
1554
e3ecdffa
AD
1555/**
1556 * amdgpu_device_ip_get_clockgating_state - get the CG state
1557 *
1558 * @adev: amdgpu_device pointer
1559 * @flags: clockgating feature flags
1560 *
1561 * Walks the list of IPs on the device and updates the clockgating
1562 * flags for each IP.
1563 * Updates @flags with the feature flags for each hardware IP where
1564 * clockgating is enabled.
1565 */
2990a1fc
AD
1566void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1567 u32 *flags)
6cb2d4e4
HR
1568{
1569 int i;
1570
1571 for (i = 0; i < adev->num_ip_blocks; i++) {
1572 if (!adev->ip_blocks[i].status.valid)
1573 continue;
1574 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1575 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1576 }
1577}
1578
e3ecdffa
AD
1579/**
1580 * amdgpu_device_ip_wait_for_idle - wait for idle
1581 *
1582 * @adev: amdgpu_device pointer
1583 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1584 *
1585 * Waits for the request hardware IP to be idle.
1586 * Returns 0 for success or a negative error code on failure.
1587 */
2990a1fc
AD
1588int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1589 enum amd_ip_block_type block_type)
5dbbb60b
AD
1590{
1591 int i, r;
1592
1593 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1594 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1595 continue;
a1255107
AD
1596 if (adev->ip_blocks[i].version->type == block_type) {
1597 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1598 if (r)
1599 return r;
1600 break;
1601 }
1602 }
1603 return 0;
1604
1605}
1606
e3ecdffa
AD
1607/**
1608 * amdgpu_device_ip_is_idle - is the hardware IP idle
1609 *
1610 * @adev: amdgpu_device pointer
1611 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1612 *
1613 * Check if the hardware IP is idle or not.
1614 * Returns true if it the IP is idle, false if not.
1615 */
2990a1fc
AD
1616bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1617 enum amd_ip_block_type block_type)
5dbbb60b
AD
1618{
1619 int i;
1620
1621 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1622 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1623 continue;
a1255107
AD
1624 if (adev->ip_blocks[i].version->type == block_type)
1625 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1626 }
1627 return true;
1628
1629}
1630
e3ecdffa
AD
1631/**
1632 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1633 *
1634 * @adev: amdgpu_device pointer
87e3f136 1635 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1636 *
1637 * Returns a pointer to the hardware IP block structure
1638 * if it exists for the asic, otherwise NULL.
1639 */
2990a1fc
AD
1640struct amdgpu_ip_block *
1641amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1642 enum amd_ip_block_type type)
d38ceaf9
AD
1643{
1644 int i;
1645
1646 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1647 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1648 return &adev->ip_blocks[i];
1649
1650 return NULL;
1651}
1652
1653/**
2990a1fc 1654 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1655 *
1656 * @adev: amdgpu_device pointer
5fc3aeeb 1657 * @type: enum amd_ip_block_type
d38ceaf9
AD
1658 * @major: major version
1659 * @minor: minor version
1660 *
1661 * return 0 if equal or greater
1662 * return 1 if smaller or the ip_block doesn't exist
1663 */
2990a1fc
AD
1664int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1665 enum amd_ip_block_type type,
1666 u32 major, u32 minor)
d38ceaf9 1667{
2990a1fc 1668 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1669
a1255107
AD
1670 if (ip_block && ((ip_block->version->major > major) ||
1671 ((ip_block->version->major == major) &&
1672 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1673 return 0;
1674
1675 return 1;
1676}
1677
a1255107 1678/**
2990a1fc 1679 * amdgpu_device_ip_block_add
a1255107
AD
1680 *
1681 * @adev: amdgpu_device pointer
1682 * @ip_block_version: pointer to the IP to add
1683 *
1684 * Adds the IP block driver information to the collection of IPs
1685 * on the asic.
1686 */
2990a1fc
AD
1687int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1688 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1689{
1690 if (!ip_block_version)
1691 return -EINVAL;
1692
e966a725 1693 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1694 ip_block_version->funcs->name);
1695
a1255107
AD
1696 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1697
1698 return 0;
1699}
1700
e3ecdffa
AD
1701/**
1702 * amdgpu_device_enable_virtual_display - enable virtual display feature
1703 *
1704 * @adev: amdgpu_device pointer
1705 *
1706 * Enabled the virtual display feature if the user has enabled it via
1707 * the module parameter virtual_display. This feature provides a virtual
1708 * display hardware on headless boards or in virtualized environments.
1709 * This function parses and validates the configuration string specified by
1710 * the user and configues the virtual display configuration (number of
1711 * virtual connectors, crtcs, etc.) specified.
1712 */
483ef985 1713static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1714{
1715 adev->enable_virtual_display = false;
1716
1717 if (amdgpu_virtual_display) {
8f66090b 1718 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1719 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1720
1721 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1722 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1723 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1724 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1725 if (!strcmp("all", pciaddname)
1726 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1727 long num_crtc;
1728 int res = -1;
1729
9accf2fd 1730 adev->enable_virtual_display = true;
0f66356d
ED
1731
1732 if (pciaddname_tmp)
1733 res = kstrtol(pciaddname_tmp, 10,
1734 &num_crtc);
1735
1736 if (!res) {
1737 if (num_crtc < 1)
1738 num_crtc = 1;
1739 if (num_crtc > 6)
1740 num_crtc = 6;
1741 adev->mode_info.num_crtc = num_crtc;
1742 } else {
1743 adev->mode_info.num_crtc = 1;
1744 }
9accf2fd
ED
1745 break;
1746 }
1747 }
1748
0f66356d
ED
1749 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1750 amdgpu_virtual_display, pci_address_name,
1751 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1752
1753 kfree(pciaddstr);
1754 }
1755}
1756
e3ecdffa
AD
1757/**
1758 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1759 *
1760 * @adev: amdgpu_device pointer
1761 *
1762 * Parses the asic configuration parameters specified in the gpu info
1763 * firmware and makes them availale to the driver for use in configuring
1764 * the asic.
1765 * Returns 0 on success, -EINVAL on failure.
1766 */
e2a75f88
AD
1767static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1768{
e2a75f88 1769 const char *chip_name;
c0a43457 1770 char fw_name[40];
e2a75f88
AD
1771 int err;
1772 const struct gpu_info_firmware_header_v1_0 *hdr;
1773
ab4fe3e1
HR
1774 adev->firmware.gpu_info_fw = NULL;
1775
72de33f8 1776 if (adev->mman.discovery_bin) {
258620d0 1777 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1778
1779 /*
1780 * FIXME: The bounding box is still needed by Navi12, so
1781 * temporarily read it from gpu_info firmware. Should be droped
1782 * when DAL no longer needs it.
1783 */
1784 if (adev->asic_type != CHIP_NAVI12)
1785 return 0;
258620d0
AD
1786 }
1787
e2a75f88 1788 switch (adev->asic_type) {
e2a75f88
AD
1789#ifdef CONFIG_DRM_AMDGPU_SI
1790 case CHIP_VERDE:
1791 case CHIP_TAHITI:
1792 case CHIP_PITCAIRN:
1793 case CHIP_OLAND:
1794 case CHIP_HAINAN:
1795#endif
1796#ifdef CONFIG_DRM_AMDGPU_CIK
1797 case CHIP_BONAIRE:
1798 case CHIP_HAWAII:
1799 case CHIP_KAVERI:
1800 case CHIP_KABINI:
1801 case CHIP_MULLINS:
1802#endif
da87c30b
AD
1803 case CHIP_TOPAZ:
1804 case CHIP_TONGA:
1805 case CHIP_FIJI:
1806 case CHIP_POLARIS10:
1807 case CHIP_POLARIS11:
1808 case CHIP_POLARIS12:
1809 case CHIP_VEGAM:
1810 case CHIP_CARRIZO:
1811 case CHIP_STONEY:
27c0bc71 1812 case CHIP_VEGA20:
84d244a3
JC
1813 case CHIP_SIENNA_CICHLID:
1814 case CHIP_NAVY_FLOUNDER:
eac88a5f 1815 case CHIP_DIMGREY_CAVEFISH:
e2a75f88
AD
1816 default:
1817 return 0;
1818 case CHIP_VEGA10:
1819 chip_name = "vega10";
1820 break;
3f76dced
AD
1821 case CHIP_VEGA12:
1822 chip_name = "vega12";
1823 break;
2d2e5e7e 1824 case CHIP_RAVEN:
54f78a76 1825 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1826 chip_name = "raven2";
54f78a76 1827 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1828 chip_name = "picasso";
54c4d17e
FX
1829 else
1830 chip_name = "raven";
2d2e5e7e 1831 break;
65e60f6e
LM
1832 case CHIP_ARCTURUS:
1833 chip_name = "arcturus";
1834 break;
b51a26a0 1835 case CHIP_RENOIR:
2e62f0b5
PL
1836 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1837 chip_name = "renoir";
1838 else
1839 chip_name = "green_sardine";
b51a26a0 1840 break;
23c6268e
HR
1841 case CHIP_NAVI10:
1842 chip_name = "navi10";
1843 break;
ed42cfe1
XY
1844 case CHIP_NAVI14:
1845 chip_name = "navi14";
1846 break;
42b325e5
XY
1847 case CHIP_NAVI12:
1848 chip_name = "navi12";
1849 break;
4e52a9f8
HR
1850 case CHIP_VANGOGH:
1851 chip_name = "vangogh";
1852 break;
e2a75f88
AD
1853 }
1854
1855 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1856 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1857 if (err) {
1858 dev_err(adev->dev,
1859 "Failed to load gpu_info firmware \"%s\"\n",
1860 fw_name);
1861 goto out;
1862 }
ab4fe3e1 1863 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1864 if (err) {
1865 dev_err(adev->dev,
1866 "Failed to validate gpu_info firmware \"%s\"\n",
1867 fw_name);
1868 goto out;
1869 }
1870
ab4fe3e1 1871 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1872 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1873
1874 switch (hdr->version_major) {
1875 case 1:
1876 {
1877 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1878 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1879 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1880
cc375d8c
TY
1881 /*
1882 * Should be droped when DAL no longer needs it.
1883 */
1884 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1885 goto parse_soc_bounding_box;
1886
b5ab16bf
AD
1887 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1888 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1889 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1890 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1891 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1892 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1893 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1894 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1895 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1896 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1897 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1898 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1899 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1900 adev->gfx.cu_info.max_waves_per_simd =
1901 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1902 adev->gfx.cu_info.max_scratch_slots_per_cu =
1903 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1904 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1905 if (hdr->version_minor >= 1) {
35c2e910
HZ
1906 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1907 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1908 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1909 adev->gfx.config.num_sc_per_sh =
1910 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1911 adev->gfx.config.num_packer_per_sc =
1912 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1913 }
ec51d3fa
XY
1914
1915parse_soc_bounding_box:
ec51d3fa
XY
1916 /*
1917 * soc bounding box info is not integrated in disocovery table,
258620d0 1918 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1919 */
48321c3d
HW
1920 if (hdr->version_minor == 2) {
1921 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1922 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1923 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1924 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1925 }
e2a75f88
AD
1926 break;
1927 }
1928 default:
1929 dev_err(adev->dev,
1930 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1931 err = -EINVAL;
1932 goto out;
1933 }
1934out:
e2a75f88
AD
1935 return err;
1936}
1937
e3ecdffa
AD
1938/**
1939 * amdgpu_device_ip_early_init - run early init for hardware IPs
1940 *
1941 * @adev: amdgpu_device pointer
1942 *
1943 * Early initialization pass for hardware IPs. The hardware IPs that make
1944 * up each asic are discovered each IP's early_init callback is run. This
1945 * is the first stage in initializing the asic.
1946 * Returns 0 on success, negative error code on failure.
1947 */
06ec9070 1948static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1949{
aaa36a97 1950 int i, r;
d38ceaf9 1951
483ef985 1952 amdgpu_device_enable_virtual_display(adev);
a6be7570 1953
00a979f3 1954 if (amdgpu_sriov_vf(adev)) {
00a979f3 1955 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
1956 if (r)
1957 return r;
00a979f3
WS
1958 }
1959
d38ceaf9 1960 switch (adev->asic_type) {
33f34802
KW
1961#ifdef CONFIG_DRM_AMDGPU_SI
1962 case CHIP_VERDE:
1963 case CHIP_TAHITI:
1964 case CHIP_PITCAIRN:
1965 case CHIP_OLAND:
1966 case CHIP_HAINAN:
295d0daf 1967 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1968 r = si_set_ip_blocks(adev);
1969 if (r)
1970 return r;
1971 break;
1972#endif
a2e73f56
AD
1973#ifdef CONFIG_DRM_AMDGPU_CIK
1974 case CHIP_BONAIRE:
1975 case CHIP_HAWAII:
1976 case CHIP_KAVERI:
1977 case CHIP_KABINI:
1978 case CHIP_MULLINS:
e1ad2d53 1979 if (adev->flags & AMD_IS_APU)
a2e73f56 1980 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1981 else
1982 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1983
1984 r = cik_set_ip_blocks(adev);
1985 if (r)
1986 return r;
1987 break;
1988#endif
da87c30b
AD
1989 case CHIP_TOPAZ:
1990 case CHIP_TONGA:
1991 case CHIP_FIJI:
1992 case CHIP_POLARIS10:
1993 case CHIP_POLARIS11:
1994 case CHIP_POLARIS12:
1995 case CHIP_VEGAM:
1996 case CHIP_CARRIZO:
1997 case CHIP_STONEY:
1998 if (adev->flags & AMD_IS_APU)
1999 adev->family = AMDGPU_FAMILY_CZ;
2000 else
2001 adev->family = AMDGPU_FAMILY_VI;
2002
2003 r = vi_set_ip_blocks(adev);
2004 if (r)
2005 return r;
2006 break;
e48a3cd9
AD
2007 case CHIP_VEGA10:
2008 case CHIP_VEGA12:
e4bd8170 2009 case CHIP_VEGA20:
e48a3cd9 2010 case CHIP_RAVEN:
61cf44c1 2011 case CHIP_ARCTURUS:
b51a26a0 2012 case CHIP_RENOIR:
70534d1e 2013 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
2014 adev->family = AMDGPU_FAMILY_RV;
2015 else
2016 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
2017
2018 r = soc15_set_ip_blocks(adev);
2019 if (r)
2020 return r;
2021 break;
0a5b8c7b 2022 case CHIP_NAVI10:
7ecb5cd4 2023 case CHIP_NAVI14:
4808cf9c 2024 case CHIP_NAVI12:
11e8aef5 2025 case CHIP_SIENNA_CICHLID:
41f446bf 2026 case CHIP_NAVY_FLOUNDER:
144722fa 2027 case CHIP_DIMGREY_CAVEFISH:
4e52a9f8
HR
2028 case CHIP_VANGOGH:
2029 if (adev->asic_type == CHIP_VANGOGH)
2030 adev->family = AMDGPU_FAMILY_VGH;
2031 else
2032 adev->family = AMDGPU_FAMILY_NV;
0a5b8c7b
HR
2033
2034 r = nv_set_ip_blocks(adev);
2035 if (r)
2036 return r;
2037 break;
d38ceaf9
AD
2038 default:
2039 /* FIXME: not supported yet */
2040 return -EINVAL;
2041 }
2042
1884734a 2043 amdgpu_amdkfd_device_probe(adev);
2044
3b94fb10 2045 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2046 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2047 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2048 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2049 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2050
d38ceaf9
AD
2051 for (i = 0; i < adev->num_ip_blocks; i++) {
2052 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2053 DRM_ERROR("disabled ip block: %d <%s>\n",
2054 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2055 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2056 } else {
a1255107
AD
2057 if (adev->ip_blocks[i].version->funcs->early_init) {
2058 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2059 if (r == -ENOENT) {
a1255107 2060 adev->ip_blocks[i].status.valid = false;
2c1a2784 2061 } else if (r) {
a1255107
AD
2062 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2063 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2064 return r;
2c1a2784 2065 } else {
a1255107 2066 adev->ip_blocks[i].status.valid = true;
2c1a2784 2067 }
974e6b64 2068 } else {
a1255107 2069 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2070 }
d38ceaf9 2071 }
21a249ca
AD
2072 /* get the vbios after the asic_funcs are set up */
2073 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2074 r = amdgpu_device_parse_gpu_info_fw(adev);
2075 if (r)
2076 return r;
2077
21a249ca
AD
2078 /* Read BIOS */
2079 if (!amdgpu_get_bios(adev))
2080 return -EINVAL;
2081
2082 r = amdgpu_atombios_init(adev);
2083 if (r) {
2084 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2085 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2086 return r;
2087 }
2088 }
d38ceaf9
AD
2089 }
2090
395d1fb9
NH
2091 adev->cg_flags &= amdgpu_cg_mask;
2092 adev->pg_flags &= amdgpu_pg_mask;
2093
d38ceaf9
AD
2094 return 0;
2095}
2096
0a4f2520
RZ
2097static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2098{
2099 int i, r;
2100
2101 for (i = 0; i < adev->num_ip_blocks; i++) {
2102 if (!adev->ip_blocks[i].status.sw)
2103 continue;
2104 if (adev->ip_blocks[i].status.hw)
2105 continue;
2106 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2107 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2108 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2109 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2110 if (r) {
2111 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2112 adev->ip_blocks[i].version->funcs->name, r);
2113 return r;
2114 }
2115 adev->ip_blocks[i].status.hw = true;
2116 }
2117 }
2118
2119 return 0;
2120}
2121
2122static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2123{
2124 int i, r;
2125
2126 for (i = 0; i < adev->num_ip_blocks; i++) {
2127 if (!adev->ip_blocks[i].status.sw)
2128 continue;
2129 if (adev->ip_blocks[i].status.hw)
2130 continue;
2131 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2132 if (r) {
2133 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2134 adev->ip_blocks[i].version->funcs->name, r);
2135 return r;
2136 }
2137 adev->ip_blocks[i].status.hw = true;
2138 }
2139
2140 return 0;
2141}
2142
7a3e0bb2
RZ
2143static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2144{
2145 int r = 0;
2146 int i;
80f41f84 2147 uint32_t smu_version;
7a3e0bb2
RZ
2148
2149 if (adev->asic_type >= CHIP_VEGA10) {
2150 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2151 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2152 continue;
2153
2154 /* no need to do the fw loading again if already done*/
2155 if (adev->ip_blocks[i].status.hw == true)
2156 break;
2157
53b3f8f4 2158 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2159 r = adev->ip_blocks[i].version->funcs->resume(adev);
2160 if (r) {
2161 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2162 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2163 return r;
2164 }
2165 } else {
2166 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2167 if (r) {
2168 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2169 adev->ip_blocks[i].version->funcs->name, r);
2170 return r;
7a3e0bb2 2171 }
7a3e0bb2 2172 }
482f0e53
ML
2173
2174 adev->ip_blocks[i].status.hw = true;
2175 break;
7a3e0bb2
RZ
2176 }
2177 }
482f0e53 2178
8973d9ec
ED
2179 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2180 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2181
80f41f84 2182 return r;
7a3e0bb2
RZ
2183}
2184
e3ecdffa
AD
2185/**
2186 * amdgpu_device_ip_init - run init for hardware IPs
2187 *
2188 * @adev: amdgpu_device pointer
2189 *
2190 * Main initialization pass for hardware IPs. The list of all the hardware
2191 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2192 * are run. sw_init initializes the software state associated with each IP
2193 * and hw_init initializes the hardware associated with each IP.
2194 * Returns 0 on success, negative error code on failure.
2195 */
06ec9070 2196static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2197{
2198 int i, r;
2199
c030f2e4 2200 r = amdgpu_ras_init(adev);
2201 if (r)
2202 return r;
2203
d38ceaf9 2204 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2205 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2206 continue;
a1255107 2207 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2208 if (r) {
a1255107
AD
2209 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2210 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2211 goto init_failed;
2c1a2784 2212 }
a1255107 2213 adev->ip_blocks[i].status.sw = true;
bfca0289 2214
d38ceaf9 2215 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 2216 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 2217 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2218 if (r) {
2219 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2220 goto init_failed;
2c1a2784 2221 }
a1255107 2222 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2223 if (r) {
2224 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2225 goto init_failed;
2c1a2784 2226 }
06ec9070 2227 r = amdgpu_device_wb_init(adev);
2c1a2784 2228 if (r) {
06ec9070 2229 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2230 goto init_failed;
2c1a2784 2231 }
a1255107 2232 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2233
2234 /* right after GMC hw init, we create CSA */
f92d5c61 2235 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2236 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2237 AMDGPU_GEM_DOMAIN_VRAM,
2238 AMDGPU_CSA_SIZE);
2493664f
ML
2239 if (r) {
2240 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2241 goto init_failed;
2493664f
ML
2242 }
2243 }
d38ceaf9
AD
2244 }
2245 }
2246
c9ffa427
YT
2247 if (amdgpu_sriov_vf(adev))
2248 amdgpu_virt_init_data_exchange(adev);
2249
533aed27
AG
2250 r = amdgpu_ib_pool_init(adev);
2251 if (r) {
2252 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2253 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2254 goto init_failed;
2255 }
2256
c8963ea4
RZ
2257 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2258 if (r)
72d3f592 2259 goto init_failed;
0a4f2520
RZ
2260
2261 r = amdgpu_device_ip_hw_init_phase1(adev);
2262 if (r)
72d3f592 2263 goto init_failed;
0a4f2520 2264
7a3e0bb2
RZ
2265 r = amdgpu_device_fw_loading(adev);
2266 if (r)
72d3f592 2267 goto init_failed;
7a3e0bb2 2268
0a4f2520
RZ
2269 r = amdgpu_device_ip_hw_init_phase2(adev);
2270 if (r)
72d3f592 2271 goto init_failed;
d38ceaf9 2272
121a2bc6
AG
2273 /*
2274 * retired pages will be loaded from eeprom and reserved here,
2275 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2276 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2277 * for I2C communication which only true at this point.
b82e65a9
GC
2278 *
2279 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2280 * failure from bad gpu situation and stop amdgpu init process
2281 * accordingly. For other failed cases, it will still release all
2282 * the resource and print error message, rather than returning one
2283 * negative value to upper level.
121a2bc6
AG
2284 *
2285 * Note: theoretically, this should be called before all vram allocations
2286 * to protect retired page from abusing
2287 */
b82e65a9
GC
2288 r = amdgpu_ras_recovery_init(adev);
2289 if (r)
2290 goto init_failed;
121a2bc6 2291
3e2e2ab5
HZ
2292 if (adev->gmc.xgmi.num_physical_nodes > 1)
2293 amdgpu_xgmi_add_device(adev);
1884734a 2294 amdgpu_amdkfd_device_init(adev);
c6332b97 2295
bd607166
KR
2296 amdgpu_fru_get_product_info(adev);
2297
72d3f592 2298init_failed:
c9ffa427 2299 if (amdgpu_sriov_vf(adev))
c6332b97 2300 amdgpu_virt_release_full_gpu(adev, true);
2301
72d3f592 2302 return r;
d38ceaf9
AD
2303}
2304
e3ecdffa
AD
2305/**
2306 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2307 *
2308 * @adev: amdgpu_device pointer
2309 *
2310 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2311 * this function before a GPU reset. If the value is retained after a
2312 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2313 */
06ec9070 2314static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2315{
2316 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2317}
2318
e3ecdffa
AD
2319/**
2320 * amdgpu_device_check_vram_lost - check if vram is valid
2321 *
2322 * @adev: amdgpu_device pointer
2323 *
2324 * Checks the reset magic value written to the gart pointer in VRAM.
2325 * The driver calls this after a GPU reset to see if the contents of
2326 * VRAM is lost or now.
2327 * returns true if vram is lost, false if not.
2328 */
06ec9070 2329static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2330{
dadce777
EQ
2331 if (memcmp(adev->gart.ptr, adev->reset_magic,
2332 AMDGPU_RESET_MAGIC_NUM))
2333 return true;
2334
53b3f8f4 2335 if (!amdgpu_in_reset(adev))
dadce777
EQ
2336 return false;
2337
2338 /*
2339 * For all ASICs with baco/mode1 reset, the VRAM is
2340 * always assumed to be lost.
2341 */
2342 switch (amdgpu_asic_reset_method(adev)) {
2343 case AMD_RESET_METHOD_BACO:
2344 case AMD_RESET_METHOD_MODE1:
2345 return true;
2346 default:
2347 return false;
2348 }
0c49e0b8
CZ
2349}
2350
e3ecdffa 2351/**
1112a46b 2352 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2353 *
2354 * @adev: amdgpu_device pointer
b8b72130 2355 * @state: clockgating state (gate or ungate)
e3ecdffa 2356 *
e3ecdffa 2357 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2358 * set_clockgating_state callbacks are run.
2359 * Late initialization pass enabling clockgating for hardware IPs.
2360 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2361 * Returns 0 on success, negative error code on failure.
2362 */
fdd34271 2363
1112a46b
RZ
2364static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2365 enum amd_clockgating_state state)
d38ceaf9 2366{
1112a46b 2367 int i, j, r;
d38ceaf9 2368
4a2ba394
SL
2369 if (amdgpu_emu_mode == 1)
2370 return 0;
2371
1112a46b
RZ
2372 for (j = 0; j < adev->num_ip_blocks; j++) {
2373 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2374 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2375 continue;
4a446d55 2376 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2377 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2378 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2379 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2380 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2381 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2382 /* enable clockgating to save power */
a1255107 2383 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2384 state);
4a446d55
AD
2385 if (r) {
2386 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2387 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2388 return r;
2389 }
b0b00ff1 2390 }
d38ceaf9 2391 }
06b18f61 2392
c9f96fd5
RZ
2393 return 0;
2394}
2395
1112a46b 2396static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2397{
1112a46b 2398 int i, j, r;
06b18f61 2399
c9f96fd5
RZ
2400 if (amdgpu_emu_mode == 1)
2401 return 0;
2402
1112a46b
RZ
2403 for (j = 0; j < adev->num_ip_blocks; j++) {
2404 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2405 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2406 continue;
2407 /* skip CG for VCE/UVD, it's handled specially */
2408 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2409 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2410 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2411 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2412 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2413 /* enable powergating to save power */
2414 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2415 state);
c9f96fd5
RZ
2416 if (r) {
2417 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2418 adev->ip_blocks[i].version->funcs->name, r);
2419 return r;
2420 }
2421 }
2422 }
2dc80b00
S
2423 return 0;
2424}
2425
beff74bc
AD
2426static int amdgpu_device_enable_mgpu_fan_boost(void)
2427{
2428 struct amdgpu_gpu_instance *gpu_ins;
2429 struct amdgpu_device *adev;
2430 int i, ret = 0;
2431
2432 mutex_lock(&mgpu_info.mutex);
2433
2434 /*
2435 * MGPU fan boost feature should be enabled
2436 * only when there are two or more dGPUs in
2437 * the system
2438 */
2439 if (mgpu_info.num_dgpu < 2)
2440 goto out;
2441
2442 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2443 gpu_ins = &(mgpu_info.gpu_ins[i]);
2444 adev = gpu_ins->adev;
2445 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2446 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2447 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2448 if (ret)
2449 break;
2450
2451 gpu_ins->mgpu_fan_enabled = 1;
2452 }
2453 }
2454
2455out:
2456 mutex_unlock(&mgpu_info.mutex);
2457
2458 return ret;
2459}
2460
e3ecdffa
AD
2461/**
2462 * amdgpu_device_ip_late_init - run late init for hardware IPs
2463 *
2464 * @adev: amdgpu_device pointer
2465 *
2466 * Late initialization pass for hardware IPs. The list of all the hardware
2467 * IPs that make up the asic is walked and the late_init callbacks are run.
2468 * late_init covers any special initialization that an IP requires
2469 * after all of the have been initialized or something that needs to happen
2470 * late in the init process.
2471 * Returns 0 on success, negative error code on failure.
2472 */
06ec9070 2473static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2474{
60599a03 2475 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2476 int i = 0, r;
2477
2478 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2479 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2480 continue;
2481 if (adev->ip_blocks[i].version->funcs->late_init) {
2482 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2483 if (r) {
2484 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2485 adev->ip_blocks[i].version->funcs->name, r);
2486 return r;
2487 }
2dc80b00 2488 }
73f847db 2489 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2490 }
2491
a891d239
DL
2492 amdgpu_ras_set_error_query_ready(adev, true);
2493
1112a46b
RZ
2494 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2495 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2496
06ec9070 2497 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2498
beff74bc
AD
2499 r = amdgpu_device_enable_mgpu_fan_boost();
2500 if (r)
2501 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2502
60599a03
EQ
2503
2504 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2505 mutex_lock(&mgpu_info.mutex);
2506
2507 /*
2508 * Reset device p-state to low as this was booted with high.
2509 *
2510 * This should be performed only after all devices from the same
2511 * hive get initialized.
2512 *
2513 * However, it's unknown how many device in the hive in advance.
2514 * As this is counted one by one during devices initializations.
2515 *
2516 * So, we wait for all XGMI interlinked devices initialized.
2517 * This may bring some delays as those devices may come from
2518 * different hives. But that should be OK.
2519 */
2520 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2521 for (i = 0; i < mgpu_info.num_gpu; i++) {
2522 gpu_instance = &(mgpu_info.gpu_ins[i]);
2523 if (gpu_instance->adev->flags & AMD_IS_APU)
2524 continue;
2525
d84a430d
JK
2526 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2527 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2528 if (r) {
2529 DRM_ERROR("pstate setting failed (%d).\n", r);
2530 break;
2531 }
2532 }
2533 }
2534
2535 mutex_unlock(&mgpu_info.mutex);
2536 }
2537
d38ceaf9
AD
2538 return 0;
2539}
2540
e3ecdffa
AD
2541/**
2542 * amdgpu_device_ip_fini - run fini for hardware IPs
2543 *
2544 * @adev: amdgpu_device pointer
2545 *
2546 * Main teardown pass for hardware IPs. The list of all the hardware
2547 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2548 * are run. hw_fini tears down the hardware associated with each IP
2549 * and sw_fini tears down any software state associated with each IP.
2550 * Returns 0 on success, negative error code on failure.
2551 */
06ec9070 2552static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2553{
2554 int i, r;
2555
5278a159
SY
2556 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2557 amdgpu_virt_release_ras_err_handler_data(adev);
2558
c030f2e4 2559 amdgpu_ras_pre_fini(adev);
2560
a82400b5
AG
2561 if (adev->gmc.xgmi.num_physical_nodes > 1)
2562 amdgpu_xgmi_remove_device(adev);
2563
05df1f01 2564 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2565 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2566
26eb6b51
DL
2567 amdgpu_amdkfd_device_fini(adev);
2568
3e96dbfd
AD
2569 /* need to disable SMC first */
2570 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2571 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2572 continue;
fdd34271 2573 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2574 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2575 /* XXX handle errors */
2576 if (r) {
2577 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2578 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2579 }
a1255107 2580 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2581 break;
2582 }
2583 }
2584
d38ceaf9 2585 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2586 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2587 continue;
8201a67a 2588
a1255107 2589 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2590 /* XXX handle errors */
2c1a2784 2591 if (r) {
a1255107
AD
2592 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2593 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2594 }
8201a67a 2595
a1255107 2596 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2597 }
2598
9950cda2 2599
d38ceaf9 2600 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2601 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2602 continue;
c12aba3a
ML
2603
2604 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2605 amdgpu_ucode_free_bo(adev);
1e256e27 2606 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2607 amdgpu_device_wb_fini(adev);
2608 amdgpu_device_vram_scratch_fini(adev);
533aed27 2609 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2610 }
2611
a1255107 2612 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2613 /* XXX handle errors */
2c1a2784 2614 if (r) {
a1255107
AD
2615 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2616 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2617 }
a1255107
AD
2618 adev->ip_blocks[i].status.sw = false;
2619 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2620 }
2621
a6dcfd9c 2622 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2623 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2624 continue;
a1255107
AD
2625 if (adev->ip_blocks[i].version->funcs->late_fini)
2626 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2627 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2628 }
2629
c030f2e4 2630 amdgpu_ras_fini(adev);
2631
030308fc 2632 if (amdgpu_sriov_vf(adev))
24136135
ML
2633 if (amdgpu_virt_release_full_gpu(adev, false))
2634 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2635
d38ceaf9
AD
2636 return 0;
2637}
2638
e3ecdffa 2639/**
beff74bc 2640 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2641 *
1112a46b 2642 * @work: work_struct.
e3ecdffa 2643 */
beff74bc 2644static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2645{
2646 struct amdgpu_device *adev =
beff74bc 2647 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2648 int r;
2649
2650 r = amdgpu_ib_ring_tests(adev);
2651 if (r)
2652 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2653}
2654
1e317b99
RZ
2655static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2656{
2657 struct amdgpu_device *adev =
2658 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2659
2660 mutex_lock(&adev->gfx.gfx_off_mutex);
2661 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2662 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2663 adev->gfx.gfx_off_state = true;
2664 }
2665 mutex_unlock(&adev->gfx.gfx_off_mutex);
2666}
2667
e3ecdffa 2668/**
e7854a03 2669 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2670 *
2671 * @adev: amdgpu_device pointer
2672 *
2673 * Main suspend function for hardware IPs. The list of all the hardware
2674 * IPs that make up the asic is walked, clockgating is disabled and the
2675 * suspend callbacks are run. suspend puts the hardware and software state
2676 * in each IP into a state suitable for suspend.
2677 * Returns 0 on success, negative error code on failure.
2678 */
e7854a03
AD
2679static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2680{
2681 int i, r;
2682
b00978de
PL
2683 if (adev->in_poweroff_reboot_com ||
2684 !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) {
628c36d7
PL
2685 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2686 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2687 }
05df1f01 2688
e7854a03
AD
2689 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2690 if (!adev->ip_blocks[i].status.valid)
2691 continue;
2b9f7848 2692
e7854a03 2693 /* displays are handled separately */
2b9f7848
ND
2694 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2695 continue;
2696
2697 /* XXX handle errors */
2698 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2699 /* XXX handle errors */
2700 if (r) {
2701 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2702 adev->ip_blocks[i].version->funcs->name, r);
2703 return r;
e7854a03 2704 }
2b9f7848
ND
2705
2706 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2707 }
2708
e7854a03
AD
2709 return 0;
2710}
2711
2712/**
2713 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2714 *
2715 * @adev: amdgpu_device pointer
2716 *
2717 * Main suspend function for hardware IPs. The list of all the hardware
2718 * IPs that make up the asic is walked, clockgating is disabled and the
2719 * suspend callbacks are run. suspend puts the hardware and software state
2720 * in each IP into a state suitable for suspend.
2721 * Returns 0 on success, negative error code on failure.
2722 */
2723static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2724{
2725 int i, r;
2726
2727 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2728 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2729 continue;
e7854a03
AD
2730 /* displays are handled in phase1 */
2731 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2732 continue;
bff77e86
LM
2733 /* PSP lost connection when err_event_athub occurs */
2734 if (amdgpu_ras_intr_triggered() &&
2735 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2736 adev->ip_blocks[i].status.hw = false;
2737 continue;
2738 }
d38ceaf9 2739 /* XXX handle errors */
a1255107 2740 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2741 /* XXX handle errors */
2c1a2784 2742 if (r) {
a1255107
AD
2743 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2744 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2745 }
876923fb 2746 adev->ip_blocks[i].status.hw = false;
a3a09142 2747 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2748 if(!amdgpu_sriov_vf(adev)){
2749 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2750 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2751 if (r) {
2752 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2753 adev->mp1_state, r);
2754 return r;
2755 }
a3a09142
AD
2756 }
2757 }
b5507c7e 2758 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2759 }
2760
2761 return 0;
2762}
2763
e7854a03
AD
2764/**
2765 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2766 *
2767 * @adev: amdgpu_device pointer
2768 *
2769 * Main suspend function for hardware IPs. The list of all the hardware
2770 * IPs that make up the asic is walked, clockgating is disabled and the
2771 * suspend callbacks are run. suspend puts the hardware and software state
2772 * in each IP into a state suitable for suspend.
2773 * Returns 0 on success, negative error code on failure.
2774 */
2775int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2776{
2777 int r;
2778
e7819644
YT
2779 if (amdgpu_sriov_vf(adev))
2780 amdgpu_virt_request_full_gpu(adev, false);
2781
e7854a03
AD
2782 r = amdgpu_device_ip_suspend_phase1(adev);
2783 if (r)
2784 return r;
2785 r = amdgpu_device_ip_suspend_phase2(adev);
2786
e7819644
YT
2787 if (amdgpu_sriov_vf(adev))
2788 amdgpu_virt_release_full_gpu(adev, false);
2789
e7854a03
AD
2790 return r;
2791}
2792
06ec9070 2793static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2794{
2795 int i, r;
2796
2cb681b6
ML
2797 static enum amd_ip_block_type ip_order[] = {
2798 AMD_IP_BLOCK_TYPE_GMC,
2799 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2800 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2801 AMD_IP_BLOCK_TYPE_IH,
2802 };
a90ad3c2 2803
2cb681b6
ML
2804 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2805 int j;
2806 struct amdgpu_ip_block *block;
a90ad3c2 2807
4cd2a96d
J
2808 block = &adev->ip_blocks[i];
2809 block->status.hw = false;
2cb681b6 2810
4cd2a96d 2811 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 2812
4cd2a96d 2813 if (block->version->type != ip_order[j] ||
2cb681b6
ML
2814 !block->status.valid)
2815 continue;
2816
2817 r = block->version->funcs->hw_init(adev);
0aaeefcc 2818 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2819 if (r)
2820 return r;
482f0e53 2821 block->status.hw = true;
a90ad3c2
ML
2822 }
2823 }
2824
2825 return 0;
2826}
2827
06ec9070 2828static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2829{
2830 int i, r;
2831
2cb681b6
ML
2832 static enum amd_ip_block_type ip_order[] = {
2833 AMD_IP_BLOCK_TYPE_SMC,
2834 AMD_IP_BLOCK_TYPE_DCE,
2835 AMD_IP_BLOCK_TYPE_GFX,
2836 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2837 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2838 AMD_IP_BLOCK_TYPE_VCE,
2839 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2840 };
a90ad3c2 2841
2cb681b6
ML
2842 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2843 int j;
2844 struct amdgpu_ip_block *block;
a90ad3c2 2845
2cb681b6
ML
2846 for (j = 0; j < adev->num_ip_blocks; j++) {
2847 block = &adev->ip_blocks[j];
2848
2849 if (block->version->type != ip_order[i] ||
482f0e53
ML
2850 !block->status.valid ||
2851 block->status.hw)
2cb681b6
ML
2852 continue;
2853
895bd048
JZ
2854 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2855 r = block->version->funcs->resume(adev);
2856 else
2857 r = block->version->funcs->hw_init(adev);
2858
0aaeefcc 2859 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2860 if (r)
2861 return r;
482f0e53 2862 block->status.hw = true;
a90ad3c2
ML
2863 }
2864 }
2865
2866 return 0;
2867}
2868
e3ecdffa
AD
2869/**
2870 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2871 *
2872 * @adev: amdgpu_device pointer
2873 *
2874 * First resume function for hardware IPs. The list of all the hardware
2875 * IPs that make up the asic is walked and the resume callbacks are run for
2876 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2877 * after a suspend and updates the software state as necessary. This
2878 * function is also used for restoring the GPU after a GPU reset.
2879 * Returns 0 on success, negative error code on failure.
2880 */
06ec9070 2881static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2882{
2883 int i, r;
2884
a90ad3c2 2885 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2886 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2887 continue;
a90ad3c2 2888 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2889 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2890 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2891
fcf0649f
CZ
2892 r = adev->ip_blocks[i].version->funcs->resume(adev);
2893 if (r) {
2894 DRM_ERROR("resume of IP block <%s> failed %d\n",
2895 adev->ip_blocks[i].version->funcs->name, r);
2896 return r;
2897 }
482f0e53 2898 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2899 }
2900 }
2901
2902 return 0;
2903}
2904
e3ecdffa
AD
2905/**
2906 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2907 *
2908 * @adev: amdgpu_device pointer
2909 *
2910 * First resume function for hardware IPs. The list of all the hardware
2911 * IPs that make up the asic is walked and the resume callbacks are run for
2912 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2913 * functional state after a suspend and updates the software state as
2914 * necessary. This function is also used for restoring the GPU after a GPU
2915 * reset.
2916 * Returns 0 on success, negative error code on failure.
2917 */
06ec9070 2918static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2919{
2920 int i, r;
2921
2922 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2923 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2924 continue;
fcf0649f 2925 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2926 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2927 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2928 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2929 continue;
a1255107 2930 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2931 if (r) {
a1255107
AD
2932 DRM_ERROR("resume of IP block <%s> failed %d\n",
2933 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2934 return r;
2c1a2784 2935 }
482f0e53 2936 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2937 }
2938
2939 return 0;
2940}
2941
e3ecdffa
AD
2942/**
2943 * amdgpu_device_ip_resume - run resume for hardware IPs
2944 *
2945 * @adev: amdgpu_device pointer
2946 *
2947 * Main resume function for hardware IPs. The hardware IPs
2948 * are split into two resume functions because they are
2949 * are also used in in recovering from a GPU reset and some additional
2950 * steps need to be take between them. In this case (S3/S4) they are
2951 * run sequentially.
2952 * Returns 0 on success, negative error code on failure.
2953 */
06ec9070 2954static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2955{
2956 int r;
2957
06ec9070 2958 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2959 if (r)
2960 return r;
7a3e0bb2
RZ
2961
2962 r = amdgpu_device_fw_loading(adev);
2963 if (r)
2964 return r;
2965
06ec9070 2966 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2967
2968 return r;
2969}
2970
e3ecdffa
AD
2971/**
2972 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2973 *
2974 * @adev: amdgpu_device pointer
2975 *
2976 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2977 */
4e99a44e 2978static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2979{
6867e1b5
ML
2980 if (amdgpu_sriov_vf(adev)) {
2981 if (adev->is_atom_fw) {
2982 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2983 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2984 } else {
2985 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2986 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2987 }
2988
2989 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2990 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2991 }
048765ad
AR
2992}
2993
e3ecdffa
AD
2994/**
2995 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2996 *
2997 * @asic_type: AMD asic type
2998 *
2999 * Check if there is DC (new modesetting infrastructre) support for an asic.
3000 * returns true if DC has support, false if not.
3001 */
4562236b
HW
3002bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3003{
3004 switch (asic_type) {
3005#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3006#if defined(CONFIG_DRM_AMD_DC_SI)
3007 case CHIP_TAHITI:
3008 case CHIP_PITCAIRN:
3009 case CHIP_VERDE:
3010 case CHIP_OLAND:
3011#endif
4562236b 3012 case CHIP_BONAIRE:
0d6fbccb 3013 case CHIP_KAVERI:
367e6687
AD
3014 case CHIP_KABINI:
3015 case CHIP_MULLINS:
d9fda248
HW
3016 /*
3017 * We have systems in the wild with these ASICs that require
3018 * LVDS and VGA support which is not supported with DC.
3019 *
3020 * Fallback to the non-DC driver here by default so as not to
3021 * cause regressions.
3022 */
3023 return amdgpu_dc > 0;
3024 case CHIP_HAWAII:
4562236b
HW
3025 case CHIP_CARRIZO:
3026 case CHIP_STONEY:
4562236b 3027 case CHIP_POLARIS10:
675fd32b 3028 case CHIP_POLARIS11:
2c8ad2d5 3029 case CHIP_POLARIS12:
675fd32b 3030 case CHIP_VEGAM:
4562236b
HW
3031 case CHIP_TONGA:
3032 case CHIP_FIJI:
42f8ffa1 3033 case CHIP_VEGA10:
dca7b401 3034 case CHIP_VEGA12:
c6034aa2 3035 case CHIP_VEGA20:
b86a1aa3 3036#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 3037 case CHIP_RAVEN:
b4f199c7 3038 case CHIP_NAVI10:
8fceceb6 3039 case CHIP_NAVI14:
078655d9 3040 case CHIP_NAVI12:
e1c14c43 3041 case CHIP_RENOIR:
81d9bfb8 3042 case CHIP_SIENNA_CICHLID:
a6c5308f 3043 case CHIP_NAVY_FLOUNDER:
7cc656e2 3044 case CHIP_DIMGREY_CAVEFISH:
84b934bc 3045 case CHIP_VANGOGH:
42f8ffa1 3046#endif
fd187853 3047 return amdgpu_dc != 0;
4562236b
HW
3048#endif
3049 default:
93b09a9a 3050 if (amdgpu_dc > 0)
044a48f4 3051 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3052 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
3053 return false;
3054 }
3055}
3056
3057/**
3058 * amdgpu_device_has_dc_support - check if dc is supported
3059 *
982a820b 3060 * @adev: amdgpu_device pointer
4562236b
HW
3061 *
3062 * Returns true for supported, false for not supported
3063 */
3064bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3065{
c997e8e2 3066 if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
2555039d
XY
3067 return false;
3068
4562236b
HW
3069 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3070}
3071
d4535e2c
AG
3072
3073static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3074{
3075 struct amdgpu_device *adev =
3076 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3077 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3078
c6a6e2db
AG
3079 /* It's a bug to not have a hive within this function */
3080 if (WARN_ON(!hive))
3081 return;
3082
3083 /*
3084 * Use task barrier to synchronize all xgmi reset works across the
3085 * hive. task_barrier_enter and task_barrier_exit will block
3086 * until all the threads running the xgmi reset works reach
3087 * those points. task_barrier_full will do both blocks.
3088 */
3089 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3090
3091 task_barrier_enter(&hive->tb);
4a580877 3092 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3093
3094 if (adev->asic_reset_res)
3095 goto fail;
3096
3097 task_barrier_exit(&hive->tb);
4a580877 3098 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3099
3100 if (adev->asic_reset_res)
3101 goto fail;
43c4d576
JC
3102
3103 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
3104 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
3105 } else {
3106
3107 task_barrier_full(&hive->tb);
3108 adev->asic_reset_res = amdgpu_asic_reset(adev);
3109 }
ce316fa5 3110
c6a6e2db 3111fail:
d4535e2c 3112 if (adev->asic_reset_res)
fed184e9 3113 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3114 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3115 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3116}
3117
71f98027
AD
3118static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3119{
3120 char *input = amdgpu_lockup_timeout;
3121 char *timeout_setting = NULL;
3122 int index = 0;
3123 long timeout;
3124 int ret = 0;
3125
3126 /*
3127 * By default timeout for non compute jobs is 10000.
3128 * And there is no timeout enforced on compute jobs.
3129 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3130 * jobs are 60000 by default.
71f98027
AD
3131 */
3132 adev->gfx_timeout = msecs_to_jiffies(10000);
3133 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3134 if (amdgpu_sriov_vf(adev))
3135 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3136 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3137 else if (amdgpu_passthrough(adev))
b7b2a316 3138 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
3139 else
3140 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
3141
f440ff44 3142 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3143 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3144 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3145 ret = kstrtol(timeout_setting, 0, &timeout);
3146 if (ret)
3147 return ret;
3148
3149 if (timeout == 0) {
3150 index++;
3151 continue;
3152 } else if (timeout < 0) {
3153 timeout = MAX_SCHEDULE_TIMEOUT;
3154 } else {
3155 timeout = msecs_to_jiffies(timeout);
3156 }
3157
3158 switch (index++) {
3159 case 0:
3160 adev->gfx_timeout = timeout;
3161 break;
3162 case 1:
3163 adev->compute_timeout = timeout;
3164 break;
3165 case 2:
3166 adev->sdma_timeout = timeout;
3167 break;
3168 case 3:
3169 adev->video_timeout = timeout;
3170 break;
3171 default:
3172 break;
3173 }
3174 }
3175 /*
3176 * There is only one value specified and
3177 * it should apply to all non-compute jobs.
3178 */
bcccee89 3179 if (index == 1) {
71f98027 3180 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3181 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3182 adev->compute_timeout = adev->gfx_timeout;
3183 }
71f98027
AD
3184 }
3185
3186 return ret;
3187}
d4535e2c 3188
77f3a5cd
ND
3189static const struct attribute *amdgpu_dev_attributes[] = {
3190 &dev_attr_product_name.attr,
3191 &dev_attr_product_number.attr,
3192 &dev_attr_serial_number.attr,
3193 &dev_attr_pcie_replay_count.attr,
3194 NULL
3195};
3196
c9a6b82f 3197
d38ceaf9
AD
3198/**
3199 * amdgpu_device_init - initialize the driver
3200 *
3201 * @adev: amdgpu_device pointer
d38ceaf9
AD
3202 * @flags: driver flags
3203 *
3204 * Initializes the driver info and hw (all asics).
3205 * Returns 0 for success or an error on failure.
3206 * Called at driver startup.
3207 */
3208int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3209 uint32_t flags)
3210{
8aba21b7
LT
3211 struct drm_device *ddev = adev_to_drm(adev);
3212 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3213 int r, i;
fd496ca8 3214 bool atpx = false;
95844d20 3215 u32 max_MBps;
d38ceaf9
AD
3216
3217 adev->shutdown = false;
d38ceaf9 3218 adev->flags = flags;
4e66d7d2
YZ
3219
3220 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3221 adev->asic_type = amdgpu_force_asic_type;
3222 else
3223 adev->asic_type = flags & AMD_ASIC_MASK;
3224
d38ceaf9 3225 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3226 if (amdgpu_emu_mode == 1)
8bdab6bb 3227 adev->usec_timeout *= 10;
770d13b1 3228 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3229 adev->accel_working = false;
3230 adev->num_rings = 0;
3231 adev->mman.buffer_funcs = NULL;
3232 adev->mman.buffer_funcs_ring = NULL;
3233 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3234 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3235 adev->gmc.gmc_funcs = NULL;
f54d1867 3236 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3237 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3238
3239 adev->smc_rreg = &amdgpu_invalid_rreg;
3240 adev->smc_wreg = &amdgpu_invalid_wreg;
3241 adev->pcie_rreg = &amdgpu_invalid_rreg;
3242 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3243 adev->pciep_rreg = &amdgpu_invalid_rreg;
3244 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3245 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3246 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3247 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3248 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3249 adev->didt_rreg = &amdgpu_invalid_rreg;
3250 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3251 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3252 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3253 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3254 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3255
3e39ab90
AD
3256 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3257 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3258 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3259
3260 /* mutex initialization are all done here so we
3261 * can recall function without having locking issues */
d38ceaf9 3262 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 3263 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3264 mutex_init(&adev->pm.mutex);
3265 mutex_init(&adev->gfx.gpu_clock_mutex);
3266 mutex_init(&adev->srbm_mutex);
b8866c26 3267 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3268 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3269 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3270 mutex_init(&adev->mn_lock);
e23b74aa 3271 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3272 hash_init(adev->mn_hash);
53b3f8f4 3273 atomic_set(&adev->in_gpu_reset, 0);
6049db43 3274 init_rwsem(&adev->reset_sem);
32eaeae0 3275 mutex_init(&adev->psp.mutex);
bd052211 3276 mutex_init(&adev->notifier_lock);
d38ceaf9 3277
912dfc84
EQ
3278 r = amdgpu_device_check_arguments(adev);
3279 if (r)
3280 return r;
d38ceaf9 3281
d38ceaf9
AD
3282 spin_lock_init(&adev->mmio_idx_lock);
3283 spin_lock_init(&adev->smc_idx_lock);
3284 spin_lock_init(&adev->pcie_idx_lock);
3285 spin_lock_init(&adev->uvd_ctx_idx_lock);
3286 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3287 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3288 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3289 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3290 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3291
0c4e7fa5
CZ
3292 INIT_LIST_HEAD(&adev->shadow_list);
3293 mutex_init(&adev->shadow_list_lock);
3294
beff74bc
AD
3295 INIT_DELAYED_WORK(&adev->delayed_init_work,
3296 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3297 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3298 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3299
d4535e2c
AG
3300 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3301
d23ee13f 3302 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3303 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3304
b265bdbd
EQ
3305 atomic_set(&adev->throttling_logging_enabled, 1);
3306 /*
3307 * If throttling continues, logging will be performed every minute
3308 * to avoid log flooding. "-1" is subtracted since the thermal
3309 * throttling interrupt comes every second. Thus, the total logging
3310 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3311 * for throttling interrupt) = 60 seconds.
3312 */
3313 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3314 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3315
0fa49558
AX
3316 /* Registers mapping */
3317 /* TODO: block userspace mapping of io register */
da69c161
KW
3318 if (adev->asic_type >= CHIP_BONAIRE) {
3319 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3320 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3321 } else {
3322 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3323 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3324 }
d38ceaf9 3325
d38ceaf9
AD
3326 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3327 if (adev->rmmio == NULL) {
3328 return -ENOMEM;
3329 }
3330 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3331 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3332
d38ceaf9
AD
3333 /* io port mapping */
3334 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3335 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3336 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3337 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3338 break;
3339 }
3340 }
3341 if (adev->rio_mem == NULL)
b64a18c5 3342 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3343
b2109d8e
JX
3344 /* enable PCIE atomic ops */
3345 r = pci_enable_atomic_ops_to_root(adev->pdev,
3346 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3347 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3348 if (r) {
3349 adev->have_atomics_support = false;
3350 DRM_INFO("PCIE atomic ops is not supported\n");
3351 } else {
3352 adev->have_atomics_support = true;
3353 }
3354
5494d864
AD
3355 amdgpu_device_get_pcie_info(adev);
3356
b239c017
JX
3357 if (amdgpu_mcbp)
3358 DRM_INFO("MCBP is enabled\n");
3359
5f84cc63
JX
3360 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3361 adev->enable_mes = true;
3362
3aa0115d
ML
3363 /* detect hw virtualization here */
3364 amdgpu_detect_virtualization(adev);
3365
dffa11b4
ML
3366 r = amdgpu_device_get_job_timeout_settings(adev);
3367 if (r) {
3368 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4192f7b5 3369 goto failed_unmap;
a190d1c7
XY
3370 }
3371
d38ceaf9 3372 /* early init functions */
06ec9070 3373 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3374 if (r)
4192f7b5 3375 goto failed_unmap;
d38ceaf9 3376
6585661d
OZ
3377 /* doorbell bar mapping and doorbell index init*/
3378 amdgpu_device_doorbell_init(adev);
3379
d38ceaf9
AD
3380 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3381 /* this will fail for cards that aren't VGA class devices, just
3382 * ignore it */
38d6be81
AD
3383 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3384 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3385
fd496ca8
AD
3386 if (amdgpu_device_supports_atpx(ddev))
3387 atpx = true;
3840c5bc
AD
3388 if (amdgpu_has_atpx() &&
3389 (amdgpu_is_atpx_hybrid() ||
3390 amdgpu_has_atpx_dgpu_power_cntl()) &&
3391 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3392 vga_switcheroo_register_client(adev->pdev,
fd496ca8
AD
3393 &amdgpu_switcheroo_ops, atpx);
3394 if (atpx)
d38ceaf9
AD
3395 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3396
9475a943
SL
3397 if (amdgpu_emu_mode == 1) {
3398 /* post the asic on emulation mode */
3399 emu_soc_asic_init(adev);
bfca0289 3400 goto fence_driver_init;
9475a943 3401 }
bfca0289 3402
4e99a44e
ML
3403 /* detect if we are with an SRIOV vbios */
3404 amdgpu_device_detect_sriov_bios(adev);
048765ad 3405
95e8e59e
AD
3406 /* check if we need to reset the asic
3407 * E.g., driver was not cleanly unloaded previously, etc.
3408 */
f14899fd 3409 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3410 r = amdgpu_asic_reset(adev);
3411 if (r) {
3412 dev_err(adev->dev, "asic reset on init failed\n");
3413 goto failed;
3414 }
3415 }
3416
8f66090b 3417 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3418
d38ceaf9 3419 /* Post card if necessary */
39c640c0 3420 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3421 if (!adev->bios) {
bec86378 3422 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3423 r = -EINVAL;
3424 goto failed;
d38ceaf9 3425 }
bec86378 3426 DRM_INFO("GPU posting now...\n");
4d2997ab 3427 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3428 if (r) {
3429 dev_err(adev->dev, "gpu post error!\n");
3430 goto failed;
3431 }
d38ceaf9
AD
3432 }
3433
88b64e95
AD
3434 if (adev->is_atom_fw) {
3435 /* Initialize clocks */
3436 r = amdgpu_atomfirmware_get_clock_info(adev);
3437 if (r) {
3438 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3439 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3440 goto failed;
3441 }
3442 } else {
a5bde2f9
AD
3443 /* Initialize clocks */
3444 r = amdgpu_atombios_get_clock_info(adev);
3445 if (r) {
3446 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3447 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3448 goto failed;
a5bde2f9
AD
3449 }
3450 /* init i2c buses */
4562236b
HW
3451 if (!amdgpu_device_has_dc_support(adev))
3452 amdgpu_atombios_i2c_init(adev);
2c1a2784 3453 }
d38ceaf9 3454
bfca0289 3455fence_driver_init:
d38ceaf9
AD
3456 /* Fence driver */
3457 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3458 if (r) {
3459 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3460 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3461 goto failed;
2c1a2784 3462 }
d38ceaf9
AD
3463
3464 /* init the mode config */
4a580877 3465 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3466
06ec9070 3467 r = amdgpu_device_ip_init(adev);
d38ceaf9 3468 if (r) {
8840a387 3469 /* failed in exclusive mode due to timeout */
3470 if (amdgpu_sriov_vf(adev) &&
3471 !amdgpu_sriov_runtime(adev) &&
3472 amdgpu_virt_mmio_blocked(adev) &&
3473 !amdgpu_virt_wait_reset(adev)) {
3474 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3475 /* Don't send request since VF is inactive. */
3476 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3477 adev->virt.ops = NULL;
8840a387 3478 r = -EAGAIN;
3479 goto failed;
3480 }
06ec9070 3481 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3482 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3483 goto failed;
d38ceaf9
AD
3484 }
3485
d69b8971
YZ
3486 dev_info(adev->dev,
3487 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3488 adev->gfx.config.max_shader_engines,
3489 adev->gfx.config.max_sh_per_se,
3490 adev->gfx.config.max_cu_per_sh,
3491 adev->gfx.cu_info.number);
3492
d38ceaf9
AD
3493 adev->accel_working = true;
3494
e59c0205
AX
3495 amdgpu_vm_check_compute_bug(adev);
3496
95844d20
MO
3497 /* Initialize the buffer migration limit. */
3498 if (amdgpu_moverate >= 0)
3499 max_MBps = amdgpu_moverate;
3500 else
3501 max_MBps = 8; /* Allow 8 MB/s. */
3502 /* Get a log2 for easy divisions. */
3503 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3504
9bc92b9c
ML
3505 amdgpu_fbdev_init(adev);
3506
d2f52ac8 3507 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3508 if (r) {
3509 adev->pm_sysfs_en = false;
d2f52ac8 3510 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3511 } else
3512 adev->pm_sysfs_en = true;
d2f52ac8 3513
5bb23532 3514 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3515 if (r) {
3516 adev->ucode_sysfs_en = false;
5bb23532 3517 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3518 } else
3519 adev->ucode_sysfs_en = true;
5bb23532 3520
d38ceaf9
AD
3521 if ((amdgpu_testing & 1)) {
3522 if (adev->accel_working)
3523 amdgpu_test_moves(adev);
3524 else
3525 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3526 }
d38ceaf9
AD
3527 if (amdgpu_benchmarking) {
3528 if (adev->accel_working)
3529 amdgpu_benchmark(adev, amdgpu_benchmarking);
3530 else
3531 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3532 }
3533
b0adca4d
EQ
3534 /*
3535 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3536 * Otherwise the mgpu fan boost feature will be skipped due to the
3537 * gpu instance is counted less.
3538 */
3539 amdgpu_register_gpu_instance(adev);
3540
d38ceaf9
AD
3541 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3542 * explicit gating rather than handling it automatically.
3543 */
06ec9070 3544 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3545 if (r) {
06ec9070 3546 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3547 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3548 goto failed;
2c1a2784 3549 }
d38ceaf9 3550
108c6a63 3551 /* must succeed. */
511fdbc3 3552 amdgpu_ras_resume(adev);
108c6a63 3553
beff74bc
AD
3554 queue_delayed_work(system_wq, &adev->delayed_init_work,
3555 msecs_to_jiffies(AMDGPU_RESUME_MS));
3556
2c738637
ML
3557 if (amdgpu_sriov_vf(adev))
3558 flush_delayed_work(&adev->delayed_init_work);
3559
77f3a5cd 3560 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3561 if (r)
77f3a5cd 3562 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3563
d155bef0
AB
3564 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3565 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3566 if (r)
3567 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3568
c1dd4aa6
AG
3569 /* Have stored pci confspace at hand for restore in sudden PCI error */
3570 if (amdgpu_device_cache_pci_state(adev->pdev))
3571 pci_restore_state(pdev);
3572
d38ceaf9 3573 return 0;
83ba126a
AD
3574
3575failed:
89041940 3576 amdgpu_vf_error_trans_all(adev);
fd496ca8 3577 if (atpx)
83ba126a 3578 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3579
4192f7b5
AD
3580failed_unmap:
3581 iounmap(adev->rmmio);
3582 adev->rmmio = NULL;
3583
83ba126a 3584 return r;
d38ceaf9
AD
3585}
3586
d38ceaf9
AD
3587/**
3588 * amdgpu_device_fini - tear down the driver
3589 *
3590 * @adev: amdgpu_device pointer
3591 *
3592 * Tear down the driver info (all asics).
3593 * Called at driver shutdown.
3594 */
3595void amdgpu_device_fini(struct amdgpu_device *adev)
3596{
aac89168 3597 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3598 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3599 adev->shutdown = true;
9f875167 3600
c1dd4aa6
AG
3601 kfree(adev->pci_state);
3602
752c683d
ML
3603 /* make sure IB test finished before entering exclusive mode
3604 * to avoid preemption on IB test
3605 * */
519b8b76 3606 if (amdgpu_sriov_vf(adev)) {
752c683d 3607 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3608 amdgpu_virt_fini_data_exchange(adev);
3609 }
752c683d 3610
e5b03032
ML
3611 /* disable all interrupts */
3612 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3613 if (adev->mode_info.mode_config_initialized){
3614 if (!amdgpu_device_has_dc_support(adev))
4a580877 3615 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3616 else
4a580877 3617 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3618 }
d38ceaf9 3619 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3620 if (adev->pm_sysfs_en)
3621 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3622 amdgpu_fbdev_fini(adev);
e230ac11 3623 amdgpu_device_ip_fini(adev);
75e1658e
ND
3624 release_firmware(adev->firmware.gpu_info_fw);
3625 adev->firmware.gpu_info_fw = NULL;
d38ceaf9
AD
3626 adev->accel_working = false;
3627 /* free i2c buses */
4562236b
HW
3628 if (!amdgpu_device_has_dc_support(adev))
3629 amdgpu_i2c_fini(adev);
bfca0289
SL
3630
3631 if (amdgpu_emu_mode != 1)
3632 amdgpu_atombios_fini(adev);
3633
d38ceaf9
AD
3634 kfree(adev->bios);
3635 adev->bios = NULL;
3840c5bc
AD
3636 if (amdgpu_has_atpx() &&
3637 (amdgpu_is_atpx_hybrid() ||
3638 amdgpu_has_atpx_dgpu_power_cntl()) &&
3639 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3640 vga_switcheroo_unregister_client(adev->pdev);
fd496ca8 3641 if (amdgpu_device_supports_atpx(adev_to_drm(adev)))
83ba126a 3642 vga_switcheroo_fini_domain_pm_ops(adev->dev);
38d6be81
AD
3643 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3644 vga_client_register(adev->pdev, NULL, NULL, NULL);
d38ceaf9
AD
3645 if (adev->rio_mem)
3646 pci_iounmap(adev->pdev, adev->rio_mem);
3647 adev->rio_mem = NULL;
3648 iounmap(adev->rmmio);
3649 adev->rmmio = NULL;
06ec9070 3650 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3651
7c868b59
YT
3652 if (adev->ucode_sysfs_en)
3653 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3654
3655 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3656 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3657 amdgpu_pmu_fini(adev);
72de33f8 3658 if (adev->mman.discovery_bin)
a190d1c7 3659 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3660}
3661
3662
3663/*
3664 * Suspend & resume.
3665 */
3666/**
810ddc3a 3667 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3668 *
87e3f136 3669 * @dev: drm dev pointer
87e3f136 3670 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3671 *
3672 * Puts the hw in the suspend state (all asics).
3673 * Returns 0 for success or an error on failure.
3674 * Called at driver suspend.
3675 */
de185019 3676int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3677{
3678 struct amdgpu_device *adev;
3679 struct drm_crtc *crtc;
3680 struct drm_connector *connector;
f8d2d39e 3681 struct drm_connector_list_iter iter;
5ceb54c6 3682 int r;
d38ceaf9 3683
1348969a 3684 adev = drm_to_adev(dev);
d38ceaf9
AD
3685
3686 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3687 return 0;
3688
44779b43 3689 adev->in_suspend = true;
d38ceaf9
AD
3690 drm_kms_helper_poll_disable(dev);
3691
5f818173
S
3692 if (fbcon)
3693 amdgpu_fbdev_set_suspend(adev, 1);
3694
beff74bc 3695 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3696
4562236b
HW
3697 if (!amdgpu_device_has_dc_support(adev)) {
3698 /* turn off display hw */
3699 drm_modeset_lock_all(dev);
f8d2d39e
LP
3700 drm_connector_list_iter_begin(dev, &iter);
3701 drm_for_each_connector_iter(connector, &iter)
3702 drm_helper_connector_dpms(connector,
3703 DRM_MODE_DPMS_OFF);
3704 drm_connector_list_iter_end(&iter);
4562236b 3705 drm_modeset_unlock_all(dev);
fe1053b7
AD
3706 /* unpin the front buffers and cursors */
3707 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3708 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3709 struct drm_framebuffer *fb = crtc->primary->fb;
3710 struct amdgpu_bo *robj;
3711
91334223 3712 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3713 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3714 r = amdgpu_bo_reserve(aobj, true);
3715 if (r == 0) {
3716 amdgpu_bo_unpin(aobj);
3717 amdgpu_bo_unreserve(aobj);
3718 }
756e6880 3719 }
756e6880 3720
fe1053b7
AD
3721 if (fb == NULL || fb->obj[0] == NULL) {
3722 continue;
3723 }
3724 robj = gem_to_amdgpu_bo(fb->obj[0]);
3725 /* don't unpin kernel fb objects */
3726 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3727 r = amdgpu_bo_reserve(robj, true);
3728 if (r == 0) {
3729 amdgpu_bo_unpin(robj);
3730 amdgpu_bo_unreserve(robj);
3731 }
d38ceaf9
AD
3732 }
3733 }
3734 }
fe1053b7 3735
5e6932fe 3736 amdgpu_ras_suspend(adev);
3737
fe1053b7
AD
3738 r = amdgpu_device_ip_suspend_phase1(adev);
3739
ad887af9 3740 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 3741
d38ceaf9
AD
3742 /* evict vram memory */
3743 amdgpu_bo_evict_vram(adev);
3744
5ceb54c6 3745 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3746
b00978de
PL
3747 if (adev->in_poweroff_reboot_com ||
3748 !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev))
628c36d7
PL
3749 r = amdgpu_device_ip_suspend_phase2(adev);
3750 else
3751 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
a0a71e49
AD
3752 /* evict remaining vram memory
3753 * This second call to evict vram is to evict the gart page table
3754 * using the CPU.
3755 */
d38ceaf9
AD
3756 amdgpu_bo_evict_vram(adev);
3757
d38ceaf9
AD
3758 return 0;
3759}
3760
3761/**
810ddc3a 3762 * amdgpu_device_resume - initiate device resume
d38ceaf9 3763 *
87e3f136 3764 * @dev: drm dev pointer
87e3f136 3765 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3766 *
3767 * Bring the hw back to operating state (all asics).
3768 * Returns 0 for success or an error on failure.
3769 * Called at driver resume.
3770 */
de185019 3771int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3772{
3773 struct drm_connector *connector;
f8d2d39e 3774 struct drm_connector_list_iter iter;
1348969a 3775 struct amdgpu_device *adev = drm_to_adev(dev);
756e6880 3776 struct drm_crtc *crtc;
03161a6e 3777 int r = 0;
d38ceaf9
AD
3778
3779 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3780 return 0;
3781
9ca5b8a1 3782 if (amdgpu_acpi_is_s0ix_supported(adev))
628c36d7
PL
3783 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
3784
d38ceaf9 3785 /* post card */
39c640c0 3786 if (amdgpu_device_need_post(adev)) {
4d2997ab 3787 r = amdgpu_device_asic_init(adev);
74b0b157 3788 if (r)
aac89168 3789 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 3790 }
d38ceaf9 3791
06ec9070 3792 r = amdgpu_device_ip_resume(adev);
e6707218 3793 if (r) {
aac89168 3794 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3795 return r;
e6707218 3796 }
5ceb54c6
AD
3797 amdgpu_fence_driver_resume(adev);
3798
d38ceaf9 3799
06ec9070 3800 r = amdgpu_device_ip_late_init(adev);
03161a6e 3801 if (r)
4d3b9ae5 3802 return r;
d38ceaf9 3803
beff74bc
AD
3804 queue_delayed_work(system_wq, &adev->delayed_init_work,
3805 msecs_to_jiffies(AMDGPU_RESUME_MS));
3806
fe1053b7
AD
3807 if (!amdgpu_device_has_dc_support(adev)) {
3808 /* pin cursors */
3809 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3810 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3811
91334223 3812 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3813 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3814 r = amdgpu_bo_reserve(aobj, true);
3815 if (r == 0) {
3816 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3817 if (r != 0)
aac89168 3818 dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
fe1053b7
AD
3819 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3820 amdgpu_bo_unreserve(aobj);
3821 }
756e6880
AD
3822 }
3823 }
3824 }
ad887af9 3825 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
ba997709
YZ
3826 if (r)
3827 return r;
756e6880 3828
96a5d8d4 3829 /* Make sure IB tests flushed */
beff74bc 3830 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3831
d38ceaf9
AD
3832 /* blat the mode back in */
3833 if (fbcon) {
4562236b
HW
3834 if (!amdgpu_device_has_dc_support(adev)) {
3835 /* pre DCE11 */
3836 drm_helper_resume_force_mode(dev);
3837
3838 /* turn on display hw */
3839 drm_modeset_lock_all(dev);
f8d2d39e
LP
3840
3841 drm_connector_list_iter_begin(dev, &iter);
3842 drm_for_each_connector_iter(connector, &iter)
3843 drm_helper_connector_dpms(connector,
3844 DRM_MODE_DPMS_ON);
3845 drm_connector_list_iter_end(&iter);
3846
4562236b 3847 drm_modeset_unlock_all(dev);
d38ceaf9 3848 }
4d3b9ae5 3849 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3850 }
3851
3852 drm_kms_helper_poll_enable(dev);
23a1a9e5 3853
5e6932fe 3854 amdgpu_ras_resume(adev);
3855
23a1a9e5
L
3856 /*
3857 * Most of the connector probing functions try to acquire runtime pm
3858 * refs to ensure that the GPU is powered on when connector polling is
3859 * performed. Since we're calling this from a runtime PM callback,
3860 * trying to acquire rpm refs will cause us to deadlock.
3861 *
3862 * Since we're guaranteed to be holding the rpm lock, it's safe to
3863 * temporarily disable the rpm helpers so this doesn't deadlock us.
3864 */
3865#ifdef CONFIG_PM
3866 dev->dev->power.disable_depth++;
3867#endif
4562236b
HW
3868 if (!amdgpu_device_has_dc_support(adev))
3869 drm_helper_hpd_irq_event(dev);
3870 else
3871 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3872#ifdef CONFIG_PM
3873 dev->dev->power.disable_depth--;
3874#endif
44779b43
RZ
3875 adev->in_suspend = false;
3876
4d3b9ae5 3877 return 0;
d38ceaf9
AD
3878}
3879
e3ecdffa
AD
3880/**
3881 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3882 *
3883 * @adev: amdgpu_device pointer
3884 *
3885 * The list of all the hardware IPs that make up the asic is walked and
3886 * the check_soft_reset callbacks are run. check_soft_reset determines
3887 * if the asic is still hung or not.
3888 * Returns true if any of the IPs are still in a hung state, false if not.
3889 */
06ec9070 3890static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3891{
3892 int i;
3893 bool asic_hang = false;
3894
f993d628
ML
3895 if (amdgpu_sriov_vf(adev))
3896 return true;
3897
8bc04c29
AD
3898 if (amdgpu_asic_need_full_reset(adev))
3899 return true;
3900
63fbf42f 3901 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3902 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3903 continue;
a1255107
AD
3904 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3905 adev->ip_blocks[i].status.hang =
3906 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3907 if (adev->ip_blocks[i].status.hang) {
aac89168 3908 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3909 asic_hang = true;
3910 }
3911 }
3912 return asic_hang;
3913}
3914
e3ecdffa
AD
3915/**
3916 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3917 *
3918 * @adev: amdgpu_device pointer
3919 *
3920 * The list of all the hardware IPs that make up the asic is walked and the
3921 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3922 * handles any IP specific hardware or software state changes that are
3923 * necessary for a soft reset to succeed.
3924 * Returns 0 on success, negative error code on failure.
3925 */
06ec9070 3926static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3927{
3928 int i, r = 0;
3929
3930 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3931 if (!adev->ip_blocks[i].status.valid)
d31a501e 3932 continue;
a1255107
AD
3933 if (adev->ip_blocks[i].status.hang &&
3934 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3935 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3936 if (r)
3937 return r;
3938 }
3939 }
3940
3941 return 0;
3942}
3943
e3ecdffa
AD
3944/**
3945 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3946 *
3947 * @adev: amdgpu_device pointer
3948 *
3949 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3950 * reset is necessary to recover.
3951 * Returns true if a full asic reset is required, false if not.
3952 */
06ec9070 3953static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3954{
da146d3b
AD
3955 int i;
3956
8bc04c29
AD
3957 if (amdgpu_asic_need_full_reset(adev))
3958 return true;
3959
da146d3b 3960 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3961 if (!adev->ip_blocks[i].status.valid)
da146d3b 3962 continue;
a1255107
AD
3963 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3964 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3965 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3966 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3967 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3968 if (adev->ip_blocks[i].status.hang) {
aac89168 3969 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
3970 return true;
3971 }
3972 }
35d782fe
CZ
3973 }
3974 return false;
3975}
3976
e3ecdffa
AD
3977/**
3978 * amdgpu_device_ip_soft_reset - do a soft reset
3979 *
3980 * @adev: amdgpu_device pointer
3981 *
3982 * The list of all the hardware IPs that make up the asic is walked and the
3983 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3984 * IP specific hardware or software state changes that are necessary to soft
3985 * reset the IP.
3986 * Returns 0 on success, negative error code on failure.
3987 */
06ec9070 3988static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3989{
3990 int i, r = 0;
3991
3992 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3993 if (!adev->ip_blocks[i].status.valid)
35d782fe 3994 continue;
a1255107
AD
3995 if (adev->ip_blocks[i].status.hang &&
3996 adev->ip_blocks[i].version->funcs->soft_reset) {
3997 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3998 if (r)
3999 return r;
4000 }
4001 }
4002
4003 return 0;
4004}
4005
e3ecdffa
AD
4006/**
4007 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4008 *
4009 * @adev: amdgpu_device pointer
4010 *
4011 * The list of all the hardware IPs that make up the asic is walked and the
4012 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4013 * handles any IP specific hardware or software state changes that are
4014 * necessary after the IP has been soft reset.
4015 * Returns 0 on success, negative error code on failure.
4016 */
06ec9070 4017static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4018{
4019 int i, r = 0;
4020
4021 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4022 if (!adev->ip_blocks[i].status.valid)
35d782fe 4023 continue;
a1255107
AD
4024 if (adev->ip_blocks[i].status.hang &&
4025 adev->ip_blocks[i].version->funcs->post_soft_reset)
4026 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4027 if (r)
4028 return r;
4029 }
4030
4031 return 0;
4032}
4033
e3ecdffa 4034/**
c33adbc7 4035 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4036 *
4037 * @adev: amdgpu_device pointer
4038 *
4039 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4040 * restore things like GPUVM page tables after a GPU reset where
4041 * the contents of VRAM might be lost.
403009bf
CK
4042 *
4043 * Returns:
4044 * 0 on success, negative error code on failure.
e3ecdffa 4045 */
c33adbc7 4046static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4047{
c41d1cf6 4048 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
4049 struct amdgpu_bo *shadow;
4050 long r = 1, tmo;
c41d1cf6
ML
4051
4052 if (amdgpu_sriov_runtime(adev))
b045d3af 4053 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4054 else
4055 tmo = msecs_to_jiffies(100);
4056
aac89168 4057 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4058 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
4059 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
4060
4061 /* No need to recover an evicted BO */
4062 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 4063 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
4064 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
4065 continue;
4066
4067 r = amdgpu_bo_restore_shadow(shadow, &next);
4068 if (r)
4069 break;
4070
c41d1cf6 4071 if (fence) {
1712fb1a 4072 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4073 dma_fence_put(fence);
4074 fence = next;
1712fb1a 4075 if (tmo == 0) {
4076 r = -ETIMEDOUT;
c41d1cf6 4077 break;
1712fb1a 4078 } else if (tmo < 0) {
4079 r = tmo;
4080 break;
4081 }
403009bf
CK
4082 } else {
4083 fence = next;
c41d1cf6 4084 }
c41d1cf6
ML
4085 }
4086 mutex_unlock(&adev->shadow_list_lock);
4087
403009bf
CK
4088 if (fence)
4089 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4090 dma_fence_put(fence);
4091
1712fb1a 4092 if (r < 0 || tmo <= 0) {
aac89168 4093 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4094 return -EIO;
4095 }
c41d1cf6 4096
aac89168 4097 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4098 return 0;
c41d1cf6
ML
4099}
4100
a90ad3c2 4101
e3ecdffa 4102/**
06ec9070 4103 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4104 *
982a820b 4105 * @adev: amdgpu_device pointer
87e3f136 4106 * @from_hypervisor: request from hypervisor
5740682e
ML
4107 *
4108 * do VF FLR and reinitialize Asic
3f48c681 4109 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4110 */
4111static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4112 bool from_hypervisor)
5740682e
ML
4113{
4114 int r;
4115
4116 if (from_hypervisor)
4117 r = amdgpu_virt_request_full_gpu(adev, true);
4118 else
4119 r = amdgpu_virt_reset_gpu(adev);
4120 if (r)
4121 return r;
a90ad3c2 4122
b639c22c
JZ
4123 amdgpu_amdkfd_pre_reset(adev);
4124
a90ad3c2 4125 /* Resume IP prior to SMC */
06ec9070 4126 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4127 if (r)
4128 goto error;
a90ad3c2 4129
c9ffa427 4130 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4131 /* we need recover gart prior to run SMC/CP/SDMA resume */
6c28aed6 4132 amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
a90ad3c2 4133
7a3e0bb2
RZ
4134 r = amdgpu_device_fw_loading(adev);
4135 if (r)
4136 return r;
4137
a90ad3c2 4138 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4139 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4140 if (r)
4141 goto error;
a90ad3c2
ML
4142
4143 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 4144 r = amdgpu_ib_ring_tests(adev);
f81e8d53 4145 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 4146
abc34253
ED
4147error:
4148 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 4149 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4150 amdgpu_inc_vram_lost(adev);
c33adbc7 4151 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
4152 }
4153
4154 return r;
4155}
4156
9a1cddd6 4157/**
4158 * amdgpu_device_has_job_running - check if there is any job in mirror list
4159 *
982a820b 4160 * @adev: amdgpu_device pointer
9a1cddd6 4161 *
4162 * check if there is any job in mirror list
4163 */
4164bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4165{
4166 int i;
4167 struct drm_sched_job *job;
4168
4169 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4170 struct amdgpu_ring *ring = adev->rings[i];
4171
4172 if (!ring || !ring->sched.thread)
4173 continue;
4174
4175 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4176 job = list_first_entry_or_null(&ring->sched.pending_list,
4177 struct drm_sched_job, list);
9a1cddd6 4178 spin_unlock(&ring->sched.job_list_lock);
4179 if (job)
4180 return true;
4181 }
4182 return false;
4183}
4184
12938fad
CK
4185/**
4186 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4187 *
982a820b 4188 * @adev: amdgpu_device pointer
12938fad
CK
4189 *
4190 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4191 * a hung GPU.
4192 */
4193bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4194{
4195 if (!amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4196 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
12938fad
CK
4197 return false;
4198 }
4199
3ba7b418
AG
4200 if (amdgpu_gpu_recovery == 0)
4201 goto disabled;
4202
4203 if (amdgpu_sriov_vf(adev))
4204 return true;
4205
4206 if (amdgpu_gpu_recovery == -1) {
4207 switch (adev->asic_type) {
fc42d47c
AG
4208 case CHIP_BONAIRE:
4209 case CHIP_HAWAII:
3ba7b418
AG
4210 case CHIP_TOPAZ:
4211 case CHIP_TONGA:
4212 case CHIP_FIJI:
4213 case CHIP_POLARIS10:
4214 case CHIP_POLARIS11:
4215 case CHIP_POLARIS12:
4216 case CHIP_VEGAM:
4217 case CHIP_VEGA20:
4218 case CHIP_VEGA10:
4219 case CHIP_VEGA12:
c43b849f 4220 case CHIP_RAVEN:
e9d4cf91 4221 case CHIP_ARCTURUS:
2cb44fb0 4222 case CHIP_RENOIR:
658c6639
AD
4223 case CHIP_NAVI10:
4224 case CHIP_NAVI14:
4225 case CHIP_NAVI12:
131a3c74 4226 case CHIP_SIENNA_CICHLID:
665fe4dc 4227 case CHIP_NAVY_FLOUNDER:
27859ee3 4228 case CHIP_DIMGREY_CAVEFISH:
3ba7b418
AG
4229 break;
4230 default:
4231 goto disabled;
4232 }
12938fad
CK
4233 }
4234
4235 return true;
3ba7b418
AG
4236
4237disabled:
aac89168 4238 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4239 return false;
12938fad
CK
4240}
4241
5c6dd71e 4242
26bc5340
AG
4243static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4244 struct amdgpu_job *job,
4245 bool *need_full_reset_arg)
4246{
4247 int i, r = 0;
4248 bool need_full_reset = *need_full_reset_arg;
71182665 4249
728e7e0c
JZ
4250 amdgpu_debugfs_wait_dump(adev);
4251
b602ca5f
TZ
4252 if (amdgpu_sriov_vf(adev)) {
4253 /* stop the data exchange thread */
4254 amdgpu_virt_fini_data_exchange(adev);
4255 }
4256
71182665 4257 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4258 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4259 struct amdgpu_ring *ring = adev->rings[i];
4260
51687759 4261 if (!ring || !ring->sched.thread)
0875dc9e 4262 continue;
5740682e 4263
2f9d4084
ML
4264 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4265 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4266 }
d38ceaf9 4267
222b5f04
AG
4268 if(job)
4269 drm_sched_increase_karma(&job->base);
4270
1d721ed6 4271 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4272 if (!amdgpu_sriov_vf(adev)) {
4273
4274 if (!need_full_reset)
4275 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4276
4277 if (!need_full_reset) {
4278 amdgpu_device_ip_pre_soft_reset(adev);
4279 r = amdgpu_device_ip_soft_reset(adev);
4280 amdgpu_device_ip_post_soft_reset(adev);
4281 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4282 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4283 need_full_reset = true;
4284 }
4285 }
4286
4287 if (need_full_reset)
4288 r = amdgpu_device_ip_suspend(adev);
4289
4290 *need_full_reset_arg = need_full_reset;
4291 }
4292
4293 return r;
4294}
4295
041a62bc 4296static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340 4297 struct list_head *device_list_handle,
7ac71382
AG
4298 bool *need_full_reset_arg,
4299 bool skip_hw_reset)
26bc5340
AG
4300{
4301 struct amdgpu_device *tmp_adev = NULL;
4302 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4303 int r = 0;
4304
4305 /*
4306 * ASIC reset has to be done on all HGMI hive nodes ASAP
4307 * to allow proper links negotiation in FW (within 1 sec)
4308 */
7ac71382 4309 if (!skip_hw_reset && need_full_reset) {
26bc5340 4310 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 4311 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4312 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 4313 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4314 r = -EALREADY;
4315 } else
4316 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4317
041a62bc 4318 if (r) {
aac89168 4319 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4320 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4321 break;
ce316fa5
LM
4322 }
4323 }
4324
041a62bc
AG
4325 /* For XGMI wait for all resets to complete before proceed */
4326 if (!r) {
ce316fa5
LM
4327 list_for_each_entry(tmp_adev, device_list_handle,
4328 gmc.xgmi.head) {
4329 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4330 flush_work(&tmp_adev->xgmi_reset_work);
4331 r = tmp_adev->asic_reset_res;
4332 if (r)
4333 break;
ce316fa5
LM
4334 }
4335 }
4336 }
ce316fa5 4337 }
26bc5340 4338
43c4d576
JC
4339 if (!r && amdgpu_ras_intr_triggered()) {
4340 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4341 if (tmp_adev->mmhub.funcs &&
4342 tmp_adev->mmhub.funcs->reset_ras_error_count)
4343 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4344 }
4345
00eaa571 4346 amdgpu_ras_intr_cleared();
43c4d576 4347 }
00eaa571 4348
26bc5340
AG
4349 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4350 if (need_full_reset) {
4351 /* post card */
4d2997ab 4352 if (amdgpu_device_asic_init(tmp_adev))
aac89168 4353 dev_warn(tmp_adev->dev, "asic atom init failed!");
26bc5340
AG
4354
4355 if (!r) {
4356 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4357 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4358 if (r)
4359 goto out;
4360
4361 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4362 if (vram_lost) {
77e7f829 4363 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4364 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4365 }
4366
6c28aed6 4367 r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
26bc5340
AG
4368 if (r)
4369 goto out;
4370
4371 r = amdgpu_device_fw_loading(tmp_adev);
4372 if (r)
4373 return r;
4374
4375 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4376 if (r)
4377 goto out;
4378
4379 if (vram_lost)
4380 amdgpu_device_fill_reset_magic(tmp_adev);
4381
fdafb359
EQ
4382 /*
4383 * Add this ASIC as tracked as reset was already
4384 * complete successfully.
4385 */
4386 amdgpu_register_gpu_instance(tmp_adev);
4387
7c04ca50 4388 r = amdgpu_device_ip_late_init(tmp_adev);
4389 if (r)
4390 goto out;
4391
565d1941
EQ
4392 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4393
e8fbaf03
GC
4394 /*
4395 * The GPU enters bad state once faulty pages
4396 * by ECC has reached the threshold, and ras
4397 * recovery is scheduled next. So add one check
4398 * here to break recovery if it indeed exceeds
4399 * bad page threshold, and remind user to
4400 * retire this GPU or setting one bigger
4401 * bad_page_threshold value to fix this once
4402 * probing driver again.
4403 */
11003c68 4404 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
4405 /* must succeed. */
4406 amdgpu_ras_resume(tmp_adev);
4407 } else {
4408 r = -EINVAL;
4409 goto out;
4410 }
e79a04d5 4411
26bc5340
AG
4412 /* Update PSP FW topology after reset */
4413 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4414 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4415 }
4416 }
4417
26bc5340
AG
4418out:
4419 if (!r) {
4420 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4421 r = amdgpu_ib_ring_tests(tmp_adev);
4422 if (r) {
4423 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4424 r = amdgpu_device_ip_suspend(tmp_adev);
4425 need_full_reset = true;
4426 r = -EAGAIN;
4427 goto end;
4428 }
4429 }
4430
4431 if (!r)
4432 r = amdgpu_device_recover_vram(tmp_adev);
4433 else
4434 tmp_adev->asic_reset_res = r;
4435 }
4436
4437end:
4438 *need_full_reset_arg = need_full_reset;
4439 return r;
4440}
4441
08ebb485
DL
4442static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
4443 struct amdgpu_hive_info *hive)
26bc5340 4444{
53b3f8f4
DL
4445 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4446 return false;
4447
08ebb485
DL
4448 if (hive) {
4449 down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
4450 } else {
4451 down_write(&adev->reset_sem);
4452 }
5740682e 4453
a3a09142
AD
4454 switch (amdgpu_asic_reset_method(adev)) {
4455 case AMD_RESET_METHOD_MODE1:
4456 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4457 break;
4458 case AMD_RESET_METHOD_MODE2:
4459 adev->mp1_state = PP_MP1_STATE_RESET;
4460 break;
4461 default:
4462 adev->mp1_state = PP_MP1_STATE_NONE;
4463 break;
4464 }
1d721ed6
AG
4465
4466 return true;
26bc5340 4467}
d38ceaf9 4468
26bc5340
AG
4469static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4470{
89041940 4471 amdgpu_vf_error_trans_all(adev);
a3a09142 4472 adev->mp1_state = PP_MP1_STATE_NONE;
53b3f8f4 4473 atomic_set(&adev->in_gpu_reset, 0);
6049db43 4474 up_write(&adev->reset_sem);
26bc5340
AG
4475}
4476
91fb309d
HC
4477/*
4478 * to lockup a list of amdgpu devices in a hive safely, if not a hive
4479 * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
4480 *
4481 * unlock won't require roll back.
4482 */
4483static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
4484{
4485 struct amdgpu_device *tmp_adev = NULL;
4486
4487 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4488 if (!hive) {
4489 dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
4490 return -ENODEV;
4491 }
4492 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4493 if (!amdgpu_device_lock_adev(tmp_adev, hive))
4494 goto roll_back;
4495 }
4496 } else if (!amdgpu_device_lock_adev(adev, hive))
4497 return -EAGAIN;
4498
4499 return 0;
4500roll_back:
4501 if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
4502 /*
4503 * if the lockup iteration break in the middle of a hive,
4504 * it may means there may has a race issue,
4505 * or a hive device locked up independently.
4506 * we may be in trouble and may not, so will try to roll back
4507 * the lock and give out a warnning.
4508 */
4509 dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
4510 list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4511 amdgpu_device_unlock_adev(tmp_adev);
4512 }
4513 }
4514 return -EAGAIN;
4515}
4516
3f12acc8
EQ
4517static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4518{
4519 struct pci_dev *p = NULL;
4520
4521 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4522 adev->pdev->bus->number, 1);
4523 if (p) {
4524 pm_runtime_enable(&(p->dev));
4525 pm_runtime_resume(&(p->dev));
4526 }
4527}
4528
4529static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4530{
4531 enum amd_reset_method reset_method;
4532 struct pci_dev *p = NULL;
4533 u64 expires;
4534
4535 /*
4536 * For now, only BACO and mode1 reset are confirmed
4537 * to suffer the audio issue without proper suspended.
4538 */
4539 reset_method = amdgpu_asic_reset_method(adev);
4540 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4541 (reset_method != AMD_RESET_METHOD_MODE1))
4542 return -EINVAL;
4543
4544 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4545 adev->pdev->bus->number, 1);
4546 if (!p)
4547 return -ENODEV;
4548
4549 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4550 if (!expires)
4551 /*
4552 * If we cannot get the audio device autosuspend delay,
4553 * a fixed 4S interval will be used. Considering 3S is
4554 * the audio controller default autosuspend delay setting.
4555 * 4S used here is guaranteed to cover that.
4556 */
54b7feb9 4557 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4558
4559 while (!pm_runtime_status_suspended(&(p->dev))) {
4560 if (!pm_runtime_suspend(&(p->dev)))
4561 break;
4562
4563 if (expires < ktime_get_mono_fast_ns()) {
4564 dev_warn(adev->dev, "failed to suspend display audio\n");
4565 /* TODO: abort the succeeding gpu reset? */
4566 return -ETIMEDOUT;
4567 }
4568 }
4569
4570 pm_runtime_disable(&(p->dev));
4571
4572 return 0;
4573}
4574
26bc5340
AG
4575/**
4576 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4577 *
982a820b 4578 * @adev: amdgpu_device pointer
26bc5340
AG
4579 * @job: which job trigger hang
4580 *
4581 * Attempt to reset the GPU if it has hung (all asics).
4582 * Attempt to do soft-reset or full-reset and reinitialize Asic
4583 * Returns 0 for success or an error on failure.
4584 */
4585
4586int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4587 struct amdgpu_job *job)
4588{
1d721ed6 4589 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4590 bool need_full_reset = false;
4591 bool job_signaled = false;
26bc5340 4592 struct amdgpu_hive_info *hive = NULL;
26bc5340 4593 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4594 int i, r = 0;
bb5c7235 4595 bool need_emergency_restart = false;
3f12acc8 4596 bool audio_suspended = false;
26bc5340 4597
6e3cd2a9 4598 /*
bb5c7235
WS
4599 * Special case: RAS triggered and full reset isn't supported
4600 */
4601 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4602
d5ea093e
AG
4603 /*
4604 * Flush RAM to disk so that after reboot
4605 * the user can read log and see why the system rebooted.
4606 */
bb5c7235 4607 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4608 DRM_WARN("Emergency reboot.");
4609
4610 ksys_sync_helper();
4611 emergency_restart();
4612 }
4613
b823821f 4614 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4615 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4616
4617 /*
1d721ed6
AG
4618 * Here we trylock to avoid chain of resets executing from
4619 * either trigger by jobs on different adevs in XGMI hive or jobs on
4620 * different schedulers for same device while this TO handler is running.
4621 * We always reset all schedulers for device and all devices for XGMI
4622 * hive so that should take care of them too.
26bc5340 4623 */
d95e8e97 4624 hive = amdgpu_get_xgmi_hive(adev);
53b3f8f4
DL
4625 if (hive) {
4626 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4627 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4628 job ? job->base.id : -1, hive->hive_id);
d95e8e97 4629 amdgpu_put_xgmi_hive(hive);
91fb309d
HC
4630 if (job)
4631 drm_sched_increase_karma(&job->base);
53b3f8f4
DL
4632 return 0;
4633 }
4634 mutex_lock(&hive->hive_lock);
1d721ed6 4635 }
26bc5340 4636
91fb309d
HC
4637 /*
4638 * lock the device before we try to operate the linked list
4639 * if didn't get the device lock, don't touch the linked list since
4640 * others may iterating it.
4641 */
4642 r = amdgpu_device_lock_hive_adev(adev, hive);
4643 if (r) {
4644 dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
4645 job ? job->base.id : -1);
4646
4647 /* even we skipped this reset, still need to set the job to guilty */
4648 if (job)
4649 drm_sched_increase_karma(&job->base);
4650 goto skip_recovery;
4651 }
4652
9e94d22c
EQ
4653 /*
4654 * Build list of devices to reset.
4655 * In case we are in XGMI hive mode, resort the device list
4656 * to put adev in the 1st position.
4657 */
4658 INIT_LIST_HEAD(&device_list);
4659 if (adev->gmc.xgmi.num_physical_nodes > 1) {
9e94d22c
EQ
4660 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4661 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4662 device_list_handle = &hive->device_list;
4663 } else {
4664 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4665 device_list_handle = &device_list;
4666 }
4667
1d721ed6
AG
4668 /* block all schedulers and reset given job's ring */
4669 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3f12acc8
EQ
4670 /*
4671 * Try to put the audio codec into suspend state
4672 * before gpu reset started.
4673 *
4674 * Due to the power domain of the graphics device
4675 * is shared with AZ power domain. Without this,
4676 * we may change the audio hardware from behind
4677 * the audio driver's back. That will trigger
4678 * some audio codec errors.
4679 */
4680 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4681 audio_suspended = true;
4682
9e94d22c
EQ
4683 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4684
52fb44cf
EQ
4685 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4686
9e94d22c
EQ
4687 if (!amdgpu_sriov_vf(tmp_adev))
4688 amdgpu_amdkfd_pre_reset(tmp_adev);
4689
12ffa55d
AG
4690 /*
4691 * Mark these ASICs to be reseted as untracked first
4692 * And add them back after reset completed
4693 */
4694 amdgpu_unregister_gpu_instance(tmp_adev);
4695
a2f63ee8 4696 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4697
f1c1314b 4698 /* disable ras on ALL IPs */
bb5c7235 4699 if (!need_emergency_restart &&
b823821f 4700 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4701 amdgpu_ras_suspend(tmp_adev);
4702
1d721ed6
AG
4703 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4704 struct amdgpu_ring *ring = tmp_adev->rings[i];
4705
4706 if (!ring || !ring->sched.thread)
4707 continue;
4708
0b2d2c2e 4709 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4710
bb5c7235 4711 if (need_emergency_restart)
7c6e68c7 4712 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 4713 }
8f8c80f4 4714 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
4715 }
4716
bb5c7235 4717 if (need_emergency_restart)
7c6e68c7
AG
4718 goto skip_sched_resume;
4719
1d721ed6
AG
4720 /*
4721 * Must check guilty signal here since after this point all old
4722 * HW fences are force signaled.
4723 *
4724 * job->base holds a reference to parent fence
4725 */
4726 if (job && job->base.s_fence->parent &&
7dd8c205 4727 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4728 job_signaled = true;
1d721ed6
AG
4729 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4730 goto skip_hw_reset;
4731 }
4732
26bc5340
AG
4733retry: /* Rest of adevs pre asic reset from XGMI hive. */
4734 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340 4735 r = amdgpu_device_pre_asic_reset(tmp_adev,
ded08454 4736 (tmp_adev == adev) ? job : NULL,
26bc5340
AG
4737 &need_full_reset);
4738 /*TODO Should we stop ?*/
4739 if (r) {
aac89168 4740 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 4741 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
4742 tmp_adev->asic_reset_res = r;
4743 }
4744 }
4745
4746 /* Actual ASIC resets if needed.*/
4747 /* TODO Implement XGMI hive reset logic for SRIOV */
4748 if (amdgpu_sriov_vf(adev)) {
4749 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4750 if (r)
4751 adev->asic_reset_res = r;
4752 } else {
7ac71382 4753 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset, false);
26bc5340
AG
4754 if (r && r == -EAGAIN)
4755 goto retry;
4756 }
4757
1d721ed6
AG
4758skip_hw_reset:
4759
26bc5340
AG
4760 /* Post ASIC reset for all devs .*/
4761 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4762
1d721ed6
AG
4763 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4764 struct amdgpu_ring *ring = tmp_adev->rings[i];
4765
4766 if (!ring || !ring->sched.thread)
4767 continue;
4768
4769 /* No point to resubmit jobs if we didn't HW reset*/
4770 if (!tmp_adev->asic_reset_res && !job_signaled)
4771 drm_sched_resubmit_jobs(&ring->sched);
4772
4773 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4774 }
4775
4776 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4a580877 4777 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
4778 }
4779
4780 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4781
4782 if (r) {
4783 /* bad news, how to tell it to userspace ? */
12ffa55d 4784 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4785 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4786 } else {
12ffa55d 4787 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4788 }
7c6e68c7 4789 }
26bc5340 4790
7c6e68c7
AG
4791skip_sched_resume:
4792 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4793 /*unlock kfd: SRIOV would do it separately */
bb5c7235 4794 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4795 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4796 if (audio_suspended)
4797 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4798 amdgpu_device_unlock_adev(tmp_adev);
4799 }
4800
cbfd17f7 4801skip_recovery:
9e94d22c 4802 if (hive) {
53b3f8f4 4803 atomic_set(&hive->in_reset, 0);
9e94d22c 4804 mutex_unlock(&hive->hive_lock);
d95e8e97 4805 amdgpu_put_xgmi_hive(hive);
9e94d22c 4806 }
26bc5340 4807
91fb309d 4808 if (r && r != -EAGAIN)
26bc5340 4809 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4810 return r;
4811}
4812
e3ecdffa
AD
4813/**
4814 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4815 *
4816 * @adev: amdgpu_device pointer
4817 *
4818 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4819 * and lanes) of the slot the device is in. Handles APUs and
4820 * virtualized environments where PCIE config space may not be available.
4821 */
5494d864 4822static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4823{
5d9a6330 4824 struct pci_dev *pdev;
c5313457
HK
4825 enum pci_bus_speed speed_cap, platform_speed_cap;
4826 enum pcie_link_width platform_link_width;
d0dd7f0c 4827
cd474ba0
AD
4828 if (amdgpu_pcie_gen_cap)
4829 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4830
cd474ba0
AD
4831 if (amdgpu_pcie_lane_cap)
4832 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4833
cd474ba0
AD
4834 /* covers APUs as well */
4835 if (pci_is_root_bus(adev->pdev->bus)) {
4836 if (adev->pm.pcie_gen_mask == 0)
4837 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4838 if (adev->pm.pcie_mlw_mask == 0)
4839 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4840 return;
cd474ba0 4841 }
d0dd7f0c 4842
c5313457
HK
4843 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4844 return;
4845
dbaa922b
AD
4846 pcie_bandwidth_available(adev->pdev, NULL,
4847 &platform_speed_cap, &platform_link_width);
c5313457 4848
cd474ba0 4849 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4850 /* asic caps */
4851 pdev = adev->pdev;
4852 speed_cap = pcie_get_speed_cap(pdev);
4853 if (speed_cap == PCI_SPEED_UNKNOWN) {
4854 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4855 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4856 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4857 } else {
2b3a1f51
FX
4858 if (speed_cap == PCIE_SPEED_32_0GT)
4859 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4860 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4861 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4862 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
4863 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
4864 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4865 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4866 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4867 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4868 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4869 else if (speed_cap == PCIE_SPEED_8_0GT)
4870 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4871 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4872 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4873 else if (speed_cap == PCIE_SPEED_5_0GT)
4874 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4875 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4876 else
4877 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4878 }
4879 /* platform caps */
c5313457 4880 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4881 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4882 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4883 } else {
2b3a1f51
FX
4884 if (platform_speed_cap == PCIE_SPEED_32_0GT)
4885 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4886 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4887 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4888 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
4889 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
4890 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4891 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4892 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4893 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4894 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4895 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4896 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4897 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4898 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4899 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4900 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4901 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4902 else
4903 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4904
cd474ba0
AD
4905 }
4906 }
4907 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4908 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4909 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4910 } else {
c5313457 4911 switch (platform_link_width) {
5d9a6330 4912 case PCIE_LNK_X32:
cd474ba0
AD
4913 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4914 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4915 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4916 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4917 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4918 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4919 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4920 break;
5d9a6330 4921 case PCIE_LNK_X16:
cd474ba0
AD
4922 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4923 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4924 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4925 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4926 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4927 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4928 break;
5d9a6330 4929 case PCIE_LNK_X12:
cd474ba0
AD
4930 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4931 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4932 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4933 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4934 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4935 break;
5d9a6330 4936 case PCIE_LNK_X8:
cd474ba0
AD
4937 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4938 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4939 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4940 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4941 break;
5d9a6330 4942 case PCIE_LNK_X4:
cd474ba0
AD
4943 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4944 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4945 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4946 break;
5d9a6330 4947 case PCIE_LNK_X2:
cd474ba0
AD
4948 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4949 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4950 break;
5d9a6330 4951 case PCIE_LNK_X1:
cd474ba0
AD
4952 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4953 break;
4954 default:
4955 break;
4956 }
d0dd7f0c
AD
4957 }
4958 }
4959}
d38ceaf9 4960
361dbd01
AD
4961int amdgpu_device_baco_enter(struct drm_device *dev)
4962{
1348969a 4963 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 4964 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 4965
4a580877 4966 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
4967 return -ENOTSUPP;
4968
6fb33209 4969 if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
4970 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4971
9530273e 4972 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4973}
4974
4975int amdgpu_device_baco_exit(struct drm_device *dev)
4976{
1348969a 4977 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 4978 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4979 int ret = 0;
361dbd01 4980
4a580877 4981 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
4982 return -ENOTSUPP;
4983
9530273e
EQ
4984 ret = amdgpu_dpm_baco_exit(adev);
4985 if (ret)
4986 return ret;
7a22677b 4987
6fb33209 4988 if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
4989 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4990
4991 return 0;
361dbd01 4992}
c9a6b82f 4993
acd89fca
AG
4994static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
4995{
4996 int i;
4997
4998 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4999 struct amdgpu_ring *ring = adev->rings[i];
5000
5001 if (!ring || !ring->sched.thread)
5002 continue;
5003
5004 cancel_delayed_work_sync(&ring->sched.work_tdr);
5005 }
5006}
5007
c9a6b82f
AG
5008/**
5009 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5010 * @pdev: PCI device struct
5011 * @state: PCI channel state
5012 *
5013 * Description: Called when a PCI error is detected.
5014 *
5015 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5016 */
5017pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5018{
5019 struct drm_device *dev = pci_get_drvdata(pdev);
5020 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5021 int i;
c9a6b82f
AG
5022
5023 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5024
6894305c
AG
5025 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5026 DRM_WARN("No support for XGMI hive yet...");
5027 return PCI_ERS_RESULT_DISCONNECT;
5028 }
5029
c9a6b82f
AG
5030 switch (state) {
5031 case pci_channel_io_normal:
5032 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5033 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5034 case pci_channel_io_frozen:
5035 /*
acd89fca
AG
5036 * Cancel and wait for all TDRs in progress if failing to
5037 * set adev->in_gpu_reset in amdgpu_device_lock_adev
5038 *
5039 * Locking adev->reset_sem will prevent any external access
5040 * to GPU during PCI error recovery
5041 */
5042 while (!amdgpu_device_lock_adev(adev, NULL))
5043 amdgpu_cancel_all_tdr(adev);
5044
5045 /*
5046 * Block any work scheduling as we do for regular GPU reset
5047 * for the duration of the recovery
5048 */
5049 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5050 struct amdgpu_ring *ring = adev->rings[i];
5051
5052 if (!ring || !ring->sched.thread)
5053 continue;
5054
5055 drm_sched_stop(&ring->sched, NULL);
5056 }
8f8c80f4 5057 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5058 return PCI_ERS_RESULT_NEED_RESET;
5059 case pci_channel_io_perm_failure:
5060 /* Permanent error, prepare for device removal */
5061 return PCI_ERS_RESULT_DISCONNECT;
5062 }
5063
5064 return PCI_ERS_RESULT_NEED_RESET;
5065}
5066
5067/**
5068 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5069 * @pdev: pointer to PCI device
5070 */
5071pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5072{
5073
5074 DRM_INFO("PCI error: mmio enabled callback!!\n");
5075
5076 /* TODO - dump whatever for debugging purposes */
5077
5078 /* This called only if amdgpu_pci_error_detected returns
5079 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5080 * works, no need to reset slot.
5081 */
5082
5083 return PCI_ERS_RESULT_RECOVERED;
5084}
5085
5086/**
5087 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5088 * @pdev: PCI device struct
5089 *
5090 * Description: This routine is called by the pci error recovery
5091 * code after the PCI slot has been reset, just before we
5092 * should resume normal operations.
5093 */
5094pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5095{
5096 struct drm_device *dev = pci_get_drvdata(pdev);
5097 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5098 int r, i;
7ac71382 5099 bool need_full_reset = true;
362c7b91 5100 u32 memsize;
7ac71382 5101 struct list_head device_list;
c9a6b82f
AG
5102
5103 DRM_INFO("PCI error: slot reset callback!!\n");
5104
7ac71382
AG
5105 INIT_LIST_HEAD(&device_list);
5106 list_add_tail(&adev->gmc.xgmi.head, &device_list);
5107
362c7b91
AG
5108 /* wait for asic to come out of reset */
5109 msleep(500);
5110
7ac71382 5111 /* Restore PCI confspace */
c1dd4aa6 5112 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5113
362c7b91
AG
5114 /* confirm ASIC came out of reset */
5115 for (i = 0; i < adev->usec_timeout; i++) {
5116 memsize = amdgpu_asic_get_config_memsize(adev);
5117
5118 if (memsize != 0xffffffff)
5119 break;
5120 udelay(1);
5121 }
5122 if (memsize == 0xffffffff) {
5123 r = -ETIME;
5124 goto out;
5125 }
5126
8a11d283 5127 adev->in_pci_err_recovery = true;
7ac71382 5128 r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);
bf36b52e 5129 adev->in_pci_err_recovery = false;
c9a6b82f
AG
5130 if (r)
5131 goto out;
5132
7ac71382 5133 r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
c9a6b82f
AG
5134
5135out:
c9a6b82f 5136 if (!r) {
c1dd4aa6
AG
5137 if (amdgpu_device_cache_pci_state(adev->pdev))
5138 pci_restore_state(adev->pdev);
5139
c9a6b82f
AG
5140 DRM_INFO("PCIe error recovery succeeded\n");
5141 } else {
5142 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5143 amdgpu_device_unlock_adev(adev);
5144 }
5145
5146 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5147}
5148
5149/**
5150 * amdgpu_pci_resume() - resume normal ops after PCI reset
5151 * @pdev: pointer to PCI device
5152 *
5153 * Called when the error recovery driver tells us that its
505199a3 5154 * OK to resume normal operation.
c9a6b82f
AG
5155 */
5156void amdgpu_pci_resume(struct pci_dev *pdev)
5157{
5158 struct drm_device *dev = pci_get_drvdata(pdev);
5159 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5160 int i;
c9a6b82f 5161
c9a6b82f
AG
5162
5163 DRM_INFO("PCI error: resume callback!!\n");
acd89fca
AG
5164
5165 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5166 struct amdgpu_ring *ring = adev->rings[i];
5167
5168 if (!ring || !ring->sched.thread)
5169 continue;
5170
5171
5172 drm_sched_resubmit_jobs(&ring->sched);
5173 drm_sched_start(&ring->sched, true);
5174 }
5175
5176 amdgpu_device_unlock_adev(adev);
c9a6b82f 5177}
c1dd4aa6
AG
5178
5179bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5180{
5181 struct drm_device *dev = pci_get_drvdata(pdev);
5182 struct amdgpu_device *adev = drm_to_adev(dev);
5183 int r;
5184
5185 r = pci_save_state(pdev);
5186 if (!r) {
5187 kfree(adev->pci_state);
5188
5189 adev->pci_state = pci_store_saved_state(pdev);
5190
5191 if (!adev->pci_state) {
5192 DRM_ERROR("Failed to store PCI saved state");
5193 return false;
5194 }
5195 } else {
5196 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5197 return false;
5198 }
5199
5200 return true;
5201}
5202
5203bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5204{
5205 struct drm_device *dev = pci_get_drvdata(pdev);
5206 struct amdgpu_device *adev = drm_to_adev(dev);
5207 int r;
5208
5209 if (!adev->pci_state)
5210 return false;
5211
5212 r = pci_load_saved_state(pdev, adev->pci_state);
5213
5214 if (!r) {
5215 pci_restore_state(pdev);
5216 } else {
5217 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5218 return false;
5219 }
5220
5221 return true;
5222}
5223
5224