amdgpu/pm: Powerplay API for smu , updates to some pm functions
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
4e52a9f8 83MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
e2a75f88 84
2dc80b00
S
85#define AMDGPU_RESUME_MS 2000
86
050091ab 87const char *amdgpu_asic_name[] = {
da69c161
KW
88 "TAHITI",
89 "PITCAIRN",
90 "VERDE",
91 "OLAND",
92 "HAINAN",
d38ceaf9
AD
93 "BONAIRE",
94 "KAVERI",
95 "KABINI",
96 "HAWAII",
97 "MULLINS",
98 "TOPAZ",
99 "TONGA",
48299f95 100 "FIJI",
d38ceaf9 101 "CARRIZO",
139f4917 102 "STONEY",
2cc0c0b5
FC
103 "POLARIS10",
104 "POLARIS11",
c4642a47 105 "POLARIS12",
48ff108d 106 "VEGAM",
d4196f01 107 "VEGA10",
8fab806a 108 "VEGA12",
956fcddc 109 "VEGA20",
2ca8a5d2 110 "RAVEN",
d6c3b24e 111 "ARCTURUS",
1eee4228 112 "RENOIR",
852a6626 113 "NAVI10",
87dbad02 114 "NAVI14",
9802f5d7 115 "NAVI12",
ccaf72d3 116 "SIENNA_CICHLID",
ddd8fbe7 117 "NAVY_FLOUNDER",
4f1e9a76 118 "VANGOGH",
a2468e04 119 "DIMGREY_CAVEFISH",
d38ceaf9
AD
120 "LAST",
121};
122
dcea6e65
KR
123/**
124 * DOC: pcie_replay_count
125 *
126 * The amdgpu driver provides a sysfs API for reporting the total number
127 * of PCIe replays (NAKs)
128 * The file pcie_replay_count is used for this and returns the total
129 * number of replays as a sum of the NAKs generated and NAKs received
130 */
131
132static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
133 struct device_attribute *attr, char *buf)
134{
135 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 136 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
137 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
138
139 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
140}
141
142static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
143 amdgpu_device_get_pcie_replay_count, NULL);
144
5494d864
AD
145static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
146
bd607166
KR
147/**
148 * DOC: product_name
149 *
150 * The amdgpu driver provides a sysfs API for reporting the product name
151 * for the device
152 * The file serial_number is used for this and returns the product name
153 * as returned from the FRU.
154 * NOTE: This is only available for certain server cards
155 */
156
157static ssize_t amdgpu_device_get_product_name(struct device *dev,
158 struct device_attribute *attr, char *buf)
159{
160 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 161 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
162
163 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
164}
165
166static DEVICE_ATTR(product_name, S_IRUGO,
167 amdgpu_device_get_product_name, NULL);
168
169/**
170 * DOC: product_number
171 *
172 * The amdgpu driver provides a sysfs API for reporting the part number
173 * for the device
174 * The file serial_number is used for this and returns the part number
175 * as returned from the FRU.
176 * NOTE: This is only available for certain server cards
177 */
178
179static ssize_t amdgpu_device_get_product_number(struct device *dev,
180 struct device_attribute *attr, char *buf)
181{
182 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 183 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
184
185 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
186}
187
188static DEVICE_ATTR(product_number, S_IRUGO,
189 amdgpu_device_get_product_number, NULL);
190
191/**
192 * DOC: serial_number
193 *
194 * The amdgpu driver provides a sysfs API for reporting the serial number
195 * for the device
196 * The file serial_number is used for this and returns the serial number
197 * as returned from the FRU.
198 * NOTE: This is only available for certain server cards
199 */
200
201static ssize_t amdgpu_device_get_serial_number(struct device *dev,
202 struct device_attribute *attr, char *buf)
203{
204 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 205 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
206
207 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
208}
209
210static DEVICE_ATTR(serial_number, S_IRUGO,
211 amdgpu_device_get_serial_number, NULL);
212
fd496ca8
AD
213/**
214 * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control
215 *
216 * @dev: drm_device pointer
217 *
218 * Returns true if the device is a dGPU with HG/PX power control,
219 * otherwise return false.
220 */
221bool amdgpu_device_supports_atpx(struct drm_device *dev)
222{
223 struct amdgpu_device *adev = drm_to_adev(dev);
224
225 if (adev->flags & AMD_IS_PX)
226 return true;
227 return false;
228}
229
e3ecdffa 230/**
0330b848 231 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
232 *
233 * @dev: drm_device pointer
234 *
235 * Returns true if the device is a dGPU with HG/PX power control,
236 * otherwise return false.
237 */
31af062a 238bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 239{
1348969a 240 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 241
0330b848 242 if (adev->has_pr3)
d38ceaf9
AD
243 return true;
244 return false;
245}
246
a69cba42
AD
247/**
248 * amdgpu_device_supports_baco - Does the device support BACO
249 *
250 * @dev: drm_device pointer
251 *
252 * Returns true if the device supporte BACO,
253 * otherwise return false.
254 */
255bool amdgpu_device_supports_baco(struct drm_device *dev)
256{
1348969a 257 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
258
259 return amdgpu_asic_supports_baco(adev);
260}
261
6e3cd2a9
MCC
262/*
263 * VRAM access helper functions
264 */
265
e35e2b11 266/**
e35e2b11
TY
267 * amdgpu_device_vram_access - read/write a buffer in vram
268 *
269 * @adev: amdgpu_device pointer
270 * @pos: offset of the buffer in vram
271 * @buf: virtual address of the buffer in system memory
272 * @size: read/write size, sizeof(@buf) must > @size
273 * @write: true - write to vram, otherwise - read from vram
274 */
275void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
276 uint32_t *buf, size_t size, bool write)
277{
e35e2b11 278 unsigned long flags;
ce05ac56
CK
279 uint32_t hi = ~0;
280 uint64_t last;
281
9d11eb0d
CK
282
283#ifdef CONFIG_64BIT
284 last = min(pos + size, adev->gmc.visible_vram_size);
285 if (last > pos) {
286 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
287 size_t count = last - pos;
288
289 if (write) {
290 memcpy_toio(addr, buf, count);
291 mb();
292 amdgpu_asic_flush_hdp(adev, NULL);
293 } else {
294 amdgpu_asic_invalidate_hdp(adev, NULL);
295 mb();
296 memcpy_fromio(buf, addr, count);
297 }
298
299 if (count == size)
300 return;
301
302 pos += count;
303 buf += count / 4;
304 size -= count;
305 }
306#endif
307
ce05ac56
CK
308 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
309 for (last = pos + size; pos < last; pos += 4) {
310 uint32_t tmp = pos >> 31;
e35e2b11 311
e35e2b11 312 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
313 if (tmp != hi) {
314 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
315 hi = tmp;
316 }
e35e2b11
TY
317 if (write)
318 WREG32_NO_KIQ(mmMM_DATA, *buf++);
319 else
320 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 321 }
ce05ac56 322 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
323}
324
d38ceaf9 325/*
f7ee1874 326 * register access helper functions.
d38ceaf9 327 */
e3ecdffa 328/**
f7ee1874 329 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
330 *
331 * @adev: amdgpu_device pointer
332 * @reg: dword aligned register offset
333 * @acc_flags: access flags which require special behavior
334 *
335 * Returns the 32 bit value from the offset specified.
336 */
f7ee1874
HZ
337uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
338 uint32_t reg, uint32_t acc_flags)
d38ceaf9 339{
f4b373f4
TSD
340 uint32_t ret;
341
bf36b52e
AG
342 if (adev->in_pci_err_recovery)
343 return 0;
344
f7ee1874
HZ
345 if ((reg * 4) < adev->rmmio_size) {
346 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
347 amdgpu_sriov_runtime(adev) &&
348 down_read_trylock(&adev->reset_sem)) {
349 ret = amdgpu_kiq_rreg(adev, reg);
350 up_read(&adev->reset_sem);
351 } else {
352 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
353 }
354 } else {
355 ret = adev->pcie_rreg(adev, reg * 4);
81202807 356 }
bc992ba5 357
f7ee1874 358 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 359
f4b373f4 360 return ret;
d38ceaf9
AD
361}
362
421a2a30
ML
363/*
364 * MMIO register read with bytes helper functions
365 * @offset:bytes offset from MMIO start
366 *
367*/
368
e3ecdffa
AD
369/**
370 * amdgpu_mm_rreg8 - read a memory mapped IO register
371 *
372 * @adev: amdgpu_device pointer
373 * @offset: byte aligned register offset
374 *
375 * Returns the 8 bit value from the offset specified.
376 */
7cbbc745
AG
377uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
378{
bf36b52e
AG
379 if (adev->in_pci_err_recovery)
380 return 0;
381
421a2a30
ML
382 if (offset < adev->rmmio_size)
383 return (readb(adev->rmmio + offset));
384 BUG();
385}
386
387/*
388 * MMIO register write with bytes helper functions
389 * @offset:bytes offset from MMIO start
390 * @value: the value want to be written to the register
391 *
392*/
e3ecdffa
AD
393/**
394 * amdgpu_mm_wreg8 - read a memory mapped IO register
395 *
396 * @adev: amdgpu_device pointer
397 * @offset: byte aligned register offset
398 * @value: 8 bit value to write
399 *
400 * Writes the value specified to the offset specified.
401 */
7cbbc745
AG
402void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
403{
bf36b52e
AG
404 if (adev->in_pci_err_recovery)
405 return;
406
421a2a30
ML
407 if (offset < adev->rmmio_size)
408 writeb(value, adev->rmmio + offset);
409 else
410 BUG();
411}
412
e3ecdffa 413/**
f7ee1874 414 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
415 *
416 * @adev: amdgpu_device pointer
417 * @reg: dword aligned register offset
418 * @v: 32 bit value to write to the register
419 * @acc_flags: access flags which require special behavior
420 *
421 * Writes the value specified to the offset specified.
422 */
f7ee1874
HZ
423void amdgpu_device_wreg(struct amdgpu_device *adev,
424 uint32_t reg, uint32_t v,
425 uint32_t acc_flags)
d38ceaf9 426{
bf36b52e
AG
427 if (adev->in_pci_err_recovery)
428 return;
429
f7ee1874
HZ
430 if ((reg * 4) < adev->rmmio_size) {
431 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
432 amdgpu_sriov_runtime(adev) &&
433 down_read_trylock(&adev->reset_sem)) {
434 amdgpu_kiq_wreg(adev, reg, v);
435 up_read(&adev->reset_sem);
436 } else {
437 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
438 }
439 } else {
440 adev->pcie_wreg(adev, reg * 4, v);
81202807 441 }
bc992ba5 442
f7ee1874 443 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 444}
d38ceaf9 445
2e0cc4d4
ML
446/*
447 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
448 *
449 * this function is invoked only the debugfs register access
450 * */
f7ee1874
HZ
451void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
452 uint32_t reg, uint32_t v)
2e0cc4d4 453{
bf36b52e
AG
454 if (adev->in_pci_err_recovery)
455 return;
456
2e0cc4d4 457 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
458 adev->gfx.rlc.funcs &&
459 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4
ML
460 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
461 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
f7ee1874
HZ
462 } else {
463 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 464 }
d38ceaf9
AD
465}
466
e3ecdffa
AD
467/**
468 * amdgpu_io_rreg - read an IO register
469 *
470 * @adev: amdgpu_device pointer
471 * @reg: dword aligned register offset
472 *
473 * Returns the 32 bit value from the offset specified.
474 */
d38ceaf9
AD
475u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
476{
bf36b52e
AG
477 if (adev->in_pci_err_recovery)
478 return 0;
479
d38ceaf9
AD
480 if ((reg * 4) < adev->rio_mem_size)
481 return ioread32(adev->rio_mem + (reg * 4));
482 else {
483 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
484 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
485 }
486}
487
e3ecdffa
AD
488/**
489 * amdgpu_io_wreg - write to an IO register
490 *
491 * @adev: amdgpu_device pointer
492 * @reg: dword aligned register offset
493 * @v: 32 bit value to write to the register
494 *
495 * Writes the value specified to the offset specified.
496 */
d38ceaf9
AD
497void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
498{
bf36b52e
AG
499 if (adev->in_pci_err_recovery)
500 return;
501
d38ceaf9
AD
502 if ((reg * 4) < adev->rio_mem_size)
503 iowrite32(v, adev->rio_mem + (reg * 4));
504 else {
505 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
506 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
507 }
508}
509
510/**
511 * amdgpu_mm_rdoorbell - read a doorbell dword
512 *
513 * @adev: amdgpu_device pointer
514 * @index: doorbell index
515 *
516 * Returns the value in the doorbell aperture at the
517 * requested doorbell index (CIK).
518 */
519u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
520{
bf36b52e
AG
521 if (adev->in_pci_err_recovery)
522 return 0;
523
d38ceaf9
AD
524 if (index < adev->doorbell.num_doorbells) {
525 return readl(adev->doorbell.ptr + index);
526 } else {
527 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
528 return 0;
529 }
530}
531
532/**
533 * amdgpu_mm_wdoorbell - write a doorbell dword
534 *
535 * @adev: amdgpu_device pointer
536 * @index: doorbell index
537 * @v: value to write
538 *
539 * Writes @v to the doorbell aperture at the
540 * requested doorbell index (CIK).
541 */
542void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
543{
bf36b52e
AG
544 if (adev->in_pci_err_recovery)
545 return;
546
d38ceaf9
AD
547 if (index < adev->doorbell.num_doorbells) {
548 writel(v, adev->doorbell.ptr + index);
549 } else {
550 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
551 }
552}
553
832be404
KW
554/**
555 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
556 *
557 * @adev: amdgpu_device pointer
558 * @index: doorbell index
559 *
560 * Returns the value in the doorbell aperture at the
561 * requested doorbell index (VEGA10+).
562 */
563u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
564{
bf36b52e
AG
565 if (adev->in_pci_err_recovery)
566 return 0;
567
832be404
KW
568 if (index < adev->doorbell.num_doorbells) {
569 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
570 } else {
571 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
572 return 0;
573 }
574}
575
576/**
577 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
578 *
579 * @adev: amdgpu_device pointer
580 * @index: doorbell index
581 * @v: value to write
582 *
583 * Writes @v to the doorbell aperture at the
584 * requested doorbell index (VEGA10+).
585 */
586void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
587{
bf36b52e
AG
588 if (adev->in_pci_err_recovery)
589 return;
590
832be404
KW
591 if (index < adev->doorbell.num_doorbells) {
592 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
593 } else {
594 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
595 }
596}
597
1bba3683
HZ
598/**
599 * amdgpu_device_indirect_rreg - read an indirect register
600 *
601 * @adev: amdgpu_device pointer
602 * @pcie_index: mmio register offset
603 * @pcie_data: mmio register offset
22f453fb 604 * @reg_addr: indirect register address to read from
1bba3683
HZ
605 *
606 * Returns the value of indirect register @reg_addr
607 */
608u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
609 u32 pcie_index, u32 pcie_data,
610 u32 reg_addr)
611{
612 unsigned long flags;
613 u32 r;
614 void __iomem *pcie_index_offset;
615 void __iomem *pcie_data_offset;
616
617 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
618 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
619 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
620
621 writel(reg_addr, pcie_index_offset);
622 readl(pcie_index_offset);
623 r = readl(pcie_data_offset);
624 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
625
626 return r;
627}
628
629/**
630 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
631 *
632 * @adev: amdgpu_device pointer
633 * @pcie_index: mmio register offset
634 * @pcie_data: mmio register offset
22f453fb 635 * @reg_addr: indirect register address to read from
1bba3683
HZ
636 *
637 * Returns the value of indirect register @reg_addr
638 */
639u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
640 u32 pcie_index, u32 pcie_data,
641 u32 reg_addr)
642{
643 unsigned long flags;
644 u64 r;
645 void __iomem *pcie_index_offset;
646 void __iomem *pcie_data_offset;
647
648 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
649 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
650 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
651
652 /* read low 32 bits */
653 writel(reg_addr, pcie_index_offset);
654 readl(pcie_index_offset);
655 r = readl(pcie_data_offset);
656 /* read high 32 bits */
657 writel(reg_addr + 4, pcie_index_offset);
658 readl(pcie_index_offset);
659 r |= ((u64)readl(pcie_data_offset) << 32);
660 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
661
662 return r;
663}
664
665/**
666 * amdgpu_device_indirect_wreg - write an indirect register address
667 *
668 * @adev: amdgpu_device pointer
669 * @pcie_index: mmio register offset
670 * @pcie_data: mmio register offset
671 * @reg_addr: indirect register offset
672 * @reg_data: indirect register data
673 *
674 */
675void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
676 u32 pcie_index, u32 pcie_data,
677 u32 reg_addr, u32 reg_data)
678{
679 unsigned long flags;
680 void __iomem *pcie_index_offset;
681 void __iomem *pcie_data_offset;
682
683 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
684 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
685 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
686
687 writel(reg_addr, pcie_index_offset);
688 readl(pcie_index_offset);
689 writel(reg_data, pcie_data_offset);
690 readl(pcie_data_offset);
691 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
692}
693
694/**
695 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
696 *
697 * @adev: amdgpu_device pointer
698 * @pcie_index: mmio register offset
699 * @pcie_data: mmio register offset
700 * @reg_addr: indirect register offset
701 * @reg_data: indirect register data
702 *
703 */
704void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
705 u32 pcie_index, u32 pcie_data,
706 u32 reg_addr, u64 reg_data)
707{
708 unsigned long flags;
709 void __iomem *pcie_index_offset;
710 void __iomem *pcie_data_offset;
711
712 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
713 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
714 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
715
716 /* write low 32 bits */
717 writel(reg_addr, pcie_index_offset);
718 readl(pcie_index_offset);
719 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
720 readl(pcie_data_offset);
721 /* write high 32 bits */
722 writel(reg_addr + 4, pcie_index_offset);
723 readl(pcie_index_offset);
724 writel((u32)(reg_data >> 32), pcie_data_offset);
725 readl(pcie_data_offset);
726 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
727}
728
d38ceaf9
AD
729/**
730 * amdgpu_invalid_rreg - dummy reg read function
731 *
982a820b 732 * @adev: amdgpu_device pointer
d38ceaf9
AD
733 * @reg: offset of register
734 *
735 * Dummy register read function. Used for register blocks
736 * that certain asics don't have (all asics).
737 * Returns the value in the register.
738 */
739static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
740{
741 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
742 BUG();
743 return 0;
744}
745
746/**
747 * amdgpu_invalid_wreg - dummy reg write function
748 *
982a820b 749 * @adev: amdgpu_device pointer
d38ceaf9
AD
750 * @reg: offset of register
751 * @v: value to write to the register
752 *
753 * Dummy register read function. Used for register blocks
754 * that certain asics don't have (all asics).
755 */
756static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
757{
758 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
759 reg, v);
760 BUG();
761}
762
4fa1c6a6
TZ
763/**
764 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
765 *
982a820b 766 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
767 * @reg: offset of register
768 *
769 * Dummy register read function. Used for register blocks
770 * that certain asics don't have (all asics).
771 * Returns the value in the register.
772 */
773static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
774{
775 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
776 BUG();
777 return 0;
778}
779
780/**
781 * amdgpu_invalid_wreg64 - dummy reg write function
782 *
982a820b 783 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
784 * @reg: offset of register
785 * @v: value to write to the register
786 *
787 * Dummy register read function. Used for register blocks
788 * that certain asics don't have (all asics).
789 */
790static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
791{
792 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
793 reg, v);
794 BUG();
795}
796
d38ceaf9
AD
797/**
798 * amdgpu_block_invalid_rreg - dummy reg read function
799 *
982a820b 800 * @adev: amdgpu_device pointer
d38ceaf9
AD
801 * @block: offset of instance
802 * @reg: offset of register
803 *
804 * Dummy register read function. Used for register blocks
805 * that certain asics don't have (all asics).
806 * Returns the value in the register.
807 */
808static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
809 uint32_t block, uint32_t reg)
810{
811 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
812 reg, block);
813 BUG();
814 return 0;
815}
816
817/**
818 * amdgpu_block_invalid_wreg - dummy reg write function
819 *
982a820b 820 * @adev: amdgpu_device pointer
d38ceaf9
AD
821 * @block: offset of instance
822 * @reg: offset of register
823 * @v: value to write to the register
824 *
825 * Dummy register read function. Used for register blocks
826 * that certain asics don't have (all asics).
827 */
828static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
829 uint32_t block,
830 uint32_t reg, uint32_t v)
831{
832 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
833 reg, block, v);
834 BUG();
835}
836
4d2997ab
AD
837/**
838 * amdgpu_device_asic_init - Wrapper for atom asic_init
839 *
982a820b 840 * @adev: amdgpu_device pointer
4d2997ab
AD
841 *
842 * Does any asic specific work and then calls atom asic init.
843 */
844static int amdgpu_device_asic_init(struct amdgpu_device *adev)
845{
846 amdgpu_asic_pre_asic_init(adev);
847
848 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
849}
850
e3ecdffa
AD
851/**
852 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
853 *
982a820b 854 * @adev: amdgpu_device pointer
e3ecdffa
AD
855 *
856 * Allocates a scratch page of VRAM for use by various things in the
857 * driver.
858 */
06ec9070 859static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 860{
a4a02777
CK
861 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
862 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
863 &adev->vram_scratch.robj,
864 &adev->vram_scratch.gpu_addr,
865 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
866}
867
e3ecdffa
AD
868/**
869 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
870 *
982a820b 871 * @adev: amdgpu_device pointer
e3ecdffa
AD
872 *
873 * Frees the VRAM scratch page.
874 */
06ec9070 875static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 876{
078af1a3 877 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
878}
879
880/**
9c3f2b54 881 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
882 *
883 * @adev: amdgpu_device pointer
884 * @registers: pointer to the register array
885 * @array_size: size of the register array
886 *
887 * Programs an array or registers with and and or masks.
888 * This is a helper for setting golden registers.
889 */
9c3f2b54
AD
890void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
891 const u32 *registers,
892 const u32 array_size)
d38ceaf9
AD
893{
894 u32 tmp, reg, and_mask, or_mask;
895 int i;
896
897 if (array_size % 3)
898 return;
899
900 for (i = 0; i < array_size; i +=3) {
901 reg = registers[i + 0];
902 and_mask = registers[i + 1];
903 or_mask = registers[i + 2];
904
905 if (and_mask == 0xffffffff) {
906 tmp = or_mask;
907 } else {
908 tmp = RREG32(reg);
909 tmp &= ~and_mask;
e0d07657
HZ
910 if (adev->family >= AMDGPU_FAMILY_AI)
911 tmp |= (or_mask & and_mask);
912 else
913 tmp |= or_mask;
d38ceaf9
AD
914 }
915 WREG32(reg, tmp);
916 }
917}
918
e3ecdffa
AD
919/**
920 * amdgpu_device_pci_config_reset - reset the GPU
921 *
922 * @adev: amdgpu_device pointer
923 *
924 * Resets the GPU using the pci config reset sequence.
925 * Only applicable to asics prior to vega10.
926 */
8111c387 927void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
928{
929 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
930}
931
af484df8
AD
932/**
933 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
934 *
935 * @adev: amdgpu_device pointer
936 *
937 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
938 */
939int amdgpu_device_pci_reset(struct amdgpu_device *adev)
940{
941 return pci_reset_function(adev->pdev);
942}
943
d38ceaf9
AD
944/*
945 * GPU doorbell aperture helpers function.
946 */
947/**
06ec9070 948 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
949 *
950 * @adev: amdgpu_device pointer
951 *
952 * Init doorbell driver information (CIK)
953 * Returns 0 on success, error on failure.
954 */
06ec9070 955static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 956{
6585661d 957
705e519e
CK
958 /* No doorbell on SI hardware generation */
959 if (adev->asic_type < CHIP_BONAIRE) {
960 adev->doorbell.base = 0;
961 adev->doorbell.size = 0;
962 adev->doorbell.num_doorbells = 0;
963 adev->doorbell.ptr = NULL;
964 return 0;
965 }
966
d6895ad3
CK
967 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
968 return -EINVAL;
969
22357775
AD
970 amdgpu_asic_init_doorbell_index(adev);
971
d38ceaf9
AD
972 /* doorbell bar mapping */
973 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
974 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
975
edf600da 976 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 977 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
978 if (adev->doorbell.num_doorbells == 0)
979 return -EINVAL;
980
ec3db8a6 981 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
982 * paging queue doorbell use the second page. The
983 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
984 * doorbells are in the first page. So with paging queue enabled,
985 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
986 */
987 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 988 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 989
8972e5d2
CK
990 adev->doorbell.ptr = ioremap(adev->doorbell.base,
991 adev->doorbell.num_doorbells *
992 sizeof(u32));
993 if (adev->doorbell.ptr == NULL)
d38ceaf9 994 return -ENOMEM;
d38ceaf9
AD
995
996 return 0;
997}
998
999/**
06ec9070 1000 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1001 *
1002 * @adev: amdgpu_device pointer
1003 *
1004 * Tear down doorbell driver information (CIK)
1005 */
06ec9070 1006static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1007{
1008 iounmap(adev->doorbell.ptr);
1009 adev->doorbell.ptr = NULL;
1010}
1011
22cb0164 1012
d38ceaf9
AD
1013
1014/*
06ec9070 1015 * amdgpu_device_wb_*()
455a7bc2 1016 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1017 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1018 */
1019
1020/**
06ec9070 1021 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1022 *
1023 * @adev: amdgpu_device pointer
1024 *
1025 * Disables Writeback and frees the Writeback memory (all asics).
1026 * Used at driver shutdown.
1027 */
06ec9070 1028static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1029{
1030 if (adev->wb.wb_obj) {
a76ed485
AD
1031 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1032 &adev->wb.gpu_addr,
1033 (void **)&adev->wb.wb);
d38ceaf9
AD
1034 adev->wb.wb_obj = NULL;
1035 }
1036}
1037
1038/**
06ec9070 1039 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
1040 *
1041 * @adev: amdgpu_device pointer
1042 *
455a7bc2 1043 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1044 * Used at driver startup.
1045 * Returns 0 on success or an -error on failure.
1046 */
06ec9070 1047static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1048{
1049 int r;
1050
1051 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1052 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1053 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1054 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1055 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1056 (void **)&adev->wb.wb);
d38ceaf9
AD
1057 if (r) {
1058 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1059 return r;
1060 }
d38ceaf9
AD
1061
1062 adev->wb.num_wb = AMDGPU_MAX_WB;
1063 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1064
1065 /* clear wb memory */
73469585 1066 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1067 }
1068
1069 return 0;
1070}
1071
1072/**
131b4b36 1073 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1074 *
1075 * @adev: amdgpu_device pointer
1076 * @wb: wb index
1077 *
1078 * Allocate a wb slot for use by the driver (all asics).
1079 * Returns 0 on success or -EINVAL on failure.
1080 */
131b4b36 1081int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1082{
1083 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1084
97407b63 1085 if (offset < adev->wb.num_wb) {
7014285a 1086 __set_bit(offset, adev->wb.used);
63ae07ca 1087 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1088 return 0;
1089 } else {
1090 return -EINVAL;
1091 }
1092}
1093
d38ceaf9 1094/**
131b4b36 1095 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1096 *
1097 * @adev: amdgpu_device pointer
1098 * @wb: wb index
1099 *
1100 * Free a wb slot allocated for use by the driver (all asics)
1101 */
131b4b36 1102void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1103{
73469585 1104 wb >>= 3;
d38ceaf9 1105 if (wb < adev->wb.num_wb)
73469585 1106 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1107}
1108
d6895ad3
CK
1109/**
1110 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1111 *
1112 * @adev: amdgpu_device pointer
1113 *
1114 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1115 * to fail, but if any of the BARs is not accessible after the size we abort
1116 * driver loading by returning -ENODEV.
1117 */
1118int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1119{
453f617a 1120 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1121 struct pci_bus *root;
1122 struct resource *res;
1123 unsigned i;
d6895ad3
CK
1124 u16 cmd;
1125 int r;
1126
0c03b912 1127 /* Bypass for VF */
1128 if (amdgpu_sriov_vf(adev))
1129 return 0;
1130
b7221f2b
AD
1131 /* skip if the bios has already enabled large BAR */
1132 if (adev->gmc.real_vram_size &&
1133 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1134 return 0;
1135
31b8adab
CK
1136 /* Check if the root BUS has 64bit memory resources */
1137 root = adev->pdev->bus;
1138 while (root->parent)
1139 root = root->parent;
1140
1141 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1142 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1143 res->start > 0x100000000ull)
1144 break;
1145 }
1146
1147 /* Trying to resize is pointless without a root hub window above 4GB */
1148 if (!res)
1149 return 0;
1150
453f617a
ND
1151 /* Limit the BAR size to what is available */
1152 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1153 rbar_size);
1154
d6895ad3
CK
1155 /* Disable memory decoding while we change the BAR addresses and size */
1156 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1157 pci_write_config_word(adev->pdev, PCI_COMMAND,
1158 cmd & ~PCI_COMMAND_MEMORY);
1159
1160 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1161 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1162 if (adev->asic_type >= CHIP_BONAIRE)
1163 pci_release_resource(adev->pdev, 2);
1164
1165 pci_release_resource(adev->pdev, 0);
1166
1167 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1168 if (r == -ENOSPC)
1169 DRM_INFO("Not enough PCI address space for a large BAR.");
1170 else if (r && r != -ENOTSUPP)
1171 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1172
1173 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1174
1175 /* When the doorbell or fb BAR isn't available we have no chance of
1176 * using the device.
1177 */
06ec9070 1178 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1179 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1180 return -ENODEV;
1181
1182 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1183
1184 return 0;
1185}
a05502e5 1186
d38ceaf9
AD
1187/*
1188 * GPU helpers function.
1189 */
1190/**
39c640c0 1191 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1192 *
1193 * @adev: amdgpu_device pointer
1194 *
c836fec5
JQ
1195 * Check if the asic has been initialized (all asics) at driver startup
1196 * or post is needed if hw reset is performed.
1197 * Returns true if need or false if not.
d38ceaf9 1198 */
39c640c0 1199bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1200{
1201 uint32_t reg;
1202
bec86378
ML
1203 if (amdgpu_sriov_vf(adev))
1204 return false;
1205
1206 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1207 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1208 * some old smc fw still need driver do vPost otherwise gpu hang, while
1209 * those smc fw version above 22.15 doesn't have this flaw, so we force
1210 * vpost executed for smc version below 22.15
bec86378
ML
1211 */
1212 if (adev->asic_type == CHIP_FIJI) {
1213 int err;
1214 uint32_t fw_ver;
1215 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1216 /* force vPost if error occured */
1217 if (err)
1218 return true;
1219
1220 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1221 if (fw_ver < 0x00160e00)
1222 return true;
bec86378 1223 }
bec86378 1224 }
91fe77eb 1225
1226 if (adev->has_hw_reset) {
1227 adev->has_hw_reset = false;
1228 return true;
1229 }
1230
1231 /* bios scratch used on CIK+ */
1232 if (adev->asic_type >= CHIP_BONAIRE)
1233 return amdgpu_atombios_scratch_need_asic_init(adev);
1234
1235 /* check MEM_SIZE for older asics */
1236 reg = amdgpu_asic_get_config_memsize(adev);
1237
1238 if ((reg != 0) && (reg != 0xffffffff))
1239 return false;
1240
1241 return true;
bec86378
ML
1242}
1243
d38ceaf9
AD
1244/* if we get transitioned to only one device, take VGA back */
1245/**
06ec9070 1246 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1247 *
1248 * @cookie: amdgpu_device pointer
1249 * @state: enable/disable vga decode
1250 *
1251 * Enable/disable vga decode (all asics).
1252 * Returns VGA resource flags.
1253 */
06ec9070 1254static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1255{
1256 struct amdgpu_device *adev = cookie;
1257 amdgpu_asic_set_vga_state(adev, state);
1258 if (state)
1259 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1260 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1261 else
1262 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1263}
1264
e3ecdffa
AD
1265/**
1266 * amdgpu_device_check_block_size - validate the vm block size
1267 *
1268 * @adev: amdgpu_device pointer
1269 *
1270 * Validates the vm block size specified via module parameter.
1271 * The vm block size defines number of bits in page table versus page directory,
1272 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1273 * page table and the remaining bits are in the page directory.
1274 */
06ec9070 1275static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1276{
1277 /* defines number of bits in page table versus page directory,
1278 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1279 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1280 if (amdgpu_vm_block_size == -1)
1281 return;
a1adf8be 1282
bab4fee7 1283 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1284 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1285 amdgpu_vm_block_size);
97489129 1286 amdgpu_vm_block_size = -1;
a1adf8be 1287 }
a1adf8be
CZ
1288}
1289
e3ecdffa
AD
1290/**
1291 * amdgpu_device_check_vm_size - validate the vm size
1292 *
1293 * @adev: amdgpu_device pointer
1294 *
1295 * Validates the vm size in GB specified via module parameter.
1296 * The VM size is the size of the GPU virtual memory space in GB.
1297 */
06ec9070 1298static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1299{
64dab074
AD
1300 /* no need to check the default value */
1301 if (amdgpu_vm_size == -1)
1302 return;
1303
83ca145d
ZJ
1304 if (amdgpu_vm_size < 1) {
1305 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1306 amdgpu_vm_size);
f3368128 1307 amdgpu_vm_size = -1;
83ca145d 1308 }
83ca145d
ZJ
1309}
1310
7951e376
RZ
1311static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1312{
1313 struct sysinfo si;
a9d4fe2f 1314 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1315 uint64_t total_memory;
1316 uint64_t dram_size_seven_GB = 0x1B8000000;
1317 uint64_t dram_size_three_GB = 0xB8000000;
1318
1319 if (amdgpu_smu_memory_pool_size == 0)
1320 return;
1321
1322 if (!is_os_64) {
1323 DRM_WARN("Not 64-bit OS, feature not supported\n");
1324 goto def_value;
1325 }
1326 si_meminfo(&si);
1327 total_memory = (uint64_t)si.totalram * si.mem_unit;
1328
1329 if ((amdgpu_smu_memory_pool_size == 1) ||
1330 (amdgpu_smu_memory_pool_size == 2)) {
1331 if (total_memory < dram_size_three_GB)
1332 goto def_value1;
1333 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1334 (amdgpu_smu_memory_pool_size == 8)) {
1335 if (total_memory < dram_size_seven_GB)
1336 goto def_value1;
1337 } else {
1338 DRM_WARN("Smu memory pool size not supported\n");
1339 goto def_value;
1340 }
1341 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1342
1343 return;
1344
1345def_value1:
1346 DRM_WARN("No enough system memory\n");
1347def_value:
1348 adev->pm.smu_prv_buffer_size = 0;
1349}
1350
d38ceaf9 1351/**
06ec9070 1352 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1353 *
1354 * @adev: amdgpu_device pointer
1355 *
1356 * Validates certain module parameters and updates
1357 * the associated values used by the driver (all asics).
1358 */
912dfc84 1359static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1360{
5b011235
CZ
1361 if (amdgpu_sched_jobs < 4) {
1362 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1363 amdgpu_sched_jobs);
1364 amdgpu_sched_jobs = 4;
76117507 1365 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1366 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1367 amdgpu_sched_jobs);
1368 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1369 }
d38ceaf9 1370
83e74db6 1371 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1372 /* gart size must be greater or equal to 32M */
1373 dev_warn(adev->dev, "gart size (%d) too small\n",
1374 amdgpu_gart_size);
83e74db6 1375 amdgpu_gart_size = -1;
d38ceaf9
AD
1376 }
1377
36d38372 1378 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1379 /* gtt size must be greater or equal to 32M */
36d38372
CK
1380 dev_warn(adev->dev, "gtt size (%d) too small\n",
1381 amdgpu_gtt_size);
1382 amdgpu_gtt_size = -1;
d38ceaf9
AD
1383 }
1384
d07f14be
RH
1385 /* valid range is between 4 and 9 inclusive */
1386 if (amdgpu_vm_fragment_size != -1 &&
1387 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1388 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1389 amdgpu_vm_fragment_size = -1;
1390 }
1391
5d5bd5e3
KW
1392 if (amdgpu_sched_hw_submission < 2) {
1393 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1394 amdgpu_sched_hw_submission);
1395 amdgpu_sched_hw_submission = 2;
1396 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1397 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1398 amdgpu_sched_hw_submission);
1399 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1400 }
1401
7951e376
RZ
1402 amdgpu_device_check_smu_prv_buffer_size(adev);
1403
06ec9070 1404 amdgpu_device_check_vm_size(adev);
d38ceaf9 1405
06ec9070 1406 amdgpu_device_check_block_size(adev);
6a7f76e7 1407
19aede77 1408 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1409
c6252390 1410 amdgpu_gmc_tmz_set(adev);
01a8dcec 1411
9b498efa
AD
1412 amdgpu_gmc_noretry_set(adev);
1413
e3c00faa 1414 return 0;
d38ceaf9
AD
1415}
1416
1417/**
1418 * amdgpu_switcheroo_set_state - set switcheroo state
1419 *
1420 * @pdev: pci dev pointer
1694467b 1421 * @state: vga_switcheroo state
d38ceaf9
AD
1422 *
1423 * Callback for the switcheroo driver. Suspends or resumes the
1424 * the asics before or after it is powered up using ACPI methods.
1425 */
8aba21b7
LT
1426static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1427 enum vga_switcheroo_state state)
d38ceaf9
AD
1428{
1429 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1430 int r;
d38ceaf9 1431
fd496ca8 1432 if (amdgpu_device_supports_atpx(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1433 return;
1434
1435 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1436 pr_info("switched on\n");
d38ceaf9
AD
1437 /* don't suspend or resume card normally */
1438 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1439
8f66090b
TZ
1440 pci_set_power_state(pdev, PCI_D0);
1441 amdgpu_device_load_pci_state(pdev);
1442 r = pci_enable_device(pdev);
de185019
AD
1443 if (r)
1444 DRM_WARN("pci_enable_device failed (%d)\n", r);
1445 amdgpu_device_resume(dev, true);
d38ceaf9 1446
d38ceaf9 1447 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1448 } else {
dd4fa6c1 1449 pr_info("switched off\n");
d38ceaf9 1450 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1451 amdgpu_device_suspend(dev, true);
8f66090b 1452 amdgpu_device_cache_pci_state(pdev);
de185019 1453 /* Shut down the device */
8f66090b
TZ
1454 pci_disable_device(pdev);
1455 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1456 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1457 }
1458}
1459
1460/**
1461 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1462 *
1463 * @pdev: pci dev pointer
1464 *
1465 * Callback for the switcheroo driver. Check of the switcheroo
1466 * state can be changed.
1467 * Returns true if the state can be changed, false if not.
1468 */
1469static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1470{
1471 struct drm_device *dev = pci_get_drvdata(pdev);
1472
1473 /*
1474 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1475 * locking inversion with the driver load path. And the access here is
1476 * completely racy anyway. So don't bother with locking for now.
1477 */
7e13ad89 1478 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1479}
1480
1481static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1482 .set_gpu_state = amdgpu_switcheroo_set_state,
1483 .reprobe = NULL,
1484 .can_switch = amdgpu_switcheroo_can_switch,
1485};
1486
e3ecdffa
AD
1487/**
1488 * amdgpu_device_ip_set_clockgating_state - set the CG state
1489 *
87e3f136 1490 * @dev: amdgpu_device pointer
e3ecdffa
AD
1491 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1492 * @state: clockgating state (gate or ungate)
1493 *
1494 * Sets the requested clockgating state for all instances of
1495 * the hardware IP specified.
1496 * Returns the error code from the last instance.
1497 */
43fa561f 1498int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1499 enum amd_ip_block_type block_type,
1500 enum amd_clockgating_state state)
d38ceaf9 1501{
43fa561f 1502 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1503 int i, r = 0;
1504
1505 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1506 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1507 continue;
c722865a
RZ
1508 if (adev->ip_blocks[i].version->type != block_type)
1509 continue;
1510 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1511 continue;
1512 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1513 (void *)adev, state);
1514 if (r)
1515 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1516 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1517 }
1518 return r;
1519}
1520
e3ecdffa
AD
1521/**
1522 * amdgpu_device_ip_set_powergating_state - set the PG state
1523 *
87e3f136 1524 * @dev: amdgpu_device pointer
e3ecdffa
AD
1525 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1526 * @state: powergating state (gate or ungate)
1527 *
1528 * Sets the requested powergating state for all instances of
1529 * the hardware IP specified.
1530 * Returns the error code from the last instance.
1531 */
43fa561f 1532int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1533 enum amd_ip_block_type block_type,
1534 enum amd_powergating_state state)
d38ceaf9 1535{
43fa561f 1536 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1537 int i, r = 0;
1538
1539 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1540 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1541 continue;
c722865a
RZ
1542 if (adev->ip_blocks[i].version->type != block_type)
1543 continue;
1544 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1545 continue;
1546 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1547 (void *)adev, state);
1548 if (r)
1549 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1550 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1551 }
1552 return r;
1553}
1554
e3ecdffa
AD
1555/**
1556 * amdgpu_device_ip_get_clockgating_state - get the CG state
1557 *
1558 * @adev: amdgpu_device pointer
1559 * @flags: clockgating feature flags
1560 *
1561 * Walks the list of IPs on the device and updates the clockgating
1562 * flags for each IP.
1563 * Updates @flags with the feature flags for each hardware IP where
1564 * clockgating is enabled.
1565 */
2990a1fc
AD
1566void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1567 u32 *flags)
6cb2d4e4
HR
1568{
1569 int i;
1570
1571 for (i = 0; i < adev->num_ip_blocks; i++) {
1572 if (!adev->ip_blocks[i].status.valid)
1573 continue;
1574 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1575 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1576 }
1577}
1578
e3ecdffa
AD
1579/**
1580 * amdgpu_device_ip_wait_for_idle - wait for idle
1581 *
1582 * @adev: amdgpu_device pointer
1583 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1584 *
1585 * Waits for the request hardware IP to be idle.
1586 * Returns 0 for success or a negative error code on failure.
1587 */
2990a1fc
AD
1588int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1589 enum amd_ip_block_type block_type)
5dbbb60b
AD
1590{
1591 int i, r;
1592
1593 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1594 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1595 continue;
a1255107
AD
1596 if (adev->ip_blocks[i].version->type == block_type) {
1597 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1598 if (r)
1599 return r;
1600 break;
1601 }
1602 }
1603 return 0;
1604
1605}
1606
e3ecdffa
AD
1607/**
1608 * amdgpu_device_ip_is_idle - is the hardware IP idle
1609 *
1610 * @adev: amdgpu_device pointer
1611 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1612 *
1613 * Check if the hardware IP is idle or not.
1614 * Returns true if it the IP is idle, false if not.
1615 */
2990a1fc
AD
1616bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1617 enum amd_ip_block_type block_type)
5dbbb60b
AD
1618{
1619 int i;
1620
1621 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1622 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1623 continue;
a1255107
AD
1624 if (adev->ip_blocks[i].version->type == block_type)
1625 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1626 }
1627 return true;
1628
1629}
1630
e3ecdffa
AD
1631/**
1632 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1633 *
1634 * @adev: amdgpu_device pointer
87e3f136 1635 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1636 *
1637 * Returns a pointer to the hardware IP block structure
1638 * if it exists for the asic, otherwise NULL.
1639 */
2990a1fc
AD
1640struct amdgpu_ip_block *
1641amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1642 enum amd_ip_block_type type)
d38ceaf9
AD
1643{
1644 int i;
1645
1646 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1647 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1648 return &adev->ip_blocks[i];
1649
1650 return NULL;
1651}
1652
1653/**
2990a1fc 1654 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1655 *
1656 * @adev: amdgpu_device pointer
5fc3aeeb 1657 * @type: enum amd_ip_block_type
d38ceaf9
AD
1658 * @major: major version
1659 * @minor: minor version
1660 *
1661 * return 0 if equal or greater
1662 * return 1 if smaller or the ip_block doesn't exist
1663 */
2990a1fc
AD
1664int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1665 enum amd_ip_block_type type,
1666 u32 major, u32 minor)
d38ceaf9 1667{
2990a1fc 1668 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1669
a1255107
AD
1670 if (ip_block && ((ip_block->version->major > major) ||
1671 ((ip_block->version->major == major) &&
1672 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1673 return 0;
1674
1675 return 1;
1676}
1677
a1255107 1678/**
2990a1fc 1679 * amdgpu_device_ip_block_add
a1255107
AD
1680 *
1681 * @adev: amdgpu_device pointer
1682 * @ip_block_version: pointer to the IP to add
1683 *
1684 * Adds the IP block driver information to the collection of IPs
1685 * on the asic.
1686 */
2990a1fc
AD
1687int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1688 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1689{
1690 if (!ip_block_version)
1691 return -EINVAL;
1692
e966a725 1693 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1694 ip_block_version->funcs->name);
1695
a1255107
AD
1696 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1697
1698 return 0;
1699}
1700
e3ecdffa
AD
1701/**
1702 * amdgpu_device_enable_virtual_display - enable virtual display feature
1703 *
1704 * @adev: amdgpu_device pointer
1705 *
1706 * Enabled the virtual display feature if the user has enabled it via
1707 * the module parameter virtual_display. This feature provides a virtual
1708 * display hardware on headless boards or in virtualized environments.
1709 * This function parses and validates the configuration string specified by
1710 * the user and configues the virtual display configuration (number of
1711 * virtual connectors, crtcs, etc.) specified.
1712 */
483ef985 1713static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1714{
1715 adev->enable_virtual_display = false;
1716
1717 if (amdgpu_virtual_display) {
8f66090b 1718 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1719 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1720
1721 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1722 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1723 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1724 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1725 if (!strcmp("all", pciaddname)
1726 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1727 long num_crtc;
1728 int res = -1;
1729
9accf2fd 1730 adev->enable_virtual_display = true;
0f66356d
ED
1731
1732 if (pciaddname_tmp)
1733 res = kstrtol(pciaddname_tmp, 10,
1734 &num_crtc);
1735
1736 if (!res) {
1737 if (num_crtc < 1)
1738 num_crtc = 1;
1739 if (num_crtc > 6)
1740 num_crtc = 6;
1741 adev->mode_info.num_crtc = num_crtc;
1742 } else {
1743 adev->mode_info.num_crtc = 1;
1744 }
9accf2fd
ED
1745 break;
1746 }
1747 }
1748
0f66356d
ED
1749 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1750 amdgpu_virtual_display, pci_address_name,
1751 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1752
1753 kfree(pciaddstr);
1754 }
1755}
1756
e3ecdffa
AD
1757/**
1758 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1759 *
1760 * @adev: amdgpu_device pointer
1761 *
1762 * Parses the asic configuration parameters specified in the gpu info
1763 * firmware and makes them availale to the driver for use in configuring
1764 * the asic.
1765 * Returns 0 on success, -EINVAL on failure.
1766 */
e2a75f88
AD
1767static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1768{
e2a75f88 1769 const char *chip_name;
c0a43457 1770 char fw_name[40];
e2a75f88
AD
1771 int err;
1772 const struct gpu_info_firmware_header_v1_0 *hdr;
1773
ab4fe3e1
HR
1774 adev->firmware.gpu_info_fw = NULL;
1775
72de33f8 1776 if (adev->mman.discovery_bin) {
258620d0 1777 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1778
1779 /*
1780 * FIXME: The bounding box is still needed by Navi12, so
1781 * temporarily read it from gpu_info firmware. Should be droped
1782 * when DAL no longer needs it.
1783 */
1784 if (adev->asic_type != CHIP_NAVI12)
1785 return 0;
258620d0
AD
1786 }
1787
e2a75f88 1788 switch (adev->asic_type) {
e2a75f88
AD
1789#ifdef CONFIG_DRM_AMDGPU_SI
1790 case CHIP_VERDE:
1791 case CHIP_TAHITI:
1792 case CHIP_PITCAIRN:
1793 case CHIP_OLAND:
1794 case CHIP_HAINAN:
1795#endif
1796#ifdef CONFIG_DRM_AMDGPU_CIK
1797 case CHIP_BONAIRE:
1798 case CHIP_HAWAII:
1799 case CHIP_KAVERI:
1800 case CHIP_KABINI:
1801 case CHIP_MULLINS:
1802#endif
da87c30b
AD
1803 case CHIP_TOPAZ:
1804 case CHIP_TONGA:
1805 case CHIP_FIJI:
1806 case CHIP_POLARIS10:
1807 case CHIP_POLARIS11:
1808 case CHIP_POLARIS12:
1809 case CHIP_VEGAM:
1810 case CHIP_CARRIZO:
1811 case CHIP_STONEY:
27c0bc71 1812 case CHIP_VEGA20:
84d244a3
JC
1813 case CHIP_SIENNA_CICHLID:
1814 case CHIP_NAVY_FLOUNDER:
eac88a5f 1815 case CHIP_DIMGREY_CAVEFISH:
e2a75f88
AD
1816 default:
1817 return 0;
1818 case CHIP_VEGA10:
1819 chip_name = "vega10";
1820 break;
3f76dced
AD
1821 case CHIP_VEGA12:
1822 chip_name = "vega12";
1823 break;
2d2e5e7e 1824 case CHIP_RAVEN:
54f78a76 1825 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1826 chip_name = "raven2";
54f78a76 1827 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1828 chip_name = "picasso";
54c4d17e
FX
1829 else
1830 chip_name = "raven";
2d2e5e7e 1831 break;
65e60f6e
LM
1832 case CHIP_ARCTURUS:
1833 chip_name = "arcturus";
1834 break;
b51a26a0 1835 case CHIP_RENOIR:
2e62f0b5
PL
1836 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1837 chip_name = "renoir";
1838 else
1839 chip_name = "green_sardine";
b51a26a0 1840 break;
23c6268e
HR
1841 case CHIP_NAVI10:
1842 chip_name = "navi10";
1843 break;
ed42cfe1
XY
1844 case CHIP_NAVI14:
1845 chip_name = "navi14";
1846 break;
42b325e5
XY
1847 case CHIP_NAVI12:
1848 chip_name = "navi12";
1849 break;
4e52a9f8
HR
1850 case CHIP_VANGOGH:
1851 chip_name = "vangogh";
1852 break;
e2a75f88
AD
1853 }
1854
1855 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1856 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1857 if (err) {
1858 dev_err(adev->dev,
1859 "Failed to load gpu_info firmware \"%s\"\n",
1860 fw_name);
1861 goto out;
1862 }
ab4fe3e1 1863 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1864 if (err) {
1865 dev_err(adev->dev,
1866 "Failed to validate gpu_info firmware \"%s\"\n",
1867 fw_name);
1868 goto out;
1869 }
1870
ab4fe3e1 1871 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1872 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1873
1874 switch (hdr->version_major) {
1875 case 1:
1876 {
1877 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1878 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1879 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1880
cc375d8c
TY
1881 /*
1882 * Should be droped when DAL no longer needs it.
1883 */
1884 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1885 goto parse_soc_bounding_box;
1886
b5ab16bf
AD
1887 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1888 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1889 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1890 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1891 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1892 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1893 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1894 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1895 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1896 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1897 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1898 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1899 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1900 adev->gfx.cu_info.max_waves_per_simd =
1901 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1902 adev->gfx.cu_info.max_scratch_slots_per_cu =
1903 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1904 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1905 if (hdr->version_minor >= 1) {
35c2e910
HZ
1906 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1907 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1908 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1909 adev->gfx.config.num_sc_per_sh =
1910 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1911 adev->gfx.config.num_packer_per_sc =
1912 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1913 }
ec51d3fa
XY
1914
1915parse_soc_bounding_box:
ec51d3fa
XY
1916 /*
1917 * soc bounding box info is not integrated in disocovery table,
258620d0 1918 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1919 */
48321c3d
HW
1920 if (hdr->version_minor == 2) {
1921 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1922 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1923 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1924 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1925 }
e2a75f88
AD
1926 break;
1927 }
1928 default:
1929 dev_err(adev->dev,
1930 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1931 err = -EINVAL;
1932 goto out;
1933 }
1934out:
e2a75f88
AD
1935 return err;
1936}
1937
e3ecdffa
AD
1938/**
1939 * amdgpu_device_ip_early_init - run early init for hardware IPs
1940 *
1941 * @adev: amdgpu_device pointer
1942 *
1943 * Early initialization pass for hardware IPs. The hardware IPs that make
1944 * up each asic are discovered each IP's early_init callback is run. This
1945 * is the first stage in initializing the asic.
1946 * Returns 0 on success, negative error code on failure.
1947 */
06ec9070 1948static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1949{
aaa36a97 1950 int i, r;
d38ceaf9 1951
483ef985 1952 amdgpu_device_enable_virtual_display(adev);
a6be7570 1953
00a979f3 1954 if (amdgpu_sriov_vf(adev)) {
00a979f3 1955 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
1956 if (r)
1957 return r;
00a979f3
WS
1958 }
1959
d38ceaf9 1960 switch (adev->asic_type) {
33f34802
KW
1961#ifdef CONFIG_DRM_AMDGPU_SI
1962 case CHIP_VERDE:
1963 case CHIP_TAHITI:
1964 case CHIP_PITCAIRN:
1965 case CHIP_OLAND:
1966 case CHIP_HAINAN:
295d0daf 1967 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1968 r = si_set_ip_blocks(adev);
1969 if (r)
1970 return r;
1971 break;
1972#endif
a2e73f56
AD
1973#ifdef CONFIG_DRM_AMDGPU_CIK
1974 case CHIP_BONAIRE:
1975 case CHIP_HAWAII:
1976 case CHIP_KAVERI:
1977 case CHIP_KABINI:
1978 case CHIP_MULLINS:
e1ad2d53 1979 if (adev->flags & AMD_IS_APU)
a2e73f56 1980 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1981 else
1982 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1983
1984 r = cik_set_ip_blocks(adev);
1985 if (r)
1986 return r;
1987 break;
1988#endif
da87c30b
AD
1989 case CHIP_TOPAZ:
1990 case CHIP_TONGA:
1991 case CHIP_FIJI:
1992 case CHIP_POLARIS10:
1993 case CHIP_POLARIS11:
1994 case CHIP_POLARIS12:
1995 case CHIP_VEGAM:
1996 case CHIP_CARRIZO:
1997 case CHIP_STONEY:
1998 if (adev->flags & AMD_IS_APU)
1999 adev->family = AMDGPU_FAMILY_CZ;
2000 else
2001 adev->family = AMDGPU_FAMILY_VI;
2002
2003 r = vi_set_ip_blocks(adev);
2004 if (r)
2005 return r;
2006 break;
e48a3cd9
AD
2007 case CHIP_VEGA10:
2008 case CHIP_VEGA12:
e4bd8170 2009 case CHIP_VEGA20:
e48a3cd9 2010 case CHIP_RAVEN:
61cf44c1 2011 case CHIP_ARCTURUS:
b51a26a0 2012 case CHIP_RENOIR:
70534d1e 2013 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
2014 adev->family = AMDGPU_FAMILY_RV;
2015 else
2016 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
2017
2018 r = soc15_set_ip_blocks(adev);
2019 if (r)
2020 return r;
2021 break;
0a5b8c7b 2022 case CHIP_NAVI10:
7ecb5cd4 2023 case CHIP_NAVI14:
4808cf9c 2024 case CHIP_NAVI12:
11e8aef5 2025 case CHIP_SIENNA_CICHLID:
41f446bf 2026 case CHIP_NAVY_FLOUNDER:
144722fa 2027 case CHIP_DIMGREY_CAVEFISH:
4e52a9f8
HR
2028 case CHIP_VANGOGH:
2029 if (adev->asic_type == CHIP_VANGOGH)
2030 adev->family = AMDGPU_FAMILY_VGH;
2031 else
2032 adev->family = AMDGPU_FAMILY_NV;
0a5b8c7b
HR
2033
2034 r = nv_set_ip_blocks(adev);
2035 if (r)
2036 return r;
2037 break;
d38ceaf9
AD
2038 default:
2039 /* FIXME: not supported yet */
2040 return -EINVAL;
2041 }
2042
1884734a 2043 amdgpu_amdkfd_device_probe(adev);
2044
3b94fb10 2045 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2046 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2047 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 2048
d38ceaf9
AD
2049 for (i = 0; i < adev->num_ip_blocks; i++) {
2050 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2051 DRM_ERROR("disabled ip block: %d <%s>\n",
2052 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2053 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2054 } else {
a1255107
AD
2055 if (adev->ip_blocks[i].version->funcs->early_init) {
2056 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2057 if (r == -ENOENT) {
a1255107 2058 adev->ip_blocks[i].status.valid = false;
2c1a2784 2059 } else if (r) {
a1255107
AD
2060 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2061 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2062 return r;
2c1a2784 2063 } else {
a1255107 2064 adev->ip_blocks[i].status.valid = true;
2c1a2784 2065 }
974e6b64 2066 } else {
a1255107 2067 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2068 }
d38ceaf9 2069 }
21a249ca
AD
2070 /* get the vbios after the asic_funcs are set up */
2071 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2072 r = amdgpu_device_parse_gpu_info_fw(adev);
2073 if (r)
2074 return r;
2075
21a249ca
AD
2076 /* Read BIOS */
2077 if (!amdgpu_get_bios(adev))
2078 return -EINVAL;
2079
2080 r = amdgpu_atombios_init(adev);
2081 if (r) {
2082 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2083 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2084 return r;
2085 }
2086 }
d38ceaf9
AD
2087 }
2088
395d1fb9
NH
2089 adev->cg_flags &= amdgpu_cg_mask;
2090 adev->pg_flags &= amdgpu_pg_mask;
2091
d38ceaf9
AD
2092 return 0;
2093}
2094
0a4f2520
RZ
2095static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2096{
2097 int i, r;
2098
2099 for (i = 0; i < adev->num_ip_blocks; i++) {
2100 if (!adev->ip_blocks[i].status.sw)
2101 continue;
2102 if (adev->ip_blocks[i].status.hw)
2103 continue;
2104 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2105 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2106 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2107 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2108 if (r) {
2109 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2110 adev->ip_blocks[i].version->funcs->name, r);
2111 return r;
2112 }
2113 adev->ip_blocks[i].status.hw = true;
2114 }
2115 }
2116
2117 return 0;
2118}
2119
2120static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2121{
2122 int i, r;
2123
2124 for (i = 0; i < adev->num_ip_blocks; i++) {
2125 if (!adev->ip_blocks[i].status.sw)
2126 continue;
2127 if (adev->ip_blocks[i].status.hw)
2128 continue;
2129 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2130 if (r) {
2131 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2132 adev->ip_blocks[i].version->funcs->name, r);
2133 return r;
2134 }
2135 adev->ip_blocks[i].status.hw = true;
2136 }
2137
2138 return 0;
2139}
2140
7a3e0bb2
RZ
2141static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2142{
2143 int r = 0;
2144 int i;
80f41f84 2145 uint32_t smu_version;
7a3e0bb2
RZ
2146
2147 if (adev->asic_type >= CHIP_VEGA10) {
2148 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2149 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2150 continue;
2151
2152 /* no need to do the fw loading again if already done*/
2153 if (adev->ip_blocks[i].status.hw == true)
2154 break;
2155
53b3f8f4 2156 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2157 r = adev->ip_blocks[i].version->funcs->resume(adev);
2158 if (r) {
2159 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2160 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2161 return r;
2162 }
2163 } else {
2164 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2165 if (r) {
2166 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2167 adev->ip_blocks[i].version->funcs->name, r);
2168 return r;
7a3e0bb2 2169 }
7a3e0bb2 2170 }
482f0e53
ML
2171
2172 adev->ip_blocks[i].status.hw = true;
2173 break;
7a3e0bb2
RZ
2174 }
2175 }
482f0e53 2176
8973d9ec
ED
2177 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2178 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2179
80f41f84 2180 return r;
7a3e0bb2
RZ
2181}
2182
e3ecdffa
AD
2183/**
2184 * amdgpu_device_ip_init - run init for hardware IPs
2185 *
2186 * @adev: amdgpu_device pointer
2187 *
2188 * Main initialization pass for hardware IPs. The list of all the hardware
2189 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2190 * are run. sw_init initializes the software state associated with each IP
2191 * and hw_init initializes the hardware associated with each IP.
2192 * Returns 0 on success, negative error code on failure.
2193 */
06ec9070 2194static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2195{
2196 int i, r;
2197
c030f2e4 2198 r = amdgpu_ras_init(adev);
2199 if (r)
2200 return r;
2201
d38ceaf9 2202 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2203 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2204 continue;
a1255107 2205 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2206 if (r) {
a1255107
AD
2207 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2208 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2209 goto init_failed;
2c1a2784 2210 }
a1255107 2211 adev->ip_blocks[i].status.sw = true;
bfca0289 2212
d38ceaf9 2213 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 2214 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 2215 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2216 if (r) {
2217 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2218 goto init_failed;
2c1a2784 2219 }
a1255107 2220 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2221 if (r) {
2222 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2223 goto init_failed;
2c1a2784 2224 }
06ec9070 2225 r = amdgpu_device_wb_init(adev);
2c1a2784 2226 if (r) {
06ec9070 2227 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2228 goto init_failed;
2c1a2784 2229 }
a1255107 2230 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2231
2232 /* right after GMC hw init, we create CSA */
f92d5c61 2233 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2234 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2235 AMDGPU_GEM_DOMAIN_VRAM,
2236 AMDGPU_CSA_SIZE);
2493664f
ML
2237 if (r) {
2238 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2239 goto init_failed;
2493664f
ML
2240 }
2241 }
d38ceaf9
AD
2242 }
2243 }
2244
c9ffa427
YT
2245 if (amdgpu_sriov_vf(adev))
2246 amdgpu_virt_init_data_exchange(adev);
2247
533aed27
AG
2248 r = amdgpu_ib_pool_init(adev);
2249 if (r) {
2250 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2251 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2252 goto init_failed;
2253 }
2254
c8963ea4
RZ
2255 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2256 if (r)
72d3f592 2257 goto init_failed;
0a4f2520
RZ
2258
2259 r = amdgpu_device_ip_hw_init_phase1(adev);
2260 if (r)
72d3f592 2261 goto init_failed;
0a4f2520 2262
7a3e0bb2
RZ
2263 r = amdgpu_device_fw_loading(adev);
2264 if (r)
72d3f592 2265 goto init_failed;
7a3e0bb2 2266
0a4f2520
RZ
2267 r = amdgpu_device_ip_hw_init_phase2(adev);
2268 if (r)
72d3f592 2269 goto init_failed;
d38ceaf9 2270
121a2bc6
AG
2271 /*
2272 * retired pages will be loaded from eeprom and reserved here,
2273 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2274 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2275 * for I2C communication which only true at this point.
b82e65a9
GC
2276 *
2277 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2278 * failure from bad gpu situation and stop amdgpu init process
2279 * accordingly. For other failed cases, it will still release all
2280 * the resource and print error message, rather than returning one
2281 * negative value to upper level.
121a2bc6
AG
2282 *
2283 * Note: theoretically, this should be called before all vram allocations
2284 * to protect retired page from abusing
2285 */
b82e65a9
GC
2286 r = amdgpu_ras_recovery_init(adev);
2287 if (r)
2288 goto init_failed;
121a2bc6 2289
3e2e2ab5
HZ
2290 if (adev->gmc.xgmi.num_physical_nodes > 1)
2291 amdgpu_xgmi_add_device(adev);
1884734a 2292 amdgpu_amdkfd_device_init(adev);
c6332b97 2293
bd607166
KR
2294 amdgpu_fru_get_product_info(adev);
2295
72d3f592 2296init_failed:
c9ffa427 2297 if (amdgpu_sriov_vf(adev))
c6332b97 2298 amdgpu_virt_release_full_gpu(adev, true);
2299
72d3f592 2300 return r;
d38ceaf9
AD
2301}
2302
e3ecdffa
AD
2303/**
2304 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2305 *
2306 * @adev: amdgpu_device pointer
2307 *
2308 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2309 * this function before a GPU reset. If the value is retained after a
2310 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2311 */
06ec9070 2312static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2313{
2314 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2315}
2316
e3ecdffa
AD
2317/**
2318 * amdgpu_device_check_vram_lost - check if vram is valid
2319 *
2320 * @adev: amdgpu_device pointer
2321 *
2322 * Checks the reset magic value written to the gart pointer in VRAM.
2323 * The driver calls this after a GPU reset to see if the contents of
2324 * VRAM is lost or now.
2325 * returns true if vram is lost, false if not.
2326 */
06ec9070 2327static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2328{
dadce777
EQ
2329 if (memcmp(adev->gart.ptr, adev->reset_magic,
2330 AMDGPU_RESET_MAGIC_NUM))
2331 return true;
2332
53b3f8f4 2333 if (!amdgpu_in_reset(adev))
dadce777
EQ
2334 return false;
2335
2336 /*
2337 * For all ASICs with baco/mode1 reset, the VRAM is
2338 * always assumed to be lost.
2339 */
2340 switch (amdgpu_asic_reset_method(adev)) {
2341 case AMD_RESET_METHOD_BACO:
2342 case AMD_RESET_METHOD_MODE1:
2343 return true;
2344 default:
2345 return false;
2346 }
0c49e0b8
CZ
2347}
2348
e3ecdffa 2349/**
1112a46b 2350 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2351 *
2352 * @adev: amdgpu_device pointer
b8b72130 2353 * @state: clockgating state (gate or ungate)
e3ecdffa 2354 *
e3ecdffa 2355 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2356 * set_clockgating_state callbacks are run.
2357 * Late initialization pass enabling clockgating for hardware IPs.
2358 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2359 * Returns 0 on success, negative error code on failure.
2360 */
fdd34271 2361
1112a46b
RZ
2362static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2363 enum amd_clockgating_state state)
d38ceaf9 2364{
1112a46b 2365 int i, j, r;
d38ceaf9 2366
4a2ba394
SL
2367 if (amdgpu_emu_mode == 1)
2368 return 0;
2369
1112a46b
RZ
2370 for (j = 0; j < adev->num_ip_blocks; j++) {
2371 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2372 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2373 continue;
4a446d55 2374 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2375 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2376 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2377 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2378 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2379 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2380 /* enable clockgating to save power */
a1255107 2381 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2382 state);
4a446d55
AD
2383 if (r) {
2384 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2385 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2386 return r;
2387 }
b0b00ff1 2388 }
d38ceaf9 2389 }
06b18f61 2390
c9f96fd5
RZ
2391 return 0;
2392}
2393
1112a46b 2394static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2395{
1112a46b 2396 int i, j, r;
06b18f61 2397
c9f96fd5
RZ
2398 if (amdgpu_emu_mode == 1)
2399 return 0;
2400
1112a46b
RZ
2401 for (j = 0; j < adev->num_ip_blocks; j++) {
2402 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2403 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2404 continue;
2405 /* skip CG for VCE/UVD, it's handled specially */
2406 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2407 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2408 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2409 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2410 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2411 /* enable powergating to save power */
2412 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2413 state);
c9f96fd5
RZ
2414 if (r) {
2415 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2416 adev->ip_blocks[i].version->funcs->name, r);
2417 return r;
2418 }
2419 }
2420 }
2dc80b00
S
2421 return 0;
2422}
2423
beff74bc
AD
2424static int amdgpu_device_enable_mgpu_fan_boost(void)
2425{
2426 struct amdgpu_gpu_instance *gpu_ins;
2427 struct amdgpu_device *adev;
2428 int i, ret = 0;
2429
2430 mutex_lock(&mgpu_info.mutex);
2431
2432 /*
2433 * MGPU fan boost feature should be enabled
2434 * only when there are two or more dGPUs in
2435 * the system
2436 */
2437 if (mgpu_info.num_dgpu < 2)
2438 goto out;
2439
2440 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2441 gpu_ins = &(mgpu_info.gpu_ins[i]);
2442 adev = gpu_ins->adev;
2443 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2444 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2445 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2446 if (ret)
2447 break;
2448
2449 gpu_ins->mgpu_fan_enabled = 1;
2450 }
2451 }
2452
2453out:
2454 mutex_unlock(&mgpu_info.mutex);
2455
2456 return ret;
2457}
2458
e3ecdffa
AD
2459/**
2460 * amdgpu_device_ip_late_init - run late init for hardware IPs
2461 *
2462 * @adev: amdgpu_device pointer
2463 *
2464 * Late initialization pass for hardware IPs. The list of all the hardware
2465 * IPs that make up the asic is walked and the late_init callbacks are run.
2466 * late_init covers any special initialization that an IP requires
2467 * after all of the have been initialized or something that needs to happen
2468 * late in the init process.
2469 * Returns 0 on success, negative error code on failure.
2470 */
06ec9070 2471static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2472{
60599a03 2473 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2474 int i = 0, r;
2475
2476 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2477 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2478 continue;
2479 if (adev->ip_blocks[i].version->funcs->late_init) {
2480 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2481 if (r) {
2482 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2483 adev->ip_blocks[i].version->funcs->name, r);
2484 return r;
2485 }
2dc80b00 2486 }
73f847db 2487 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2488 }
2489
a891d239
DL
2490 amdgpu_ras_set_error_query_ready(adev, true);
2491
1112a46b
RZ
2492 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2493 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2494
06ec9070 2495 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2496
beff74bc
AD
2497 r = amdgpu_device_enable_mgpu_fan_boost();
2498 if (r)
2499 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2500
60599a03
EQ
2501
2502 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2503 mutex_lock(&mgpu_info.mutex);
2504
2505 /*
2506 * Reset device p-state to low as this was booted with high.
2507 *
2508 * This should be performed only after all devices from the same
2509 * hive get initialized.
2510 *
2511 * However, it's unknown how many device in the hive in advance.
2512 * As this is counted one by one during devices initializations.
2513 *
2514 * So, we wait for all XGMI interlinked devices initialized.
2515 * This may bring some delays as those devices may come from
2516 * different hives. But that should be OK.
2517 */
2518 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2519 for (i = 0; i < mgpu_info.num_gpu; i++) {
2520 gpu_instance = &(mgpu_info.gpu_ins[i]);
2521 if (gpu_instance->adev->flags & AMD_IS_APU)
2522 continue;
2523
d84a430d
JK
2524 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2525 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2526 if (r) {
2527 DRM_ERROR("pstate setting failed (%d).\n", r);
2528 break;
2529 }
2530 }
2531 }
2532
2533 mutex_unlock(&mgpu_info.mutex);
2534 }
2535
d38ceaf9
AD
2536 return 0;
2537}
2538
e3ecdffa
AD
2539/**
2540 * amdgpu_device_ip_fini - run fini for hardware IPs
2541 *
2542 * @adev: amdgpu_device pointer
2543 *
2544 * Main teardown pass for hardware IPs. The list of all the hardware
2545 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2546 * are run. hw_fini tears down the hardware associated with each IP
2547 * and sw_fini tears down any software state associated with each IP.
2548 * Returns 0 on success, negative error code on failure.
2549 */
06ec9070 2550static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2551{
2552 int i, r;
2553
5278a159
SY
2554 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2555 amdgpu_virt_release_ras_err_handler_data(adev);
2556
c030f2e4 2557 amdgpu_ras_pre_fini(adev);
2558
a82400b5
AG
2559 if (adev->gmc.xgmi.num_physical_nodes > 1)
2560 amdgpu_xgmi_remove_device(adev);
2561
05df1f01 2562 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2563 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2564
26eb6b51
DL
2565 amdgpu_amdkfd_device_fini(adev);
2566
3e96dbfd
AD
2567 /* need to disable SMC first */
2568 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2569 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2570 continue;
fdd34271 2571 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2572 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2573 /* XXX handle errors */
2574 if (r) {
2575 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2576 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2577 }
a1255107 2578 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2579 break;
2580 }
2581 }
2582
d38ceaf9 2583 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2584 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2585 continue;
8201a67a 2586
a1255107 2587 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2588 /* XXX handle errors */
2c1a2784 2589 if (r) {
a1255107
AD
2590 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2591 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2592 }
8201a67a 2593
a1255107 2594 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2595 }
2596
9950cda2 2597
d38ceaf9 2598 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2599 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2600 continue;
c12aba3a
ML
2601
2602 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2603 amdgpu_ucode_free_bo(adev);
1e256e27 2604 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2605 amdgpu_device_wb_fini(adev);
2606 amdgpu_device_vram_scratch_fini(adev);
533aed27 2607 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2608 }
2609
a1255107 2610 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2611 /* XXX handle errors */
2c1a2784 2612 if (r) {
a1255107
AD
2613 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2614 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2615 }
a1255107
AD
2616 adev->ip_blocks[i].status.sw = false;
2617 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2618 }
2619
a6dcfd9c 2620 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2621 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2622 continue;
a1255107
AD
2623 if (adev->ip_blocks[i].version->funcs->late_fini)
2624 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2625 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2626 }
2627
c030f2e4 2628 amdgpu_ras_fini(adev);
2629
030308fc 2630 if (amdgpu_sriov_vf(adev))
24136135
ML
2631 if (amdgpu_virt_release_full_gpu(adev, false))
2632 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2633
d38ceaf9
AD
2634 return 0;
2635}
2636
e3ecdffa 2637/**
beff74bc 2638 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2639 *
1112a46b 2640 * @work: work_struct.
e3ecdffa 2641 */
beff74bc 2642static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2643{
2644 struct amdgpu_device *adev =
beff74bc 2645 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2646 int r;
2647
2648 r = amdgpu_ib_ring_tests(adev);
2649 if (r)
2650 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2651}
2652
1e317b99
RZ
2653static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2654{
2655 struct amdgpu_device *adev =
2656 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2657
2658 mutex_lock(&adev->gfx.gfx_off_mutex);
2659 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2660 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2661 adev->gfx.gfx_off_state = true;
2662 }
2663 mutex_unlock(&adev->gfx.gfx_off_mutex);
2664}
2665
e3ecdffa 2666/**
e7854a03 2667 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2668 *
2669 * @adev: amdgpu_device pointer
2670 *
2671 * Main suspend function for hardware IPs. The list of all the hardware
2672 * IPs that make up the asic is walked, clockgating is disabled and the
2673 * suspend callbacks are run. suspend puts the hardware and software state
2674 * in each IP into a state suitable for suspend.
2675 * Returns 0 on success, negative error code on failure.
2676 */
e7854a03
AD
2677static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2678{
2679 int i, r;
2680
b00978de
PL
2681 if (adev->in_poweroff_reboot_com ||
2682 !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) {
628c36d7
PL
2683 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2684 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2685 }
05df1f01 2686
e7854a03
AD
2687 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2688 if (!adev->ip_blocks[i].status.valid)
2689 continue;
2b9f7848 2690
e7854a03 2691 /* displays are handled separately */
2b9f7848
ND
2692 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2693 continue;
2694
2695 /* XXX handle errors */
2696 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2697 /* XXX handle errors */
2698 if (r) {
2699 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2700 adev->ip_blocks[i].version->funcs->name, r);
2701 return r;
e7854a03 2702 }
2b9f7848
ND
2703
2704 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2705 }
2706
e7854a03
AD
2707 return 0;
2708}
2709
2710/**
2711 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2712 *
2713 * @adev: amdgpu_device pointer
2714 *
2715 * Main suspend function for hardware IPs. The list of all the hardware
2716 * IPs that make up the asic is walked, clockgating is disabled and the
2717 * suspend callbacks are run. suspend puts the hardware and software state
2718 * in each IP into a state suitable for suspend.
2719 * Returns 0 on success, negative error code on failure.
2720 */
2721static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2722{
2723 int i, r;
2724
2725 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2726 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2727 continue;
e7854a03
AD
2728 /* displays are handled in phase1 */
2729 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2730 continue;
bff77e86
LM
2731 /* PSP lost connection when err_event_athub occurs */
2732 if (amdgpu_ras_intr_triggered() &&
2733 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2734 adev->ip_blocks[i].status.hw = false;
2735 continue;
2736 }
d38ceaf9 2737 /* XXX handle errors */
a1255107 2738 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2739 /* XXX handle errors */
2c1a2784 2740 if (r) {
a1255107
AD
2741 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2742 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2743 }
876923fb 2744 adev->ip_blocks[i].status.hw = false;
a3a09142 2745 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2746 if(!amdgpu_sriov_vf(adev)){
2747 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2748 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2749 if (r) {
2750 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2751 adev->mp1_state, r);
2752 return r;
2753 }
a3a09142
AD
2754 }
2755 }
b5507c7e 2756 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2757 }
2758
2759 return 0;
2760}
2761
e7854a03
AD
2762/**
2763 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2764 *
2765 * @adev: amdgpu_device pointer
2766 *
2767 * Main suspend function for hardware IPs. The list of all the hardware
2768 * IPs that make up the asic is walked, clockgating is disabled and the
2769 * suspend callbacks are run. suspend puts the hardware and software state
2770 * in each IP into a state suitable for suspend.
2771 * Returns 0 on success, negative error code on failure.
2772 */
2773int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2774{
2775 int r;
2776
e7819644
YT
2777 if (amdgpu_sriov_vf(adev))
2778 amdgpu_virt_request_full_gpu(adev, false);
2779
e7854a03
AD
2780 r = amdgpu_device_ip_suspend_phase1(adev);
2781 if (r)
2782 return r;
2783 r = amdgpu_device_ip_suspend_phase2(adev);
2784
e7819644
YT
2785 if (amdgpu_sriov_vf(adev))
2786 amdgpu_virt_release_full_gpu(adev, false);
2787
e7854a03
AD
2788 return r;
2789}
2790
06ec9070 2791static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2792{
2793 int i, r;
2794
2cb681b6
ML
2795 static enum amd_ip_block_type ip_order[] = {
2796 AMD_IP_BLOCK_TYPE_GMC,
2797 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2798 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2799 AMD_IP_BLOCK_TYPE_IH,
2800 };
a90ad3c2 2801
2cb681b6
ML
2802 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2803 int j;
2804 struct amdgpu_ip_block *block;
a90ad3c2 2805
4cd2a96d
J
2806 block = &adev->ip_blocks[i];
2807 block->status.hw = false;
2cb681b6 2808
4cd2a96d 2809 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 2810
4cd2a96d 2811 if (block->version->type != ip_order[j] ||
2cb681b6
ML
2812 !block->status.valid)
2813 continue;
2814
2815 r = block->version->funcs->hw_init(adev);
0aaeefcc 2816 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2817 if (r)
2818 return r;
482f0e53 2819 block->status.hw = true;
a90ad3c2
ML
2820 }
2821 }
2822
2823 return 0;
2824}
2825
06ec9070 2826static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2827{
2828 int i, r;
2829
2cb681b6
ML
2830 static enum amd_ip_block_type ip_order[] = {
2831 AMD_IP_BLOCK_TYPE_SMC,
2832 AMD_IP_BLOCK_TYPE_DCE,
2833 AMD_IP_BLOCK_TYPE_GFX,
2834 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2835 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2836 AMD_IP_BLOCK_TYPE_VCE,
2837 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2838 };
a90ad3c2 2839
2cb681b6
ML
2840 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2841 int j;
2842 struct amdgpu_ip_block *block;
a90ad3c2 2843
2cb681b6
ML
2844 for (j = 0; j < adev->num_ip_blocks; j++) {
2845 block = &adev->ip_blocks[j];
2846
2847 if (block->version->type != ip_order[i] ||
482f0e53
ML
2848 !block->status.valid ||
2849 block->status.hw)
2cb681b6
ML
2850 continue;
2851
895bd048
JZ
2852 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2853 r = block->version->funcs->resume(adev);
2854 else
2855 r = block->version->funcs->hw_init(adev);
2856
0aaeefcc 2857 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2858 if (r)
2859 return r;
482f0e53 2860 block->status.hw = true;
a90ad3c2
ML
2861 }
2862 }
2863
2864 return 0;
2865}
2866
e3ecdffa
AD
2867/**
2868 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2869 *
2870 * @adev: amdgpu_device pointer
2871 *
2872 * First resume function for hardware IPs. The list of all the hardware
2873 * IPs that make up the asic is walked and the resume callbacks are run for
2874 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2875 * after a suspend and updates the software state as necessary. This
2876 * function is also used for restoring the GPU after a GPU reset.
2877 * Returns 0 on success, negative error code on failure.
2878 */
06ec9070 2879static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2880{
2881 int i, r;
2882
a90ad3c2 2883 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2884 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2885 continue;
a90ad3c2 2886 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2887 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2888 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2889
fcf0649f
CZ
2890 r = adev->ip_blocks[i].version->funcs->resume(adev);
2891 if (r) {
2892 DRM_ERROR("resume of IP block <%s> failed %d\n",
2893 adev->ip_blocks[i].version->funcs->name, r);
2894 return r;
2895 }
482f0e53 2896 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2897 }
2898 }
2899
2900 return 0;
2901}
2902
e3ecdffa
AD
2903/**
2904 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2905 *
2906 * @adev: amdgpu_device pointer
2907 *
2908 * First resume function for hardware IPs. The list of all the hardware
2909 * IPs that make up the asic is walked and the resume callbacks are run for
2910 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2911 * functional state after a suspend and updates the software state as
2912 * necessary. This function is also used for restoring the GPU after a GPU
2913 * reset.
2914 * Returns 0 on success, negative error code on failure.
2915 */
06ec9070 2916static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2917{
2918 int i, r;
2919
2920 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2921 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2922 continue;
fcf0649f 2923 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2924 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2925 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2926 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2927 continue;
a1255107 2928 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2929 if (r) {
a1255107
AD
2930 DRM_ERROR("resume of IP block <%s> failed %d\n",
2931 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2932 return r;
2c1a2784 2933 }
482f0e53 2934 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2935 }
2936
2937 return 0;
2938}
2939
e3ecdffa
AD
2940/**
2941 * amdgpu_device_ip_resume - run resume for hardware IPs
2942 *
2943 * @adev: amdgpu_device pointer
2944 *
2945 * Main resume function for hardware IPs. The hardware IPs
2946 * are split into two resume functions because they are
2947 * are also used in in recovering from a GPU reset and some additional
2948 * steps need to be take between them. In this case (S3/S4) they are
2949 * run sequentially.
2950 * Returns 0 on success, negative error code on failure.
2951 */
06ec9070 2952static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2953{
2954 int r;
2955
06ec9070 2956 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2957 if (r)
2958 return r;
7a3e0bb2
RZ
2959
2960 r = amdgpu_device_fw_loading(adev);
2961 if (r)
2962 return r;
2963
06ec9070 2964 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2965
2966 return r;
2967}
2968
e3ecdffa
AD
2969/**
2970 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2971 *
2972 * @adev: amdgpu_device pointer
2973 *
2974 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2975 */
4e99a44e 2976static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2977{
6867e1b5
ML
2978 if (amdgpu_sriov_vf(adev)) {
2979 if (adev->is_atom_fw) {
2980 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2981 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2982 } else {
2983 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2984 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2985 }
2986
2987 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2988 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2989 }
048765ad
AR
2990}
2991
e3ecdffa
AD
2992/**
2993 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2994 *
2995 * @asic_type: AMD asic type
2996 *
2997 * Check if there is DC (new modesetting infrastructre) support for an asic.
2998 * returns true if DC has support, false if not.
2999 */
4562236b
HW
3000bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3001{
3002 switch (asic_type) {
3003#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3004#if defined(CONFIG_DRM_AMD_DC_SI)
3005 case CHIP_TAHITI:
3006 case CHIP_PITCAIRN:
3007 case CHIP_VERDE:
3008 case CHIP_OLAND:
3009#endif
4562236b 3010 case CHIP_BONAIRE:
0d6fbccb 3011 case CHIP_KAVERI:
367e6687
AD
3012 case CHIP_KABINI:
3013 case CHIP_MULLINS:
d9fda248
HW
3014 /*
3015 * We have systems in the wild with these ASICs that require
3016 * LVDS and VGA support which is not supported with DC.
3017 *
3018 * Fallback to the non-DC driver here by default so as not to
3019 * cause regressions.
3020 */
3021 return amdgpu_dc > 0;
3022 case CHIP_HAWAII:
4562236b
HW
3023 case CHIP_CARRIZO:
3024 case CHIP_STONEY:
4562236b 3025 case CHIP_POLARIS10:
675fd32b 3026 case CHIP_POLARIS11:
2c8ad2d5 3027 case CHIP_POLARIS12:
675fd32b 3028 case CHIP_VEGAM:
4562236b
HW
3029 case CHIP_TONGA:
3030 case CHIP_FIJI:
42f8ffa1 3031 case CHIP_VEGA10:
dca7b401 3032 case CHIP_VEGA12:
c6034aa2 3033 case CHIP_VEGA20:
b86a1aa3 3034#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 3035 case CHIP_RAVEN:
b4f199c7 3036 case CHIP_NAVI10:
8fceceb6 3037 case CHIP_NAVI14:
078655d9 3038 case CHIP_NAVI12:
e1c14c43 3039 case CHIP_RENOIR:
81d9bfb8 3040 case CHIP_SIENNA_CICHLID:
a6c5308f 3041 case CHIP_NAVY_FLOUNDER:
7cc656e2 3042 case CHIP_DIMGREY_CAVEFISH:
84b934bc 3043 case CHIP_VANGOGH:
42f8ffa1 3044#endif
fd187853 3045 return amdgpu_dc != 0;
4562236b
HW
3046#endif
3047 default:
93b09a9a 3048 if (amdgpu_dc > 0)
044a48f4 3049 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3050 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
3051 return false;
3052 }
3053}
3054
3055/**
3056 * amdgpu_device_has_dc_support - check if dc is supported
3057 *
982a820b 3058 * @adev: amdgpu_device pointer
4562236b
HW
3059 *
3060 * Returns true for supported, false for not supported
3061 */
3062bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3063{
c997e8e2 3064 if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
2555039d
XY
3065 return false;
3066
4562236b
HW
3067 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3068}
3069
d4535e2c
AG
3070
3071static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3072{
3073 struct amdgpu_device *adev =
3074 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3075 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3076
c6a6e2db
AG
3077 /* It's a bug to not have a hive within this function */
3078 if (WARN_ON(!hive))
3079 return;
3080
3081 /*
3082 * Use task barrier to synchronize all xgmi reset works across the
3083 * hive. task_barrier_enter and task_barrier_exit will block
3084 * until all the threads running the xgmi reset works reach
3085 * those points. task_barrier_full will do both blocks.
3086 */
3087 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3088
3089 task_barrier_enter(&hive->tb);
4a580877 3090 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3091
3092 if (adev->asic_reset_res)
3093 goto fail;
3094
3095 task_barrier_exit(&hive->tb);
4a580877 3096 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3097
3098 if (adev->asic_reset_res)
3099 goto fail;
43c4d576
JC
3100
3101 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
3102 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
3103 } else {
3104
3105 task_barrier_full(&hive->tb);
3106 adev->asic_reset_res = amdgpu_asic_reset(adev);
3107 }
ce316fa5 3108
c6a6e2db 3109fail:
d4535e2c 3110 if (adev->asic_reset_res)
fed184e9 3111 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3112 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3113 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3114}
3115
71f98027
AD
3116static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3117{
3118 char *input = amdgpu_lockup_timeout;
3119 char *timeout_setting = NULL;
3120 int index = 0;
3121 long timeout;
3122 int ret = 0;
3123
3124 /*
3125 * By default timeout for non compute jobs is 10000.
3126 * And there is no timeout enforced on compute jobs.
3127 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3128 * jobs are 60000 by default.
71f98027
AD
3129 */
3130 adev->gfx_timeout = msecs_to_jiffies(10000);
3131 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3132 if (amdgpu_sriov_vf(adev))
3133 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3134 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3135 else if (amdgpu_passthrough(adev))
b7b2a316 3136 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
3137 else
3138 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
3139
f440ff44 3140 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3141 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3142 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3143 ret = kstrtol(timeout_setting, 0, &timeout);
3144 if (ret)
3145 return ret;
3146
3147 if (timeout == 0) {
3148 index++;
3149 continue;
3150 } else if (timeout < 0) {
3151 timeout = MAX_SCHEDULE_TIMEOUT;
3152 } else {
3153 timeout = msecs_to_jiffies(timeout);
3154 }
3155
3156 switch (index++) {
3157 case 0:
3158 adev->gfx_timeout = timeout;
3159 break;
3160 case 1:
3161 adev->compute_timeout = timeout;
3162 break;
3163 case 2:
3164 adev->sdma_timeout = timeout;
3165 break;
3166 case 3:
3167 adev->video_timeout = timeout;
3168 break;
3169 default:
3170 break;
3171 }
3172 }
3173 /*
3174 * There is only one value specified and
3175 * it should apply to all non-compute jobs.
3176 */
bcccee89 3177 if (index == 1) {
71f98027 3178 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3179 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3180 adev->compute_timeout = adev->gfx_timeout;
3181 }
71f98027
AD
3182 }
3183
3184 return ret;
3185}
d4535e2c 3186
77f3a5cd
ND
3187static const struct attribute *amdgpu_dev_attributes[] = {
3188 &dev_attr_product_name.attr,
3189 &dev_attr_product_number.attr,
3190 &dev_attr_serial_number.attr,
3191 &dev_attr_pcie_replay_count.attr,
3192 NULL
3193};
3194
c9a6b82f 3195
d38ceaf9
AD
3196/**
3197 * amdgpu_device_init - initialize the driver
3198 *
3199 * @adev: amdgpu_device pointer
d38ceaf9
AD
3200 * @flags: driver flags
3201 *
3202 * Initializes the driver info and hw (all asics).
3203 * Returns 0 for success or an error on failure.
3204 * Called at driver startup.
3205 */
3206int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3207 uint32_t flags)
3208{
8aba21b7
LT
3209 struct drm_device *ddev = adev_to_drm(adev);
3210 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3211 int r, i;
fd496ca8 3212 bool atpx = false;
95844d20 3213 u32 max_MBps;
d38ceaf9
AD
3214
3215 adev->shutdown = false;
d38ceaf9 3216 adev->flags = flags;
4e66d7d2
YZ
3217
3218 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3219 adev->asic_type = amdgpu_force_asic_type;
3220 else
3221 adev->asic_type = flags & AMD_ASIC_MASK;
3222
d38ceaf9 3223 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3224 if (amdgpu_emu_mode == 1)
8bdab6bb 3225 adev->usec_timeout *= 10;
770d13b1 3226 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3227 adev->accel_working = false;
3228 adev->num_rings = 0;
3229 adev->mman.buffer_funcs = NULL;
3230 adev->mman.buffer_funcs_ring = NULL;
3231 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3232 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3233 adev->gmc.gmc_funcs = NULL;
f54d1867 3234 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3235 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3236
3237 adev->smc_rreg = &amdgpu_invalid_rreg;
3238 adev->smc_wreg = &amdgpu_invalid_wreg;
3239 adev->pcie_rreg = &amdgpu_invalid_rreg;
3240 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3241 adev->pciep_rreg = &amdgpu_invalid_rreg;
3242 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3243 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3244 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3245 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3246 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3247 adev->didt_rreg = &amdgpu_invalid_rreg;
3248 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3249 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3250 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3251 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3252 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3253
3e39ab90
AD
3254 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3255 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3256 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3257
3258 /* mutex initialization are all done here so we
3259 * can recall function without having locking issues */
d38ceaf9 3260 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 3261 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3262 mutex_init(&adev->pm.mutex);
3263 mutex_init(&adev->gfx.gpu_clock_mutex);
3264 mutex_init(&adev->srbm_mutex);
b8866c26 3265 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3266 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3267 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3268 mutex_init(&adev->mn_lock);
e23b74aa 3269 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3270 hash_init(adev->mn_hash);
53b3f8f4 3271 atomic_set(&adev->in_gpu_reset, 0);
6049db43 3272 init_rwsem(&adev->reset_sem);
32eaeae0 3273 mutex_init(&adev->psp.mutex);
bd052211 3274 mutex_init(&adev->notifier_lock);
d38ceaf9 3275
912dfc84
EQ
3276 r = amdgpu_device_check_arguments(adev);
3277 if (r)
3278 return r;
d38ceaf9 3279
d38ceaf9
AD
3280 spin_lock_init(&adev->mmio_idx_lock);
3281 spin_lock_init(&adev->smc_idx_lock);
3282 spin_lock_init(&adev->pcie_idx_lock);
3283 spin_lock_init(&adev->uvd_ctx_idx_lock);
3284 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3285 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3286 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3287 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3288 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3289
0c4e7fa5
CZ
3290 INIT_LIST_HEAD(&adev->shadow_list);
3291 mutex_init(&adev->shadow_list_lock);
3292
beff74bc
AD
3293 INIT_DELAYED_WORK(&adev->delayed_init_work,
3294 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3295 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3296 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3297
d4535e2c
AG
3298 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3299
d23ee13f 3300 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3301 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3302
b265bdbd
EQ
3303 atomic_set(&adev->throttling_logging_enabled, 1);
3304 /*
3305 * If throttling continues, logging will be performed every minute
3306 * to avoid log flooding. "-1" is subtracted since the thermal
3307 * throttling interrupt comes every second. Thus, the total logging
3308 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3309 * for throttling interrupt) = 60 seconds.
3310 */
3311 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3312 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3313
0fa49558
AX
3314 /* Registers mapping */
3315 /* TODO: block userspace mapping of io register */
da69c161
KW
3316 if (adev->asic_type >= CHIP_BONAIRE) {
3317 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3318 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3319 } else {
3320 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3321 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3322 }
d38ceaf9 3323
d38ceaf9
AD
3324 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3325 if (adev->rmmio == NULL) {
3326 return -ENOMEM;
3327 }
3328 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3329 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3330
d38ceaf9
AD
3331 /* io port mapping */
3332 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3333 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3334 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3335 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3336 break;
3337 }
3338 }
3339 if (adev->rio_mem == NULL)
b64a18c5 3340 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3341
b2109d8e
JX
3342 /* enable PCIE atomic ops */
3343 r = pci_enable_atomic_ops_to_root(adev->pdev,
3344 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3345 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3346 if (r) {
3347 adev->have_atomics_support = false;
3348 DRM_INFO("PCIE atomic ops is not supported\n");
3349 } else {
3350 adev->have_atomics_support = true;
3351 }
3352
5494d864
AD
3353 amdgpu_device_get_pcie_info(adev);
3354
b239c017
JX
3355 if (amdgpu_mcbp)
3356 DRM_INFO("MCBP is enabled\n");
3357
5f84cc63
JX
3358 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3359 adev->enable_mes = true;
3360
3aa0115d
ML
3361 /* detect hw virtualization here */
3362 amdgpu_detect_virtualization(adev);
3363
dffa11b4
ML
3364 r = amdgpu_device_get_job_timeout_settings(adev);
3365 if (r) {
3366 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4192f7b5 3367 goto failed_unmap;
a190d1c7
XY
3368 }
3369
d38ceaf9 3370 /* early init functions */
06ec9070 3371 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3372 if (r)
4192f7b5 3373 goto failed_unmap;
d38ceaf9 3374
6585661d
OZ
3375 /* doorbell bar mapping and doorbell index init*/
3376 amdgpu_device_doorbell_init(adev);
3377
d38ceaf9
AD
3378 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3379 /* this will fail for cards that aren't VGA class devices, just
3380 * ignore it */
38d6be81
AD
3381 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3382 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3383
fd496ca8
AD
3384 if (amdgpu_device_supports_atpx(ddev))
3385 atpx = true;
3840c5bc
AD
3386 if (amdgpu_has_atpx() &&
3387 (amdgpu_is_atpx_hybrid() ||
3388 amdgpu_has_atpx_dgpu_power_cntl()) &&
3389 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3390 vga_switcheroo_register_client(adev->pdev,
fd496ca8
AD
3391 &amdgpu_switcheroo_ops, atpx);
3392 if (atpx)
d38ceaf9
AD
3393 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3394
9475a943
SL
3395 if (amdgpu_emu_mode == 1) {
3396 /* post the asic on emulation mode */
3397 emu_soc_asic_init(adev);
bfca0289 3398 goto fence_driver_init;
9475a943 3399 }
bfca0289 3400
4e99a44e
ML
3401 /* detect if we are with an SRIOV vbios */
3402 amdgpu_device_detect_sriov_bios(adev);
048765ad 3403
95e8e59e
AD
3404 /* check if we need to reset the asic
3405 * E.g., driver was not cleanly unloaded previously, etc.
3406 */
f14899fd 3407 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3408 r = amdgpu_asic_reset(adev);
3409 if (r) {
3410 dev_err(adev->dev, "asic reset on init failed\n");
3411 goto failed;
3412 }
3413 }
3414
8f66090b 3415 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3416
d38ceaf9 3417 /* Post card if necessary */
39c640c0 3418 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3419 if (!adev->bios) {
bec86378 3420 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3421 r = -EINVAL;
3422 goto failed;
d38ceaf9 3423 }
bec86378 3424 DRM_INFO("GPU posting now...\n");
4d2997ab 3425 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3426 if (r) {
3427 dev_err(adev->dev, "gpu post error!\n");
3428 goto failed;
3429 }
d38ceaf9
AD
3430 }
3431
88b64e95
AD
3432 if (adev->is_atom_fw) {
3433 /* Initialize clocks */
3434 r = amdgpu_atomfirmware_get_clock_info(adev);
3435 if (r) {
3436 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3437 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3438 goto failed;
3439 }
3440 } else {
a5bde2f9
AD
3441 /* Initialize clocks */
3442 r = amdgpu_atombios_get_clock_info(adev);
3443 if (r) {
3444 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3445 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3446 goto failed;
a5bde2f9
AD
3447 }
3448 /* init i2c buses */
4562236b
HW
3449 if (!amdgpu_device_has_dc_support(adev))
3450 amdgpu_atombios_i2c_init(adev);
2c1a2784 3451 }
d38ceaf9 3452
bfca0289 3453fence_driver_init:
d38ceaf9
AD
3454 /* Fence driver */
3455 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3456 if (r) {
3457 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3458 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3459 goto failed;
2c1a2784 3460 }
d38ceaf9
AD
3461
3462 /* init the mode config */
4a580877 3463 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3464
06ec9070 3465 r = amdgpu_device_ip_init(adev);
d38ceaf9 3466 if (r) {
8840a387 3467 /* failed in exclusive mode due to timeout */
3468 if (amdgpu_sriov_vf(adev) &&
3469 !amdgpu_sriov_runtime(adev) &&
3470 amdgpu_virt_mmio_blocked(adev) &&
3471 !amdgpu_virt_wait_reset(adev)) {
3472 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3473 /* Don't send request since VF is inactive. */
3474 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3475 adev->virt.ops = NULL;
8840a387 3476 r = -EAGAIN;
3477 goto failed;
3478 }
06ec9070 3479 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3480 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3481 goto failed;
d38ceaf9
AD
3482 }
3483
d69b8971
YZ
3484 dev_info(adev->dev,
3485 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3486 adev->gfx.config.max_shader_engines,
3487 adev->gfx.config.max_sh_per_se,
3488 adev->gfx.config.max_cu_per_sh,
3489 adev->gfx.cu_info.number);
3490
d38ceaf9
AD
3491 adev->accel_working = true;
3492
e59c0205
AX
3493 amdgpu_vm_check_compute_bug(adev);
3494
95844d20
MO
3495 /* Initialize the buffer migration limit. */
3496 if (amdgpu_moverate >= 0)
3497 max_MBps = amdgpu_moverate;
3498 else
3499 max_MBps = 8; /* Allow 8 MB/s. */
3500 /* Get a log2 for easy divisions. */
3501 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3502
9bc92b9c
ML
3503 amdgpu_fbdev_init(adev);
3504
d2f52ac8 3505 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3506 if (r) {
3507 adev->pm_sysfs_en = false;
d2f52ac8 3508 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3509 } else
3510 adev->pm_sysfs_en = true;
d2f52ac8 3511
5bb23532 3512 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3513 if (r) {
3514 adev->ucode_sysfs_en = false;
5bb23532 3515 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3516 } else
3517 adev->ucode_sysfs_en = true;
5bb23532 3518
d38ceaf9
AD
3519 if ((amdgpu_testing & 1)) {
3520 if (adev->accel_working)
3521 amdgpu_test_moves(adev);
3522 else
3523 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3524 }
d38ceaf9
AD
3525 if (amdgpu_benchmarking) {
3526 if (adev->accel_working)
3527 amdgpu_benchmark(adev, amdgpu_benchmarking);
3528 else
3529 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3530 }
3531
b0adca4d
EQ
3532 /*
3533 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3534 * Otherwise the mgpu fan boost feature will be skipped due to the
3535 * gpu instance is counted less.
3536 */
3537 amdgpu_register_gpu_instance(adev);
3538
d38ceaf9
AD
3539 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3540 * explicit gating rather than handling it automatically.
3541 */
06ec9070 3542 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3543 if (r) {
06ec9070 3544 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3545 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3546 goto failed;
2c1a2784 3547 }
d38ceaf9 3548
108c6a63 3549 /* must succeed. */
511fdbc3 3550 amdgpu_ras_resume(adev);
108c6a63 3551
beff74bc
AD
3552 queue_delayed_work(system_wq, &adev->delayed_init_work,
3553 msecs_to_jiffies(AMDGPU_RESUME_MS));
3554
2c738637
ML
3555 if (amdgpu_sriov_vf(adev))
3556 flush_delayed_work(&adev->delayed_init_work);
3557
77f3a5cd 3558 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3559 if (r)
77f3a5cd 3560 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3561
d155bef0
AB
3562 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3563 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3564 if (r)
3565 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3566
c1dd4aa6
AG
3567 /* Have stored pci confspace at hand for restore in sudden PCI error */
3568 if (amdgpu_device_cache_pci_state(adev->pdev))
3569 pci_restore_state(pdev);
3570
d38ceaf9 3571 return 0;
83ba126a
AD
3572
3573failed:
89041940 3574 amdgpu_vf_error_trans_all(adev);
fd496ca8 3575 if (atpx)
83ba126a 3576 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3577
4192f7b5
AD
3578failed_unmap:
3579 iounmap(adev->rmmio);
3580 adev->rmmio = NULL;
3581
83ba126a 3582 return r;
d38ceaf9
AD
3583}
3584
d38ceaf9
AD
3585/**
3586 * amdgpu_device_fini - tear down the driver
3587 *
3588 * @adev: amdgpu_device pointer
3589 *
3590 * Tear down the driver info (all asics).
3591 * Called at driver shutdown.
3592 */
3593void amdgpu_device_fini(struct amdgpu_device *adev)
3594{
aac89168 3595 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3596 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3597 adev->shutdown = true;
9f875167 3598
c1dd4aa6
AG
3599 kfree(adev->pci_state);
3600
752c683d
ML
3601 /* make sure IB test finished before entering exclusive mode
3602 * to avoid preemption on IB test
3603 * */
519b8b76 3604 if (amdgpu_sriov_vf(adev)) {
752c683d 3605 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3606 amdgpu_virt_fini_data_exchange(adev);
3607 }
752c683d 3608
e5b03032
ML
3609 /* disable all interrupts */
3610 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3611 if (adev->mode_info.mode_config_initialized){
3612 if (!amdgpu_device_has_dc_support(adev))
4a580877 3613 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3614 else
4a580877 3615 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3616 }
d38ceaf9 3617 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3618 if (adev->pm_sysfs_en)
3619 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3620 amdgpu_fbdev_fini(adev);
e230ac11 3621 amdgpu_device_ip_fini(adev);
75e1658e
ND
3622 release_firmware(adev->firmware.gpu_info_fw);
3623 adev->firmware.gpu_info_fw = NULL;
d38ceaf9
AD
3624 adev->accel_working = false;
3625 /* free i2c buses */
4562236b
HW
3626 if (!amdgpu_device_has_dc_support(adev))
3627 amdgpu_i2c_fini(adev);
bfca0289
SL
3628
3629 if (amdgpu_emu_mode != 1)
3630 amdgpu_atombios_fini(adev);
3631
d38ceaf9
AD
3632 kfree(adev->bios);
3633 adev->bios = NULL;
3840c5bc
AD
3634 if (amdgpu_has_atpx() &&
3635 (amdgpu_is_atpx_hybrid() ||
3636 amdgpu_has_atpx_dgpu_power_cntl()) &&
3637 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3638 vga_switcheroo_unregister_client(adev->pdev);
fd496ca8 3639 if (amdgpu_device_supports_atpx(adev_to_drm(adev)))
83ba126a 3640 vga_switcheroo_fini_domain_pm_ops(adev->dev);
38d6be81
AD
3641 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3642 vga_client_register(adev->pdev, NULL, NULL, NULL);
d38ceaf9
AD
3643 if (adev->rio_mem)
3644 pci_iounmap(adev->pdev, adev->rio_mem);
3645 adev->rio_mem = NULL;
3646 iounmap(adev->rmmio);
3647 adev->rmmio = NULL;
06ec9070 3648 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3649
7c868b59
YT
3650 if (adev->ucode_sysfs_en)
3651 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3652
3653 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3654 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3655 amdgpu_pmu_fini(adev);
72de33f8 3656 if (adev->mman.discovery_bin)
a190d1c7 3657 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3658}
3659
3660
3661/*
3662 * Suspend & resume.
3663 */
3664/**
810ddc3a 3665 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3666 *
87e3f136 3667 * @dev: drm dev pointer
87e3f136 3668 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3669 *
3670 * Puts the hw in the suspend state (all asics).
3671 * Returns 0 for success or an error on failure.
3672 * Called at driver suspend.
3673 */
de185019 3674int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3675{
3676 struct amdgpu_device *adev;
3677 struct drm_crtc *crtc;
3678 struct drm_connector *connector;
f8d2d39e 3679 struct drm_connector_list_iter iter;
5ceb54c6 3680 int r;
d38ceaf9 3681
1348969a 3682 adev = drm_to_adev(dev);
d38ceaf9
AD
3683
3684 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3685 return 0;
3686
44779b43 3687 adev->in_suspend = true;
d38ceaf9
AD
3688 drm_kms_helper_poll_disable(dev);
3689
5f818173
S
3690 if (fbcon)
3691 amdgpu_fbdev_set_suspend(adev, 1);
3692
beff74bc 3693 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3694
4562236b
HW
3695 if (!amdgpu_device_has_dc_support(adev)) {
3696 /* turn off display hw */
3697 drm_modeset_lock_all(dev);
f8d2d39e
LP
3698 drm_connector_list_iter_begin(dev, &iter);
3699 drm_for_each_connector_iter(connector, &iter)
3700 drm_helper_connector_dpms(connector,
3701 DRM_MODE_DPMS_OFF);
3702 drm_connector_list_iter_end(&iter);
4562236b 3703 drm_modeset_unlock_all(dev);
fe1053b7
AD
3704 /* unpin the front buffers and cursors */
3705 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3706 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3707 struct drm_framebuffer *fb = crtc->primary->fb;
3708 struct amdgpu_bo *robj;
3709
91334223 3710 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3711 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3712 r = amdgpu_bo_reserve(aobj, true);
3713 if (r == 0) {
3714 amdgpu_bo_unpin(aobj);
3715 amdgpu_bo_unreserve(aobj);
3716 }
756e6880 3717 }
756e6880 3718
fe1053b7
AD
3719 if (fb == NULL || fb->obj[0] == NULL) {
3720 continue;
3721 }
3722 robj = gem_to_amdgpu_bo(fb->obj[0]);
3723 /* don't unpin kernel fb objects */
3724 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3725 r = amdgpu_bo_reserve(robj, true);
3726 if (r == 0) {
3727 amdgpu_bo_unpin(robj);
3728 amdgpu_bo_unreserve(robj);
3729 }
d38ceaf9
AD
3730 }
3731 }
3732 }
fe1053b7 3733
5e6932fe 3734 amdgpu_ras_suspend(adev);
3735
fe1053b7
AD
3736 r = amdgpu_device_ip_suspend_phase1(adev);
3737
ad887af9 3738 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 3739
d38ceaf9
AD
3740 /* evict vram memory */
3741 amdgpu_bo_evict_vram(adev);
3742
5ceb54c6 3743 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3744
b00978de
PL
3745 if (adev->in_poweroff_reboot_com ||
3746 !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev))
628c36d7
PL
3747 r = amdgpu_device_ip_suspend_phase2(adev);
3748 else
3749 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
a0a71e49
AD
3750 /* evict remaining vram memory
3751 * This second call to evict vram is to evict the gart page table
3752 * using the CPU.
3753 */
d38ceaf9
AD
3754 amdgpu_bo_evict_vram(adev);
3755
d38ceaf9
AD
3756 return 0;
3757}
3758
3759/**
810ddc3a 3760 * amdgpu_device_resume - initiate device resume
d38ceaf9 3761 *
87e3f136 3762 * @dev: drm dev pointer
87e3f136 3763 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3764 *
3765 * Bring the hw back to operating state (all asics).
3766 * Returns 0 for success or an error on failure.
3767 * Called at driver resume.
3768 */
de185019 3769int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3770{
3771 struct drm_connector *connector;
f8d2d39e 3772 struct drm_connector_list_iter iter;
1348969a 3773 struct amdgpu_device *adev = drm_to_adev(dev);
756e6880 3774 struct drm_crtc *crtc;
03161a6e 3775 int r = 0;
d38ceaf9
AD
3776
3777 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3778 return 0;
3779
9ca5b8a1 3780 if (amdgpu_acpi_is_s0ix_supported(adev))
628c36d7
PL
3781 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
3782
d38ceaf9 3783 /* post card */
39c640c0 3784 if (amdgpu_device_need_post(adev)) {
4d2997ab 3785 r = amdgpu_device_asic_init(adev);
74b0b157 3786 if (r)
aac89168 3787 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 3788 }
d38ceaf9 3789
06ec9070 3790 r = amdgpu_device_ip_resume(adev);
e6707218 3791 if (r) {
aac89168 3792 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3793 return r;
e6707218 3794 }
5ceb54c6
AD
3795 amdgpu_fence_driver_resume(adev);
3796
d38ceaf9 3797
06ec9070 3798 r = amdgpu_device_ip_late_init(adev);
03161a6e 3799 if (r)
4d3b9ae5 3800 return r;
d38ceaf9 3801
beff74bc
AD
3802 queue_delayed_work(system_wq, &adev->delayed_init_work,
3803 msecs_to_jiffies(AMDGPU_RESUME_MS));
3804
fe1053b7
AD
3805 if (!amdgpu_device_has_dc_support(adev)) {
3806 /* pin cursors */
3807 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3808 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3809
91334223 3810 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3811 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3812 r = amdgpu_bo_reserve(aobj, true);
3813 if (r == 0) {
3814 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3815 if (r != 0)
aac89168 3816 dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
fe1053b7
AD
3817 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3818 amdgpu_bo_unreserve(aobj);
3819 }
756e6880
AD
3820 }
3821 }
3822 }
ad887af9 3823 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
ba997709
YZ
3824 if (r)
3825 return r;
756e6880 3826
96a5d8d4 3827 /* Make sure IB tests flushed */
beff74bc 3828 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3829
d38ceaf9
AD
3830 /* blat the mode back in */
3831 if (fbcon) {
4562236b
HW
3832 if (!amdgpu_device_has_dc_support(adev)) {
3833 /* pre DCE11 */
3834 drm_helper_resume_force_mode(dev);
3835
3836 /* turn on display hw */
3837 drm_modeset_lock_all(dev);
f8d2d39e
LP
3838
3839 drm_connector_list_iter_begin(dev, &iter);
3840 drm_for_each_connector_iter(connector, &iter)
3841 drm_helper_connector_dpms(connector,
3842 DRM_MODE_DPMS_ON);
3843 drm_connector_list_iter_end(&iter);
3844
4562236b 3845 drm_modeset_unlock_all(dev);
d38ceaf9 3846 }
4d3b9ae5 3847 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3848 }
3849
3850 drm_kms_helper_poll_enable(dev);
23a1a9e5 3851
5e6932fe 3852 amdgpu_ras_resume(adev);
3853
23a1a9e5
L
3854 /*
3855 * Most of the connector probing functions try to acquire runtime pm
3856 * refs to ensure that the GPU is powered on when connector polling is
3857 * performed. Since we're calling this from a runtime PM callback,
3858 * trying to acquire rpm refs will cause us to deadlock.
3859 *
3860 * Since we're guaranteed to be holding the rpm lock, it's safe to
3861 * temporarily disable the rpm helpers so this doesn't deadlock us.
3862 */
3863#ifdef CONFIG_PM
3864 dev->dev->power.disable_depth++;
3865#endif
4562236b
HW
3866 if (!amdgpu_device_has_dc_support(adev))
3867 drm_helper_hpd_irq_event(dev);
3868 else
3869 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3870#ifdef CONFIG_PM
3871 dev->dev->power.disable_depth--;
3872#endif
44779b43
RZ
3873 adev->in_suspend = false;
3874
4d3b9ae5 3875 return 0;
d38ceaf9
AD
3876}
3877
e3ecdffa
AD
3878/**
3879 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3880 *
3881 * @adev: amdgpu_device pointer
3882 *
3883 * The list of all the hardware IPs that make up the asic is walked and
3884 * the check_soft_reset callbacks are run. check_soft_reset determines
3885 * if the asic is still hung or not.
3886 * Returns true if any of the IPs are still in a hung state, false if not.
3887 */
06ec9070 3888static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3889{
3890 int i;
3891 bool asic_hang = false;
3892
f993d628
ML
3893 if (amdgpu_sriov_vf(adev))
3894 return true;
3895
8bc04c29
AD
3896 if (amdgpu_asic_need_full_reset(adev))
3897 return true;
3898
63fbf42f 3899 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3900 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3901 continue;
a1255107
AD
3902 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3903 adev->ip_blocks[i].status.hang =
3904 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3905 if (adev->ip_blocks[i].status.hang) {
aac89168 3906 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3907 asic_hang = true;
3908 }
3909 }
3910 return asic_hang;
3911}
3912
e3ecdffa
AD
3913/**
3914 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3915 *
3916 * @adev: amdgpu_device pointer
3917 *
3918 * The list of all the hardware IPs that make up the asic is walked and the
3919 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3920 * handles any IP specific hardware or software state changes that are
3921 * necessary for a soft reset to succeed.
3922 * Returns 0 on success, negative error code on failure.
3923 */
06ec9070 3924static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3925{
3926 int i, r = 0;
3927
3928 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3929 if (!adev->ip_blocks[i].status.valid)
d31a501e 3930 continue;
a1255107
AD
3931 if (adev->ip_blocks[i].status.hang &&
3932 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3933 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3934 if (r)
3935 return r;
3936 }
3937 }
3938
3939 return 0;
3940}
3941
e3ecdffa
AD
3942/**
3943 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3944 *
3945 * @adev: amdgpu_device pointer
3946 *
3947 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3948 * reset is necessary to recover.
3949 * Returns true if a full asic reset is required, false if not.
3950 */
06ec9070 3951static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3952{
da146d3b
AD
3953 int i;
3954
8bc04c29
AD
3955 if (amdgpu_asic_need_full_reset(adev))
3956 return true;
3957
da146d3b 3958 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3959 if (!adev->ip_blocks[i].status.valid)
da146d3b 3960 continue;
a1255107
AD
3961 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3962 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3963 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3964 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3965 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3966 if (adev->ip_blocks[i].status.hang) {
aac89168 3967 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
3968 return true;
3969 }
3970 }
35d782fe
CZ
3971 }
3972 return false;
3973}
3974
e3ecdffa
AD
3975/**
3976 * amdgpu_device_ip_soft_reset - do a soft reset
3977 *
3978 * @adev: amdgpu_device pointer
3979 *
3980 * The list of all the hardware IPs that make up the asic is walked and the
3981 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3982 * IP specific hardware or software state changes that are necessary to soft
3983 * reset the IP.
3984 * Returns 0 on success, negative error code on failure.
3985 */
06ec9070 3986static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3987{
3988 int i, r = 0;
3989
3990 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3991 if (!adev->ip_blocks[i].status.valid)
35d782fe 3992 continue;
a1255107
AD
3993 if (adev->ip_blocks[i].status.hang &&
3994 adev->ip_blocks[i].version->funcs->soft_reset) {
3995 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3996 if (r)
3997 return r;
3998 }
3999 }
4000
4001 return 0;
4002}
4003
e3ecdffa
AD
4004/**
4005 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4006 *
4007 * @adev: amdgpu_device pointer
4008 *
4009 * The list of all the hardware IPs that make up the asic is walked and the
4010 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4011 * handles any IP specific hardware or software state changes that are
4012 * necessary after the IP has been soft reset.
4013 * Returns 0 on success, negative error code on failure.
4014 */
06ec9070 4015static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4016{
4017 int i, r = 0;
4018
4019 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4020 if (!adev->ip_blocks[i].status.valid)
35d782fe 4021 continue;
a1255107
AD
4022 if (adev->ip_blocks[i].status.hang &&
4023 adev->ip_blocks[i].version->funcs->post_soft_reset)
4024 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4025 if (r)
4026 return r;
4027 }
4028
4029 return 0;
4030}
4031
e3ecdffa 4032/**
c33adbc7 4033 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4034 *
4035 * @adev: amdgpu_device pointer
4036 *
4037 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4038 * restore things like GPUVM page tables after a GPU reset where
4039 * the contents of VRAM might be lost.
403009bf
CK
4040 *
4041 * Returns:
4042 * 0 on success, negative error code on failure.
e3ecdffa 4043 */
c33adbc7 4044static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4045{
c41d1cf6 4046 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
4047 struct amdgpu_bo *shadow;
4048 long r = 1, tmo;
c41d1cf6
ML
4049
4050 if (amdgpu_sriov_runtime(adev))
b045d3af 4051 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4052 else
4053 tmo = msecs_to_jiffies(100);
4054
aac89168 4055 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4056 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
4057 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
4058
4059 /* No need to recover an evicted BO */
4060 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 4061 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
4062 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
4063 continue;
4064
4065 r = amdgpu_bo_restore_shadow(shadow, &next);
4066 if (r)
4067 break;
4068
c41d1cf6 4069 if (fence) {
1712fb1a 4070 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4071 dma_fence_put(fence);
4072 fence = next;
1712fb1a 4073 if (tmo == 0) {
4074 r = -ETIMEDOUT;
c41d1cf6 4075 break;
1712fb1a 4076 } else if (tmo < 0) {
4077 r = tmo;
4078 break;
4079 }
403009bf
CK
4080 } else {
4081 fence = next;
c41d1cf6 4082 }
c41d1cf6
ML
4083 }
4084 mutex_unlock(&adev->shadow_list_lock);
4085
403009bf
CK
4086 if (fence)
4087 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4088 dma_fence_put(fence);
4089
1712fb1a 4090 if (r < 0 || tmo <= 0) {
aac89168 4091 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4092 return -EIO;
4093 }
c41d1cf6 4094
aac89168 4095 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4096 return 0;
c41d1cf6
ML
4097}
4098
a90ad3c2 4099
e3ecdffa 4100/**
06ec9070 4101 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4102 *
982a820b 4103 * @adev: amdgpu_device pointer
87e3f136 4104 * @from_hypervisor: request from hypervisor
5740682e
ML
4105 *
4106 * do VF FLR and reinitialize Asic
3f48c681 4107 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4108 */
4109static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4110 bool from_hypervisor)
5740682e
ML
4111{
4112 int r;
4113
4114 if (from_hypervisor)
4115 r = amdgpu_virt_request_full_gpu(adev, true);
4116 else
4117 r = amdgpu_virt_reset_gpu(adev);
4118 if (r)
4119 return r;
a90ad3c2 4120
b639c22c
JZ
4121 amdgpu_amdkfd_pre_reset(adev);
4122
a90ad3c2 4123 /* Resume IP prior to SMC */
06ec9070 4124 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4125 if (r)
4126 goto error;
a90ad3c2 4127
c9ffa427 4128 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4129 /* we need recover gart prior to run SMC/CP/SDMA resume */
6c28aed6 4130 amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
a90ad3c2 4131
7a3e0bb2
RZ
4132 r = amdgpu_device_fw_loading(adev);
4133 if (r)
4134 return r;
4135
a90ad3c2 4136 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4137 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4138 if (r)
4139 goto error;
a90ad3c2
ML
4140
4141 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 4142 r = amdgpu_ib_ring_tests(adev);
f81e8d53 4143 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 4144
abc34253
ED
4145error:
4146 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 4147 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4148 amdgpu_inc_vram_lost(adev);
c33adbc7 4149 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
4150 }
4151
4152 return r;
4153}
4154
9a1cddd6 4155/**
4156 * amdgpu_device_has_job_running - check if there is any job in mirror list
4157 *
982a820b 4158 * @adev: amdgpu_device pointer
9a1cddd6 4159 *
4160 * check if there is any job in mirror list
4161 */
4162bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4163{
4164 int i;
4165 struct drm_sched_job *job;
4166
4167 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4168 struct amdgpu_ring *ring = adev->rings[i];
4169
4170 if (!ring || !ring->sched.thread)
4171 continue;
4172
4173 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4174 job = list_first_entry_or_null(&ring->sched.pending_list,
4175 struct drm_sched_job, list);
9a1cddd6 4176 spin_unlock(&ring->sched.job_list_lock);
4177 if (job)
4178 return true;
4179 }
4180 return false;
4181}
4182
12938fad
CK
4183/**
4184 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4185 *
982a820b 4186 * @adev: amdgpu_device pointer
12938fad
CK
4187 *
4188 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4189 * a hung GPU.
4190 */
4191bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4192{
4193 if (!amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4194 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
12938fad
CK
4195 return false;
4196 }
4197
3ba7b418
AG
4198 if (amdgpu_gpu_recovery == 0)
4199 goto disabled;
4200
4201 if (amdgpu_sriov_vf(adev))
4202 return true;
4203
4204 if (amdgpu_gpu_recovery == -1) {
4205 switch (adev->asic_type) {
fc42d47c
AG
4206 case CHIP_BONAIRE:
4207 case CHIP_HAWAII:
3ba7b418
AG
4208 case CHIP_TOPAZ:
4209 case CHIP_TONGA:
4210 case CHIP_FIJI:
4211 case CHIP_POLARIS10:
4212 case CHIP_POLARIS11:
4213 case CHIP_POLARIS12:
4214 case CHIP_VEGAM:
4215 case CHIP_VEGA20:
4216 case CHIP_VEGA10:
4217 case CHIP_VEGA12:
c43b849f 4218 case CHIP_RAVEN:
e9d4cf91 4219 case CHIP_ARCTURUS:
2cb44fb0 4220 case CHIP_RENOIR:
658c6639
AD
4221 case CHIP_NAVI10:
4222 case CHIP_NAVI14:
4223 case CHIP_NAVI12:
131a3c74 4224 case CHIP_SIENNA_CICHLID:
665fe4dc 4225 case CHIP_NAVY_FLOUNDER:
27859ee3 4226 case CHIP_DIMGREY_CAVEFISH:
3ba7b418
AG
4227 break;
4228 default:
4229 goto disabled;
4230 }
12938fad
CK
4231 }
4232
4233 return true;
3ba7b418
AG
4234
4235disabled:
aac89168 4236 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4237 return false;
12938fad
CK
4238}
4239
5c6dd71e 4240
26bc5340
AG
4241static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4242 struct amdgpu_job *job,
4243 bool *need_full_reset_arg)
4244{
4245 int i, r = 0;
4246 bool need_full_reset = *need_full_reset_arg;
71182665 4247
728e7e0c
JZ
4248 amdgpu_debugfs_wait_dump(adev);
4249
b602ca5f
TZ
4250 if (amdgpu_sriov_vf(adev)) {
4251 /* stop the data exchange thread */
4252 amdgpu_virt_fini_data_exchange(adev);
4253 }
4254
71182665 4255 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4256 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4257 struct amdgpu_ring *ring = adev->rings[i];
4258
51687759 4259 if (!ring || !ring->sched.thread)
0875dc9e 4260 continue;
5740682e 4261
2f9d4084
ML
4262 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4263 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4264 }
d38ceaf9 4265
222b5f04
AG
4266 if(job)
4267 drm_sched_increase_karma(&job->base);
4268
1d721ed6 4269 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4270 if (!amdgpu_sriov_vf(adev)) {
4271
4272 if (!need_full_reset)
4273 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4274
4275 if (!need_full_reset) {
4276 amdgpu_device_ip_pre_soft_reset(adev);
4277 r = amdgpu_device_ip_soft_reset(adev);
4278 amdgpu_device_ip_post_soft_reset(adev);
4279 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4280 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4281 need_full_reset = true;
4282 }
4283 }
4284
4285 if (need_full_reset)
4286 r = amdgpu_device_ip_suspend(adev);
4287
4288 *need_full_reset_arg = need_full_reset;
4289 }
4290
4291 return r;
4292}
4293
041a62bc 4294static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340 4295 struct list_head *device_list_handle,
7ac71382
AG
4296 bool *need_full_reset_arg,
4297 bool skip_hw_reset)
26bc5340
AG
4298{
4299 struct amdgpu_device *tmp_adev = NULL;
4300 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4301 int r = 0;
4302
4303 /*
4304 * ASIC reset has to be done on all HGMI hive nodes ASAP
4305 * to allow proper links negotiation in FW (within 1 sec)
4306 */
7ac71382 4307 if (!skip_hw_reset && need_full_reset) {
26bc5340 4308 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 4309 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4310 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 4311 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4312 r = -EALREADY;
4313 } else
4314 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4315
041a62bc 4316 if (r) {
aac89168 4317 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4318 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4319 break;
ce316fa5
LM
4320 }
4321 }
4322
041a62bc
AG
4323 /* For XGMI wait for all resets to complete before proceed */
4324 if (!r) {
ce316fa5
LM
4325 list_for_each_entry(tmp_adev, device_list_handle,
4326 gmc.xgmi.head) {
4327 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4328 flush_work(&tmp_adev->xgmi_reset_work);
4329 r = tmp_adev->asic_reset_res;
4330 if (r)
4331 break;
ce316fa5
LM
4332 }
4333 }
4334 }
ce316fa5 4335 }
26bc5340 4336
43c4d576
JC
4337 if (!r && amdgpu_ras_intr_triggered()) {
4338 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4339 if (tmp_adev->mmhub.funcs &&
4340 tmp_adev->mmhub.funcs->reset_ras_error_count)
4341 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4342 }
4343
00eaa571 4344 amdgpu_ras_intr_cleared();
43c4d576 4345 }
00eaa571 4346
26bc5340
AG
4347 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4348 if (need_full_reset) {
4349 /* post card */
4d2997ab 4350 if (amdgpu_device_asic_init(tmp_adev))
aac89168 4351 dev_warn(tmp_adev->dev, "asic atom init failed!");
26bc5340
AG
4352
4353 if (!r) {
4354 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4355 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4356 if (r)
4357 goto out;
4358
4359 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4360 if (vram_lost) {
77e7f829 4361 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4362 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4363 }
4364
6c28aed6 4365 r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
26bc5340
AG
4366 if (r)
4367 goto out;
4368
4369 r = amdgpu_device_fw_loading(tmp_adev);
4370 if (r)
4371 return r;
4372
4373 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4374 if (r)
4375 goto out;
4376
4377 if (vram_lost)
4378 amdgpu_device_fill_reset_magic(tmp_adev);
4379
fdafb359
EQ
4380 /*
4381 * Add this ASIC as tracked as reset was already
4382 * complete successfully.
4383 */
4384 amdgpu_register_gpu_instance(tmp_adev);
4385
7c04ca50 4386 r = amdgpu_device_ip_late_init(tmp_adev);
4387 if (r)
4388 goto out;
4389
565d1941
EQ
4390 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4391
e8fbaf03
GC
4392 /*
4393 * The GPU enters bad state once faulty pages
4394 * by ECC has reached the threshold, and ras
4395 * recovery is scheduled next. So add one check
4396 * here to break recovery if it indeed exceeds
4397 * bad page threshold, and remind user to
4398 * retire this GPU or setting one bigger
4399 * bad_page_threshold value to fix this once
4400 * probing driver again.
4401 */
4402 if (!amdgpu_ras_check_err_threshold(tmp_adev)) {
4403 /* must succeed. */
4404 amdgpu_ras_resume(tmp_adev);
4405 } else {
4406 r = -EINVAL;
4407 goto out;
4408 }
e79a04d5 4409
26bc5340
AG
4410 /* Update PSP FW topology after reset */
4411 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4412 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4413 }
4414 }
4415
26bc5340
AG
4416out:
4417 if (!r) {
4418 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4419 r = amdgpu_ib_ring_tests(tmp_adev);
4420 if (r) {
4421 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4422 r = amdgpu_device_ip_suspend(tmp_adev);
4423 need_full_reset = true;
4424 r = -EAGAIN;
4425 goto end;
4426 }
4427 }
4428
4429 if (!r)
4430 r = amdgpu_device_recover_vram(tmp_adev);
4431 else
4432 tmp_adev->asic_reset_res = r;
4433 }
4434
4435end:
4436 *need_full_reset_arg = need_full_reset;
4437 return r;
4438}
4439
08ebb485
DL
4440static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
4441 struct amdgpu_hive_info *hive)
26bc5340 4442{
53b3f8f4
DL
4443 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4444 return false;
4445
08ebb485
DL
4446 if (hive) {
4447 down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
4448 } else {
4449 down_write(&adev->reset_sem);
4450 }
5740682e 4451
26bc5340 4452 atomic_inc(&adev->gpu_reset_counter);
a3a09142
AD
4453 switch (amdgpu_asic_reset_method(adev)) {
4454 case AMD_RESET_METHOD_MODE1:
4455 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4456 break;
4457 case AMD_RESET_METHOD_MODE2:
4458 adev->mp1_state = PP_MP1_STATE_RESET;
4459 break;
4460 default:
4461 adev->mp1_state = PP_MP1_STATE_NONE;
4462 break;
4463 }
1d721ed6
AG
4464
4465 return true;
26bc5340 4466}
d38ceaf9 4467
26bc5340
AG
4468static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4469{
89041940 4470 amdgpu_vf_error_trans_all(adev);
a3a09142 4471 adev->mp1_state = PP_MP1_STATE_NONE;
53b3f8f4 4472 atomic_set(&adev->in_gpu_reset, 0);
6049db43 4473 up_write(&adev->reset_sem);
26bc5340
AG
4474}
4475
91fb309d
HC
4476/*
4477 * to lockup a list of amdgpu devices in a hive safely, if not a hive
4478 * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
4479 *
4480 * unlock won't require roll back.
4481 */
4482static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
4483{
4484 struct amdgpu_device *tmp_adev = NULL;
4485
4486 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4487 if (!hive) {
4488 dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
4489 return -ENODEV;
4490 }
4491 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4492 if (!amdgpu_device_lock_adev(tmp_adev, hive))
4493 goto roll_back;
4494 }
4495 } else if (!amdgpu_device_lock_adev(adev, hive))
4496 return -EAGAIN;
4497
4498 return 0;
4499roll_back:
4500 if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
4501 /*
4502 * if the lockup iteration break in the middle of a hive,
4503 * it may means there may has a race issue,
4504 * or a hive device locked up independently.
4505 * we may be in trouble and may not, so will try to roll back
4506 * the lock and give out a warnning.
4507 */
4508 dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
4509 list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4510 amdgpu_device_unlock_adev(tmp_adev);
4511 }
4512 }
4513 return -EAGAIN;
4514}
4515
3f12acc8
EQ
4516static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4517{
4518 struct pci_dev *p = NULL;
4519
4520 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4521 adev->pdev->bus->number, 1);
4522 if (p) {
4523 pm_runtime_enable(&(p->dev));
4524 pm_runtime_resume(&(p->dev));
4525 }
4526}
4527
4528static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4529{
4530 enum amd_reset_method reset_method;
4531 struct pci_dev *p = NULL;
4532 u64 expires;
4533
4534 /*
4535 * For now, only BACO and mode1 reset are confirmed
4536 * to suffer the audio issue without proper suspended.
4537 */
4538 reset_method = amdgpu_asic_reset_method(adev);
4539 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4540 (reset_method != AMD_RESET_METHOD_MODE1))
4541 return -EINVAL;
4542
4543 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4544 adev->pdev->bus->number, 1);
4545 if (!p)
4546 return -ENODEV;
4547
4548 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4549 if (!expires)
4550 /*
4551 * If we cannot get the audio device autosuspend delay,
4552 * a fixed 4S interval will be used. Considering 3S is
4553 * the audio controller default autosuspend delay setting.
4554 * 4S used here is guaranteed to cover that.
4555 */
54b7feb9 4556 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4557
4558 while (!pm_runtime_status_suspended(&(p->dev))) {
4559 if (!pm_runtime_suspend(&(p->dev)))
4560 break;
4561
4562 if (expires < ktime_get_mono_fast_ns()) {
4563 dev_warn(adev->dev, "failed to suspend display audio\n");
4564 /* TODO: abort the succeeding gpu reset? */
4565 return -ETIMEDOUT;
4566 }
4567 }
4568
4569 pm_runtime_disable(&(p->dev));
4570
4571 return 0;
4572}
4573
26bc5340
AG
4574/**
4575 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4576 *
982a820b 4577 * @adev: amdgpu_device pointer
26bc5340
AG
4578 * @job: which job trigger hang
4579 *
4580 * Attempt to reset the GPU if it has hung (all asics).
4581 * Attempt to do soft-reset or full-reset and reinitialize Asic
4582 * Returns 0 for success or an error on failure.
4583 */
4584
4585int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4586 struct amdgpu_job *job)
4587{
1d721ed6 4588 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4589 bool need_full_reset = false;
4590 bool job_signaled = false;
26bc5340 4591 struct amdgpu_hive_info *hive = NULL;
26bc5340 4592 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4593 int i, r = 0;
bb5c7235 4594 bool need_emergency_restart = false;
3f12acc8 4595 bool audio_suspended = false;
26bc5340 4596
6e3cd2a9 4597 /*
bb5c7235
WS
4598 * Special case: RAS triggered and full reset isn't supported
4599 */
4600 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4601
d5ea093e
AG
4602 /*
4603 * Flush RAM to disk so that after reboot
4604 * the user can read log and see why the system rebooted.
4605 */
bb5c7235 4606 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4607 DRM_WARN("Emergency reboot.");
4608
4609 ksys_sync_helper();
4610 emergency_restart();
4611 }
4612
b823821f 4613 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4614 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4615
4616 /*
1d721ed6
AG
4617 * Here we trylock to avoid chain of resets executing from
4618 * either trigger by jobs on different adevs in XGMI hive or jobs on
4619 * different schedulers for same device while this TO handler is running.
4620 * We always reset all schedulers for device and all devices for XGMI
4621 * hive so that should take care of them too.
26bc5340 4622 */
d95e8e97 4623 hive = amdgpu_get_xgmi_hive(adev);
53b3f8f4
DL
4624 if (hive) {
4625 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4626 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4627 job ? job->base.id : -1, hive->hive_id);
d95e8e97 4628 amdgpu_put_xgmi_hive(hive);
91fb309d
HC
4629 if (job)
4630 drm_sched_increase_karma(&job->base);
53b3f8f4
DL
4631 return 0;
4632 }
4633 mutex_lock(&hive->hive_lock);
1d721ed6 4634 }
26bc5340 4635
91fb309d
HC
4636 /*
4637 * lock the device before we try to operate the linked list
4638 * if didn't get the device lock, don't touch the linked list since
4639 * others may iterating it.
4640 */
4641 r = amdgpu_device_lock_hive_adev(adev, hive);
4642 if (r) {
4643 dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
4644 job ? job->base.id : -1);
4645
4646 /* even we skipped this reset, still need to set the job to guilty */
4647 if (job)
4648 drm_sched_increase_karma(&job->base);
4649 goto skip_recovery;
4650 }
4651
9e94d22c
EQ
4652 /*
4653 * Build list of devices to reset.
4654 * In case we are in XGMI hive mode, resort the device list
4655 * to put adev in the 1st position.
4656 */
4657 INIT_LIST_HEAD(&device_list);
4658 if (adev->gmc.xgmi.num_physical_nodes > 1) {
9e94d22c
EQ
4659 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4660 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4661 device_list_handle = &hive->device_list;
4662 } else {
4663 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4664 device_list_handle = &device_list;
4665 }
4666
1d721ed6
AG
4667 /* block all schedulers and reset given job's ring */
4668 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3f12acc8
EQ
4669 /*
4670 * Try to put the audio codec into suspend state
4671 * before gpu reset started.
4672 *
4673 * Due to the power domain of the graphics device
4674 * is shared with AZ power domain. Without this,
4675 * we may change the audio hardware from behind
4676 * the audio driver's back. That will trigger
4677 * some audio codec errors.
4678 */
4679 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4680 audio_suspended = true;
4681
9e94d22c
EQ
4682 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4683
52fb44cf
EQ
4684 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4685
9e94d22c
EQ
4686 if (!amdgpu_sriov_vf(tmp_adev))
4687 amdgpu_amdkfd_pre_reset(tmp_adev);
4688
12ffa55d
AG
4689 /*
4690 * Mark these ASICs to be reseted as untracked first
4691 * And add them back after reset completed
4692 */
4693 amdgpu_unregister_gpu_instance(tmp_adev);
4694
a2f63ee8 4695 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4696
f1c1314b 4697 /* disable ras on ALL IPs */
bb5c7235 4698 if (!need_emergency_restart &&
b823821f 4699 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4700 amdgpu_ras_suspend(tmp_adev);
4701
1d721ed6
AG
4702 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4703 struct amdgpu_ring *ring = tmp_adev->rings[i];
4704
4705 if (!ring || !ring->sched.thread)
4706 continue;
4707
0b2d2c2e 4708 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4709
bb5c7235 4710 if (need_emergency_restart)
7c6e68c7 4711 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4712 }
4713 }
4714
bb5c7235 4715 if (need_emergency_restart)
7c6e68c7
AG
4716 goto skip_sched_resume;
4717
1d721ed6
AG
4718 /*
4719 * Must check guilty signal here since after this point all old
4720 * HW fences are force signaled.
4721 *
4722 * job->base holds a reference to parent fence
4723 */
4724 if (job && job->base.s_fence->parent &&
7dd8c205 4725 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4726 job_signaled = true;
1d721ed6
AG
4727 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4728 goto skip_hw_reset;
4729 }
4730
26bc5340
AG
4731retry: /* Rest of adevs pre asic reset from XGMI hive. */
4732 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340 4733 r = amdgpu_device_pre_asic_reset(tmp_adev,
ded08454 4734 (tmp_adev == adev) ? job : NULL,
26bc5340
AG
4735 &need_full_reset);
4736 /*TODO Should we stop ?*/
4737 if (r) {
aac89168 4738 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 4739 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
4740 tmp_adev->asic_reset_res = r;
4741 }
4742 }
4743
4744 /* Actual ASIC resets if needed.*/
4745 /* TODO Implement XGMI hive reset logic for SRIOV */
4746 if (amdgpu_sriov_vf(adev)) {
4747 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4748 if (r)
4749 adev->asic_reset_res = r;
4750 } else {
7ac71382 4751 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset, false);
26bc5340
AG
4752 if (r && r == -EAGAIN)
4753 goto retry;
4754 }
4755
1d721ed6
AG
4756skip_hw_reset:
4757
26bc5340
AG
4758 /* Post ASIC reset for all devs .*/
4759 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4760
1d721ed6
AG
4761 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4762 struct amdgpu_ring *ring = tmp_adev->rings[i];
4763
4764 if (!ring || !ring->sched.thread)
4765 continue;
4766
4767 /* No point to resubmit jobs if we didn't HW reset*/
4768 if (!tmp_adev->asic_reset_res && !job_signaled)
4769 drm_sched_resubmit_jobs(&ring->sched);
4770
4771 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4772 }
4773
4774 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4a580877 4775 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
4776 }
4777
4778 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4779
4780 if (r) {
4781 /* bad news, how to tell it to userspace ? */
12ffa55d 4782 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4783 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4784 } else {
12ffa55d 4785 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4786 }
7c6e68c7 4787 }
26bc5340 4788
7c6e68c7
AG
4789skip_sched_resume:
4790 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4791 /*unlock kfd: SRIOV would do it separately */
bb5c7235 4792 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4793 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4794 if (audio_suspended)
4795 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4796 amdgpu_device_unlock_adev(tmp_adev);
4797 }
4798
cbfd17f7 4799skip_recovery:
9e94d22c 4800 if (hive) {
53b3f8f4 4801 atomic_set(&hive->in_reset, 0);
9e94d22c 4802 mutex_unlock(&hive->hive_lock);
d95e8e97 4803 amdgpu_put_xgmi_hive(hive);
9e94d22c 4804 }
26bc5340 4805
91fb309d 4806 if (r && r != -EAGAIN)
26bc5340 4807 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4808 return r;
4809}
4810
e3ecdffa
AD
4811/**
4812 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4813 *
4814 * @adev: amdgpu_device pointer
4815 *
4816 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4817 * and lanes) of the slot the device is in. Handles APUs and
4818 * virtualized environments where PCIE config space may not be available.
4819 */
5494d864 4820static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4821{
5d9a6330 4822 struct pci_dev *pdev;
c5313457
HK
4823 enum pci_bus_speed speed_cap, platform_speed_cap;
4824 enum pcie_link_width platform_link_width;
d0dd7f0c 4825
cd474ba0
AD
4826 if (amdgpu_pcie_gen_cap)
4827 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4828
cd474ba0
AD
4829 if (amdgpu_pcie_lane_cap)
4830 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4831
cd474ba0
AD
4832 /* covers APUs as well */
4833 if (pci_is_root_bus(adev->pdev->bus)) {
4834 if (adev->pm.pcie_gen_mask == 0)
4835 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4836 if (adev->pm.pcie_mlw_mask == 0)
4837 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4838 return;
cd474ba0 4839 }
d0dd7f0c 4840
c5313457
HK
4841 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4842 return;
4843
dbaa922b
AD
4844 pcie_bandwidth_available(adev->pdev, NULL,
4845 &platform_speed_cap, &platform_link_width);
c5313457 4846
cd474ba0 4847 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4848 /* asic caps */
4849 pdev = adev->pdev;
4850 speed_cap = pcie_get_speed_cap(pdev);
4851 if (speed_cap == PCI_SPEED_UNKNOWN) {
4852 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4853 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4854 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4855 } else {
2b3a1f51
FX
4856 if (speed_cap == PCIE_SPEED_32_0GT)
4857 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4858 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4859 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4860 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
4861 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
4862 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4863 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4864 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4865 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4866 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4867 else if (speed_cap == PCIE_SPEED_8_0GT)
4868 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4869 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4870 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4871 else if (speed_cap == PCIE_SPEED_5_0GT)
4872 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4873 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4874 else
4875 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4876 }
4877 /* platform caps */
c5313457 4878 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4879 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4880 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4881 } else {
2b3a1f51
FX
4882 if (platform_speed_cap == PCIE_SPEED_32_0GT)
4883 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4884 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4885 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4886 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
4887 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
4888 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4889 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4890 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4891 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4892 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4893 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4894 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4895 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4896 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4897 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4898 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4899 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4900 else
4901 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4902
cd474ba0
AD
4903 }
4904 }
4905 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4906 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4907 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4908 } else {
c5313457 4909 switch (platform_link_width) {
5d9a6330 4910 case PCIE_LNK_X32:
cd474ba0
AD
4911 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4912 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4913 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4914 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4915 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4916 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4917 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4918 break;
5d9a6330 4919 case PCIE_LNK_X16:
cd474ba0
AD
4920 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4921 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4922 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4923 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4924 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4925 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4926 break;
5d9a6330 4927 case PCIE_LNK_X12:
cd474ba0
AD
4928 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4929 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4930 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4931 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4932 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4933 break;
5d9a6330 4934 case PCIE_LNK_X8:
cd474ba0
AD
4935 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4936 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4937 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4938 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4939 break;
5d9a6330 4940 case PCIE_LNK_X4:
cd474ba0
AD
4941 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4942 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4943 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4944 break;
5d9a6330 4945 case PCIE_LNK_X2:
cd474ba0
AD
4946 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4947 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4948 break;
5d9a6330 4949 case PCIE_LNK_X1:
cd474ba0
AD
4950 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4951 break;
4952 default:
4953 break;
4954 }
d0dd7f0c
AD
4955 }
4956 }
4957}
d38ceaf9 4958
361dbd01
AD
4959int amdgpu_device_baco_enter(struct drm_device *dev)
4960{
1348969a 4961 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 4962 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 4963
4a580877 4964 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
4965 return -ENOTSUPP;
4966
6fb33209 4967 if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
4968 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4969
9530273e 4970 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4971}
4972
4973int amdgpu_device_baco_exit(struct drm_device *dev)
4974{
1348969a 4975 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 4976 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4977 int ret = 0;
361dbd01 4978
4a580877 4979 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
4980 return -ENOTSUPP;
4981
9530273e
EQ
4982 ret = amdgpu_dpm_baco_exit(adev);
4983 if (ret)
4984 return ret;
7a22677b 4985
6fb33209 4986 if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
4987 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4988
4989 return 0;
361dbd01 4990}
c9a6b82f 4991
acd89fca
AG
4992static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
4993{
4994 int i;
4995
4996 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4997 struct amdgpu_ring *ring = adev->rings[i];
4998
4999 if (!ring || !ring->sched.thread)
5000 continue;
5001
5002 cancel_delayed_work_sync(&ring->sched.work_tdr);
5003 }
5004}
5005
c9a6b82f
AG
5006/**
5007 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5008 * @pdev: PCI device struct
5009 * @state: PCI channel state
5010 *
5011 * Description: Called when a PCI error is detected.
5012 *
5013 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5014 */
5015pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5016{
5017 struct drm_device *dev = pci_get_drvdata(pdev);
5018 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5019 int i;
c9a6b82f
AG
5020
5021 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5022
6894305c
AG
5023 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5024 DRM_WARN("No support for XGMI hive yet...");
5025 return PCI_ERS_RESULT_DISCONNECT;
5026 }
5027
c9a6b82f
AG
5028 switch (state) {
5029 case pci_channel_io_normal:
5030 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5031 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5032 case pci_channel_io_frozen:
5033 /*
acd89fca
AG
5034 * Cancel and wait for all TDRs in progress if failing to
5035 * set adev->in_gpu_reset in amdgpu_device_lock_adev
5036 *
5037 * Locking adev->reset_sem will prevent any external access
5038 * to GPU during PCI error recovery
5039 */
5040 while (!amdgpu_device_lock_adev(adev, NULL))
5041 amdgpu_cancel_all_tdr(adev);
5042
5043 /*
5044 * Block any work scheduling as we do for regular GPU reset
5045 * for the duration of the recovery
5046 */
5047 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5048 struct amdgpu_ring *ring = adev->rings[i];
5049
5050 if (!ring || !ring->sched.thread)
5051 continue;
5052
5053 drm_sched_stop(&ring->sched, NULL);
5054 }
c9a6b82f
AG
5055 return PCI_ERS_RESULT_NEED_RESET;
5056 case pci_channel_io_perm_failure:
5057 /* Permanent error, prepare for device removal */
5058 return PCI_ERS_RESULT_DISCONNECT;
5059 }
5060
5061 return PCI_ERS_RESULT_NEED_RESET;
5062}
5063
5064/**
5065 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5066 * @pdev: pointer to PCI device
5067 */
5068pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5069{
5070
5071 DRM_INFO("PCI error: mmio enabled callback!!\n");
5072
5073 /* TODO - dump whatever for debugging purposes */
5074
5075 /* This called only if amdgpu_pci_error_detected returns
5076 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5077 * works, no need to reset slot.
5078 */
5079
5080 return PCI_ERS_RESULT_RECOVERED;
5081}
5082
5083/**
5084 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5085 * @pdev: PCI device struct
5086 *
5087 * Description: This routine is called by the pci error recovery
5088 * code after the PCI slot has been reset, just before we
5089 * should resume normal operations.
5090 */
5091pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5092{
5093 struct drm_device *dev = pci_get_drvdata(pdev);
5094 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5095 int r, i;
7ac71382 5096 bool need_full_reset = true;
362c7b91 5097 u32 memsize;
7ac71382 5098 struct list_head device_list;
c9a6b82f
AG
5099
5100 DRM_INFO("PCI error: slot reset callback!!\n");
5101
7ac71382
AG
5102 INIT_LIST_HEAD(&device_list);
5103 list_add_tail(&adev->gmc.xgmi.head, &device_list);
5104
362c7b91
AG
5105 /* wait for asic to come out of reset */
5106 msleep(500);
5107
7ac71382 5108 /* Restore PCI confspace */
c1dd4aa6 5109 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5110
362c7b91
AG
5111 /* confirm ASIC came out of reset */
5112 for (i = 0; i < adev->usec_timeout; i++) {
5113 memsize = amdgpu_asic_get_config_memsize(adev);
5114
5115 if (memsize != 0xffffffff)
5116 break;
5117 udelay(1);
5118 }
5119 if (memsize == 0xffffffff) {
5120 r = -ETIME;
5121 goto out;
5122 }
5123
8a11d283 5124 adev->in_pci_err_recovery = true;
7ac71382 5125 r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);
bf36b52e 5126 adev->in_pci_err_recovery = false;
c9a6b82f
AG
5127 if (r)
5128 goto out;
5129
7ac71382 5130 r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
c9a6b82f
AG
5131
5132out:
c9a6b82f 5133 if (!r) {
c1dd4aa6
AG
5134 if (amdgpu_device_cache_pci_state(adev->pdev))
5135 pci_restore_state(adev->pdev);
5136
c9a6b82f
AG
5137 DRM_INFO("PCIe error recovery succeeded\n");
5138 } else {
5139 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5140 amdgpu_device_unlock_adev(adev);
5141 }
5142
5143 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5144}
5145
5146/**
5147 * amdgpu_pci_resume() - resume normal ops after PCI reset
5148 * @pdev: pointer to PCI device
5149 *
5150 * Called when the error recovery driver tells us that its
505199a3 5151 * OK to resume normal operation.
c9a6b82f
AG
5152 */
5153void amdgpu_pci_resume(struct pci_dev *pdev)
5154{
5155 struct drm_device *dev = pci_get_drvdata(pdev);
5156 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5157 int i;
c9a6b82f 5158
c9a6b82f
AG
5159
5160 DRM_INFO("PCI error: resume callback!!\n");
acd89fca
AG
5161
5162 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5163 struct amdgpu_ring *ring = adev->rings[i];
5164
5165 if (!ring || !ring->sched.thread)
5166 continue;
5167
5168
5169 drm_sched_resubmit_jobs(&ring->sched);
5170 drm_sched_start(&ring->sched, true);
5171 }
5172
5173 amdgpu_device_unlock_adev(adev);
c9a6b82f 5174}
c1dd4aa6
AG
5175
5176bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5177{
5178 struct drm_device *dev = pci_get_drvdata(pdev);
5179 struct amdgpu_device *adev = drm_to_adev(dev);
5180 int r;
5181
5182 r = pci_save_state(pdev);
5183 if (!r) {
5184 kfree(adev->pci_state);
5185
5186 adev->pci_state = pci_store_saved_state(pdev);
5187
5188 if (!adev->pci_state) {
5189 DRM_ERROR("Failed to store PCI saved state");
5190 return false;
5191 }
5192 } else {
5193 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5194 return false;
5195 }
5196
5197 return true;
5198}
5199
5200bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5201{
5202 struct drm_device *dev = pci_get_drvdata(pdev);
5203 struct amdgpu_device *adev = drm_to_adev(dev);
5204 int r;
5205
5206 if (!adev->pci_state)
5207 return false;
5208
5209 r = pci_load_saved_state(pdev, adev->pci_state);
5210
5211 if (!r) {
5212 pci_restore_state(pdev);
5213 } else {
5214 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5215 return false;
5216 }
5217
5218 return true;
5219}
5220
5221