Merge tag 'for-linus-2021-01-24' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
4e52a9f8 83MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
e2a75f88 84
2dc80b00
S
85#define AMDGPU_RESUME_MS 2000
86
050091ab 87const char *amdgpu_asic_name[] = {
da69c161
KW
88 "TAHITI",
89 "PITCAIRN",
90 "VERDE",
91 "OLAND",
92 "HAINAN",
d38ceaf9
AD
93 "BONAIRE",
94 "KAVERI",
95 "KABINI",
96 "HAWAII",
97 "MULLINS",
98 "TOPAZ",
99 "TONGA",
48299f95 100 "FIJI",
d38ceaf9 101 "CARRIZO",
139f4917 102 "STONEY",
2cc0c0b5
FC
103 "POLARIS10",
104 "POLARIS11",
c4642a47 105 "POLARIS12",
48ff108d 106 "VEGAM",
d4196f01 107 "VEGA10",
8fab806a 108 "VEGA12",
956fcddc 109 "VEGA20",
2ca8a5d2 110 "RAVEN",
d6c3b24e 111 "ARCTURUS",
1eee4228 112 "RENOIR",
852a6626 113 "NAVI10",
87dbad02 114 "NAVI14",
9802f5d7 115 "NAVI12",
ccaf72d3 116 "SIENNA_CICHLID",
ddd8fbe7 117 "NAVY_FLOUNDER",
4f1e9a76 118 "VANGOGH",
a2468e04 119 "DIMGREY_CAVEFISH",
d38ceaf9
AD
120 "LAST",
121};
122
dcea6e65
KR
123/**
124 * DOC: pcie_replay_count
125 *
126 * The amdgpu driver provides a sysfs API for reporting the total number
127 * of PCIe replays (NAKs)
128 * The file pcie_replay_count is used for this and returns the total
129 * number of replays as a sum of the NAKs generated and NAKs received
130 */
131
132static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
133 struct device_attribute *attr, char *buf)
134{
135 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 136 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
137 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
138
139 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
140}
141
142static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
143 amdgpu_device_get_pcie_replay_count, NULL);
144
5494d864
AD
145static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
146
bd607166
KR
147/**
148 * DOC: product_name
149 *
150 * The amdgpu driver provides a sysfs API for reporting the product name
151 * for the device
152 * The file serial_number is used for this and returns the product name
153 * as returned from the FRU.
154 * NOTE: This is only available for certain server cards
155 */
156
157static ssize_t amdgpu_device_get_product_name(struct device *dev,
158 struct device_attribute *attr, char *buf)
159{
160 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 161 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
162
163 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
164}
165
166static DEVICE_ATTR(product_name, S_IRUGO,
167 amdgpu_device_get_product_name, NULL);
168
169/**
170 * DOC: product_number
171 *
172 * The amdgpu driver provides a sysfs API for reporting the part number
173 * for the device
174 * The file serial_number is used for this and returns the part number
175 * as returned from the FRU.
176 * NOTE: This is only available for certain server cards
177 */
178
179static ssize_t amdgpu_device_get_product_number(struct device *dev,
180 struct device_attribute *attr, char *buf)
181{
182 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 183 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
184
185 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
186}
187
188static DEVICE_ATTR(product_number, S_IRUGO,
189 amdgpu_device_get_product_number, NULL);
190
191/**
192 * DOC: serial_number
193 *
194 * The amdgpu driver provides a sysfs API for reporting the serial number
195 * for the device
196 * The file serial_number is used for this and returns the serial number
197 * as returned from the FRU.
198 * NOTE: This is only available for certain server cards
199 */
200
201static ssize_t amdgpu_device_get_serial_number(struct device *dev,
202 struct device_attribute *attr, char *buf)
203{
204 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 205 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
206
207 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
208}
209
210static DEVICE_ATTR(serial_number, S_IRUGO,
211 amdgpu_device_get_serial_number, NULL);
212
fd496ca8
AD
213/**
214 * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control
215 *
216 * @dev: drm_device pointer
217 *
218 * Returns true if the device is a dGPU with HG/PX power control,
219 * otherwise return false.
220 */
221bool amdgpu_device_supports_atpx(struct drm_device *dev)
222{
223 struct amdgpu_device *adev = drm_to_adev(dev);
224
225 if (adev->flags & AMD_IS_PX)
226 return true;
227 return false;
228}
229
e3ecdffa 230/**
0330b848 231 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
232 *
233 * @dev: drm_device pointer
234 *
235 * Returns true if the device is a dGPU with HG/PX power control,
236 * otherwise return false.
237 */
31af062a 238bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 239{
1348969a 240 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 241
0330b848 242 if (adev->has_pr3)
d38ceaf9
AD
243 return true;
244 return false;
245}
246
a69cba42
AD
247/**
248 * amdgpu_device_supports_baco - Does the device support BACO
249 *
250 * @dev: drm_device pointer
251 *
252 * Returns true if the device supporte BACO,
253 * otherwise return false.
254 */
255bool amdgpu_device_supports_baco(struct drm_device *dev)
256{
1348969a 257 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
258
259 return amdgpu_asic_supports_baco(adev);
260}
261
6e3cd2a9
MCC
262/*
263 * VRAM access helper functions
264 */
265
e35e2b11 266/**
e35e2b11
TY
267 * amdgpu_device_vram_access - read/write a buffer in vram
268 *
269 * @adev: amdgpu_device pointer
270 * @pos: offset of the buffer in vram
271 * @buf: virtual address of the buffer in system memory
272 * @size: read/write size, sizeof(@buf) must > @size
273 * @write: true - write to vram, otherwise - read from vram
274 */
275void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
276 uint32_t *buf, size_t size, bool write)
277{
e35e2b11 278 unsigned long flags;
ce05ac56
CK
279 uint32_t hi = ~0;
280 uint64_t last;
281
9d11eb0d
CK
282
283#ifdef CONFIG_64BIT
284 last = min(pos + size, adev->gmc.visible_vram_size);
285 if (last > pos) {
286 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
287 size_t count = last - pos;
288
289 if (write) {
290 memcpy_toio(addr, buf, count);
291 mb();
292 amdgpu_asic_flush_hdp(adev, NULL);
293 } else {
294 amdgpu_asic_invalidate_hdp(adev, NULL);
295 mb();
296 memcpy_fromio(buf, addr, count);
297 }
298
299 if (count == size)
300 return;
301
302 pos += count;
303 buf += count / 4;
304 size -= count;
305 }
306#endif
307
ce05ac56
CK
308 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
309 for (last = pos + size; pos < last; pos += 4) {
310 uint32_t tmp = pos >> 31;
e35e2b11 311
e35e2b11 312 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
313 if (tmp != hi) {
314 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
315 hi = tmp;
316 }
e35e2b11
TY
317 if (write)
318 WREG32_NO_KIQ(mmMM_DATA, *buf++);
319 else
320 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 321 }
ce05ac56 322 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
323}
324
d38ceaf9 325/*
f7ee1874 326 * register access helper functions.
d38ceaf9 327 */
e3ecdffa 328/**
f7ee1874 329 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
330 *
331 * @adev: amdgpu_device pointer
332 * @reg: dword aligned register offset
333 * @acc_flags: access flags which require special behavior
334 *
335 * Returns the 32 bit value from the offset specified.
336 */
f7ee1874
HZ
337uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
338 uint32_t reg, uint32_t acc_flags)
d38ceaf9 339{
f4b373f4
TSD
340 uint32_t ret;
341
bf36b52e
AG
342 if (adev->in_pci_err_recovery)
343 return 0;
344
f7ee1874
HZ
345 if ((reg * 4) < adev->rmmio_size) {
346 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
347 amdgpu_sriov_runtime(adev) &&
348 down_read_trylock(&adev->reset_sem)) {
349 ret = amdgpu_kiq_rreg(adev, reg);
350 up_read(&adev->reset_sem);
351 } else {
352 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
353 }
354 } else {
355 ret = adev->pcie_rreg(adev, reg * 4);
81202807 356 }
bc992ba5 357
f7ee1874 358 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 359
f4b373f4 360 return ret;
d38ceaf9
AD
361}
362
421a2a30
ML
363/*
364 * MMIO register read with bytes helper functions
365 * @offset:bytes offset from MMIO start
366 *
367*/
368
e3ecdffa
AD
369/**
370 * amdgpu_mm_rreg8 - read a memory mapped IO register
371 *
372 * @adev: amdgpu_device pointer
373 * @offset: byte aligned register offset
374 *
375 * Returns the 8 bit value from the offset specified.
376 */
7cbbc745
AG
377uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
378{
bf36b52e
AG
379 if (adev->in_pci_err_recovery)
380 return 0;
381
421a2a30
ML
382 if (offset < adev->rmmio_size)
383 return (readb(adev->rmmio + offset));
384 BUG();
385}
386
387/*
388 * MMIO register write with bytes helper functions
389 * @offset:bytes offset from MMIO start
390 * @value: the value want to be written to the register
391 *
392*/
e3ecdffa
AD
393/**
394 * amdgpu_mm_wreg8 - read a memory mapped IO register
395 *
396 * @adev: amdgpu_device pointer
397 * @offset: byte aligned register offset
398 * @value: 8 bit value to write
399 *
400 * Writes the value specified to the offset specified.
401 */
7cbbc745
AG
402void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
403{
bf36b52e
AG
404 if (adev->in_pci_err_recovery)
405 return;
406
421a2a30
ML
407 if (offset < adev->rmmio_size)
408 writeb(value, adev->rmmio + offset);
409 else
410 BUG();
411}
412
e3ecdffa 413/**
f7ee1874 414 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
415 *
416 * @adev: amdgpu_device pointer
417 * @reg: dword aligned register offset
418 * @v: 32 bit value to write to the register
419 * @acc_flags: access flags which require special behavior
420 *
421 * Writes the value specified to the offset specified.
422 */
f7ee1874
HZ
423void amdgpu_device_wreg(struct amdgpu_device *adev,
424 uint32_t reg, uint32_t v,
425 uint32_t acc_flags)
d38ceaf9 426{
bf36b52e
AG
427 if (adev->in_pci_err_recovery)
428 return;
429
f7ee1874
HZ
430 if ((reg * 4) < adev->rmmio_size) {
431 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
432 amdgpu_sriov_runtime(adev) &&
433 down_read_trylock(&adev->reset_sem)) {
434 amdgpu_kiq_wreg(adev, reg, v);
435 up_read(&adev->reset_sem);
436 } else {
437 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
438 }
439 } else {
440 adev->pcie_wreg(adev, reg * 4, v);
81202807 441 }
bc992ba5 442
f7ee1874 443 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 444}
d38ceaf9 445
2e0cc4d4
ML
446/*
447 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
448 *
449 * this function is invoked only the debugfs register access
450 * */
f7ee1874
HZ
451void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
452 uint32_t reg, uint32_t v)
2e0cc4d4 453{
bf36b52e
AG
454 if (adev->in_pci_err_recovery)
455 return;
456
2e0cc4d4 457 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
458 adev->gfx.rlc.funcs &&
459 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4
ML
460 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
461 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
f7ee1874
HZ
462 } else {
463 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 464 }
d38ceaf9
AD
465}
466
e3ecdffa
AD
467/**
468 * amdgpu_io_rreg - read an IO register
469 *
470 * @adev: amdgpu_device pointer
471 * @reg: dword aligned register offset
472 *
473 * Returns the 32 bit value from the offset specified.
474 */
d38ceaf9
AD
475u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
476{
bf36b52e
AG
477 if (adev->in_pci_err_recovery)
478 return 0;
479
d38ceaf9
AD
480 if ((reg * 4) < adev->rio_mem_size)
481 return ioread32(adev->rio_mem + (reg * 4));
482 else {
483 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
484 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
485 }
486}
487
e3ecdffa
AD
488/**
489 * amdgpu_io_wreg - write to an IO register
490 *
491 * @adev: amdgpu_device pointer
492 * @reg: dword aligned register offset
493 * @v: 32 bit value to write to the register
494 *
495 * Writes the value specified to the offset specified.
496 */
d38ceaf9
AD
497void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
498{
bf36b52e
AG
499 if (adev->in_pci_err_recovery)
500 return;
501
d38ceaf9
AD
502 if ((reg * 4) < adev->rio_mem_size)
503 iowrite32(v, adev->rio_mem + (reg * 4));
504 else {
505 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
506 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
507 }
508}
509
510/**
511 * amdgpu_mm_rdoorbell - read a doorbell dword
512 *
513 * @adev: amdgpu_device pointer
514 * @index: doorbell index
515 *
516 * Returns the value in the doorbell aperture at the
517 * requested doorbell index (CIK).
518 */
519u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
520{
bf36b52e
AG
521 if (adev->in_pci_err_recovery)
522 return 0;
523
d38ceaf9
AD
524 if (index < adev->doorbell.num_doorbells) {
525 return readl(adev->doorbell.ptr + index);
526 } else {
527 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
528 return 0;
529 }
530}
531
532/**
533 * amdgpu_mm_wdoorbell - write a doorbell dword
534 *
535 * @adev: amdgpu_device pointer
536 * @index: doorbell index
537 * @v: value to write
538 *
539 * Writes @v to the doorbell aperture at the
540 * requested doorbell index (CIK).
541 */
542void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
543{
bf36b52e
AG
544 if (adev->in_pci_err_recovery)
545 return;
546
d38ceaf9
AD
547 if (index < adev->doorbell.num_doorbells) {
548 writel(v, adev->doorbell.ptr + index);
549 } else {
550 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
551 }
552}
553
832be404
KW
554/**
555 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
556 *
557 * @adev: amdgpu_device pointer
558 * @index: doorbell index
559 *
560 * Returns the value in the doorbell aperture at the
561 * requested doorbell index (VEGA10+).
562 */
563u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
564{
bf36b52e
AG
565 if (adev->in_pci_err_recovery)
566 return 0;
567
832be404
KW
568 if (index < adev->doorbell.num_doorbells) {
569 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
570 } else {
571 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
572 return 0;
573 }
574}
575
576/**
577 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
578 *
579 * @adev: amdgpu_device pointer
580 * @index: doorbell index
581 * @v: value to write
582 *
583 * Writes @v to the doorbell aperture at the
584 * requested doorbell index (VEGA10+).
585 */
586void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
587{
bf36b52e
AG
588 if (adev->in_pci_err_recovery)
589 return;
590
832be404
KW
591 if (index < adev->doorbell.num_doorbells) {
592 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
593 } else {
594 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
595 }
596}
597
1bba3683
HZ
598/**
599 * amdgpu_device_indirect_rreg - read an indirect register
600 *
601 * @adev: amdgpu_device pointer
602 * @pcie_index: mmio register offset
603 * @pcie_data: mmio register offset
22f453fb 604 * @reg_addr: indirect register address to read from
1bba3683
HZ
605 *
606 * Returns the value of indirect register @reg_addr
607 */
608u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
609 u32 pcie_index, u32 pcie_data,
610 u32 reg_addr)
611{
612 unsigned long flags;
613 u32 r;
614 void __iomem *pcie_index_offset;
615 void __iomem *pcie_data_offset;
616
617 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
618 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
619 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
620
621 writel(reg_addr, pcie_index_offset);
622 readl(pcie_index_offset);
623 r = readl(pcie_data_offset);
624 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
625
626 return r;
627}
628
629/**
630 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
631 *
632 * @adev: amdgpu_device pointer
633 * @pcie_index: mmio register offset
634 * @pcie_data: mmio register offset
22f453fb 635 * @reg_addr: indirect register address to read from
1bba3683
HZ
636 *
637 * Returns the value of indirect register @reg_addr
638 */
639u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
640 u32 pcie_index, u32 pcie_data,
641 u32 reg_addr)
642{
643 unsigned long flags;
644 u64 r;
645 void __iomem *pcie_index_offset;
646 void __iomem *pcie_data_offset;
647
648 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
649 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
650 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
651
652 /* read low 32 bits */
653 writel(reg_addr, pcie_index_offset);
654 readl(pcie_index_offset);
655 r = readl(pcie_data_offset);
656 /* read high 32 bits */
657 writel(reg_addr + 4, pcie_index_offset);
658 readl(pcie_index_offset);
659 r |= ((u64)readl(pcie_data_offset) << 32);
660 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
661
662 return r;
663}
664
665/**
666 * amdgpu_device_indirect_wreg - write an indirect register address
667 *
668 * @adev: amdgpu_device pointer
669 * @pcie_index: mmio register offset
670 * @pcie_data: mmio register offset
671 * @reg_addr: indirect register offset
672 * @reg_data: indirect register data
673 *
674 */
675void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
676 u32 pcie_index, u32 pcie_data,
677 u32 reg_addr, u32 reg_data)
678{
679 unsigned long flags;
680 void __iomem *pcie_index_offset;
681 void __iomem *pcie_data_offset;
682
683 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
684 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
685 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
686
687 writel(reg_addr, pcie_index_offset);
688 readl(pcie_index_offset);
689 writel(reg_data, pcie_data_offset);
690 readl(pcie_data_offset);
691 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
692}
693
694/**
695 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
696 *
697 * @adev: amdgpu_device pointer
698 * @pcie_index: mmio register offset
699 * @pcie_data: mmio register offset
700 * @reg_addr: indirect register offset
701 * @reg_data: indirect register data
702 *
703 */
704void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
705 u32 pcie_index, u32 pcie_data,
706 u32 reg_addr, u64 reg_data)
707{
708 unsigned long flags;
709 void __iomem *pcie_index_offset;
710 void __iomem *pcie_data_offset;
711
712 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
713 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
714 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
715
716 /* write low 32 bits */
717 writel(reg_addr, pcie_index_offset);
718 readl(pcie_index_offset);
719 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
720 readl(pcie_data_offset);
721 /* write high 32 bits */
722 writel(reg_addr + 4, pcie_index_offset);
723 readl(pcie_index_offset);
724 writel((u32)(reg_data >> 32), pcie_data_offset);
725 readl(pcie_data_offset);
726 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
727}
728
d38ceaf9
AD
729/**
730 * amdgpu_invalid_rreg - dummy reg read function
731 *
982a820b 732 * @adev: amdgpu_device pointer
d38ceaf9
AD
733 * @reg: offset of register
734 *
735 * Dummy register read function. Used for register blocks
736 * that certain asics don't have (all asics).
737 * Returns the value in the register.
738 */
739static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
740{
741 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
742 BUG();
743 return 0;
744}
745
746/**
747 * amdgpu_invalid_wreg - dummy reg write function
748 *
982a820b 749 * @adev: amdgpu_device pointer
d38ceaf9
AD
750 * @reg: offset of register
751 * @v: value to write to the register
752 *
753 * Dummy register read function. Used for register blocks
754 * that certain asics don't have (all asics).
755 */
756static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
757{
758 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
759 reg, v);
760 BUG();
761}
762
4fa1c6a6
TZ
763/**
764 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
765 *
982a820b 766 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
767 * @reg: offset of register
768 *
769 * Dummy register read function. Used for register blocks
770 * that certain asics don't have (all asics).
771 * Returns the value in the register.
772 */
773static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
774{
775 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
776 BUG();
777 return 0;
778}
779
780/**
781 * amdgpu_invalid_wreg64 - dummy reg write function
782 *
982a820b 783 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
784 * @reg: offset of register
785 * @v: value to write to the register
786 *
787 * Dummy register read function. Used for register blocks
788 * that certain asics don't have (all asics).
789 */
790static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
791{
792 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
793 reg, v);
794 BUG();
795}
796
d38ceaf9
AD
797/**
798 * amdgpu_block_invalid_rreg - dummy reg read function
799 *
982a820b 800 * @adev: amdgpu_device pointer
d38ceaf9
AD
801 * @block: offset of instance
802 * @reg: offset of register
803 *
804 * Dummy register read function. Used for register blocks
805 * that certain asics don't have (all asics).
806 * Returns the value in the register.
807 */
808static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
809 uint32_t block, uint32_t reg)
810{
811 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
812 reg, block);
813 BUG();
814 return 0;
815}
816
817/**
818 * amdgpu_block_invalid_wreg - dummy reg write function
819 *
982a820b 820 * @adev: amdgpu_device pointer
d38ceaf9
AD
821 * @block: offset of instance
822 * @reg: offset of register
823 * @v: value to write to the register
824 *
825 * Dummy register read function. Used for register blocks
826 * that certain asics don't have (all asics).
827 */
828static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
829 uint32_t block,
830 uint32_t reg, uint32_t v)
831{
832 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
833 reg, block, v);
834 BUG();
835}
836
4d2997ab
AD
837/**
838 * amdgpu_device_asic_init - Wrapper for atom asic_init
839 *
982a820b 840 * @adev: amdgpu_device pointer
4d2997ab
AD
841 *
842 * Does any asic specific work and then calls atom asic init.
843 */
844static int amdgpu_device_asic_init(struct amdgpu_device *adev)
845{
846 amdgpu_asic_pre_asic_init(adev);
847
848 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
849}
850
e3ecdffa
AD
851/**
852 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
853 *
982a820b 854 * @adev: amdgpu_device pointer
e3ecdffa
AD
855 *
856 * Allocates a scratch page of VRAM for use by various things in the
857 * driver.
858 */
06ec9070 859static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 860{
a4a02777
CK
861 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
862 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
863 &adev->vram_scratch.robj,
864 &adev->vram_scratch.gpu_addr,
865 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
866}
867
e3ecdffa
AD
868/**
869 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
870 *
982a820b 871 * @adev: amdgpu_device pointer
e3ecdffa
AD
872 *
873 * Frees the VRAM scratch page.
874 */
06ec9070 875static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 876{
078af1a3 877 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
878}
879
880/**
9c3f2b54 881 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
882 *
883 * @adev: amdgpu_device pointer
884 * @registers: pointer to the register array
885 * @array_size: size of the register array
886 *
887 * Programs an array or registers with and and or masks.
888 * This is a helper for setting golden registers.
889 */
9c3f2b54
AD
890void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
891 const u32 *registers,
892 const u32 array_size)
d38ceaf9
AD
893{
894 u32 tmp, reg, and_mask, or_mask;
895 int i;
896
897 if (array_size % 3)
898 return;
899
900 for (i = 0; i < array_size; i +=3) {
901 reg = registers[i + 0];
902 and_mask = registers[i + 1];
903 or_mask = registers[i + 2];
904
905 if (and_mask == 0xffffffff) {
906 tmp = or_mask;
907 } else {
908 tmp = RREG32(reg);
909 tmp &= ~and_mask;
e0d07657
HZ
910 if (adev->family >= AMDGPU_FAMILY_AI)
911 tmp |= (or_mask & and_mask);
912 else
913 tmp |= or_mask;
d38ceaf9
AD
914 }
915 WREG32(reg, tmp);
916 }
917}
918
e3ecdffa
AD
919/**
920 * amdgpu_device_pci_config_reset - reset the GPU
921 *
922 * @adev: amdgpu_device pointer
923 *
924 * Resets the GPU using the pci config reset sequence.
925 * Only applicable to asics prior to vega10.
926 */
8111c387 927void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
928{
929 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
930}
931
932/*
933 * GPU doorbell aperture helpers function.
934 */
935/**
06ec9070 936 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
937 *
938 * @adev: amdgpu_device pointer
939 *
940 * Init doorbell driver information (CIK)
941 * Returns 0 on success, error on failure.
942 */
06ec9070 943static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 944{
6585661d 945
705e519e
CK
946 /* No doorbell on SI hardware generation */
947 if (adev->asic_type < CHIP_BONAIRE) {
948 adev->doorbell.base = 0;
949 adev->doorbell.size = 0;
950 adev->doorbell.num_doorbells = 0;
951 adev->doorbell.ptr = NULL;
952 return 0;
953 }
954
d6895ad3
CK
955 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
956 return -EINVAL;
957
22357775
AD
958 amdgpu_asic_init_doorbell_index(adev);
959
d38ceaf9
AD
960 /* doorbell bar mapping */
961 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
962 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
963
edf600da 964 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 965 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
966 if (adev->doorbell.num_doorbells == 0)
967 return -EINVAL;
968
ec3db8a6 969 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
970 * paging queue doorbell use the second page. The
971 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
972 * doorbells are in the first page. So with paging queue enabled,
973 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
974 */
975 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 976 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 977
8972e5d2
CK
978 adev->doorbell.ptr = ioremap(adev->doorbell.base,
979 adev->doorbell.num_doorbells *
980 sizeof(u32));
981 if (adev->doorbell.ptr == NULL)
d38ceaf9 982 return -ENOMEM;
d38ceaf9
AD
983
984 return 0;
985}
986
987/**
06ec9070 988 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
989 *
990 * @adev: amdgpu_device pointer
991 *
992 * Tear down doorbell driver information (CIK)
993 */
06ec9070 994static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
995{
996 iounmap(adev->doorbell.ptr);
997 adev->doorbell.ptr = NULL;
998}
999
22cb0164 1000
d38ceaf9
AD
1001
1002/*
06ec9070 1003 * amdgpu_device_wb_*()
455a7bc2 1004 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1005 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1006 */
1007
1008/**
06ec9070 1009 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1010 *
1011 * @adev: amdgpu_device pointer
1012 *
1013 * Disables Writeback and frees the Writeback memory (all asics).
1014 * Used at driver shutdown.
1015 */
06ec9070 1016static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1017{
1018 if (adev->wb.wb_obj) {
a76ed485
AD
1019 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1020 &adev->wb.gpu_addr,
1021 (void **)&adev->wb.wb);
d38ceaf9
AD
1022 adev->wb.wb_obj = NULL;
1023 }
1024}
1025
1026/**
06ec9070 1027 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
1028 *
1029 * @adev: amdgpu_device pointer
1030 *
455a7bc2 1031 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1032 * Used at driver startup.
1033 * Returns 0 on success or an -error on failure.
1034 */
06ec9070 1035static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1036{
1037 int r;
1038
1039 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1040 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1041 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1042 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1043 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1044 (void **)&adev->wb.wb);
d38ceaf9
AD
1045 if (r) {
1046 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1047 return r;
1048 }
d38ceaf9
AD
1049
1050 adev->wb.num_wb = AMDGPU_MAX_WB;
1051 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1052
1053 /* clear wb memory */
73469585 1054 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1055 }
1056
1057 return 0;
1058}
1059
1060/**
131b4b36 1061 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1062 *
1063 * @adev: amdgpu_device pointer
1064 * @wb: wb index
1065 *
1066 * Allocate a wb slot for use by the driver (all asics).
1067 * Returns 0 on success or -EINVAL on failure.
1068 */
131b4b36 1069int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1070{
1071 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1072
97407b63 1073 if (offset < adev->wb.num_wb) {
7014285a 1074 __set_bit(offset, adev->wb.used);
63ae07ca 1075 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1076 return 0;
1077 } else {
1078 return -EINVAL;
1079 }
1080}
1081
d38ceaf9 1082/**
131b4b36 1083 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1084 *
1085 * @adev: amdgpu_device pointer
1086 * @wb: wb index
1087 *
1088 * Free a wb slot allocated for use by the driver (all asics)
1089 */
131b4b36 1090void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1091{
73469585 1092 wb >>= 3;
d38ceaf9 1093 if (wb < adev->wb.num_wb)
73469585 1094 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1095}
1096
d6895ad3
CK
1097/**
1098 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1099 *
1100 * @adev: amdgpu_device pointer
1101 *
1102 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1103 * to fail, but if any of the BARs is not accessible after the size we abort
1104 * driver loading by returning -ENODEV.
1105 */
1106int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1107{
770d13b1 1108 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 1109 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
1110 struct pci_bus *root;
1111 struct resource *res;
1112 unsigned i;
d6895ad3
CK
1113 u16 cmd;
1114 int r;
1115
0c03b912 1116 /* Bypass for VF */
1117 if (amdgpu_sriov_vf(adev))
1118 return 0;
1119
b7221f2b
AD
1120 /* skip if the bios has already enabled large BAR */
1121 if (adev->gmc.real_vram_size &&
1122 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1123 return 0;
1124
31b8adab
CK
1125 /* Check if the root BUS has 64bit memory resources */
1126 root = adev->pdev->bus;
1127 while (root->parent)
1128 root = root->parent;
1129
1130 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1131 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1132 res->start > 0x100000000ull)
1133 break;
1134 }
1135
1136 /* Trying to resize is pointless without a root hub window above 4GB */
1137 if (!res)
1138 return 0;
1139
d6895ad3
CK
1140 /* Disable memory decoding while we change the BAR addresses and size */
1141 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1142 pci_write_config_word(adev->pdev, PCI_COMMAND,
1143 cmd & ~PCI_COMMAND_MEMORY);
1144
1145 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1146 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1147 if (adev->asic_type >= CHIP_BONAIRE)
1148 pci_release_resource(adev->pdev, 2);
1149
1150 pci_release_resource(adev->pdev, 0);
1151
1152 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1153 if (r == -ENOSPC)
1154 DRM_INFO("Not enough PCI address space for a large BAR.");
1155 else if (r && r != -ENOTSUPP)
1156 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1157
1158 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1159
1160 /* When the doorbell or fb BAR isn't available we have no chance of
1161 * using the device.
1162 */
06ec9070 1163 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1164 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1165 return -ENODEV;
1166
1167 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1168
1169 return 0;
1170}
a05502e5 1171
d38ceaf9
AD
1172/*
1173 * GPU helpers function.
1174 */
1175/**
39c640c0 1176 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1177 *
1178 * @adev: amdgpu_device pointer
1179 *
c836fec5
JQ
1180 * Check if the asic has been initialized (all asics) at driver startup
1181 * or post is needed if hw reset is performed.
1182 * Returns true if need or false if not.
d38ceaf9 1183 */
39c640c0 1184bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1185{
1186 uint32_t reg;
1187
bec86378
ML
1188 if (amdgpu_sriov_vf(adev))
1189 return false;
1190
1191 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1192 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1193 * some old smc fw still need driver do vPost otherwise gpu hang, while
1194 * those smc fw version above 22.15 doesn't have this flaw, so we force
1195 * vpost executed for smc version below 22.15
bec86378
ML
1196 */
1197 if (adev->asic_type == CHIP_FIJI) {
1198 int err;
1199 uint32_t fw_ver;
1200 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1201 /* force vPost if error occured */
1202 if (err)
1203 return true;
1204
1205 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1206 if (fw_ver < 0x00160e00)
1207 return true;
bec86378 1208 }
bec86378 1209 }
91fe77eb 1210
1211 if (adev->has_hw_reset) {
1212 adev->has_hw_reset = false;
1213 return true;
1214 }
1215
1216 /* bios scratch used on CIK+ */
1217 if (adev->asic_type >= CHIP_BONAIRE)
1218 return amdgpu_atombios_scratch_need_asic_init(adev);
1219
1220 /* check MEM_SIZE for older asics */
1221 reg = amdgpu_asic_get_config_memsize(adev);
1222
1223 if ((reg != 0) && (reg != 0xffffffff))
1224 return false;
1225
1226 return true;
bec86378
ML
1227}
1228
d38ceaf9
AD
1229/* if we get transitioned to only one device, take VGA back */
1230/**
06ec9070 1231 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1232 *
1233 * @cookie: amdgpu_device pointer
1234 * @state: enable/disable vga decode
1235 *
1236 * Enable/disable vga decode (all asics).
1237 * Returns VGA resource flags.
1238 */
06ec9070 1239static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1240{
1241 struct amdgpu_device *adev = cookie;
1242 amdgpu_asic_set_vga_state(adev, state);
1243 if (state)
1244 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1245 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1246 else
1247 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1248}
1249
e3ecdffa
AD
1250/**
1251 * amdgpu_device_check_block_size - validate the vm block size
1252 *
1253 * @adev: amdgpu_device pointer
1254 *
1255 * Validates the vm block size specified via module parameter.
1256 * The vm block size defines number of bits in page table versus page directory,
1257 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1258 * page table and the remaining bits are in the page directory.
1259 */
06ec9070 1260static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1261{
1262 /* defines number of bits in page table versus page directory,
1263 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1264 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1265 if (amdgpu_vm_block_size == -1)
1266 return;
a1adf8be 1267
bab4fee7 1268 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1269 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1270 amdgpu_vm_block_size);
97489129 1271 amdgpu_vm_block_size = -1;
a1adf8be 1272 }
a1adf8be
CZ
1273}
1274
e3ecdffa
AD
1275/**
1276 * amdgpu_device_check_vm_size - validate the vm size
1277 *
1278 * @adev: amdgpu_device pointer
1279 *
1280 * Validates the vm size in GB specified via module parameter.
1281 * The VM size is the size of the GPU virtual memory space in GB.
1282 */
06ec9070 1283static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1284{
64dab074
AD
1285 /* no need to check the default value */
1286 if (amdgpu_vm_size == -1)
1287 return;
1288
83ca145d
ZJ
1289 if (amdgpu_vm_size < 1) {
1290 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1291 amdgpu_vm_size);
f3368128 1292 amdgpu_vm_size = -1;
83ca145d 1293 }
83ca145d
ZJ
1294}
1295
7951e376
RZ
1296static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1297{
1298 struct sysinfo si;
a9d4fe2f 1299 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1300 uint64_t total_memory;
1301 uint64_t dram_size_seven_GB = 0x1B8000000;
1302 uint64_t dram_size_three_GB = 0xB8000000;
1303
1304 if (amdgpu_smu_memory_pool_size == 0)
1305 return;
1306
1307 if (!is_os_64) {
1308 DRM_WARN("Not 64-bit OS, feature not supported\n");
1309 goto def_value;
1310 }
1311 si_meminfo(&si);
1312 total_memory = (uint64_t)si.totalram * si.mem_unit;
1313
1314 if ((amdgpu_smu_memory_pool_size == 1) ||
1315 (amdgpu_smu_memory_pool_size == 2)) {
1316 if (total_memory < dram_size_three_GB)
1317 goto def_value1;
1318 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1319 (amdgpu_smu_memory_pool_size == 8)) {
1320 if (total_memory < dram_size_seven_GB)
1321 goto def_value1;
1322 } else {
1323 DRM_WARN("Smu memory pool size not supported\n");
1324 goto def_value;
1325 }
1326 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1327
1328 return;
1329
1330def_value1:
1331 DRM_WARN("No enough system memory\n");
1332def_value:
1333 adev->pm.smu_prv_buffer_size = 0;
1334}
1335
d38ceaf9 1336/**
06ec9070 1337 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1338 *
1339 * @adev: amdgpu_device pointer
1340 *
1341 * Validates certain module parameters and updates
1342 * the associated values used by the driver (all asics).
1343 */
912dfc84 1344static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1345{
5b011235
CZ
1346 if (amdgpu_sched_jobs < 4) {
1347 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1348 amdgpu_sched_jobs);
1349 amdgpu_sched_jobs = 4;
76117507 1350 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1351 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1352 amdgpu_sched_jobs);
1353 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1354 }
d38ceaf9 1355
83e74db6 1356 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1357 /* gart size must be greater or equal to 32M */
1358 dev_warn(adev->dev, "gart size (%d) too small\n",
1359 amdgpu_gart_size);
83e74db6 1360 amdgpu_gart_size = -1;
d38ceaf9
AD
1361 }
1362
36d38372 1363 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1364 /* gtt size must be greater or equal to 32M */
36d38372
CK
1365 dev_warn(adev->dev, "gtt size (%d) too small\n",
1366 amdgpu_gtt_size);
1367 amdgpu_gtt_size = -1;
d38ceaf9
AD
1368 }
1369
d07f14be
RH
1370 /* valid range is between 4 and 9 inclusive */
1371 if (amdgpu_vm_fragment_size != -1 &&
1372 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1373 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1374 amdgpu_vm_fragment_size = -1;
1375 }
1376
5d5bd5e3
KW
1377 if (amdgpu_sched_hw_submission < 2) {
1378 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1379 amdgpu_sched_hw_submission);
1380 amdgpu_sched_hw_submission = 2;
1381 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1382 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1383 amdgpu_sched_hw_submission);
1384 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1385 }
1386
7951e376
RZ
1387 amdgpu_device_check_smu_prv_buffer_size(adev);
1388
06ec9070 1389 amdgpu_device_check_vm_size(adev);
d38ceaf9 1390
06ec9070 1391 amdgpu_device_check_block_size(adev);
6a7f76e7 1392
19aede77 1393 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1394
c6252390 1395 amdgpu_gmc_tmz_set(adev);
01a8dcec 1396
9b498efa
AD
1397 amdgpu_gmc_noretry_set(adev);
1398
e3c00faa 1399 return 0;
d38ceaf9
AD
1400}
1401
1402/**
1403 * amdgpu_switcheroo_set_state - set switcheroo state
1404 *
1405 * @pdev: pci dev pointer
1694467b 1406 * @state: vga_switcheroo state
d38ceaf9
AD
1407 *
1408 * Callback for the switcheroo driver. Suspends or resumes the
1409 * the asics before or after it is powered up using ACPI methods.
1410 */
8aba21b7
LT
1411static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1412 enum vga_switcheroo_state state)
d38ceaf9
AD
1413{
1414 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1415 int r;
d38ceaf9 1416
fd496ca8 1417 if (amdgpu_device_supports_atpx(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1418 return;
1419
1420 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1421 pr_info("switched on\n");
d38ceaf9
AD
1422 /* don't suspend or resume card normally */
1423 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1424
de185019 1425 pci_set_power_state(dev->pdev, PCI_D0);
c1dd4aa6 1426 amdgpu_device_load_pci_state(dev->pdev);
de185019
AD
1427 r = pci_enable_device(dev->pdev);
1428 if (r)
1429 DRM_WARN("pci_enable_device failed (%d)\n", r);
1430 amdgpu_device_resume(dev, true);
d38ceaf9 1431
d38ceaf9
AD
1432 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1433 drm_kms_helper_poll_enable(dev);
1434 } else {
dd4fa6c1 1435 pr_info("switched off\n");
d38ceaf9
AD
1436 drm_kms_helper_poll_disable(dev);
1437 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1438 amdgpu_device_suspend(dev, true);
c1dd4aa6 1439 amdgpu_device_cache_pci_state(dev->pdev);
de185019
AD
1440 /* Shut down the device */
1441 pci_disable_device(dev->pdev);
1442 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1443 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1444 }
1445}
1446
1447/**
1448 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1449 *
1450 * @pdev: pci dev pointer
1451 *
1452 * Callback for the switcheroo driver. Check of the switcheroo
1453 * state can be changed.
1454 * Returns true if the state can be changed, false if not.
1455 */
1456static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1457{
1458 struct drm_device *dev = pci_get_drvdata(pdev);
1459
1460 /*
1461 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1462 * locking inversion with the driver load path. And the access here is
1463 * completely racy anyway. So don't bother with locking for now.
1464 */
7e13ad89 1465 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1466}
1467
1468static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1469 .set_gpu_state = amdgpu_switcheroo_set_state,
1470 .reprobe = NULL,
1471 .can_switch = amdgpu_switcheroo_can_switch,
1472};
1473
e3ecdffa
AD
1474/**
1475 * amdgpu_device_ip_set_clockgating_state - set the CG state
1476 *
87e3f136 1477 * @dev: amdgpu_device pointer
e3ecdffa
AD
1478 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1479 * @state: clockgating state (gate or ungate)
1480 *
1481 * Sets the requested clockgating state for all instances of
1482 * the hardware IP specified.
1483 * Returns the error code from the last instance.
1484 */
43fa561f 1485int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1486 enum amd_ip_block_type block_type,
1487 enum amd_clockgating_state state)
d38ceaf9 1488{
43fa561f 1489 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1490 int i, r = 0;
1491
1492 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1493 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1494 continue;
c722865a
RZ
1495 if (adev->ip_blocks[i].version->type != block_type)
1496 continue;
1497 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1498 continue;
1499 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1500 (void *)adev, state);
1501 if (r)
1502 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1503 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1504 }
1505 return r;
1506}
1507
e3ecdffa
AD
1508/**
1509 * amdgpu_device_ip_set_powergating_state - set the PG state
1510 *
87e3f136 1511 * @dev: amdgpu_device pointer
e3ecdffa
AD
1512 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1513 * @state: powergating state (gate or ungate)
1514 *
1515 * Sets the requested powergating state for all instances of
1516 * the hardware IP specified.
1517 * Returns the error code from the last instance.
1518 */
43fa561f 1519int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1520 enum amd_ip_block_type block_type,
1521 enum amd_powergating_state state)
d38ceaf9 1522{
43fa561f 1523 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1524 int i, r = 0;
1525
1526 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1527 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1528 continue;
c722865a
RZ
1529 if (adev->ip_blocks[i].version->type != block_type)
1530 continue;
1531 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1532 continue;
1533 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1534 (void *)adev, state);
1535 if (r)
1536 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1537 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1538 }
1539 return r;
1540}
1541
e3ecdffa
AD
1542/**
1543 * amdgpu_device_ip_get_clockgating_state - get the CG state
1544 *
1545 * @adev: amdgpu_device pointer
1546 * @flags: clockgating feature flags
1547 *
1548 * Walks the list of IPs on the device and updates the clockgating
1549 * flags for each IP.
1550 * Updates @flags with the feature flags for each hardware IP where
1551 * clockgating is enabled.
1552 */
2990a1fc
AD
1553void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1554 u32 *flags)
6cb2d4e4
HR
1555{
1556 int i;
1557
1558 for (i = 0; i < adev->num_ip_blocks; i++) {
1559 if (!adev->ip_blocks[i].status.valid)
1560 continue;
1561 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1562 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1563 }
1564}
1565
e3ecdffa
AD
1566/**
1567 * amdgpu_device_ip_wait_for_idle - wait for idle
1568 *
1569 * @adev: amdgpu_device pointer
1570 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1571 *
1572 * Waits for the request hardware IP to be idle.
1573 * Returns 0 for success or a negative error code on failure.
1574 */
2990a1fc
AD
1575int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1576 enum amd_ip_block_type block_type)
5dbbb60b
AD
1577{
1578 int i, r;
1579
1580 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1581 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1582 continue;
a1255107
AD
1583 if (adev->ip_blocks[i].version->type == block_type) {
1584 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1585 if (r)
1586 return r;
1587 break;
1588 }
1589 }
1590 return 0;
1591
1592}
1593
e3ecdffa
AD
1594/**
1595 * amdgpu_device_ip_is_idle - is the hardware IP idle
1596 *
1597 * @adev: amdgpu_device pointer
1598 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1599 *
1600 * Check if the hardware IP is idle or not.
1601 * Returns true if it the IP is idle, false if not.
1602 */
2990a1fc
AD
1603bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1604 enum amd_ip_block_type block_type)
5dbbb60b
AD
1605{
1606 int i;
1607
1608 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1609 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1610 continue;
a1255107
AD
1611 if (adev->ip_blocks[i].version->type == block_type)
1612 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1613 }
1614 return true;
1615
1616}
1617
e3ecdffa
AD
1618/**
1619 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1620 *
1621 * @adev: amdgpu_device pointer
87e3f136 1622 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1623 *
1624 * Returns a pointer to the hardware IP block structure
1625 * if it exists for the asic, otherwise NULL.
1626 */
2990a1fc
AD
1627struct amdgpu_ip_block *
1628amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1629 enum amd_ip_block_type type)
d38ceaf9
AD
1630{
1631 int i;
1632
1633 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1634 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1635 return &adev->ip_blocks[i];
1636
1637 return NULL;
1638}
1639
1640/**
2990a1fc 1641 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1642 *
1643 * @adev: amdgpu_device pointer
5fc3aeeb 1644 * @type: enum amd_ip_block_type
d38ceaf9
AD
1645 * @major: major version
1646 * @minor: minor version
1647 *
1648 * return 0 if equal or greater
1649 * return 1 if smaller or the ip_block doesn't exist
1650 */
2990a1fc
AD
1651int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1652 enum amd_ip_block_type type,
1653 u32 major, u32 minor)
d38ceaf9 1654{
2990a1fc 1655 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1656
a1255107
AD
1657 if (ip_block && ((ip_block->version->major > major) ||
1658 ((ip_block->version->major == major) &&
1659 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1660 return 0;
1661
1662 return 1;
1663}
1664
a1255107 1665/**
2990a1fc 1666 * amdgpu_device_ip_block_add
a1255107
AD
1667 *
1668 * @adev: amdgpu_device pointer
1669 * @ip_block_version: pointer to the IP to add
1670 *
1671 * Adds the IP block driver information to the collection of IPs
1672 * on the asic.
1673 */
2990a1fc
AD
1674int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1675 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1676{
1677 if (!ip_block_version)
1678 return -EINVAL;
1679
e966a725 1680 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1681 ip_block_version->funcs->name);
1682
a1255107
AD
1683 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1684
1685 return 0;
1686}
1687
e3ecdffa
AD
1688/**
1689 * amdgpu_device_enable_virtual_display - enable virtual display feature
1690 *
1691 * @adev: amdgpu_device pointer
1692 *
1693 * Enabled the virtual display feature if the user has enabled it via
1694 * the module parameter virtual_display. This feature provides a virtual
1695 * display hardware on headless boards or in virtualized environments.
1696 * This function parses and validates the configuration string specified by
1697 * the user and configues the virtual display configuration (number of
1698 * virtual connectors, crtcs, etc.) specified.
1699 */
483ef985 1700static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1701{
1702 adev->enable_virtual_display = false;
1703
1704 if (amdgpu_virtual_display) {
4a580877 1705 struct drm_device *ddev = adev_to_drm(adev);
9accf2fd 1706 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1707 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1708
1709 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1710 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1711 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1712 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1713 if (!strcmp("all", pciaddname)
1714 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1715 long num_crtc;
1716 int res = -1;
1717
9accf2fd 1718 adev->enable_virtual_display = true;
0f66356d
ED
1719
1720 if (pciaddname_tmp)
1721 res = kstrtol(pciaddname_tmp, 10,
1722 &num_crtc);
1723
1724 if (!res) {
1725 if (num_crtc < 1)
1726 num_crtc = 1;
1727 if (num_crtc > 6)
1728 num_crtc = 6;
1729 adev->mode_info.num_crtc = num_crtc;
1730 } else {
1731 adev->mode_info.num_crtc = 1;
1732 }
9accf2fd
ED
1733 break;
1734 }
1735 }
1736
0f66356d
ED
1737 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1738 amdgpu_virtual_display, pci_address_name,
1739 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1740
1741 kfree(pciaddstr);
1742 }
1743}
1744
e3ecdffa
AD
1745/**
1746 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1747 *
1748 * @adev: amdgpu_device pointer
1749 *
1750 * Parses the asic configuration parameters specified in the gpu info
1751 * firmware and makes them availale to the driver for use in configuring
1752 * the asic.
1753 * Returns 0 on success, -EINVAL on failure.
1754 */
e2a75f88
AD
1755static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1756{
e2a75f88 1757 const char *chip_name;
c0a43457 1758 char fw_name[40];
e2a75f88
AD
1759 int err;
1760 const struct gpu_info_firmware_header_v1_0 *hdr;
1761
ab4fe3e1
HR
1762 adev->firmware.gpu_info_fw = NULL;
1763
72de33f8 1764 if (adev->mman.discovery_bin) {
258620d0 1765 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1766
1767 /*
1768 * FIXME: The bounding box is still needed by Navi12, so
1769 * temporarily read it from gpu_info firmware. Should be droped
1770 * when DAL no longer needs it.
1771 */
1772 if (adev->asic_type != CHIP_NAVI12)
1773 return 0;
258620d0
AD
1774 }
1775
e2a75f88 1776 switch (adev->asic_type) {
e2a75f88
AD
1777#ifdef CONFIG_DRM_AMDGPU_SI
1778 case CHIP_VERDE:
1779 case CHIP_TAHITI:
1780 case CHIP_PITCAIRN:
1781 case CHIP_OLAND:
1782 case CHIP_HAINAN:
1783#endif
1784#ifdef CONFIG_DRM_AMDGPU_CIK
1785 case CHIP_BONAIRE:
1786 case CHIP_HAWAII:
1787 case CHIP_KAVERI:
1788 case CHIP_KABINI:
1789 case CHIP_MULLINS:
1790#endif
da87c30b
AD
1791 case CHIP_TOPAZ:
1792 case CHIP_TONGA:
1793 case CHIP_FIJI:
1794 case CHIP_POLARIS10:
1795 case CHIP_POLARIS11:
1796 case CHIP_POLARIS12:
1797 case CHIP_VEGAM:
1798 case CHIP_CARRIZO:
1799 case CHIP_STONEY:
27c0bc71 1800 case CHIP_VEGA20:
84d244a3
JC
1801 case CHIP_SIENNA_CICHLID:
1802 case CHIP_NAVY_FLOUNDER:
eac88a5f 1803 case CHIP_DIMGREY_CAVEFISH:
e2a75f88
AD
1804 default:
1805 return 0;
1806 case CHIP_VEGA10:
1807 chip_name = "vega10";
1808 break;
3f76dced
AD
1809 case CHIP_VEGA12:
1810 chip_name = "vega12";
1811 break;
2d2e5e7e 1812 case CHIP_RAVEN:
54f78a76 1813 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1814 chip_name = "raven2";
54f78a76 1815 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1816 chip_name = "picasso";
54c4d17e
FX
1817 else
1818 chip_name = "raven";
2d2e5e7e 1819 break;
65e60f6e
LM
1820 case CHIP_ARCTURUS:
1821 chip_name = "arcturus";
1822 break;
b51a26a0 1823 case CHIP_RENOIR:
2e62f0b5
PL
1824 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1825 chip_name = "renoir";
1826 else
1827 chip_name = "green_sardine";
b51a26a0 1828 break;
23c6268e
HR
1829 case CHIP_NAVI10:
1830 chip_name = "navi10";
1831 break;
ed42cfe1
XY
1832 case CHIP_NAVI14:
1833 chip_name = "navi14";
1834 break;
42b325e5
XY
1835 case CHIP_NAVI12:
1836 chip_name = "navi12";
1837 break;
4e52a9f8
HR
1838 case CHIP_VANGOGH:
1839 chip_name = "vangogh";
1840 break;
e2a75f88
AD
1841 }
1842
1843 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1844 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1845 if (err) {
1846 dev_err(adev->dev,
1847 "Failed to load gpu_info firmware \"%s\"\n",
1848 fw_name);
1849 goto out;
1850 }
ab4fe3e1 1851 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1852 if (err) {
1853 dev_err(adev->dev,
1854 "Failed to validate gpu_info firmware \"%s\"\n",
1855 fw_name);
1856 goto out;
1857 }
1858
ab4fe3e1 1859 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1860 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1861
1862 switch (hdr->version_major) {
1863 case 1:
1864 {
1865 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1866 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1867 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1868
cc375d8c
TY
1869 /*
1870 * Should be droped when DAL no longer needs it.
1871 */
1872 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1873 goto parse_soc_bounding_box;
1874
b5ab16bf
AD
1875 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1876 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1877 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1878 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1879 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1880 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1881 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1882 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1883 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1884 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1885 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1886 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1887 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1888 adev->gfx.cu_info.max_waves_per_simd =
1889 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1890 adev->gfx.cu_info.max_scratch_slots_per_cu =
1891 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1892 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1893 if (hdr->version_minor >= 1) {
35c2e910
HZ
1894 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1895 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1896 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897 adev->gfx.config.num_sc_per_sh =
1898 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1899 adev->gfx.config.num_packer_per_sc =
1900 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1901 }
ec51d3fa
XY
1902
1903parse_soc_bounding_box:
ec51d3fa
XY
1904 /*
1905 * soc bounding box info is not integrated in disocovery table,
258620d0 1906 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1907 */
48321c3d
HW
1908 if (hdr->version_minor == 2) {
1909 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1910 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1911 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1912 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1913 }
e2a75f88
AD
1914 break;
1915 }
1916 default:
1917 dev_err(adev->dev,
1918 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1919 err = -EINVAL;
1920 goto out;
1921 }
1922out:
e2a75f88
AD
1923 return err;
1924}
1925
e3ecdffa
AD
1926/**
1927 * amdgpu_device_ip_early_init - run early init for hardware IPs
1928 *
1929 * @adev: amdgpu_device pointer
1930 *
1931 * Early initialization pass for hardware IPs. The hardware IPs that make
1932 * up each asic are discovered each IP's early_init callback is run. This
1933 * is the first stage in initializing the asic.
1934 * Returns 0 on success, negative error code on failure.
1935 */
06ec9070 1936static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1937{
aaa36a97 1938 int i, r;
d38ceaf9 1939
483ef985 1940 amdgpu_device_enable_virtual_display(adev);
a6be7570 1941
00a979f3 1942 if (amdgpu_sriov_vf(adev)) {
00a979f3 1943 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
1944 if (r)
1945 return r;
00a979f3
WS
1946 }
1947
d38ceaf9 1948 switch (adev->asic_type) {
33f34802
KW
1949#ifdef CONFIG_DRM_AMDGPU_SI
1950 case CHIP_VERDE:
1951 case CHIP_TAHITI:
1952 case CHIP_PITCAIRN:
1953 case CHIP_OLAND:
1954 case CHIP_HAINAN:
295d0daf 1955 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1956 r = si_set_ip_blocks(adev);
1957 if (r)
1958 return r;
1959 break;
1960#endif
a2e73f56
AD
1961#ifdef CONFIG_DRM_AMDGPU_CIK
1962 case CHIP_BONAIRE:
1963 case CHIP_HAWAII:
1964 case CHIP_KAVERI:
1965 case CHIP_KABINI:
1966 case CHIP_MULLINS:
e1ad2d53 1967 if (adev->flags & AMD_IS_APU)
a2e73f56 1968 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1969 else
1970 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1971
1972 r = cik_set_ip_blocks(adev);
1973 if (r)
1974 return r;
1975 break;
1976#endif
da87c30b
AD
1977 case CHIP_TOPAZ:
1978 case CHIP_TONGA:
1979 case CHIP_FIJI:
1980 case CHIP_POLARIS10:
1981 case CHIP_POLARIS11:
1982 case CHIP_POLARIS12:
1983 case CHIP_VEGAM:
1984 case CHIP_CARRIZO:
1985 case CHIP_STONEY:
1986 if (adev->flags & AMD_IS_APU)
1987 adev->family = AMDGPU_FAMILY_CZ;
1988 else
1989 adev->family = AMDGPU_FAMILY_VI;
1990
1991 r = vi_set_ip_blocks(adev);
1992 if (r)
1993 return r;
1994 break;
e48a3cd9
AD
1995 case CHIP_VEGA10:
1996 case CHIP_VEGA12:
e4bd8170 1997 case CHIP_VEGA20:
e48a3cd9 1998 case CHIP_RAVEN:
61cf44c1 1999 case CHIP_ARCTURUS:
b51a26a0 2000 case CHIP_RENOIR:
70534d1e 2001 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
2002 adev->family = AMDGPU_FAMILY_RV;
2003 else
2004 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
2005
2006 r = soc15_set_ip_blocks(adev);
2007 if (r)
2008 return r;
2009 break;
0a5b8c7b 2010 case CHIP_NAVI10:
7ecb5cd4 2011 case CHIP_NAVI14:
4808cf9c 2012 case CHIP_NAVI12:
11e8aef5 2013 case CHIP_SIENNA_CICHLID:
41f446bf 2014 case CHIP_NAVY_FLOUNDER:
144722fa 2015 case CHIP_DIMGREY_CAVEFISH:
4e52a9f8
HR
2016 case CHIP_VANGOGH:
2017 if (adev->asic_type == CHIP_VANGOGH)
2018 adev->family = AMDGPU_FAMILY_VGH;
2019 else
2020 adev->family = AMDGPU_FAMILY_NV;
0a5b8c7b
HR
2021
2022 r = nv_set_ip_blocks(adev);
2023 if (r)
2024 return r;
2025 break;
d38ceaf9
AD
2026 default:
2027 /* FIXME: not supported yet */
2028 return -EINVAL;
2029 }
2030
1884734a 2031 amdgpu_amdkfd_device_probe(adev);
2032
3b94fb10 2033 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2034 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2035 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 2036
d38ceaf9
AD
2037 for (i = 0; i < adev->num_ip_blocks; i++) {
2038 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2039 DRM_ERROR("disabled ip block: %d <%s>\n",
2040 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2041 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2042 } else {
a1255107
AD
2043 if (adev->ip_blocks[i].version->funcs->early_init) {
2044 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2045 if (r == -ENOENT) {
a1255107 2046 adev->ip_blocks[i].status.valid = false;
2c1a2784 2047 } else if (r) {
a1255107
AD
2048 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2049 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2050 return r;
2c1a2784 2051 } else {
a1255107 2052 adev->ip_blocks[i].status.valid = true;
2c1a2784 2053 }
974e6b64 2054 } else {
a1255107 2055 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2056 }
d38ceaf9 2057 }
21a249ca
AD
2058 /* get the vbios after the asic_funcs are set up */
2059 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2060 r = amdgpu_device_parse_gpu_info_fw(adev);
2061 if (r)
2062 return r;
2063
21a249ca
AD
2064 /* Read BIOS */
2065 if (!amdgpu_get_bios(adev))
2066 return -EINVAL;
2067
2068 r = amdgpu_atombios_init(adev);
2069 if (r) {
2070 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2071 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2072 return r;
2073 }
2074 }
d38ceaf9
AD
2075 }
2076
395d1fb9
NH
2077 adev->cg_flags &= amdgpu_cg_mask;
2078 adev->pg_flags &= amdgpu_pg_mask;
2079
d38ceaf9
AD
2080 return 0;
2081}
2082
0a4f2520
RZ
2083static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2084{
2085 int i, r;
2086
2087 for (i = 0; i < adev->num_ip_blocks; i++) {
2088 if (!adev->ip_blocks[i].status.sw)
2089 continue;
2090 if (adev->ip_blocks[i].status.hw)
2091 continue;
2092 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2093 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2094 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2095 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2096 if (r) {
2097 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2098 adev->ip_blocks[i].version->funcs->name, r);
2099 return r;
2100 }
2101 adev->ip_blocks[i].status.hw = true;
2102 }
2103 }
2104
2105 return 0;
2106}
2107
2108static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2109{
2110 int i, r;
2111
2112 for (i = 0; i < adev->num_ip_blocks; i++) {
2113 if (!adev->ip_blocks[i].status.sw)
2114 continue;
2115 if (adev->ip_blocks[i].status.hw)
2116 continue;
2117 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2118 if (r) {
2119 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2120 adev->ip_blocks[i].version->funcs->name, r);
2121 return r;
2122 }
2123 adev->ip_blocks[i].status.hw = true;
2124 }
2125
2126 return 0;
2127}
2128
7a3e0bb2
RZ
2129static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2130{
2131 int r = 0;
2132 int i;
80f41f84 2133 uint32_t smu_version;
7a3e0bb2
RZ
2134
2135 if (adev->asic_type >= CHIP_VEGA10) {
2136 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2137 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2138 continue;
2139
2140 /* no need to do the fw loading again if already done*/
2141 if (adev->ip_blocks[i].status.hw == true)
2142 break;
2143
53b3f8f4 2144 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2145 r = adev->ip_blocks[i].version->funcs->resume(adev);
2146 if (r) {
2147 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2148 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2149 return r;
2150 }
2151 } else {
2152 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2153 if (r) {
2154 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2155 adev->ip_blocks[i].version->funcs->name, r);
2156 return r;
7a3e0bb2 2157 }
7a3e0bb2 2158 }
482f0e53
ML
2159
2160 adev->ip_blocks[i].status.hw = true;
2161 break;
7a3e0bb2
RZ
2162 }
2163 }
482f0e53 2164
8973d9ec
ED
2165 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2166 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2167
80f41f84 2168 return r;
7a3e0bb2
RZ
2169}
2170
e3ecdffa
AD
2171/**
2172 * amdgpu_device_ip_init - run init for hardware IPs
2173 *
2174 * @adev: amdgpu_device pointer
2175 *
2176 * Main initialization pass for hardware IPs. The list of all the hardware
2177 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2178 * are run. sw_init initializes the software state associated with each IP
2179 * and hw_init initializes the hardware associated with each IP.
2180 * Returns 0 on success, negative error code on failure.
2181 */
06ec9070 2182static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2183{
2184 int i, r;
2185
c030f2e4 2186 r = amdgpu_ras_init(adev);
2187 if (r)
2188 return r;
2189
d38ceaf9 2190 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2191 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2192 continue;
a1255107 2193 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2194 if (r) {
a1255107
AD
2195 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2196 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2197 goto init_failed;
2c1a2784 2198 }
a1255107 2199 adev->ip_blocks[i].status.sw = true;
bfca0289 2200
d38ceaf9 2201 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 2202 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 2203 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2204 if (r) {
2205 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2206 goto init_failed;
2c1a2784 2207 }
a1255107 2208 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2209 if (r) {
2210 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2211 goto init_failed;
2c1a2784 2212 }
06ec9070 2213 r = amdgpu_device_wb_init(adev);
2c1a2784 2214 if (r) {
06ec9070 2215 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2216 goto init_failed;
2c1a2784 2217 }
a1255107 2218 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2219
2220 /* right after GMC hw init, we create CSA */
f92d5c61 2221 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2222 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2223 AMDGPU_GEM_DOMAIN_VRAM,
2224 AMDGPU_CSA_SIZE);
2493664f
ML
2225 if (r) {
2226 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2227 goto init_failed;
2493664f
ML
2228 }
2229 }
d38ceaf9
AD
2230 }
2231 }
2232
c9ffa427
YT
2233 if (amdgpu_sriov_vf(adev))
2234 amdgpu_virt_init_data_exchange(adev);
2235
533aed27
AG
2236 r = amdgpu_ib_pool_init(adev);
2237 if (r) {
2238 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2239 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2240 goto init_failed;
2241 }
2242
c8963ea4
RZ
2243 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2244 if (r)
72d3f592 2245 goto init_failed;
0a4f2520
RZ
2246
2247 r = amdgpu_device_ip_hw_init_phase1(adev);
2248 if (r)
72d3f592 2249 goto init_failed;
0a4f2520 2250
7a3e0bb2
RZ
2251 r = amdgpu_device_fw_loading(adev);
2252 if (r)
72d3f592 2253 goto init_failed;
7a3e0bb2 2254
0a4f2520
RZ
2255 r = amdgpu_device_ip_hw_init_phase2(adev);
2256 if (r)
72d3f592 2257 goto init_failed;
d38ceaf9 2258
121a2bc6
AG
2259 /*
2260 * retired pages will be loaded from eeprom and reserved here,
2261 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2262 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2263 * for I2C communication which only true at this point.
b82e65a9
GC
2264 *
2265 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2266 * failure from bad gpu situation and stop amdgpu init process
2267 * accordingly. For other failed cases, it will still release all
2268 * the resource and print error message, rather than returning one
2269 * negative value to upper level.
121a2bc6
AG
2270 *
2271 * Note: theoretically, this should be called before all vram allocations
2272 * to protect retired page from abusing
2273 */
b82e65a9
GC
2274 r = amdgpu_ras_recovery_init(adev);
2275 if (r)
2276 goto init_failed;
121a2bc6 2277
3e2e2ab5
HZ
2278 if (adev->gmc.xgmi.num_physical_nodes > 1)
2279 amdgpu_xgmi_add_device(adev);
1884734a 2280 amdgpu_amdkfd_device_init(adev);
c6332b97 2281
bd607166
KR
2282 amdgpu_fru_get_product_info(adev);
2283
72d3f592 2284init_failed:
c9ffa427 2285 if (amdgpu_sriov_vf(adev))
c6332b97 2286 amdgpu_virt_release_full_gpu(adev, true);
2287
72d3f592 2288 return r;
d38ceaf9
AD
2289}
2290
e3ecdffa
AD
2291/**
2292 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2293 *
2294 * @adev: amdgpu_device pointer
2295 *
2296 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2297 * this function before a GPU reset. If the value is retained after a
2298 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2299 */
06ec9070 2300static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2301{
2302 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2303}
2304
e3ecdffa
AD
2305/**
2306 * amdgpu_device_check_vram_lost - check if vram is valid
2307 *
2308 * @adev: amdgpu_device pointer
2309 *
2310 * Checks the reset magic value written to the gart pointer in VRAM.
2311 * The driver calls this after a GPU reset to see if the contents of
2312 * VRAM is lost or now.
2313 * returns true if vram is lost, false if not.
2314 */
06ec9070 2315static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2316{
dadce777
EQ
2317 if (memcmp(adev->gart.ptr, adev->reset_magic,
2318 AMDGPU_RESET_MAGIC_NUM))
2319 return true;
2320
53b3f8f4 2321 if (!amdgpu_in_reset(adev))
dadce777
EQ
2322 return false;
2323
2324 /*
2325 * For all ASICs with baco/mode1 reset, the VRAM is
2326 * always assumed to be lost.
2327 */
2328 switch (amdgpu_asic_reset_method(adev)) {
2329 case AMD_RESET_METHOD_BACO:
2330 case AMD_RESET_METHOD_MODE1:
2331 return true;
2332 default:
2333 return false;
2334 }
0c49e0b8
CZ
2335}
2336
e3ecdffa 2337/**
1112a46b 2338 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2339 *
2340 * @adev: amdgpu_device pointer
b8b72130 2341 * @state: clockgating state (gate or ungate)
e3ecdffa 2342 *
e3ecdffa 2343 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2344 * set_clockgating_state callbacks are run.
2345 * Late initialization pass enabling clockgating for hardware IPs.
2346 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2347 * Returns 0 on success, negative error code on failure.
2348 */
fdd34271 2349
1112a46b
RZ
2350static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2351 enum amd_clockgating_state state)
d38ceaf9 2352{
1112a46b 2353 int i, j, r;
d38ceaf9 2354
4a2ba394
SL
2355 if (amdgpu_emu_mode == 1)
2356 return 0;
2357
1112a46b
RZ
2358 for (j = 0; j < adev->num_ip_blocks; j++) {
2359 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2360 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2361 continue;
4a446d55 2362 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2363 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2364 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2365 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2366 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2367 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2368 /* enable clockgating to save power */
a1255107 2369 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2370 state);
4a446d55
AD
2371 if (r) {
2372 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2373 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2374 return r;
2375 }
b0b00ff1 2376 }
d38ceaf9 2377 }
06b18f61 2378
c9f96fd5
RZ
2379 return 0;
2380}
2381
1112a46b 2382static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2383{
1112a46b 2384 int i, j, r;
06b18f61 2385
c9f96fd5
RZ
2386 if (amdgpu_emu_mode == 1)
2387 return 0;
2388
1112a46b
RZ
2389 for (j = 0; j < adev->num_ip_blocks; j++) {
2390 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2391 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2392 continue;
2393 /* skip CG for VCE/UVD, it's handled specially */
2394 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2395 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2396 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2397 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2398 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2399 /* enable powergating to save power */
2400 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2401 state);
c9f96fd5
RZ
2402 if (r) {
2403 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2404 adev->ip_blocks[i].version->funcs->name, r);
2405 return r;
2406 }
2407 }
2408 }
2dc80b00
S
2409 return 0;
2410}
2411
beff74bc
AD
2412static int amdgpu_device_enable_mgpu_fan_boost(void)
2413{
2414 struct amdgpu_gpu_instance *gpu_ins;
2415 struct amdgpu_device *adev;
2416 int i, ret = 0;
2417
2418 mutex_lock(&mgpu_info.mutex);
2419
2420 /*
2421 * MGPU fan boost feature should be enabled
2422 * only when there are two or more dGPUs in
2423 * the system
2424 */
2425 if (mgpu_info.num_dgpu < 2)
2426 goto out;
2427
2428 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2429 gpu_ins = &(mgpu_info.gpu_ins[i]);
2430 adev = gpu_ins->adev;
2431 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2432 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2433 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2434 if (ret)
2435 break;
2436
2437 gpu_ins->mgpu_fan_enabled = 1;
2438 }
2439 }
2440
2441out:
2442 mutex_unlock(&mgpu_info.mutex);
2443
2444 return ret;
2445}
2446
e3ecdffa
AD
2447/**
2448 * amdgpu_device_ip_late_init - run late init for hardware IPs
2449 *
2450 * @adev: amdgpu_device pointer
2451 *
2452 * Late initialization pass for hardware IPs. The list of all the hardware
2453 * IPs that make up the asic is walked and the late_init callbacks are run.
2454 * late_init covers any special initialization that an IP requires
2455 * after all of the have been initialized or something that needs to happen
2456 * late in the init process.
2457 * Returns 0 on success, negative error code on failure.
2458 */
06ec9070 2459static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2460{
60599a03 2461 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2462 int i = 0, r;
2463
2464 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2465 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2466 continue;
2467 if (adev->ip_blocks[i].version->funcs->late_init) {
2468 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2469 if (r) {
2470 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2471 adev->ip_blocks[i].version->funcs->name, r);
2472 return r;
2473 }
2dc80b00 2474 }
73f847db 2475 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2476 }
2477
a891d239
DL
2478 amdgpu_ras_set_error_query_ready(adev, true);
2479
1112a46b
RZ
2480 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2481 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2482
06ec9070 2483 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2484
beff74bc
AD
2485 r = amdgpu_device_enable_mgpu_fan_boost();
2486 if (r)
2487 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2488
60599a03
EQ
2489
2490 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2491 mutex_lock(&mgpu_info.mutex);
2492
2493 /*
2494 * Reset device p-state to low as this was booted with high.
2495 *
2496 * This should be performed only after all devices from the same
2497 * hive get initialized.
2498 *
2499 * However, it's unknown how many device in the hive in advance.
2500 * As this is counted one by one during devices initializations.
2501 *
2502 * So, we wait for all XGMI interlinked devices initialized.
2503 * This may bring some delays as those devices may come from
2504 * different hives. But that should be OK.
2505 */
2506 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2507 for (i = 0; i < mgpu_info.num_gpu; i++) {
2508 gpu_instance = &(mgpu_info.gpu_ins[i]);
2509 if (gpu_instance->adev->flags & AMD_IS_APU)
2510 continue;
2511
d84a430d
JK
2512 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2513 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2514 if (r) {
2515 DRM_ERROR("pstate setting failed (%d).\n", r);
2516 break;
2517 }
2518 }
2519 }
2520
2521 mutex_unlock(&mgpu_info.mutex);
2522 }
2523
d38ceaf9
AD
2524 return 0;
2525}
2526
e3ecdffa
AD
2527/**
2528 * amdgpu_device_ip_fini - run fini for hardware IPs
2529 *
2530 * @adev: amdgpu_device pointer
2531 *
2532 * Main teardown pass for hardware IPs. The list of all the hardware
2533 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2534 * are run. hw_fini tears down the hardware associated with each IP
2535 * and sw_fini tears down any software state associated with each IP.
2536 * Returns 0 on success, negative error code on failure.
2537 */
06ec9070 2538static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2539{
2540 int i, r;
2541
5278a159
SY
2542 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2543 amdgpu_virt_release_ras_err_handler_data(adev);
2544
c030f2e4 2545 amdgpu_ras_pre_fini(adev);
2546
a82400b5
AG
2547 if (adev->gmc.xgmi.num_physical_nodes > 1)
2548 amdgpu_xgmi_remove_device(adev);
2549
05df1f01 2550 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2551 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2552
88e21af1
DL
2553 amdgpu_amdkfd_device_fini(adev);
2554
3e96dbfd
AD
2555 /* need to disable SMC first */
2556 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2557 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2558 continue;
fdd34271 2559 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2560 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2561 /* XXX handle errors */
2562 if (r) {
2563 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2564 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2565 }
a1255107 2566 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2567 break;
2568 }
2569 }
2570
d38ceaf9 2571 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2572 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2573 continue;
8201a67a 2574
a1255107 2575 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2576 /* XXX handle errors */
2c1a2784 2577 if (r) {
a1255107
AD
2578 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2579 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2580 }
8201a67a 2581
a1255107 2582 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2583 }
2584
9950cda2 2585
d38ceaf9 2586 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2587 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2588 continue;
c12aba3a
ML
2589
2590 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2591 amdgpu_ucode_free_bo(adev);
1e256e27 2592 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2593 amdgpu_device_wb_fini(adev);
2594 amdgpu_device_vram_scratch_fini(adev);
533aed27 2595 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2596 }
2597
a1255107 2598 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2599 /* XXX handle errors */
2c1a2784 2600 if (r) {
a1255107
AD
2601 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2602 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2603 }
a1255107
AD
2604 adev->ip_blocks[i].status.sw = false;
2605 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2606 }
2607
a6dcfd9c 2608 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2609 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2610 continue;
a1255107
AD
2611 if (adev->ip_blocks[i].version->funcs->late_fini)
2612 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2613 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2614 }
2615
c030f2e4 2616 amdgpu_ras_fini(adev);
2617
030308fc 2618 if (amdgpu_sriov_vf(adev))
24136135
ML
2619 if (amdgpu_virt_release_full_gpu(adev, false))
2620 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2621
d38ceaf9
AD
2622 return 0;
2623}
2624
e3ecdffa 2625/**
beff74bc 2626 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2627 *
1112a46b 2628 * @work: work_struct.
e3ecdffa 2629 */
beff74bc 2630static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2631{
2632 struct amdgpu_device *adev =
beff74bc 2633 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2634 int r;
2635
2636 r = amdgpu_ib_ring_tests(adev);
2637 if (r)
2638 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2639}
2640
1e317b99
RZ
2641static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2642{
2643 struct amdgpu_device *adev =
2644 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2645
2646 mutex_lock(&adev->gfx.gfx_off_mutex);
2647 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2648 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2649 adev->gfx.gfx_off_state = true;
2650 }
2651 mutex_unlock(&adev->gfx.gfx_off_mutex);
2652}
2653
e3ecdffa 2654/**
e7854a03 2655 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2656 *
2657 * @adev: amdgpu_device pointer
2658 *
2659 * Main suspend function for hardware IPs. The list of all the hardware
2660 * IPs that make up the asic is walked, clockgating is disabled and the
2661 * suspend callbacks are run. suspend puts the hardware and software state
2662 * in each IP into a state suitable for suspend.
2663 * Returns 0 on success, negative error code on failure.
2664 */
e7854a03
AD
2665static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2666{
2667 int i, r;
2668
9ca5b8a1 2669 if (!amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) {
628c36d7
PL
2670 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2671 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2672 }
05df1f01 2673
e7854a03
AD
2674 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2675 if (!adev->ip_blocks[i].status.valid)
2676 continue;
2b9f7848 2677
e7854a03 2678 /* displays are handled separately */
2b9f7848
ND
2679 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2680 continue;
2681
2682 /* XXX handle errors */
2683 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2684 /* XXX handle errors */
2685 if (r) {
2686 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2687 adev->ip_blocks[i].version->funcs->name, r);
2688 return r;
e7854a03 2689 }
2b9f7848
ND
2690
2691 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2692 }
2693
e7854a03
AD
2694 return 0;
2695}
2696
2697/**
2698 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2699 *
2700 * @adev: amdgpu_device pointer
2701 *
2702 * Main suspend function for hardware IPs. The list of all the hardware
2703 * IPs that make up the asic is walked, clockgating is disabled and the
2704 * suspend callbacks are run. suspend puts the hardware and software state
2705 * in each IP into a state suitable for suspend.
2706 * Returns 0 on success, negative error code on failure.
2707 */
2708static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2709{
2710 int i, r;
2711
2712 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2713 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2714 continue;
e7854a03
AD
2715 /* displays are handled in phase1 */
2716 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2717 continue;
bff77e86
LM
2718 /* PSP lost connection when err_event_athub occurs */
2719 if (amdgpu_ras_intr_triggered() &&
2720 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2721 adev->ip_blocks[i].status.hw = false;
2722 continue;
2723 }
d38ceaf9 2724 /* XXX handle errors */
a1255107 2725 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2726 /* XXX handle errors */
2c1a2784 2727 if (r) {
a1255107
AD
2728 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2729 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2730 }
876923fb 2731 adev->ip_blocks[i].status.hw = false;
a3a09142 2732 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2733 if(!amdgpu_sriov_vf(adev)){
2734 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2735 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2736 if (r) {
2737 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2738 adev->mp1_state, r);
2739 return r;
2740 }
a3a09142
AD
2741 }
2742 }
b5507c7e 2743 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2744 }
2745
2746 return 0;
2747}
2748
e7854a03
AD
2749/**
2750 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2751 *
2752 * @adev: amdgpu_device pointer
2753 *
2754 * Main suspend function for hardware IPs. The list of all the hardware
2755 * IPs that make up the asic is walked, clockgating is disabled and the
2756 * suspend callbacks are run. suspend puts the hardware and software state
2757 * in each IP into a state suitable for suspend.
2758 * Returns 0 on success, negative error code on failure.
2759 */
2760int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2761{
2762 int r;
2763
e7819644
YT
2764 if (amdgpu_sriov_vf(adev))
2765 amdgpu_virt_request_full_gpu(adev, false);
2766
e7854a03
AD
2767 r = amdgpu_device_ip_suspend_phase1(adev);
2768 if (r)
2769 return r;
2770 r = amdgpu_device_ip_suspend_phase2(adev);
2771
e7819644
YT
2772 if (amdgpu_sriov_vf(adev))
2773 amdgpu_virt_release_full_gpu(adev, false);
2774
e7854a03
AD
2775 return r;
2776}
2777
06ec9070 2778static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2779{
2780 int i, r;
2781
2cb681b6
ML
2782 static enum amd_ip_block_type ip_order[] = {
2783 AMD_IP_BLOCK_TYPE_GMC,
2784 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2785 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2786 AMD_IP_BLOCK_TYPE_IH,
2787 };
a90ad3c2 2788
2cb681b6
ML
2789 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2790 int j;
2791 struct amdgpu_ip_block *block;
a90ad3c2 2792
4cd2a96d
J
2793 block = &adev->ip_blocks[i];
2794 block->status.hw = false;
2cb681b6 2795
4cd2a96d 2796 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 2797
4cd2a96d 2798 if (block->version->type != ip_order[j] ||
2cb681b6
ML
2799 !block->status.valid)
2800 continue;
2801
2802 r = block->version->funcs->hw_init(adev);
0aaeefcc 2803 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2804 if (r)
2805 return r;
482f0e53 2806 block->status.hw = true;
a90ad3c2
ML
2807 }
2808 }
2809
2810 return 0;
2811}
2812
06ec9070 2813static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2814{
2815 int i, r;
2816
2cb681b6
ML
2817 static enum amd_ip_block_type ip_order[] = {
2818 AMD_IP_BLOCK_TYPE_SMC,
2819 AMD_IP_BLOCK_TYPE_DCE,
2820 AMD_IP_BLOCK_TYPE_GFX,
2821 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2822 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2823 AMD_IP_BLOCK_TYPE_VCE,
2824 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2825 };
a90ad3c2 2826
2cb681b6
ML
2827 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2828 int j;
2829 struct amdgpu_ip_block *block;
a90ad3c2 2830
2cb681b6
ML
2831 for (j = 0; j < adev->num_ip_blocks; j++) {
2832 block = &adev->ip_blocks[j];
2833
2834 if (block->version->type != ip_order[i] ||
482f0e53
ML
2835 !block->status.valid ||
2836 block->status.hw)
2cb681b6
ML
2837 continue;
2838
895bd048
JZ
2839 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2840 r = block->version->funcs->resume(adev);
2841 else
2842 r = block->version->funcs->hw_init(adev);
2843
0aaeefcc 2844 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2845 if (r)
2846 return r;
482f0e53 2847 block->status.hw = true;
a90ad3c2
ML
2848 }
2849 }
2850
2851 return 0;
2852}
2853
e3ecdffa
AD
2854/**
2855 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2856 *
2857 * @adev: amdgpu_device pointer
2858 *
2859 * First resume function for hardware IPs. The list of all the hardware
2860 * IPs that make up the asic is walked and the resume callbacks are run for
2861 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2862 * after a suspend and updates the software state as necessary. This
2863 * function is also used for restoring the GPU after a GPU reset.
2864 * Returns 0 on success, negative error code on failure.
2865 */
06ec9070 2866static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2867{
2868 int i, r;
2869
a90ad3c2 2870 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2871 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2872 continue;
a90ad3c2 2873 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2874 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2875 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2876
fcf0649f
CZ
2877 r = adev->ip_blocks[i].version->funcs->resume(adev);
2878 if (r) {
2879 DRM_ERROR("resume of IP block <%s> failed %d\n",
2880 adev->ip_blocks[i].version->funcs->name, r);
2881 return r;
2882 }
482f0e53 2883 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2884 }
2885 }
2886
2887 return 0;
2888}
2889
e3ecdffa
AD
2890/**
2891 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2892 *
2893 * @adev: amdgpu_device pointer
2894 *
2895 * First resume function for hardware IPs. The list of all the hardware
2896 * IPs that make up the asic is walked and the resume callbacks are run for
2897 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2898 * functional state after a suspend and updates the software state as
2899 * necessary. This function is also used for restoring the GPU after a GPU
2900 * reset.
2901 * Returns 0 on success, negative error code on failure.
2902 */
06ec9070 2903static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2904{
2905 int i, r;
2906
2907 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2908 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2909 continue;
fcf0649f 2910 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2911 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2912 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2913 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2914 continue;
a1255107 2915 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2916 if (r) {
a1255107
AD
2917 DRM_ERROR("resume of IP block <%s> failed %d\n",
2918 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2919 return r;
2c1a2784 2920 }
482f0e53 2921 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2922 }
2923
2924 return 0;
2925}
2926
e3ecdffa
AD
2927/**
2928 * amdgpu_device_ip_resume - run resume for hardware IPs
2929 *
2930 * @adev: amdgpu_device pointer
2931 *
2932 * Main resume function for hardware IPs. The hardware IPs
2933 * are split into two resume functions because they are
2934 * are also used in in recovering from a GPU reset and some additional
2935 * steps need to be take between them. In this case (S3/S4) they are
2936 * run sequentially.
2937 * Returns 0 on success, negative error code on failure.
2938 */
06ec9070 2939static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2940{
2941 int r;
2942
06ec9070 2943 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2944 if (r)
2945 return r;
7a3e0bb2
RZ
2946
2947 r = amdgpu_device_fw_loading(adev);
2948 if (r)
2949 return r;
2950
06ec9070 2951 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2952
2953 return r;
2954}
2955
e3ecdffa
AD
2956/**
2957 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2958 *
2959 * @adev: amdgpu_device pointer
2960 *
2961 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2962 */
4e99a44e 2963static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2964{
6867e1b5
ML
2965 if (amdgpu_sriov_vf(adev)) {
2966 if (adev->is_atom_fw) {
2967 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2968 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2969 } else {
2970 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2971 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2972 }
2973
2974 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2975 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2976 }
048765ad
AR
2977}
2978
e3ecdffa
AD
2979/**
2980 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2981 *
2982 * @asic_type: AMD asic type
2983 *
2984 * Check if there is DC (new modesetting infrastructre) support for an asic.
2985 * returns true if DC has support, false if not.
2986 */
4562236b
HW
2987bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2988{
2989 switch (asic_type) {
2990#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
2991#if defined(CONFIG_DRM_AMD_DC_SI)
2992 case CHIP_TAHITI:
2993 case CHIP_PITCAIRN:
2994 case CHIP_VERDE:
2995 case CHIP_OLAND:
2996#endif
4562236b 2997 case CHIP_BONAIRE:
0d6fbccb 2998 case CHIP_KAVERI:
367e6687
AD
2999 case CHIP_KABINI:
3000 case CHIP_MULLINS:
d9fda248
HW
3001 /*
3002 * We have systems in the wild with these ASICs that require
3003 * LVDS and VGA support which is not supported with DC.
3004 *
3005 * Fallback to the non-DC driver here by default so as not to
3006 * cause regressions.
3007 */
3008 return amdgpu_dc > 0;
3009 case CHIP_HAWAII:
4562236b
HW
3010 case CHIP_CARRIZO:
3011 case CHIP_STONEY:
4562236b 3012 case CHIP_POLARIS10:
675fd32b 3013 case CHIP_POLARIS11:
2c8ad2d5 3014 case CHIP_POLARIS12:
675fd32b 3015 case CHIP_VEGAM:
4562236b
HW
3016 case CHIP_TONGA:
3017 case CHIP_FIJI:
42f8ffa1 3018 case CHIP_VEGA10:
dca7b401 3019 case CHIP_VEGA12:
c6034aa2 3020 case CHIP_VEGA20:
b86a1aa3 3021#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 3022 case CHIP_RAVEN:
b4f199c7 3023 case CHIP_NAVI10:
8fceceb6 3024 case CHIP_NAVI14:
078655d9 3025 case CHIP_NAVI12:
e1c14c43 3026 case CHIP_RENOIR:
81d9bfb8 3027 case CHIP_SIENNA_CICHLID:
a6c5308f 3028 case CHIP_NAVY_FLOUNDER:
7cc656e2 3029 case CHIP_DIMGREY_CAVEFISH:
84b934bc 3030 case CHIP_VANGOGH:
42f8ffa1 3031#endif
fd187853 3032 return amdgpu_dc != 0;
4562236b
HW
3033#endif
3034 default:
93b09a9a 3035 if (amdgpu_dc > 0)
ff9346db 3036 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3037 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
3038 return false;
3039 }
3040}
3041
3042/**
3043 * amdgpu_device_has_dc_support - check if dc is supported
3044 *
982a820b 3045 * @adev: amdgpu_device pointer
4562236b
HW
3046 *
3047 * Returns true for supported, false for not supported
3048 */
3049bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3050{
c997e8e2 3051 if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
2555039d
XY
3052 return false;
3053
4562236b
HW
3054 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3055}
3056
d4535e2c
AG
3057
3058static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3059{
3060 struct amdgpu_device *adev =
3061 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3062 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3063
c6a6e2db
AG
3064 /* It's a bug to not have a hive within this function */
3065 if (WARN_ON(!hive))
3066 return;
3067
3068 /*
3069 * Use task barrier to synchronize all xgmi reset works across the
3070 * hive. task_barrier_enter and task_barrier_exit will block
3071 * until all the threads running the xgmi reset works reach
3072 * those points. task_barrier_full will do both blocks.
3073 */
3074 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3075
3076 task_barrier_enter(&hive->tb);
4a580877 3077 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3078
3079 if (adev->asic_reset_res)
3080 goto fail;
3081
3082 task_barrier_exit(&hive->tb);
4a580877 3083 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3084
3085 if (adev->asic_reset_res)
3086 goto fail;
43c4d576
JC
3087
3088 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
3089 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
3090 } else {
3091
3092 task_barrier_full(&hive->tb);
3093 adev->asic_reset_res = amdgpu_asic_reset(adev);
3094 }
ce316fa5 3095
c6a6e2db 3096fail:
d4535e2c 3097 if (adev->asic_reset_res)
fed184e9 3098 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3099 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3100 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3101}
3102
71f98027
AD
3103static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3104{
3105 char *input = amdgpu_lockup_timeout;
3106 char *timeout_setting = NULL;
3107 int index = 0;
3108 long timeout;
3109 int ret = 0;
3110
3111 /*
3112 * By default timeout for non compute jobs is 10000.
3113 * And there is no timeout enforced on compute jobs.
3114 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3115 * jobs are 60000 by default.
71f98027
AD
3116 */
3117 adev->gfx_timeout = msecs_to_jiffies(10000);
3118 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3119 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 3120 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
3121 else
3122 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
3123
f440ff44 3124 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3125 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3126 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3127 ret = kstrtol(timeout_setting, 0, &timeout);
3128 if (ret)
3129 return ret;
3130
3131 if (timeout == 0) {
3132 index++;
3133 continue;
3134 } else if (timeout < 0) {
3135 timeout = MAX_SCHEDULE_TIMEOUT;
3136 } else {
3137 timeout = msecs_to_jiffies(timeout);
3138 }
3139
3140 switch (index++) {
3141 case 0:
3142 adev->gfx_timeout = timeout;
3143 break;
3144 case 1:
3145 adev->compute_timeout = timeout;
3146 break;
3147 case 2:
3148 adev->sdma_timeout = timeout;
3149 break;
3150 case 3:
3151 adev->video_timeout = timeout;
3152 break;
3153 default:
3154 break;
3155 }
3156 }
3157 /*
3158 * There is only one value specified and
3159 * it should apply to all non-compute jobs.
3160 */
bcccee89 3161 if (index == 1) {
71f98027 3162 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3163 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3164 adev->compute_timeout = adev->gfx_timeout;
3165 }
71f98027
AD
3166 }
3167
3168 return ret;
3169}
d4535e2c 3170
77f3a5cd
ND
3171static const struct attribute *amdgpu_dev_attributes[] = {
3172 &dev_attr_product_name.attr,
3173 &dev_attr_product_number.attr,
3174 &dev_attr_serial_number.attr,
3175 &dev_attr_pcie_replay_count.attr,
3176 NULL
3177};
3178
c9a6b82f 3179
d38ceaf9
AD
3180/**
3181 * amdgpu_device_init - initialize the driver
3182 *
3183 * @adev: amdgpu_device pointer
d38ceaf9
AD
3184 * @flags: driver flags
3185 *
3186 * Initializes the driver info and hw (all asics).
3187 * Returns 0 for success or an error on failure.
3188 * Called at driver startup.
3189 */
3190int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3191 uint32_t flags)
3192{
8aba21b7
LT
3193 struct drm_device *ddev = adev_to_drm(adev);
3194 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3195 int r, i;
fd496ca8 3196 bool atpx = false;
95844d20 3197 u32 max_MBps;
d38ceaf9
AD
3198
3199 adev->shutdown = false;
d38ceaf9 3200 adev->flags = flags;
4e66d7d2
YZ
3201
3202 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3203 adev->asic_type = amdgpu_force_asic_type;
3204 else
3205 adev->asic_type = flags & AMD_ASIC_MASK;
3206
d38ceaf9 3207 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3208 if (amdgpu_emu_mode == 1)
8bdab6bb 3209 adev->usec_timeout *= 10;
770d13b1 3210 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3211 adev->accel_working = false;
3212 adev->num_rings = 0;
3213 adev->mman.buffer_funcs = NULL;
3214 adev->mman.buffer_funcs_ring = NULL;
3215 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3216 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3217 adev->gmc.gmc_funcs = NULL;
f54d1867 3218 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3219 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3220
3221 adev->smc_rreg = &amdgpu_invalid_rreg;
3222 adev->smc_wreg = &amdgpu_invalid_wreg;
3223 adev->pcie_rreg = &amdgpu_invalid_rreg;
3224 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3225 adev->pciep_rreg = &amdgpu_invalid_rreg;
3226 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3227 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3228 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3229 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3230 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3231 adev->didt_rreg = &amdgpu_invalid_rreg;
3232 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3233 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3234 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3235 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3236 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3237
3e39ab90
AD
3238 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3239 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3240 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3241
3242 /* mutex initialization are all done here so we
3243 * can recall function without having locking issues */
d38ceaf9 3244 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 3245 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3246 mutex_init(&adev->pm.mutex);
3247 mutex_init(&adev->gfx.gpu_clock_mutex);
3248 mutex_init(&adev->srbm_mutex);
b8866c26 3249 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3250 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3251 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3252 mutex_init(&adev->mn_lock);
e23b74aa 3253 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3254 hash_init(adev->mn_hash);
53b3f8f4 3255 atomic_set(&adev->in_gpu_reset, 0);
6049db43 3256 init_rwsem(&adev->reset_sem);
32eaeae0 3257 mutex_init(&adev->psp.mutex);
bd052211 3258 mutex_init(&adev->notifier_lock);
d38ceaf9 3259
912dfc84
EQ
3260 r = amdgpu_device_check_arguments(adev);
3261 if (r)
3262 return r;
d38ceaf9 3263
d38ceaf9
AD
3264 spin_lock_init(&adev->mmio_idx_lock);
3265 spin_lock_init(&adev->smc_idx_lock);
3266 spin_lock_init(&adev->pcie_idx_lock);
3267 spin_lock_init(&adev->uvd_ctx_idx_lock);
3268 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3269 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3270 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3271 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3272 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3273
0c4e7fa5
CZ
3274 INIT_LIST_HEAD(&adev->shadow_list);
3275 mutex_init(&adev->shadow_list_lock);
3276
beff74bc
AD
3277 INIT_DELAYED_WORK(&adev->delayed_init_work,
3278 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3279 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3280 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3281
d4535e2c
AG
3282 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3283
d23ee13f 3284 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3285 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3286
b265bdbd
EQ
3287 atomic_set(&adev->throttling_logging_enabled, 1);
3288 /*
3289 * If throttling continues, logging will be performed every minute
3290 * to avoid log flooding. "-1" is subtracted since the thermal
3291 * throttling interrupt comes every second. Thus, the total logging
3292 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3293 * for throttling interrupt) = 60 seconds.
3294 */
3295 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3296 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3297
0fa49558
AX
3298 /* Registers mapping */
3299 /* TODO: block userspace mapping of io register */
da69c161
KW
3300 if (adev->asic_type >= CHIP_BONAIRE) {
3301 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3302 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3303 } else {
3304 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3305 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3306 }
d38ceaf9 3307
d38ceaf9
AD
3308 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3309 if (adev->rmmio == NULL) {
3310 return -ENOMEM;
3311 }
3312 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3313 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3314
d38ceaf9
AD
3315 /* io port mapping */
3316 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3317 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3318 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3319 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3320 break;
3321 }
3322 }
3323 if (adev->rio_mem == NULL)
b64a18c5 3324 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3325
b2109d8e
JX
3326 /* enable PCIE atomic ops */
3327 r = pci_enable_atomic_ops_to_root(adev->pdev,
3328 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3329 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3330 if (r) {
3331 adev->have_atomics_support = false;
3332 DRM_INFO("PCIE atomic ops is not supported\n");
3333 } else {
3334 adev->have_atomics_support = true;
3335 }
3336
5494d864
AD
3337 amdgpu_device_get_pcie_info(adev);
3338
b239c017
JX
3339 if (amdgpu_mcbp)
3340 DRM_INFO("MCBP is enabled\n");
3341
5f84cc63
JX
3342 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3343 adev->enable_mes = true;
3344
3aa0115d
ML
3345 /* detect hw virtualization here */
3346 amdgpu_detect_virtualization(adev);
3347
dffa11b4
ML
3348 r = amdgpu_device_get_job_timeout_settings(adev);
3349 if (r) {
3350 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4192f7b5 3351 goto failed_unmap;
a190d1c7
XY
3352 }
3353
d38ceaf9 3354 /* early init functions */
06ec9070 3355 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3356 if (r)
4192f7b5 3357 goto failed_unmap;
d38ceaf9 3358
6585661d
OZ
3359 /* doorbell bar mapping and doorbell index init*/
3360 amdgpu_device_doorbell_init(adev);
3361
d38ceaf9
AD
3362 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3363 /* this will fail for cards that aren't VGA class devices, just
3364 * ignore it */
38d6be81
AD
3365 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3366 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3367
fd496ca8
AD
3368 if (amdgpu_device_supports_atpx(ddev))
3369 atpx = true;
3840c5bc
AD
3370 if (amdgpu_has_atpx() &&
3371 (amdgpu_is_atpx_hybrid() ||
3372 amdgpu_has_atpx_dgpu_power_cntl()) &&
3373 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3374 vga_switcheroo_register_client(adev->pdev,
fd496ca8
AD
3375 &amdgpu_switcheroo_ops, atpx);
3376 if (atpx)
d38ceaf9
AD
3377 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3378
9475a943
SL
3379 if (amdgpu_emu_mode == 1) {
3380 /* post the asic on emulation mode */
3381 emu_soc_asic_init(adev);
bfca0289 3382 goto fence_driver_init;
9475a943 3383 }
bfca0289 3384
4e99a44e
ML
3385 /* detect if we are with an SRIOV vbios */
3386 amdgpu_device_detect_sriov_bios(adev);
048765ad 3387
95e8e59e
AD
3388 /* check if we need to reset the asic
3389 * E.g., driver was not cleanly unloaded previously, etc.
3390 */
f14899fd 3391 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3392 r = amdgpu_asic_reset(adev);
3393 if (r) {
3394 dev_err(adev->dev, "asic reset on init failed\n");
3395 goto failed;
3396 }
3397 }
3398
c9a6b82f
AG
3399 pci_enable_pcie_error_reporting(adev->ddev.pdev);
3400
d38ceaf9 3401 /* Post card if necessary */
39c640c0 3402 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3403 if (!adev->bios) {
bec86378 3404 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3405 r = -EINVAL;
3406 goto failed;
d38ceaf9 3407 }
bec86378 3408 DRM_INFO("GPU posting now...\n");
4d2997ab 3409 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3410 if (r) {
3411 dev_err(adev->dev, "gpu post error!\n");
3412 goto failed;
3413 }
d38ceaf9
AD
3414 }
3415
88b64e95
AD
3416 if (adev->is_atom_fw) {
3417 /* Initialize clocks */
3418 r = amdgpu_atomfirmware_get_clock_info(adev);
3419 if (r) {
3420 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3421 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3422 goto failed;
3423 }
3424 } else {
a5bde2f9
AD
3425 /* Initialize clocks */
3426 r = amdgpu_atombios_get_clock_info(adev);
3427 if (r) {
3428 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3429 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3430 goto failed;
a5bde2f9
AD
3431 }
3432 /* init i2c buses */
4562236b
HW
3433 if (!amdgpu_device_has_dc_support(adev))
3434 amdgpu_atombios_i2c_init(adev);
2c1a2784 3435 }
d38ceaf9 3436
bfca0289 3437fence_driver_init:
d38ceaf9
AD
3438 /* Fence driver */
3439 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3440 if (r) {
3441 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3442 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3443 goto failed;
2c1a2784 3444 }
d38ceaf9
AD
3445
3446 /* init the mode config */
4a580877 3447 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3448
06ec9070 3449 r = amdgpu_device_ip_init(adev);
d38ceaf9 3450 if (r) {
8840a387 3451 /* failed in exclusive mode due to timeout */
3452 if (amdgpu_sriov_vf(adev) &&
3453 !amdgpu_sriov_runtime(adev) &&
3454 amdgpu_virt_mmio_blocked(adev) &&
3455 !amdgpu_virt_wait_reset(adev)) {
3456 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3457 /* Don't send request since VF is inactive. */
3458 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3459 adev->virt.ops = NULL;
8840a387 3460 r = -EAGAIN;
3461 goto failed;
3462 }
06ec9070 3463 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3464 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3465 goto failed;
d38ceaf9
AD
3466 }
3467
d69b8971
YZ
3468 dev_info(adev->dev,
3469 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3470 adev->gfx.config.max_shader_engines,
3471 adev->gfx.config.max_sh_per_se,
3472 adev->gfx.config.max_cu_per_sh,
3473 adev->gfx.cu_info.number);
3474
d38ceaf9
AD
3475 adev->accel_working = true;
3476
e59c0205
AX
3477 amdgpu_vm_check_compute_bug(adev);
3478
95844d20
MO
3479 /* Initialize the buffer migration limit. */
3480 if (amdgpu_moverate >= 0)
3481 max_MBps = amdgpu_moverate;
3482 else
3483 max_MBps = 8; /* Allow 8 MB/s. */
3484 /* Get a log2 for easy divisions. */
3485 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3486
9bc92b9c
ML
3487 amdgpu_fbdev_init(adev);
3488
d2f52ac8 3489 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3490 if (r) {
3491 adev->pm_sysfs_en = false;
d2f52ac8 3492 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3493 } else
3494 adev->pm_sysfs_en = true;
d2f52ac8 3495
5bb23532 3496 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3497 if (r) {
3498 adev->ucode_sysfs_en = false;
5bb23532 3499 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3500 } else
3501 adev->ucode_sysfs_en = true;
5bb23532 3502
d38ceaf9
AD
3503 if ((amdgpu_testing & 1)) {
3504 if (adev->accel_working)
3505 amdgpu_test_moves(adev);
3506 else
3507 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3508 }
d38ceaf9
AD
3509 if (amdgpu_benchmarking) {
3510 if (adev->accel_working)
3511 amdgpu_benchmark(adev, amdgpu_benchmarking);
3512 else
3513 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3514 }
3515
b0adca4d
EQ
3516 /*
3517 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3518 * Otherwise the mgpu fan boost feature will be skipped due to the
3519 * gpu instance is counted less.
3520 */
3521 amdgpu_register_gpu_instance(adev);
3522
d38ceaf9
AD
3523 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3524 * explicit gating rather than handling it automatically.
3525 */
06ec9070 3526 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3527 if (r) {
06ec9070 3528 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3529 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3530 goto failed;
2c1a2784 3531 }
d38ceaf9 3532
108c6a63 3533 /* must succeed. */
511fdbc3 3534 amdgpu_ras_resume(adev);
108c6a63 3535
beff74bc
AD
3536 queue_delayed_work(system_wq, &adev->delayed_init_work,
3537 msecs_to_jiffies(AMDGPU_RESUME_MS));
3538
2c738637
ML
3539 if (amdgpu_sriov_vf(adev))
3540 flush_delayed_work(&adev->delayed_init_work);
3541
77f3a5cd 3542 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3543 if (r)
77f3a5cd 3544 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3545
d155bef0
AB
3546 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3547 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3548 if (r)
3549 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3550
c1dd4aa6
AG
3551 /* Have stored pci confspace at hand for restore in sudden PCI error */
3552 if (amdgpu_device_cache_pci_state(adev->pdev))
3553 pci_restore_state(pdev);
3554
d38ceaf9 3555 return 0;
83ba126a
AD
3556
3557failed:
89041940 3558 amdgpu_vf_error_trans_all(adev);
fd496ca8 3559 if (atpx)
83ba126a 3560 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3561
4192f7b5
AD
3562failed_unmap:
3563 iounmap(adev->rmmio);
3564 adev->rmmio = NULL;
3565
83ba126a 3566 return r;
d38ceaf9
AD
3567}
3568
d38ceaf9
AD
3569/**
3570 * amdgpu_device_fini - tear down the driver
3571 *
3572 * @adev: amdgpu_device pointer
3573 *
3574 * Tear down the driver info (all asics).
3575 * Called at driver shutdown.
3576 */
3577void amdgpu_device_fini(struct amdgpu_device *adev)
3578{
aac89168 3579 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3580 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3581 adev->shutdown = true;
9f875167 3582
c1dd4aa6
AG
3583 kfree(adev->pci_state);
3584
752c683d
ML
3585 /* make sure IB test finished before entering exclusive mode
3586 * to avoid preemption on IB test
3587 * */
519b8b76 3588 if (amdgpu_sriov_vf(adev)) {
752c683d 3589 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3590 amdgpu_virt_fini_data_exchange(adev);
3591 }
752c683d 3592
e5b03032
ML
3593 /* disable all interrupts */
3594 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3595 if (adev->mode_info.mode_config_initialized){
3596 if (!amdgpu_device_has_dc_support(adev))
4a580877 3597 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3598 else
4a580877 3599 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3600 }
d38ceaf9 3601 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3602 if (adev->pm_sysfs_en)
3603 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3604 amdgpu_fbdev_fini(adev);
e230ac11 3605 amdgpu_device_ip_fini(adev);
75e1658e
ND
3606 release_firmware(adev->firmware.gpu_info_fw);
3607 adev->firmware.gpu_info_fw = NULL;
d38ceaf9
AD
3608 adev->accel_working = false;
3609 /* free i2c buses */
4562236b
HW
3610 if (!amdgpu_device_has_dc_support(adev))
3611 amdgpu_i2c_fini(adev);
bfca0289
SL
3612
3613 if (amdgpu_emu_mode != 1)
3614 amdgpu_atombios_fini(adev);
3615
d38ceaf9
AD
3616 kfree(adev->bios);
3617 adev->bios = NULL;
3840c5bc
AD
3618 if (amdgpu_has_atpx() &&
3619 (amdgpu_is_atpx_hybrid() ||
3620 amdgpu_has_atpx_dgpu_power_cntl()) &&
3621 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3622 vga_switcheroo_unregister_client(adev->pdev);
fd496ca8 3623 if (amdgpu_device_supports_atpx(adev_to_drm(adev)))
83ba126a 3624 vga_switcheroo_fini_domain_pm_ops(adev->dev);
38d6be81
AD
3625 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3626 vga_client_register(adev->pdev, NULL, NULL, NULL);
d38ceaf9
AD
3627 if (adev->rio_mem)
3628 pci_iounmap(adev->pdev, adev->rio_mem);
3629 adev->rio_mem = NULL;
3630 iounmap(adev->rmmio);
3631 adev->rmmio = NULL;
06ec9070 3632 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3633
7c868b59
YT
3634 if (adev->ucode_sysfs_en)
3635 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3636
3637 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3638 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3639 amdgpu_pmu_fini(adev);
72de33f8 3640 if (adev->mman.discovery_bin)
a190d1c7 3641 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3642}
3643
3644
3645/*
3646 * Suspend & resume.
3647 */
3648/**
810ddc3a 3649 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3650 *
87e3f136 3651 * @dev: drm dev pointer
87e3f136 3652 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3653 *
3654 * Puts the hw in the suspend state (all asics).
3655 * Returns 0 for success or an error on failure.
3656 * Called at driver suspend.
3657 */
de185019 3658int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3659{
3660 struct amdgpu_device *adev;
3661 struct drm_crtc *crtc;
3662 struct drm_connector *connector;
f8d2d39e 3663 struct drm_connector_list_iter iter;
5ceb54c6 3664 int r;
d38ceaf9 3665
1348969a 3666 adev = drm_to_adev(dev);
d38ceaf9
AD
3667
3668 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3669 return 0;
3670
44779b43 3671 adev->in_suspend = true;
d38ceaf9
AD
3672 drm_kms_helper_poll_disable(dev);
3673
5f818173
S
3674 if (fbcon)
3675 amdgpu_fbdev_set_suspend(adev, 1);
3676
beff74bc 3677 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3678
4562236b
HW
3679 if (!amdgpu_device_has_dc_support(adev)) {
3680 /* turn off display hw */
3681 drm_modeset_lock_all(dev);
f8d2d39e
LP
3682 drm_connector_list_iter_begin(dev, &iter);
3683 drm_for_each_connector_iter(connector, &iter)
3684 drm_helper_connector_dpms(connector,
3685 DRM_MODE_DPMS_OFF);
3686 drm_connector_list_iter_end(&iter);
4562236b 3687 drm_modeset_unlock_all(dev);
fe1053b7
AD
3688 /* unpin the front buffers and cursors */
3689 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3690 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3691 struct drm_framebuffer *fb = crtc->primary->fb;
3692 struct amdgpu_bo *robj;
3693
91334223 3694 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3695 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3696 r = amdgpu_bo_reserve(aobj, true);
3697 if (r == 0) {
3698 amdgpu_bo_unpin(aobj);
3699 amdgpu_bo_unreserve(aobj);
3700 }
756e6880 3701 }
756e6880 3702
fe1053b7
AD
3703 if (fb == NULL || fb->obj[0] == NULL) {
3704 continue;
3705 }
3706 robj = gem_to_amdgpu_bo(fb->obj[0]);
3707 /* don't unpin kernel fb objects */
3708 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3709 r = amdgpu_bo_reserve(robj, true);
3710 if (r == 0) {
3711 amdgpu_bo_unpin(robj);
3712 amdgpu_bo_unreserve(robj);
3713 }
d38ceaf9
AD
3714 }
3715 }
3716 }
fe1053b7 3717
5e6932fe 3718 amdgpu_ras_suspend(adev);
3719
fe1053b7
AD
3720 r = amdgpu_device_ip_suspend_phase1(adev);
3721
94fa5660
EQ
3722 amdgpu_amdkfd_suspend(adev, !fbcon);
3723
d38ceaf9
AD
3724 /* evict vram memory */
3725 amdgpu_bo_evict_vram(adev);
3726
5ceb54c6 3727 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3728
9ca5b8a1 3729 if (!amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev))
628c36d7
PL
3730 r = amdgpu_device_ip_suspend_phase2(adev);
3731 else
3732 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
a0a71e49
AD
3733 /* evict remaining vram memory
3734 * This second call to evict vram is to evict the gart page table
3735 * using the CPU.
3736 */
d38ceaf9
AD
3737 amdgpu_bo_evict_vram(adev);
3738
d38ceaf9
AD
3739 return 0;
3740}
3741
3742/**
810ddc3a 3743 * amdgpu_device_resume - initiate device resume
d38ceaf9 3744 *
87e3f136 3745 * @dev: drm dev pointer
87e3f136 3746 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3747 *
3748 * Bring the hw back to operating state (all asics).
3749 * Returns 0 for success or an error on failure.
3750 * Called at driver resume.
3751 */
de185019 3752int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3753{
3754 struct drm_connector *connector;
f8d2d39e 3755 struct drm_connector_list_iter iter;
1348969a 3756 struct amdgpu_device *adev = drm_to_adev(dev);
756e6880 3757 struct drm_crtc *crtc;
03161a6e 3758 int r = 0;
d38ceaf9
AD
3759
3760 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3761 return 0;
3762
9ca5b8a1 3763 if (amdgpu_acpi_is_s0ix_supported(adev))
628c36d7
PL
3764 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
3765
d38ceaf9 3766 /* post card */
39c640c0 3767 if (amdgpu_device_need_post(adev)) {
4d2997ab 3768 r = amdgpu_device_asic_init(adev);
74b0b157 3769 if (r)
aac89168 3770 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 3771 }
d38ceaf9 3772
06ec9070 3773 r = amdgpu_device_ip_resume(adev);
e6707218 3774 if (r) {
aac89168 3775 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3776 return r;
e6707218 3777 }
5ceb54c6
AD
3778 amdgpu_fence_driver_resume(adev);
3779
d38ceaf9 3780
06ec9070 3781 r = amdgpu_device_ip_late_init(adev);
03161a6e 3782 if (r)
4d3b9ae5 3783 return r;
d38ceaf9 3784
beff74bc
AD
3785 queue_delayed_work(system_wq, &adev->delayed_init_work,
3786 msecs_to_jiffies(AMDGPU_RESUME_MS));
3787
fe1053b7
AD
3788 if (!amdgpu_device_has_dc_support(adev)) {
3789 /* pin cursors */
3790 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3791 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3792
91334223 3793 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3794 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3795 r = amdgpu_bo_reserve(aobj, true);
3796 if (r == 0) {
3797 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3798 if (r != 0)
aac89168 3799 dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
fe1053b7
AD
3800 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3801 amdgpu_bo_unreserve(aobj);
3802 }
756e6880
AD
3803 }
3804 }
3805 }
9593f4d6 3806 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3807 if (r)
3808 return r;
756e6880 3809
96a5d8d4 3810 /* Make sure IB tests flushed */
beff74bc 3811 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3812
d38ceaf9
AD
3813 /* blat the mode back in */
3814 if (fbcon) {
4562236b
HW
3815 if (!amdgpu_device_has_dc_support(adev)) {
3816 /* pre DCE11 */
3817 drm_helper_resume_force_mode(dev);
3818
3819 /* turn on display hw */
3820 drm_modeset_lock_all(dev);
f8d2d39e
LP
3821
3822 drm_connector_list_iter_begin(dev, &iter);
3823 drm_for_each_connector_iter(connector, &iter)
3824 drm_helper_connector_dpms(connector,
3825 DRM_MODE_DPMS_ON);
3826 drm_connector_list_iter_end(&iter);
3827
4562236b 3828 drm_modeset_unlock_all(dev);
d38ceaf9 3829 }
4d3b9ae5 3830 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3831 }
3832
3833 drm_kms_helper_poll_enable(dev);
23a1a9e5 3834
5e6932fe 3835 amdgpu_ras_resume(adev);
3836
23a1a9e5
L
3837 /*
3838 * Most of the connector probing functions try to acquire runtime pm
3839 * refs to ensure that the GPU is powered on when connector polling is
3840 * performed. Since we're calling this from a runtime PM callback,
3841 * trying to acquire rpm refs will cause us to deadlock.
3842 *
3843 * Since we're guaranteed to be holding the rpm lock, it's safe to
3844 * temporarily disable the rpm helpers so this doesn't deadlock us.
3845 */
3846#ifdef CONFIG_PM
3847 dev->dev->power.disable_depth++;
3848#endif
4562236b
HW
3849 if (!amdgpu_device_has_dc_support(adev))
3850 drm_helper_hpd_irq_event(dev);
3851 else
3852 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3853#ifdef CONFIG_PM
3854 dev->dev->power.disable_depth--;
3855#endif
44779b43
RZ
3856 adev->in_suspend = false;
3857
4d3b9ae5 3858 return 0;
d38ceaf9
AD
3859}
3860
e3ecdffa
AD
3861/**
3862 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3863 *
3864 * @adev: amdgpu_device pointer
3865 *
3866 * The list of all the hardware IPs that make up the asic is walked and
3867 * the check_soft_reset callbacks are run. check_soft_reset determines
3868 * if the asic is still hung or not.
3869 * Returns true if any of the IPs are still in a hung state, false if not.
3870 */
06ec9070 3871static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3872{
3873 int i;
3874 bool asic_hang = false;
3875
f993d628
ML
3876 if (amdgpu_sriov_vf(adev))
3877 return true;
3878
8bc04c29
AD
3879 if (amdgpu_asic_need_full_reset(adev))
3880 return true;
3881
63fbf42f 3882 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3883 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3884 continue;
a1255107
AD
3885 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3886 adev->ip_blocks[i].status.hang =
3887 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3888 if (adev->ip_blocks[i].status.hang) {
aac89168 3889 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3890 asic_hang = true;
3891 }
3892 }
3893 return asic_hang;
3894}
3895
e3ecdffa
AD
3896/**
3897 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3898 *
3899 * @adev: amdgpu_device pointer
3900 *
3901 * The list of all the hardware IPs that make up the asic is walked and the
3902 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3903 * handles any IP specific hardware or software state changes that are
3904 * necessary for a soft reset to succeed.
3905 * Returns 0 on success, negative error code on failure.
3906 */
06ec9070 3907static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3908{
3909 int i, r = 0;
3910
3911 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3912 if (!adev->ip_blocks[i].status.valid)
d31a501e 3913 continue;
a1255107
AD
3914 if (adev->ip_blocks[i].status.hang &&
3915 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3916 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3917 if (r)
3918 return r;
3919 }
3920 }
3921
3922 return 0;
3923}
3924
e3ecdffa
AD
3925/**
3926 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3927 *
3928 * @adev: amdgpu_device pointer
3929 *
3930 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3931 * reset is necessary to recover.
3932 * Returns true if a full asic reset is required, false if not.
3933 */
06ec9070 3934static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3935{
da146d3b
AD
3936 int i;
3937
8bc04c29
AD
3938 if (amdgpu_asic_need_full_reset(adev))
3939 return true;
3940
da146d3b 3941 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3942 if (!adev->ip_blocks[i].status.valid)
da146d3b 3943 continue;
a1255107
AD
3944 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3945 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3946 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3947 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3948 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3949 if (adev->ip_blocks[i].status.hang) {
aac89168 3950 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
3951 return true;
3952 }
3953 }
35d782fe
CZ
3954 }
3955 return false;
3956}
3957
e3ecdffa
AD
3958/**
3959 * amdgpu_device_ip_soft_reset - do a soft reset
3960 *
3961 * @adev: amdgpu_device pointer
3962 *
3963 * The list of all the hardware IPs that make up the asic is walked and the
3964 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3965 * IP specific hardware or software state changes that are necessary to soft
3966 * reset the IP.
3967 * Returns 0 on success, negative error code on failure.
3968 */
06ec9070 3969static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3970{
3971 int i, r = 0;
3972
3973 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3974 if (!adev->ip_blocks[i].status.valid)
35d782fe 3975 continue;
a1255107
AD
3976 if (adev->ip_blocks[i].status.hang &&
3977 adev->ip_blocks[i].version->funcs->soft_reset) {
3978 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3979 if (r)
3980 return r;
3981 }
3982 }
3983
3984 return 0;
3985}
3986
e3ecdffa
AD
3987/**
3988 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3989 *
3990 * @adev: amdgpu_device pointer
3991 *
3992 * The list of all the hardware IPs that make up the asic is walked and the
3993 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3994 * handles any IP specific hardware or software state changes that are
3995 * necessary after the IP has been soft reset.
3996 * Returns 0 on success, negative error code on failure.
3997 */
06ec9070 3998static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3999{
4000 int i, r = 0;
4001
4002 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4003 if (!adev->ip_blocks[i].status.valid)
35d782fe 4004 continue;
a1255107
AD
4005 if (adev->ip_blocks[i].status.hang &&
4006 adev->ip_blocks[i].version->funcs->post_soft_reset)
4007 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4008 if (r)
4009 return r;
4010 }
4011
4012 return 0;
4013}
4014
e3ecdffa 4015/**
c33adbc7 4016 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4017 *
4018 * @adev: amdgpu_device pointer
4019 *
4020 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4021 * restore things like GPUVM page tables after a GPU reset where
4022 * the contents of VRAM might be lost.
403009bf
CK
4023 *
4024 * Returns:
4025 * 0 on success, negative error code on failure.
e3ecdffa 4026 */
c33adbc7 4027static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4028{
c41d1cf6 4029 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
4030 struct amdgpu_bo *shadow;
4031 long r = 1, tmo;
c41d1cf6
ML
4032
4033 if (amdgpu_sriov_runtime(adev))
b045d3af 4034 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4035 else
4036 tmo = msecs_to_jiffies(100);
4037
aac89168 4038 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4039 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
4040 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
4041
4042 /* No need to recover an evicted BO */
4043 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 4044 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
4045 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
4046 continue;
4047
4048 r = amdgpu_bo_restore_shadow(shadow, &next);
4049 if (r)
4050 break;
4051
c41d1cf6 4052 if (fence) {
1712fb1a 4053 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4054 dma_fence_put(fence);
4055 fence = next;
1712fb1a 4056 if (tmo == 0) {
4057 r = -ETIMEDOUT;
c41d1cf6 4058 break;
1712fb1a 4059 } else if (tmo < 0) {
4060 r = tmo;
4061 break;
4062 }
403009bf
CK
4063 } else {
4064 fence = next;
c41d1cf6 4065 }
c41d1cf6
ML
4066 }
4067 mutex_unlock(&adev->shadow_list_lock);
4068
403009bf
CK
4069 if (fence)
4070 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4071 dma_fence_put(fence);
4072
1712fb1a 4073 if (r < 0 || tmo <= 0) {
aac89168 4074 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4075 return -EIO;
4076 }
c41d1cf6 4077
aac89168 4078 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4079 return 0;
c41d1cf6
ML
4080}
4081
a90ad3c2 4082
e3ecdffa 4083/**
06ec9070 4084 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4085 *
982a820b 4086 * @adev: amdgpu_device pointer
87e3f136 4087 * @from_hypervisor: request from hypervisor
5740682e
ML
4088 *
4089 * do VF FLR and reinitialize Asic
3f48c681 4090 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4091 */
4092static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4093 bool from_hypervisor)
5740682e
ML
4094{
4095 int r;
4096
4097 if (from_hypervisor)
4098 r = amdgpu_virt_request_full_gpu(adev, true);
4099 else
4100 r = amdgpu_virt_reset_gpu(adev);
4101 if (r)
4102 return r;
a90ad3c2 4103
b639c22c
JZ
4104 amdgpu_amdkfd_pre_reset(adev);
4105
a90ad3c2 4106 /* Resume IP prior to SMC */
06ec9070 4107 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4108 if (r)
4109 goto error;
a90ad3c2 4110
c9ffa427 4111 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4112 /* we need recover gart prior to run SMC/CP/SDMA resume */
6c28aed6 4113 amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
a90ad3c2 4114
7a3e0bb2
RZ
4115 r = amdgpu_device_fw_loading(adev);
4116 if (r)
4117 return r;
4118
a90ad3c2 4119 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4120 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4121 if (r)
4122 goto error;
a90ad3c2
ML
4123
4124 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 4125 r = amdgpu_ib_ring_tests(adev);
f81e8d53 4126 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 4127
abc34253
ED
4128error:
4129 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 4130 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4131 amdgpu_inc_vram_lost(adev);
c33adbc7 4132 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
4133 }
4134
4135 return r;
4136}
4137
9a1cddd6 4138/**
4139 * amdgpu_device_has_job_running - check if there is any job in mirror list
4140 *
982a820b 4141 * @adev: amdgpu_device pointer
9a1cddd6 4142 *
4143 * check if there is any job in mirror list
4144 */
4145bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4146{
4147 int i;
4148 struct drm_sched_job *job;
4149
4150 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4151 struct amdgpu_ring *ring = adev->rings[i];
4152
4153 if (!ring || !ring->sched.thread)
4154 continue;
4155
4156 spin_lock(&ring->sched.job_list_lock);
4157 job = list_first_entry_or_null(&ring->sched.ring_mirror_list,
4158 struct drm_sched_job, node);
4159 spin_unlock(&ring->sched.job_list_lock);
4160 if (job)
4161 return true;
4162 }
4163 return false;
4164}
4165
12938fad
CK
4166/**
4167 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4168 *
982a820b 4169 * @adev: amdgpu_device pointer
12938fad
CK
4170 *
4171 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4172 * a hung GPU.
4173 */
4174bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4175{
4176 if (!amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4177 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
12938fad
CK
4178 return false;
4179 }
4180
3ba7b418
AG
4181 if (amdgpu_gpu_recovery == 0)
4182 goto disabled;
4183
4184 if (amdgpu_sriov_vf(adev))
4185 return true;
4186
4187 if (amdgpu_gpu_recovery == -1) {
4188 switch (adev->asic_type) {
fc42d47c
AG
4189 case CHIP_BONAIRE:
4190 case CHIP_HAWAII:
3ba7b418
AG
4191 case CHIP_TOPAZ:
4192 case CHIP_TONGA:
4193 case CHIP_FIJI:
4194 case CHIP_POLARIS10:
4195 case CHIP_POLARIS11:
4196 case CHIP_POLARIS12:
4197 case CHIP_VEGAM:
4198 case CHIP_VEGA20:
4199 case CHIP_VEGA10:
4200 case CHIP_VEGA12:
c43b849f 4201 case CHIP_RAVEN:
e9d4cf91 4202 case CHIP_ARCTURUS:
2cb44fb0 4203 case CHIP_RENOIR:
658c6639
AD
4204 case CHIP_NAVI10:
4205 case CHIP_NAVI14:
4206 case CHIP_NAVI12:
131a3c74 4207 case CHIP_SIENNA_CICHLID:
3ba7b418
AG
4208 break;
4209 default:
4210 goto disabled;
4211 }
12938fad
CK
4212 }
4213
4214 return true;
3ba7b418
AG
4215
4216disabled:
aac89168 4217 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4218 return false;
12938fad
CK
4219}
4220
5c6dd71e 4221
26bc5340
AG
4222static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4223 struct amdgpu_job *job,
4224 bool *need_full_reset_arg)
4225{
4226 int i, r = 0;
4227 bool need_full_reset = *need_full_reset_arg;
71182665 4228
728e7e0c
JZ
4229 amdgpu_debugfs_wait_dump(adev);
4230
b602ca5f
TZ
4231 if (amdgpu_sriov_vf(adev)) {
4232 /* stop the data exchange thread */
4233 amdgpu_virt_fini_data_exchange(adev);
4234 }
4235
71182665 4236 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4237 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4238 struct amdgpu_ring *ring = adev->rings[i];
4239
51687759 4240 if (!ring || !ring->sched.thread)
0875dc9e 4241 continue;
5740682e 4242
2f9d4084
ML
4243 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4244 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4245 }
d38ceaf9 4246
222b5f04
AG
4247 if(job)
4248 drm_sched_increase_karma(&job->base);
4249
1d721ed6 4250 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4251 if (!amdgpu_sriov_vf(adev)) {
4252
4253 if (!need_full_reset)
4254 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4255
4256 if (!need_full_reset) {
4257 amdgpu_device_ip_pre_soft_reset(adev);
4258 r = amdgpu_device_ip_soft_reset(adev);
4259 amdgpu_device_ip_post_soft_reset(adev);
4260 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4261 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4262 need_full_reset = true;
4263 }
4264 }
4265
4266 if (need_full_reset)
4267 r = amdgpu_device_ip_suspend(adev);
4268
4269 *need_full_reset_arg = need_full_reset;
4270 }
4271
4272 return r;
4273}
4274
041a62bc 4275static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340 4276 struct list_head *device_list_handle,
7ac71382
AG
4277 bool *need_full_reset_arg,
4278 bool skip_hw_reset)
26bc5340
AG
4279{
4280 struct amdgpu_device *tmp_adev = NULL;
4281 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4282 int r = 0;
4283
4284 /*
4285 * ASIC reset has to be done on all HGMI hive nodes ASAP
4286 * to allow proper links negotiation in FW (within 1 sec)
4287 */
7ac71382 4288 if (!skip_hw_reset && need_full_reset) {
26bc5340 4289 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 4290 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4291 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 4292 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4293 r = -EALREADY;
4294 } else
4295 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4296
041a62bc 4297 if (r) {
aac89168 4298 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4299 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4300 break;
ce316fa5
LM
4301 }
4302 }
4303
041a62bc
AG
4304 /* For XGMI wait for all resets to complete before proceed */
4305 if (!r) {
ce316fa5
LM
4306 list_for_each_entry(tmp_adev, device_list_handle,
4307 gmc.xgmi.head) {
4308 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4309 flush_work(&tmp_adev->xgmi_reset_work);
4310 r = tmp_adev->asic_reset_res;
4311 if (r)
4312 break;
ce316fa5
LM
4313 }
4314 }
4315 }
ce316fa5 4316 }
26bc5340 4317
43c4d576
JC
4318 if (!r && amdgpu_ras_intr_triggered()) {
4319 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4320 if (tmp_adev->mmhub.funcs &&
4321 tmp_adev->mmhub.funcs->reset_ras_error_count)
4322 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4323 }
4324
00eaa571 4325 amdgpu_ras_intr_cleared();
43c4d576 4326 }
00eaa571 4327
26bc5340
AG
4328 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4329 if (need_full_reset) {
4330 /* post card */
4d2997ab 4331 if (amdgpu_device_asic_init(tmp_adev))
aac89168 4332 dev_warn(tmp_adev->dev, "asic atom init failed!");
26bc5340
AG
4333
4334 if (!r) {
4335 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4336 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4337 if (r)
4338 goto out;
4339
4340 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4341 if (vram_lost) {
77e7f829 4342 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4343 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4344 }
4345
6c28aed6 4346 r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
26bc5340
AG
4347 if (r)
4348 goto out;
4349
4350 r = amdgpu_device_fw_loading(tmp_adev);
4351 if (r)
4352 return r;
4353
4354 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4355 if (r)
4356 goto out;
4357
4358 if (vram_lost)
4359 amdgpu_device_fill_reset_magic(tmp_adev);
4360
fdafb359
EQ
4361 /*
4362 * Add this ASIC as tracked as reset was already
4363 * complete successfully.
4364 */
4365 amdgpu_register_gpu_instance(tmp_adev);
4366
7c04ca50 4367 r = amdgpu_device_ip_late_init(tmp_adev);
4368 if (r)
4369 goto out;
4370
565d1941
EQ
4371 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4372
e8fbaf03
GC
4373 /*
4374 * The GPU enters bad state once faulty pages
4375 * by ECC has reached the threshold, and ras
4376 * recovery is scheduled next. So add one check
4377 * here to break recovery if it indeed exceeds
4378 * bad page threshold, and remind user to
4379 * retire this GPU or setting one bigger
4380 * bad_page_threshold value to fix this once
4381 * probing driver again.
4382 */
4383 if (!amdgpu_ras_check_err_threshold(tmp_adev)) {
4384 /* must succeed. */
4385 amdgpu_ras_resume(tmp_adev);
4386 } else {
4387 r = -EINVAL;
4388 goto out;
4389 }
e79a04d5 4390
26bc5340
AG
4391 /* Update PSP FW topology after reset */
4392 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4393 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4394 }
4395 }
4396
26bc5340
AG
4397out:
4398 if (!r) {
4399 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4400 r = amdgpu_ib_ring_tests(tmp_adev);
4401 if (r) {
4402 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4403 r = amdgpu_device_ip_suspend(tmp_adev);
4404 need_full_reset = true;
4405 r = -EAGAIN;
4406 goto end;
4407 }
4408 }
4409
4410 if (!r)
4411 r = amdgpu_device_recover_vram(tmp_adev);
4412 else
4413 tmp_adev->asic_reset_res = r;
4414 }
4415
4416end:
4417 *need_full_reset_arg = need_full_reset;
4418 return r;
4419}
4420
08ebb485
DL
4421static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
4422 struct amdgpu_hive_info *hive)
26bc5340 4423{
53b3f8f4
DL
4424 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4425 return false;
4426
08ebb485
DL
4427 if (hive) {
4428 down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
4429 } else {
4430 down_write(&adev->reset_sem);
4431 }
5740682e 4432
26bc5340 4433 atomic_inc(&adev->gpu_reset_counter);
a3a09142
AD
4434 switch (amdgpu_asic_reset_method(adev)) {
4435 case AMD_RESET_METHOD_MODE1:
4436 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4437 break;
4438 case AMD_RESET_METHOD_MODE2:
4439 adev->mp1_state = PP_MP1_STATE_RESET;
4440 break;
4441 default:
4442 adev->mp1_state = PP_MP1_STATE_NONE;
4443 break;
4444 }
1d721ed6
AG
4445
4446 return true;
26bc5340 4447}
d38ceaf9 4448
26bc5340
AG
4449static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4450{
89041940 4451 amdgpu_vf_error_trans_all(adev);
a3a09142 4452 adev->mp1_state = PP_MP1_STATE_NONE;
53b3f8f4 4453 atomic_set(&adev->in_gpu_reset, 0);
6049db43 4454 up_write(&adev->reset_sem);
26bc5340
AG
4455}
4456
3f12acc8
EQ
4457static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4458{
4459 struct pci_dev *p = NULL;
4460
4461 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4462 adev->pdev->bus->number, 1);
4463 if (p) {
4464 pm_runtime_enable(&(p->dev));
4465 pm_runtime_resume(&(p->dev));
4466 }
4467}
4468
4469static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4470{
4471 enum amd_reset_method reset_method;
4472 struct pci_dev *p = NULL;
4473 u64 expires;
4474
4475 /*
4476 * For now, only BACO and mode1 reset are confirmed
4477 * to suffer the audio issue without proper suspended.
4478 */
4479 reset_method = amdgpu_asic_reset_method(adev);
4480 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4481 (reset_method != AMD_RESET_METHOD_MODE1))
4482 return -EINVAL;
4483
4484 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4485 adev->pdev->bus->number, 1);
4486 if (!p)
4487 return -ENODEV;
4488
4489 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4490 if (!expires)
4491 /*
4492 * If we cannot get the audio device autosuspend delay,
4493 * a fixed 4S interval will be used. Considering 3S is
4494 * the audio controller default autosuspend delay setting.
4495 * 4S used here is guaranteed to cover that.
4496 */
54b7feb9 4497 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4498
4499 while (!pm_runtime_status_suspended(&(p->dev))) {
4500 if (!pm_runtime_suspend(&(p->dev)))
4501 break;
4502
4503 if (expires < ktime_get_mono_fast_ns()) {
4504 dev_warn(adev->dev, "failed to suspend display audio\n");
4505 /* TODO: abort the succeeding gpu reset? */
4506 return -ETIMEDOUT;
4507 }
4508 }
4509
4510 pm_runtime_disable(&(p->dev));
4511
4512 return 0;
4513}
4514
26bc5340
AG
4515/**
4516 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4517 *
982a820b 4518 * @adev: amdgpu_device pointer
26bc5340
AG
4519 * @job: which job trigger hang
4520 *
4521 * Attempt to reset the GPU if it has hung (all asics).
4522 * Attempt to do soft-reset or full-reset and reinitialize Asic
4523 * Returns 0 for success or an error on failure.
4524 */
4525
4526int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4527 struct amdgpu_job *job)
4528{
1d721ed6 4529 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4530 bool need_full_reset = false;
4531 bool job_signaled = false;
26bc5340 4532 struct amdgpu_hive_info *hive = NULL;
26bc5340 4533 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4534 int i, r = 0;
bb5c7235 4535 bool need_emergency_restart = false;
3f12acc8 4536 bool audio_suspended = false;
26bc5340 4537
6e3cd2a9 4538 /*
bb5c7235
WS
4539 * Special case: RAS triggered and full reset isn't supported
4540 */
4541 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4542
d5ea093e
AG
4543 /*
4544 * Flush RAM to disk so that after reboot
4545 * the user can read log and see why the system rebooted.
4546 */
bb5c7235 4547 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4548 DRM_WARN("Emergency reboot.");
4549
4550 ksys_sync_helper();
4551 emergency_restart();
4552 }
4553
b823821f 4554 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4555 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4556
4557 /*
1d721ed6
AG
4558 * Here we trylock to avoid chain of resets executing from
4559 * either trigger by jobs on different adevs in XGMI hive or jobs on
4560 * different schedulers for same device while this TO handler is running.
4561 * We always reset all schedulers for device and all devices for XGMI
4562 * hive so that should take care of them too.
26bc5340 4563 */
d95e8e97 4564 hive = amdgpu_get_xgmi_hive(adev);
53b3f8f4
DL
4565 if (hive) {
4566 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4567 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4568 job ? job->base.id : -1, hive->hive_id);
d95e8e97 4569 amdgpu_put_xgmi_hive(hive);
53b3f8f4
DL
4570 return 0;
4571 }
4572 mutex_lock(&hive->hive_lock);
1d721ed6 4573 }
26bc5340 4574
9e94d22c
EQ
4575 /*
4576 * Build list of devices to reset.
4577 * In case we are in XGMI hive mode, resort the device list
4578 * to put adev in the 1st position.
4579 */
4580 INIT_LIST_HEAD(&device_list);
4581 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4582 if (!hive)
26bc5340 4583 return -ENODEV;
9e94d22c
EQ
4584 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4585 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4586 device_list_handle = &hive->device_list;
4587 } else {
4588 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4589 device_list_handle = &device_list;
4590 }
4591
1d721ed6
AG
4592 /* block all schedulers and reset given job's ring */
4593 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
08ebb485 4594 if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
aac89168 4595 dev_info(tmp_adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
9e94d22c 4596 job ? job->base.id : -1);
cbfd17f7
DL
4597 r = 0;
4598 goto skip_recovery;
7c6e68c7
AG
4599 }
4600
3f12acc8
EQ
4601 /*
4602 * Try to put the audio codec into suspend state
4603 * before gpu reset started.
4604 *
4605 * Due to the power domain of the graphics device
4606 * is shared with AZ power domain. Without this,
4607 * we may change the audio hardware from behind
4608 * the audio driver's back. That will trigger
4609 * some audio codec errors.
4610 */
4611 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4612 audio_suspended = true;
4613
9e94d22c
EQ
4614 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4615
52fb44cf
EQ
4616 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4617
9e94d22c
EQ
4618 if (!amdgpu_sriov_vf(tmp_adev))
4619 amdgpu_amdkfd_pre_reset(tmp_adev);
4620
12ffa55d
AG
4621 /*
4622 * Mark these ASICs to be reseted as untracked first
4623 * And add them back after reset completed
4624 */
4625 amdgpu_unregister_gpu_instance(tmp_adev);
4626
a2f63ee8 4627 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4628
f1c1314b 4629 /* disable ras on ALL IPs */
bb5c7235 4630 if (!need_emergency_restart &&
b823821f 4631 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4632 amdgpu_ras_suspend(tmp_adev);
4633
1d721ed6
AG
4634 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4635 struct amdgpu_ring *ring = tmp_adev->rings[i];
4636
4637 if (!ring || !ring->sched.thread)
4638 continue;
4639
0b2d2c2e 4640 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4641
bb5c7235 4642 if (need_emergency_restart)
7c6e68c7 4643 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4644 }
4645 }
4646
bb5c7235 4647 if (need_emergency_restart)
7c6e68c7
AG
4648 goto skip_sched_resume;
4649
1d721ed6
AG
4650 /*
4651 * Must check guilty signal here since after this point all old
4652 * HW fences are force signaled.
4653 *
4654 * job->base holds a reference to parent fence
4655 */
4656 if (job && job->base.s_fence->parent &&
7dd8c205 4657 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4658 job_signaled = true;
1d721ed6
AG
4659 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4660 goto skip_hw_reset;
4661 }
4662
26bc5340
AG
4663retry: /* Rest of adevs pre asic reset from XGMI hive. */
4664 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340 4665 r = amdgpu_device_pre_asic_reset(tmp_adev,
ded08454 4666 (tmp_adev == adev) ? job : NULL,
26bc5340
AG
4667 &need_full_reset);
4668 /*TODO Should we stop ?*/
4669 if (r) {
aac89168 4670 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 4671 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
4672 tmp_adev->asic_reset_res = r;
4673 }
4674 }
4675
4676 /* Actual ASIC resets if needed.*/
4677 /* TODO Implement XGMI hive reset logic for SRIOV */
4678 if (amdgpu_sriov_vf(adev)) {
4679 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4680 if (r)
4681 adev->asic_reset_res = r;
4682 } else {
7ac71382 4683 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset, false);
26bc5340
AG
4684 if (r && r == -EAGAIN)
4685 goto retry;
4686 }
4687
1d721ed6
AG
4688skip_hw_reset:
4689
26bc5340
AG
4690 /* Post ASIC reset for all devs .*/
4691 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4692
1d721ed6
AG
4693 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4694 struct amdgpu_ring *ring = tmp_adev->rings[i];
4695
4696 if (!ring || !ring->sched.thread)
4697 continue;
4698
4699 /* No point to resubmit jobs if we didn't HW reset*/
4700 if (!tmp_adev->asic_reset_res && !job_signaled)
4701 drm_sched_resubmit_jobs(&ring->sched);
4702
4703 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4704 }
4705
4706 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4a580877 4707 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
4708 }
4709
4710 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4711
4712 if (r) {
4713 /* bad news, how to tell it to userspace ? */
12ffa55d 4714 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4715 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4716 } else {
12ffa55d 4717 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4718 }
7c6e68c7 4719 }
26bc5340 4720
7c6e68c7
AG
4721skip_sched_resume:
4722 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4723 /*unlock kfd: SRIOV would do it separately */
bb5c7235 4724 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4725 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4726 if (audio_suspended)
4727 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4728 amdgpu_device_unlock_adev(tmp_adev);
4729 }
4730
cbfd17f7 4731skip_recovery:
9e94d22c 4732 if (hive) {
53b3f8f4 4733 atomic_set(&hive->in_reset, 0);
9e94d22c 4734 mutex_unlock(&hive->hive_lock);
d95e8e97 4735 amdgpu_put_xgmi_hive(hive);
9e94d22c 4736 }
26bc5340
AG
4737
4738 if (r)
4739 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4740 return r;
4741}
4742
e3ecdffa
AD
4743/**
4744 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4745 *
4746 * @adev: amdgpu_device pointer
4747 *
4748 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4749 * and lanes) of the slot the device is in. Handles APUs and
4750 * virtualized environments where PCIE config space may not be available.
4751 */
5494d864 4752static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4753{
5d9a6330 4754 struct pci_dev *pdev;
c5313457
HK
4755 enum pci_bus_speed speed_cap, platform_speed_cap;
4756 enum pcie_link_width platform_link_width;
d0dd7f0c 4757
cd474ba0
AD
4758 if (amdgpu_pcie_gen_cap)
4759 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4760
cd474ba0
AD
4761 if (amdgpu_pcie_lane_cap)
4762 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4763
cd474ba0
AD
4764 /* covers APUs as well */
4765 if (pci_is_root_bus(adev->pdev->bus)) {
4766 if (adev->pm.pcie_gen_mask == 0)
4767 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4768 if (adev->pm.pcie_mlw_mask == 0)
4769 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4770 return;
cd474ba0 4771 }
d0dd7f0c 4772
c5313457
HK
4773 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4774 return;
4775
dbaa922b
AD
4776 pcie_bandwidth_available(adev->pdev, NULL,
4777 &platform_speed_cap, &platform_link_width);
c5313457 4778
cd474ba0 4779 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4780 /* asic caps */
4781 pdev = adev->pdev;
4782 speed_cap = pcie_get_speed_cap(pdev);
4783 if (speed_cap == PCI_SPEED_UNKNOWN) {
4784 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4785 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4786 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4787 } else {
5d9a6330
AD
4788 if (speed_cap == PCIE_SPEED_16_0GT)
4789 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4790 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4791 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4792 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4793 else if (speed_cap == PCIE_SPEED_8_0GT)
4794 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4795 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4796 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4797 else if (speed_cap == PCIE_SPEED_5_0GT)
4798 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4799 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4800 else
4801 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4802 }
4803 /* platform caps */
c5313457 4804 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4805 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4806 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4807 } else {
c5313457 4808 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4809 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4810 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4811 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4812 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4813 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4814 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4815 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4816 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4817 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4818 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4819 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4820 else
4821 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4822
cd474ba0
AD
4823 }
4824 }
4825 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4826 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4827 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4828 } else {
c5313457 4829 switch (platform_link_width) {
5d9a6330 4830 case PCIE_LNK_X32:
cd474ba0
AD
4831 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4832 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4833 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4834 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4835 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4836 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4837 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4838 break;
5d9a6330 4839 case PCIE_LNK_X16:
cd474ba0
AD
4840 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4841 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4842 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4843 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4844 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4845 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4846 break;
5d9a6330 4847 case PCIE_LNK_X12:
cd474ba0
AD
4848 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4849 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4850 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4851 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4852 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4853 break;
5d9a6330 4854 case PCIE_LNK_X8:
cd474ba0
AD
4855 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4856 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4857 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4858 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4859 break;
5d9a6330 4860 case PCIE_LNK_X4:
cd474ba0
AD
4861 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4862 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4863 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4864 break;
5d9a6330 4865 case PCIE_LNK_X2:
cd474ba0
AD
4866 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4867 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4868 break;
5d9a6330 4869 case PCIE_LNK_X1:
cd474ba0
AD
4870 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4871 break;
4872 default:
4873 break;
4874 }
d0dd7f0c
AD
4875 }
4876 }
4877}
d38ceaf9 4878
361dbd01
AD
4879int amdgpu_device_baco_enter(struct drm_device *dev)
4880{
1348969a 4881 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 4882 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 4883
4a580877 4884 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
4885 return -ENOTSUPP;
4886
6fb33209 4887 if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
4888 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4889
9530273e 4890 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4891}
4892
4893int amdgpu_device_baco_exit(struct drm_device *dev)
4894{
1348969a 4895 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 4896 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4897 int ret = 0;
361dbd01 4898
4a580877 4899 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
4900 return -ENOTSUPP;
4901
9530273e
EQ
4902 ret = amdgpu_dpm_baco_exit(adev);
4903 if (ret)
4904 return ret;
7a22677b 4905
6fb33209 4906 if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
4907 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4908
4909 return 0;
361dbd01 4910}
c9a6b82f 4911
acd89fca
AG
4912static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
4913{
4914 int i;
4915
4916 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4917 struct amdgpu_ring *ring = adev->rings[i];
4918
4919 if (!ring || !ring->sched.thread)
4920 continue;
4921
4922 cancel_delayed_work_sync(&ring->sched.work_tdr);
4923 }
4924}
4925
c9a6b82f
AG
4926/**
4927 * amdgpu_pci_error_detected - Called when a PCI error is detected.
4928 * @pdev: PCI device struct
4929 * @state: PCI channel state
4930 *
4931 * Description: Called when a PCI error is detected.
4932 *
4933 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
4934 */
4935pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
4936{
4937 struct drm_device *dev = pci_get_drvdata(pdev);
4938 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 4939 int i;
c9a6b82f
AG
4940
4941 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
4942
6894305c
AG
4943 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4944 DRM_WARN("No support for XGMI hive yet...");
4945 return PCI_ERS_RESULT_DISCONNECT;
4946 }
4947
c9a6b82f
AG
4948 switch (state) {
4949 case pci_channel_io_normal:
4950 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca
AG
4951 /* Fatal error, prepare for slot reset */
4952 case pci_channel_io_frozen:
4953 /*
4954 * Cancel and wait for all TDRs in progress if failing to
4955 * set adev->in_gpu_reset in amdgpu_device_lock_adev
4956 *
4957 * Locking adev->reset_sem will prevent any external access
4958 * to GPU during PCI error recovery
4959 */
4960 while (!amdgpu_device_lock_adev(adev, NULL))
4961 amdgpu_cancel_all_tdr(adev);
4962
4963 /*
4964 * Block any work scheduling as we do for regular GPU reset
4965 * for the duration of the recovery
4966 */
4967 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4968 struct amdgpu_ring *ring = adev->rings[i];
4969
4970 if (!ring || !ring->sched.thread)
4971 continue;
4972
4973 drm_sched_stop(&ring->sched, NULL);
4974 }
c9a6b82f
AG
4975 return PCI_ERS_RESULT_NEED_RESET;
4976 case pci_channel_io_perm_failure:
4977 /* Permanent error, prepare for device removal */
4978 return PCI_ERS_RESULT_DISCONNECT;
4979 }
4980
4981 return PCI_ERS_RESULT_NEED_RESET;
4982}
4983
4984/**
4985 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
4986 * @pdev: pointer to PCI device
4987 */
4988pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
4989{
4990
4991 DRM_INFO("PCI error: mmio enabled callback!!\n");
4992
4993 /* TODO - dump whatever for debugging purposes */
4994
4995 /* This called only if amdgpu_pci_error_detected returns
4996 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
4997 * works, no need to reset slot.
4998 */
4999
5000 return PCI_ERS_RESULT_RECOVERED;
5001}
5002
5003/**
5004 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5005 * @pdev: PCI device struct
5006 *
5007 * Description: This routine is called by the pci error recovery
5008 * code after the PCI slot has been reset, just before we
5009 * should resume normal operations.
5010 */
5011pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5012{
5013 struct drm_device *dev = pci_get_drvdata(pdev);
5014 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5015 int r, i;
7ac71382 5016 bool need_full_reset = true;
362c7b91 5017 u32 memsize;
7ac71382 5018 struct list_head device_list;
c9a6b82f
AG
5019
5020 DRM_INFO("PCI error: slot reset callback!!\n");
5021
7ac71382
AG
5022 INIT_LIST_HEAD(&device_list);
5023 list_add_tail(&adev->gmc.xgmi.head, &device_list);
5024
362c7b91
AG
5025 /* wait for asic to come out of reset */
5026 msleep(500);
5027
7ac71382 5028 /* Restore PCI confspace */
c1dd4aa6 5029 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5030
362c7b91
AG
5031 /* confirm ASIC came out of reset */
5032 for (i = 0; i < adev->usec_timeout; i++) {
5033 memsize = amdgpu_asic_get_config_memsize(adev);
5034
5035 if (memsize != 0xffffffff)
5036 break;
5037 udelay(1);
5038 }
5039 if (memsize == 0xffffffff) {
5040 r = -ETIME;
5041 goto out;
5042 }
5043
362c7b91 5044 adev->in_pci_err_recovery = true;
7ac71382 5045 r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);
bf36b52e 5046 adev->in_pci_err_recovery = false;
c9a6b82f
AG
5047 if (r)
5048 goto out;
5049
7ac71382 5050 r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
c9a6b82f
AG
5051
5052out:
c9a6b82f 5053 if (!r) {
c1dd4aa6
AG
5054 if (amdgpu_device_cache_pci_state(adev->pdev))
5055 pci_restore_state(adev->pdev);
5056
c9a6b82f
AG
5057 DRM_INFO("PCIe error recovery succeeded\n");
5058 } else {
5059 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5060 amdgpu_device_unlock_adev(adev);
5061 }
5062
5063 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5064}
5065
5066/**
5067 * amdgpu_pci_resume() - resume normal ops after PCI reset
5068 * @pdev: pointer to PCI device
5069 *
5070 * Called when the error recovery driver tells us that its
505199a3 5071 * OK to resume normal operation.
c9a6b82f
AG
5072 */
5073void amdgpu_pci_resume(struct pci_dev *pdev)
5074{
5075 struct drm_device *dev = pci_get_drvdata(pdev);
5076 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5077 int i;
c9a6b82f 5078
c9a6b82f
AG
5079
5080 DRM_INFO("PCI error: resume callback!!\n");
acd89fca
AG
5081
5082 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5083 struct amdgpu_ring *ring = adev->rings[i];
5084
5085 if (!ring || !ring->sched.thread)
5086 continue;
5087
5088
5089 drm_sched_resubmit_jobs(&ring->sched);
5090 drm_sched_start(&ring->sched, true);
5091 }
5092
5093 amdgpu_device_unlock_adev(adev);
c9a6b82f 5094}
c1dd4aa6
AG
5095
5096bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5097{
5098 struct drm_device *dev = pci_get_drvdata(pdev);
5099 struct amdgpu_device *adev = drm_to_adev(dev);
5100 int r;
5101
5102 r = pci_save_state(pdev);
5103 if (!r) {
5104 kfree(adev->pci_state);
5105
5106 adev->pci_state = pci_store_saved_state(pdev);
5107
5108 if (!adev->pci_state) {
5109 DRM_ERROR("Failed to store PCI saved state");
5110 return false;
5111 }
5112 } else {
5113 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5114 return false;
5115 }
5116
5117 return true;
5118}
5119
5120bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5121{
5122 struct drm_device *dev = pci_get_drvdata(pdev);
5123 struct amdgpu_device *adev = drm_to_adev(dev);
5124 int r;
5125
5126 if (!adev->pci_state)
5127 return false;
5128
5129 r = pci_load_saved_state(pdev, adev->pci_state);
5130
5131 if (!r) {
5132 pci_restore_state(pdev);
5133 } else {
5134 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5135 return false;
5136 }
5137
5138 return true;
5139}
5140
5141