drm/amd/display: Fix module load hangs when connected to an eDP
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
4e52a9f8 83MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
2e62f0b5 84MODULE_FIRMWARE("amdgpu/green_sardine_gpu_info.bin");
e2a75f88 85
2dc80b00
S
86#define AMDGPU_RESUME_MS 2000
87
050091ab 88const char *amdgpu_asic_name[] = {
da69c161
KW
89 "TAHITI",
90 "PITCAIRN",
91 "VERDE",
92 "OLAND",
93 "HAINAN",
d38ceaf9
AD
94 "BONAIRE",
95 "KAVERI",
96 "KABINI",
97 "HAWAII",
98 "MULLINS",
99 "TOPAZ",
100 "TONGA",
48299f95 101 "FIJI",
d38ceaf9 102 "CARRIZO",
139f4917 103 "STONEY",
2cc0c0b5
FC
104 "POLARIS10",
105 "POLARIS11",
c4642a47 106 "POLARIS12",
48ff108d 107 "VEGAM",
d4196f01 108 "VEGA10",
8fab806a 109 "VEGA12",
956fcddc 110 "VEGA20",
2ca8a5d2 111 "RAVEN",
d6c3b24e 112 "ARCTURUS",
1eee4228 113 "RENOIR",
852a6626 114 "NAVI10",
87dbad02 115 "NAVI14",
9802f5d7 116 "NAVI12",
ccaf72d3 117 "SIENNA_CICHLID",
ddd8fbe7 118 "NAVY_FLOUNDER",
4f1e9a76 119 "VANGOGH",
d38ceaf9
AD
120 "LAST",
121};
122
dcea6e65
KR
123/**
124 * DOC: pcie_replay_count
125 *
126 * The amdgpu driver provides a sysfs API for reporting the total number
127 * of PCIe replays (NAKs)
128 * The file pcie_replay_count is used for this and returns the total
129 * number of replays as a sum of the NAKs generated and NAKs received
130 */
131
132static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
133 struct device_attribute *attr, char *buf)
134{
135 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 136 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
137 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
138
139 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
140}
141
142static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
143 amdgpu_device_get_pcie_replay_count, NULL);
144
5494d864
AD
145static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
146
bd607166
KR
147/**
148 * DOC: product_name
149 *
150 * The amdgpu driver provides a sysfs API for reporting the product name
151 * for the device
152 * The file serial_number is used for this and returns the product name
153 * as returned from the FRU.
154 * NOTE: This is only available for certain server cards
155 */
156
157static ssize_t amdgpu_device_get_product_name(struct device *dev,
158 struct device_attribute *attr, char *buf)
159{
160 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 161 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
162
163 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
164}
165
166static DEVICE_ATTR(product_name, S_IRUGO,
167 amdgpu_device_get_product_name, NULL);
168
169/**
170 * DOC: product_number
171 *
172 * The amdgpu driver provides a sysfs API for reporting the part number
173 * for the device
174 * The file serial_number is used for this and returns the part number
175 * as returned from the FRU.
176 * NOTE: This is only available for certain server cards
177 */
178
179static ssize_t amdgpu_device_get_product_number(struct device *dev,
180 struct device_attribute *attr, char *buf)
181{
182 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 183 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
184
185 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
186}
187
188static DEVICE_ATTR(product_number, S_IRUGO,
189 amdgpu_device_get_product_number, NULL);
190
191/**
192 * DOC: serial_number
193 *
194 * The amdgpu driver provides a sysfs API for reporting the serial number
195 * for the device
196 * The file serial_number is used for this and returns the serial number
197 * as returned from the FRU.
198 * NOTE: This is only available for certain server cards
199 */
200
201static ssize_t amdgpu_device_get_serial_number(struct device *dev,
202 struct device_attribute *attr, char *buf)
203{
204 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 205 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
206
207 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
208}
209
210static DEVICE_ATTR(serial_number, S_IRUGO,
211 amdgpu_device_get_serial_number, NULL);
212
e3ecdffa 213/**
31af062a 214 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
215 *
216 * @dev: drm_device pointer
217 *
218 * Returns true if the device is a dGPU with HG/PX power control,
219 * otherwise return false.
220 */
31af062a 221bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 222{
1348969a 223 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 224
2f7d10b3 225 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
226 return true;
227 return false;
228}
229
a69cba42
AD
230/**
231 * amdgpu_device_supports_baco - Does the device support BACO
232 *
233 * @dev: drm_device pointer
234 *
235 * Returns true if the device supporte BACO,
236 * otherwise return false.
237 */
238bool amdgpu_device_supports_baco(struct drm_device *dev)
239{
1348969a 240 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
241
242 return amdgpu_asic_supports_baco(adev);
243}
244
e35e2b11
TY
245/**
246 * VRAM access helper functions.
247 *
248 * amdgpu_device_vram_access - read/write a buffer in vram
249 *
250 * @adev: amdgpu_device pointer
251 * @pos: offset of the buffer in vram
252 * @buf: virtual address of the buffer in system memory
253 * @size: read/write size, sizeof(@buf) must > @size
254 * @write: true - write to vram, otherwise - read from vram
255 */
256void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
257 uint32_t *buf, size_t size, bool write)
258{
e35e2b11 259 unsigned long flags;
ce05ac56
CK
260 uint32_t hi = ~0;
261 uint64_t last;
262
9d11eb0d
CK
263
264#ifdef CONFIG_64BIT
265 last = min(pos + size, adev->gmc.visible_vram_size);
266 if (last > pos) {
267 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
268 size_t count = last - pos;
269
270 if (write) {
271 memcpy_toio(addr, buf, count);
272 mb();
273 amdgpu_asic_flush_hdp(adev, NULL);
274 } else {
275 amdgpu_asic_invalidate_hdp(adev, NULL);
276 mb();
277 memcpy_fromio(buf, addr, count);
278 }
279
280 if (count == size)
281 return;
282
283 pos += count;
284 buf += count / 4;
285 size -= count;
286 }
287#endif
288
ce05ac56
CK
289 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
290 for (last = pos + size; pos < last; pos += 4) {
291 uint32_t tmp = pos >> 31;
e35e2b11 292
e35e2b11 293 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
294 if (tmp != hi) {
295 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
296 hi = tmp;
297 }
e35e2b11
TY
298 if (write)
299 WREG32_NO_KIQ(mmMM_DATA, *buf++);
300 else
301 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 302 }
ce05ac56 303 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
304}
305
d38ceaf9 306/*
f7ee1874 307 * register access helper functions.
d38ceaf9 308 */
e3ecdffa 309/**
f7ee1874 310 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
311 *
312 * @adev: amdgpu_device pointer
313 * @reg: dword aligned register offset
314 * @acc_flags: access flags which require special behavior
315 *
316 * Returns the 32 bit value from the offset specified.
317 */
f7ee1874
HZ
318uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
319 uint32_t reg, uint32_t acc_flags)
d38ceaf9 320{
f4b373f4
TSD
321 uint32_t ret;
322
bf36b52e
AG
323 if (adev->in_pci_err_recovery)
324 return 0;
325
f7ee1874
HZ
326 if ((reg * 4) < adev->rmmio_size) {
327 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
328 amdgpu_sriov_runtime(adev) &&
329 down_read_trylock(&adev->reset_sem)) {
330 ret = amdgpu_kiq_rreg(adev, reg);
331 up_read(&adev->reset_sem);
332 } else {
333 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
334 }
335 } else {
336 ret = adev->pcie_rreg(adev, reg * 4);
81202807 337 }
bc992ba5 338
f7ee1874 339 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 340
f4b373f4 341 return ret;
d38ceaf9
AD
342}
343
421a2a30
ML
344/*
345 * MMIO register read with bytes helper functions
346 * @offset:bytes offset from MMIO start
347 *
348*/
349
e3ecdffa
AD
350/**
351 * amdgpu_mm_rreg8 - read a memory mapped IO register
352 *
353 * @adev: amdgpu_device pointer
354 * @offset: byte aligned register offset
355 *
356 * Returns the 8 bit value from the offset specified.
357 */
7cbbc745
AG
358uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
359{
bf36b52e
AG
360 if (adev->in_pci_err_recovery)
361 return 0;
362
421a2a30
ML
363 if (offset < adev->rmmio_size)
364 return (readb(adev->rmmio + offset));
365 BUG();
366}
367
368/*
369 * MMIO register write with bytes helper functions
370 * @offset:bytes offset from MMIO start
371 * @value: the value want to be written to the register
372 *
373*/
e3ecdffa
AD
374/**
375 * amdgpu_mm_wreg8 - read a memory mapped IO register
376 *
377 * @adev: amdgpu_device pointer
378 * @offset: byte aligned register offset
379 * @value: 8 bit value to write
380 *
381 * Writes the value specified to the offset specified.
382 */
7cbbc745
AG
383void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
384{
bf36b52e
AG
385 if (adev->in_pci_err_recovery)
386 return;
387
421a2a30
ML
388 if (offset < adev->rmmio_size)
389 writeb(value, adev->rmmio + offset);
390 else
391 BUG();
392}
393
e3ecdffa 394/**
f7ee1874 395 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
396 *
397 * @adev: amdgpu_device pointer
398 * @reg: dword aligned register offset
399 * @v: 32 bit value to write to the register
400 * @acc_flags: access flags which require special behavior
401 *
402 * Writes the value specified to the offset specified.
403 */
f7ee1874
HZ
404void amdgpu_device_wreg(struct amdgpu_device *adev,
405 uint32_t reg, uint32_t v,
406 uint32_t acc_flags)
d38ceaf9 407{
bf36b52e
AG
408 if (adev->in_pci_err_recovery)
409 return;
410
f7ee1874
HZ
411 if ((reg * 4) < adev->rmmio_size) {
412 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
413 amdgpu_sriov_runtime(adev) &&
414 down_read_trylock(&adev->reset_sem)) {
415 amdgpu_kiq_wreg(adev, reg, v);
416 up_read(&adev->reset_sem);
417 } else {
418 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
419 }
420 } else {
421 adev->pcie_wreg(adev, reg * 4, v);
81202807 422 }
bc992ba5 423
f7ee1874 424 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 425}
d38ceaf9 426
2e0cc4d4
ML
427/*
428 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
429 *
430 * this function is invoked only the debugfs register access
431 * */
f7ee1874
HZ
432void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
433 uint32_t reg, uint32_t v)
2e0cc4d4 434{
bf36b52e
AG
435 if (adev->in_pci_err_recovery)
436 return;
437
2e0cc4d4 438 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
439 adev->gfx.rlc.funcs &&
440 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4
ML
441 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
442 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
f7ee1874
HZ
443 } else {
444 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 445 }
d38ceaf9
AD
446}
447
e3ecdffa
AD
448/**
449 * amdgpu_io_rreg - read an IO register
450 *
451 * @adev: amdgpu_device pointer
452 * @reg: dword aligned register offset
453 *
454 * Returns the 32 bit value from the offset specified.
455 */
d38ceaf9
AD
456u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
457{
bf36b52e
AG
458 if (adev->in_pci_err_recovery)
459 return 0;
460
d38ceaf9
AD
461 if ((reg * 4) < adev->rio_mem_size)
462 return ioread32(adev->rio_mem + (reg * 4));
463 else {
464 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
465 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
466 }
467}
468
e3ecdffa
AD
469/**
470 * amdgpu_io_wreg - write to an IO register
471 *
472 * @adev: amdgpu_device pointer
473 * @reg: dword aligned register offset
474 * @v: 32 bit value to write to the register
475 *
476 * Writes the value specified to the offset specified.
477 */
d38ceaf9
AD
478void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
479{
bf36b52e
AG
480 if (adev->in_pci_err_recovery)
481 return;
482
d38ceaf9
AD
483 if ((reg * 4) < adev->rio_mem_size)
484 iowrite32(v, adev->rio_mem + (reg * 4));
485 else {
486 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
487 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
488 }
489}
490
491/**
492 * amdgpu_mm_rdoorbell - read a doorbell dword
493 *
494 * @adev: amdgpu_device pointer
495 * @index: doorbell index
496 *
497 * Returns the value in the doorbell aperture at the
498 * requested doorbell index (CIK).
499 */
500u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
501{
bf36b52e
AG
502 if (adev->in_pci_err_recovery)
503 return 0;
504
d38ceaf9
AD
505 if (index < adev->doorbell.num_doorbells) {
506 return readl(adev->doorbell.ptr + index);
507 } else {
508 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
509 return 0;
510 }
511}
512
513/**
514 * amdgpu_mm_wdoorbell - write a doorbell dword
515 *
516 * @adev: amdgpu_device pointer
517 * @index: doorbell index
518 * @v: value to write
519 *
520 * Writes @v to the doorbell aperture at the
521 * requested doorbell index (CIK).
522 */
523void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
524{
bf36b52e
AG
525 if (adev->in_pci_err_recovery)
526 return;
527
d38ceaf9
AD
528 if (index < adev->doorbell.num_doorbells) {
529 writel(v, adev->doorbell.ptr + index);
530 } else {
531 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
532 }
533}
534
832be404
KW
535/**
536 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
537 *
538 * @adev: amdgpu_device pointer
539 * @index: doorbell index
540 *
541 * Returns the value in the doorbell aperture at the
542 * requested doorbell index (VEGA10+).
543 */
544u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
545{
bf36b52e
AG
546 if (adev->in_pci_err_recovery)
547 return 0;
548
832be404
KW
549 if (index < adev->doorbell.num_doorbells) {
550 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
551 } else {
552 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
553 return 0;
554 }
555}
556
557/**
558 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
559 *
560 * @adev: amdgpu_device pointer
561 * @index: doorbell index
562 * @v: value to write
563 *
564 * Writes @v to the doorbell aperture at the
565 * requested doorbell index (VEGA10+).
566 */
567void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
568{
bf36b52e
AG
569 if (adev->in_pci_err_recovery)
570 return;
571
832be404
KW
572 if (index < adev->doorbell.num_doorbells) {
573 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
574 } else {
575 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
576 }
577}
578
1bba3683
HZ
579/**
580 * amdgpu_device_indirect_rreg - read an indirect register
581 *
582 * @adev: amdgpu_device pointer
583 * @pcie_index: mmio register offset
584 * @pcie_data: mmio register offset
585 *
586 * Returns the value of indirect register @reg_addr
587 */
588u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
589 u32 pcie_index, u32 pcie_data,
590 u32 reg_addr)
591{
592 unsigned long flags;
593 u32 r;
594 void __iomem *pcie_index_offset;
595 void __iomem *pcie_data_offset;
596
597 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
598 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
599 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
600
601 writel(reg_addr, pcie_index_offset);
602 readl(pcie_index_offset);
603 r = readl(pcie_data_offset);
604 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
605
606 return r;
607}
608
609/**
610 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
611 *
612 * @adev: amdgpu_device pointer
613 * @pcie_index: mmio register offset
614 * @pcie_data: mmio register offset
615 *
616 * Returns the value of indirect register @reg_addr
617 */
618u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
619 u32 pcie_index, u32 pcie_data,
620 u32 reg_addr)
621{
622 unsigned long flags;
623 u64 r;
624 void __iomem *pcie_index_offset;
625 void __iomem *pcie_data_offset;
626
627 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
628 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
629 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
630
631 /* read low 32 bits */
632 writel(reg_addr, pcie_index_offset);
633 readl(pcie_index_offset);
634 r = readl(pcie_data_offset);
635 /* read high 32 bits */
636 writel(reg_addr + 4, pcie_index_offset);
637 readl(pcie_index_offset);
638 r |= ((u64)readl(pcie_data_offset) << 32);
639 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
640
641 return r;
642}
643
644/**
645 * amdgpu_device_indirect_wreg - write an indirect register address
646 *
647 * @adev: amdgpu_device pointer
648 * @pcie_index: mmio register offset
649 * @pcie_data: mmio register offset
650 * @reg_addr: indirect register offset
651 * @reg_data: indirect register data
652 *
653 */
654void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
655 u32 pcie_index, u32 pcie_data,
656 u32 reg_addr, u32 reg_data)
657{
658 unsigned long flags;
659 void __iomem *pcie_index_offset;
660 void __iomem *pcie_data_offset;
661
662 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
663 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
664 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
665
666 writel(reg_addr, pcie_index_offset);
667 readl(pcie_index_offset);
668 writel(reg_data, pcie_data_offset);
669 readl(pcie_data_offset);
670 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
671}
672
673/**
674 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
675 *
676 * @adev: amdgpu_device pointer
677 * @pcie_index: mmio register offset
678 * @pcie_data: mmio register offset
679 * @reg_addr: indirect register offset
680 * @reg_data: indirect register data
681 *
682 */
683void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
684 u32 pcie_index, u32 pcie_data,
685 u32 reg_addr, u64 reg_data)
686{
687 unsigned long flags;
688 void __iomem *pcie_index_offset;
689 void __iomem *pcie_data_offset;
690
691 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
692 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
693 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
694
695 /* write low 32 bits */
696 writel(reg_addr, pcie_index_offset);
697 readl(pcie_index_offset);
698 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
699 readl(pcie_data_offset);
700 /* write high 32 bits */
701 writel(reg_addr + 4, pcie_index_offset);
702 readl(pcie_index_offset);
703 writel((u32)(reg_data >> 32), pcie_data_offset);
704 readl(pcie_data_offset);
705 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
706}
707
d38ceaf9
AD
708/**
709 * amdgpu_invalid_rreg - dummy reg read function
710 *
711 * @adev: amdgpu device pointer
712 * @reg: offset of register
713 *
714 * Dummy register read function. Used for register blocks
715 * that certain asics don't have (all asics).
716 * Returns the value in the register.
717 */
718static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
719{
720 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
721 BUG();
722 return 0;
723}
724
725/**
726 * amdgpu_invalid_wreg - dummy reg write function
727 *
728 * @adev: amdgpu device pointer
729 * @reg: offset of register
730 * @v: value to write to the register
731 *
732 * Dummy register read function. Used for register blocks
733 * that certain asics don't have (all asics).
734 */
735static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
736{
737 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
738 reg, v);
739 BUG();
740}
741
4fa1c6a6
TZ
742/**
743 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
744 *
745 * @adev: amdgpu device pointer
746 * @reg: offset of register
747 *
748 * Dummy register read function. Used for register blocks
749 * that certain asics don't have (all asics).
750 * Returns the value in the register.
751 */
752static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
753{
754 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
755 BUG();
756 return 0;
757}
758
759/**
760 * amdgpu_invalid_wreg64 - dummy reg write function
761 *
762 * @adev: amdgpu device pointer
763 * @reg: offset of register
764 * @v: value to write to the register
765 *
766 * Dummy register read function. Used for register blocks
767 * that certain asics don't have (all asics).
768 */
769static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
770{
771 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
772 reg, v);
773 BUG();
774}
775
d38ceaf9
AD
776/**
777 * amdgpu_block_invalid_rreg - dummy reg read function
778 *
779 * @adev: amdgpu device pointer
780 * @block: offset of instance
781 * @reg: offset of register
782 *
783 * Dummy register read function. Used for register blocks
784 * that certain asics don't have (all asics).
785 * Returns the value in the register.
786 */
787static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
788 uint32_t block, uint32_t reg)
789{
790 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
791 reg, block);
792 BUG();
793 return 0;
794}
795
796/**
797 * amdgpu_block_invalid_wreg - dummy reg write function
798 *
799 * @adev: amdgpu device pointer
800 * @block: offset of instance
801 * @reg: offset of register
802 * @v: value to write to the register
803 *
804 * Dummy register read function. Used for register blocks
805 * that certain asics don't have (all asics).
806 */
807static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
808 uint32_t block,
809 uint32_t reg, uint32_t v)
810{
811 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
812 reg, block, v);
813 BUG();
814}
815
4d2997ab
AD
816/**
817 * amdgpu_device_asic_init - Wrapper for atom asic_init
818 *
819 * @dev: drm_device pointer
820 *
821 * Does any asic specific work and then calls atom asic init.
822 */
823static int amdgpu_device_asic_init(struct amdgpu_device *adev)
824{
825 amdgpu_asic_pre_asic_init(adev);
826
827 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
828}
829
e3ecdffa
AD
830/**
831 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
832 *
833 * @adev: amdgpu device pointer
834 *
835 * Allocates a scratch page of VRAM for use by various things in the
836 * driver.
837 */
06ec9070 838static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 839{
a4a02777
CK
840 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
841 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
842 &adev->vram_scratch.robj,
843 &adev->vram_scratch.gpu_addr,
844 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
845}
846
e3ecdffa
AD
847/**
848 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
849 *
850 * @adev: amdgpu device pointer
851 *
852 * Frees the VRAM scratch page.
853 */
06ec9070 854static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 855{
078af1a3 856 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
857}
858
859/**
9c3f2b54 860 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
861 *
862 * @adev: amdgpu_device pointer
863 * @registers: pointer to the register array
864 * @array_size: size of the register array
865 *
866 * Programs an array or registers with and and or masks.
867 * This is a helper for setting golden registers.
868 */
9c3f2b54
AD
869void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
870 const u32 *registers,
871 const u32 array_size)
d38ceaf9
AD
872{
873 u32 tmp, reg, and_mask, or_mask;
874 int i;
875
876 if (array_size % 3)
877 return;
878
879 for (i = 0; i < array_size; i +=3) {
880 reg = registers[i + 0];
881 and_mask = registers[i + 1];
882 or_mask = registers[i + 2];
883
884 if (and_mask == 0xffffffff) {
885 tmp = or_mask;
886 } else {
887 tmp = RREG32(reg);
888 tmp &= ~and_mask;
e0d07657
HZ
889 if (adev->family >= AMDGPU_FAMILY_AI)
890 tmp |= (or_mask & and_mask);
891 else
892 tmp |= or_mask;
d38ceaf9
AD
893 }
894 WREG32(reg, tmp);
895 }
896}
897
e3ecdffa
AD
898/**
899 * amdgpu_device_pci_config_reset - reset the GPU
900 *
901 * @adev: amdgpu_device pointer
902 *
903 * Resets the GPU using the pci config reset sequence.
904 * Only applicable to asics prior to vega10.
905 */
8111c387 906void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
907{
908 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
909}
910
911/*
912 * GPU doorbell aperture helpers function.
913 */
914/**
06ec9070 915 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
916 *
917 * @adev: amdgpu_device pointer
918 *
919 * Init doorbell driver information (CIK)
920 * Returns 0 on success, error on failure.
921 */
06ec9070 922static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 923{
6585661d 924
705e519e
CK
925 /* No doorbell on SI hardware generation */
926 if (adev->asic_type < CHIP_BONAIRE) {
927 adev->doorbell.base = 0;
928 adev->doorbell.size = 0;
929 adev->doorbell.num_doorbells = 0;
930 adev->doorbell.ptr = NULL;
931 return 0;
932 }
933
d6895ad3
CK
934 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
935 return -EINVAL;
936
22357775
AD
937 amdgpu_asic_init_doorbell_index(adev);
938
d38ceaf9
AD
939 /* doorbell bar mapping */
940 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
941 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
942
edf600da 943 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 944 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
945 if (adev->doorbell.num_doorbells == 0)
946 return -EINVAL;
947
ec3db8a6 948 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
949 * paging queue doorbell use the second page. The
950 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
951 * doorbells are in the first page. So with paging queue enabled,
952 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
953 */
954 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 955 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 956
8972e5d2
CK
957 adev->doorbell.ptr = ioremap(adev->doorbell.base,
958 adev->doorbell.num_doorbells *
959 sizeof(u32));
960 if (adev->doorbell.ptr == NULL)
d38ceaf9 961 return -ENOMEM;
d38ceaf9
AD
962
963 return 0;
964}
965
966/**
06ec9070 967 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
968 *
969 * @adev: amdgpu_device pointer
970 *
971 * Tear down doorbell driver information (CIK)
972 */
06ec9070 973static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
974{
975 iounmap(adev->doorbell.ptr);
976 adev->doorbell.ptr = NULL;
977}
978
22cb0164 979
d38ceaf9
AD
980
981/*
06ec9070 982 * amdgpu_device_wb_*()
455a7bc2 983 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 984 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
985 */
986
987/**
06ec9070 988 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
989 *
990 * @adev: amdgpu_device pointer
991 *
992 * Disables Writeback and frees the Writeback memory (all asics).
993 * Used at driver shutdown.
994 */
06ec9070 995static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
996{
997 if (adev->wb.wb_obj) {
a76ed485
AD
998 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
999 &adev->wb.gpu_addr,
1000 (void **)&adev->wb.wb);
d38ceaf9
AD
1001 adev->wb.wb_obj = NULL;
1002 }
1003}
1004
1005/**
06ec9070 1006 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
1007 *
1008 * @adev: amdgpu_device pointer
1009 *
455a7bc2 1010 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1011 * Used at driver startup.
1012 * Returns 0 on success or an -error on failure.
1013 */
06ec9070 1014static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1015{
1016 int r;
1017
1018 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1019 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1020 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1021 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1022 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1023 (void **)&adev->wb.wb);
d38ceaf9
AD
1024 if (r) {
1025 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1026 return r;
1027 }
d38ceaf9
AD
1028
1029 adev->wb.num_wb = AMDGPU_MAX_WB;
1030 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1031
1032 /* clear wb memory */
73469585 1033 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1034 }
1035
1036 return 0;
1037}
1038
1039/**
131b4b36 1040 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1041 *
1042 * @adev: amdgpu_device pointer
1043 * @wb: wb index
1044 *
1045 * Allocate a wb slot for use by the driver (all asics).
1046 * Returns 0 on success or -EINVAL on failure.
1047 */
131b4b36 1048int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1049{
1050 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1051
97407b63 1052 if (offset < adev->wb.num_wb) {
7014285a 1053 __set_bit(offset, adev->wb.used);
63ae07ca 1054 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1055 return 0;
1056 } else {
1057 return -EINVAL;
1058 }
1059}
1060
d38ceaf9 1061/**
131b4b36 1062 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1063 *
1064 * @adev: amdgpu_device pointer
1065 * @wb: wb index
1066 *
1067 * Free a wb slot allocated for use by the driver (all asics)
1068 */
131b4b36 1069void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1070{
73469585 1071 wb >>= 3;
d38ceaf9 1072 if (wb < adev->wb.num_wb)
73469585 1073 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1074}
1075
d6895ad3
CK
1076/**
1077 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1078 *
1079 * @adev: amdgpu_device pointer
1080 *
1081 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1082 * to fail, but if any of the BARs is not accessible after the size we abort
1083 * driver loading by returning -ENODEV.
1084 */
1085int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1086{
770d13b1 1087 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 1088 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
1089 struct pci_bus *root;
1090 struct resource *res;
1091 unsigned i;
d6895ad3
CK
1092 u16 cmd;
1093 int r;
1094
0c03b912 1095 /* Bypass for VF */
1096 if (amdgpu_sriov_vf(adev))
1097 return 0;
1098
b7221f2b
AD
1099 /* skip if the bios has already enabled large BAR */
1100 if (adev->gmc.real_vram_size &&
1101 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1102 return 0;
1103
31b8adab
CK
1104 /* Check if the root BUS has 64bit memory resources */
1105 root = adev->pdev->bus;
1106 while (root->parent)
1107 root = root->parent;
1108
1109 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1110 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1111 res->start > 0x100000000ull)
1112 break;
1113 }
1114
1115 /* Trying to resize is pointless without a root hub window above 4GB */
1116 if (!res)
1117 return 0;
1118
d6895ad3
CK
1119 /* Disable memory decoding while we change the BAR addresses and size */
1120 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1121 pci_write_config_word(adev->pdev, PCI_COMMAND,
1122 cmd & ~PCI_COMMAND_MEMORY);
1123
1124 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1125 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1126 if (adev->asic_type >= CHIP_BONAIRE)
1127 pci_release_resource(adev->pdev, 2);
1128
1129 pci_release_resource(adev->pdev, 0);
1130
1131 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1132 if (r == -ENOSPC)
1133 DRM_INFO("Not enough PCI address space for a large BAR.");
1134 else if (r && r != -ENOTSUPP)
1135 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1136
1137 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1138
1139 /* When the doorbell or fb BAR isn't available we have no chance of
1140 * using the device.
1141 */
06ec9070 1142 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1143 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1144 return -ENODEV;
1145
1146 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1147
1148 return 0;
1149}
a05502e5 1150
d38ceaf9
AD
1151/*
1152 * GPU helpers function.
1153 */
1154/**
39c640c0 1155 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1156 *
1157 * @adev: amdgpu_device pointer
1158 *
c836fec5
JQ
1159 * Check if the asic has been initialized (all asics) at driver startup
1160 * or post is needed if hw reset is performed.
1161 * Returns true if need or false if not.
d38ceaf9 1162 */
39c640c0 1163bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1164{
1165 uint32_t reg;
1166
bec86378
ML
1167 if (amdgpu_sriov_vf(adev))
1168 return false;
1169
1170 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1171 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1172 * some old smc fw still need driver do vPost otherwise gpu hang, while
1173 * those smc fw version above 22.15 doesn't have this flaw, so we force
1174 * vpost executed for smc version below 22.15
bec86378
ML
1175 */
1176 if (adev->asic_type == CHIP_FIJI) {
1177 int err;
1178 uint32_t fw_ver;
1179 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1180 /* force vPost if error occured */
1181 if (err)
1182 return true;
1183
1184 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1185 if (fw_ver < 0x00160e00)
1186 return true;
bec86378 1187 }
bec86378 1188 }
91fe77eb 1189
1190 if (adev->has_hw_reset) {
1191 adev->has_hw_reset = false;
1192 return true;
1193 }
1194
1195 /* bios scratch used on CIK+ */
1196 if (adev->asic_type >= CHIP_BONAIRE)
1197 return amdgpu_atombios_scratch_need_asic_init(adev);
1198
1199 /* check MEM_SIZE for older asics */
1200 reg = amdgpu_asic_get_config_memsize(adev);
1201
1202 if ((reg != 0) && (reg != 0xffffffff))
1203 return false;
1204
1205 return true;
bec86378
ML
1206}
1207
d38ceaf9
AD
1208/* if we get transitioned to only one device, take VGA back */
1209/**
06ec9070 1210 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1211 *
1212 * @cookie: amdgpu_device pointer
1213 * @state: enable/disable vga decode
1214 *
1215 * Enable/disable vga decode (all asics).
1216 * Returns VGA resource flags.
1217 */
06ec9070 1218static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1219{
1220 struct amdgpu_device *adev = cookie;
1221 amdgpu_asic_set_vga_state(adev, state);
1222 if (state)
1223 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1224 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1225 else
1226 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1227}
1228
e3ecdffa
AD
1229/**
1230 * amdgpu_device_check_block_size - validate the vm block size
1231 *
1232 * @adev: amdgpu_device pointer
1233 *
1234 * Validates the vm block size specified via module parameter.
1235 * The vm block size defines number of bits in page table versus page directory,
1236 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1237 * page table and the remaining bits are in the page directory.
1238 */
06ec9070 1239static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1240{
1241 /* defines number of bits in page table versus page directory,
1242 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1243 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1244 if (amdgpu_vm_block_size == -1)
1245 return;
a1adf8be 1246
bab4fee7 1247 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1248 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1249 amdgpu_vm_block_size);
97489129 1250 amdgpu_vm_block_size = -1;
a1adf8be 1251 }
a1adf8be
CZ
1252}
1253
e3ecdffa
AD
1254/**
1255 * amdgpu_device_check_vm_size - validate the vm size
1256 *
1257 * @adev: amdgpu_device pointer
1258 *
1259 * Validates the vm size in GB specified via module parameter.
1260 * The VM size is the size of the GPU virtual memory space in GB.
1261 */
06ec9070 1262static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1263{
64dab074
AD
1264 /* no need to check the default value */
1265 if (amdgpu_vm_size == -1)
1266 return;
1267
83ca145d
ZJ
1268 if (amdgpu_vm_size < 1) {
1269 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1270 amdgpu_vm_size);
f3368128 1271 amdgpu_vm_size = -1;
83ca145d 1272 }
83ca145d
ZJ
1273}
1274
7951e376
RZ
1275static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1276{
1277 struct sysinfo si;
a9d4fe2f 1278 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1279 uint64_t total_memory;
1280 uint64_t dram_size_seven_GB = 0x1B8000000;
1281 uint64_t dram_size_three_GB = 0xB8000000;
1282
1283 if (amdgpu_smu_memory_pool_size == 0)
1284 return;
1285
1286 if (!is_os_64) {
1287 DRM_WARN("Not 64-bit OS, feature not supported\n");
1288 goto def_value;
1289 }
1290 si_meminfo(&si);
1291 total_memory = (uint64_t)si.totalram * si.mem_unit;
1292
1293 if ((amdgpu_smu_memory_pool_size == 1) ||
1294 (amdgpu_smu_memory_pool_size == 2)) {
1295 if (total_memory < dram_size_three_GB)
1296 goto def_value1;
1297 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1298 (amdgpu_smu_memory_pool_size == 8)) {
1299 if (total_memory < dram_size_seven_GB)
1300 goto def_value1;
1301 } else {
1302 DRM_WARN("Smu memory pool size not supported\n");
1303 goto def_value;
1304 }
1305 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1306
1307 return;
1308
1309def_value1:
1310 DRM_WARN("No enough system memory\n");
1311def_value:
1312 adev->pm.smu_prv_buffer_size = 0;
1313}
1314
d38ceaf9 1315/**
06ec9070 1316 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1317 *
1318 * @adev: amdgpu_device pointer
1319 *
1320 * Validates certain module parameters and updates
1321 * the associated values used by the driver (all asics).
1322 */
912dfc84 1323static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1324{
5b011235
CZ
1325 if (amdgpu_sched_jobs < 4) {
1326 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1327 amdgpu_sched_jobs);
1328 amdgpu_sched_jobs = 4;
76117507 1329 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1330 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1331 amdgpu_sched_jobs);
1332 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1333 }
d38ceaf9 1334
83e74db6 1335 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1336 /* gart size must be greater or equal to 32M */
1337 dev_warn(adev->dev, "gart size (%d) too small\n",
1338 amdgpu_gart_size);
83e74db6 1339 amdgpu_gart_size = -1;
d38ceaf9
AD
1340 }
1341
36d38372 1342 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1343 /* gtt size must be greater or equal to 32M */
36d38372
CK
1344 dev_warn(adev->dev, "gtt size (%d) too small\n",
1345 amdgpu_gtt_size);
1346 amdgpu_gtt_size = -1;
d38ceaf9
AD
1347 }
1348
d07f14be
RH
1349 /* valid range is between 4 and 9 inclusive */
1350 if (amdgpu_vm_fragment_size != -1 &&
1351 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1352 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1353 amdgpu_vm_fragment_size = -1;
1354 }
1355
5d5bd5e3
KW
1356 if (amdgpu_sched_hw_submission < 2) {
1357 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1358 amdgpu_sched_hw_submission);
1359 amdgpu_sched_hw_submission = 2;
1360 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1361 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1362 amdgpu_sched_hw_submission);
1363 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1364 }
1365
7951e376
RZ
1366 amdgpu_device_check_smu_prv_buffer_size(adev);
1367
06ec9070 1368 amdgpu_device_check_vm_size(adev);
d38ceaf9 1369
06ec9070 1370 amdgpu_device_check_block_size(adev);
6a7f76e7 1371
19aede77 1372 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1373
c6252390 1374 amdgpu_gmc_tmz_set(adev);
01a8dcec 1375
e500dc63
AD
1376 if (amdgpu_num_kcq == -1) {
1377 amdgpu_num_kcq = 8;
1378 } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
a300de40 1379 amdgpu_num_kcq = 8;
c16ce562 1380 dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
a300de40
ML
1381 }
1382
9b498efa
AD
1383 amdgpu_gmc_noretry_set(adev);
1384
e3c00faa 1385 return 0;
d38ceaf9
AD
1386}
1387
1388/**
1389 * amdgpu_switcheroo_set_state - set switcheroo state
1390 *
1391 * @pdev: pci dev pointer
1694467b 1392 * @state: vga_switcheroo state
d38ceaf9
AD
1393 *
1394 * Callback for the switcheroo driver. Suspends or resumes the
1395 * the asics before or after it is powered up using ACPI methods.
1396 */
8aba21b7
LT
1397static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1398 enum vga_switcheroo_state state)
d38ceaf9
AD
1399{
1400 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1401 int r;
d38ceaf9 1402
31af062a 1403 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1404 return;
1405
1406 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1407 pr_info("switched on\n");
d38ceaf9
AD
1408 /* don't suspend or resume card normally */
1409 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1410
de185019 1411 pci_set_power_state(dev->pdev, PCI_D0);
c1dd4aa6 1412 amdgpu_device_load_pci_state(dev->pdev);
de185019
AD
1413 r = pci_enable_device(dev->pdev);
1414 if (r)
1415 DRM_WARN("pci_enable_device failed (%d)\n", r);
1416 amdgpu_device_resume(dev, true);
d38ceaf9 1417
d38ceaf9
AD
1418 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1419 drm_kms_helper_poll_enable(dev);
1420 } else {
dd4fa6c1 1421 pr_info("switched off\n");
d38ceaf9
AD
1422 drm_kms_helper_poll_disable(dev);
1423 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1424 amdgpu_device_suspend(dev, true);
c1dd4aa6 1425 amdgpu_device_cache_pci_state(dev->pdev);
de185019
AD
1426 /* Shut down the device */
1427 pci_disable_device(dev->pdev);
1428 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1429 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1430 }
1431}
1432
1433/**
1434 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1435 *
1436 * @pdev: pci dev pointer
1437 *
1438 * Callback for the switcheroo driver. Check of the switcheroo
1439 * state can be changed.
1440 * Returns true if the state can be changed, false if not.
1441 */
1442static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1443{
1444 struct drm_device *dev = pci_get_drvdata(pdev);
1445
1446 /*
1447 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1448 * locking inversion with the driver load path. And the access here is
1449 * completely racy anyway. So don't bother with locking for now.
1450 */
7e13ad89 1451 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1452}
1453
1454static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1455 .set_gpu_state = amdgpu_switcheroo_set_state,
1456 .reprobe = NULL,
1457 .can_switch = amdgpu_switcheroo_can_switch,
1458};
1459
e3ecdffa
AD
1460/**
1461 * amdgpu_device_ip_set_clockgating_state - set the CG state
1462 *
87e3f136 1463 * @dev: amdgpu_device pointer
e3ecdffa
AD
1464 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1465 * @state: clockgating state (gate or ungate)
1466 *
1467 * Sets the requested clockgating state for all instances of
1468 * the hardware IP specified.
1469 * Returns the error code from the last instance.
1470 */
43fa561f 1471int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1472 enum amd_ip_block_type block_type,
1473 enum amd_clockgating_state state)
d38ceaf9 1474{
43fa561f 1475 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1476 int i, r = 0;
1477
1478 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1479 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1480 continue;
c722865a
RZ
1481 if (adev->ip_blocks[i].version->type != block_type)
1482 continue;
1483 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1484 continue;
1485 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1486 (void *)adev, state);
1487 if (r)
1488 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1489 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1490 }
1491 return r;
1492}
1493
e3ecdffa
AD
1494/**
1495 * amdgpu_device_ip_set_powergating_state - set the PG state
1496 *
87e3f136 1497 * @dev: amdgpu_device pointer
e3ecdffa
AD
1498 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1499 * @state: powergating state (gate or ungate)
1500 *
1501 * Sets the requested powergating state for all instances of
1502 * the hardware IP specified.
1503 * Returns the error code from the last instance.
1504 */
43fa561f 1505int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1506 enum amd_ip_block_type block_type,
1507 enum amd_powergating_state state)
d38ceaf9 1508{
43fa561f 1509 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1510 int i, r = 0;
1511
1512 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1513 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1514 continue;
c722865a
RZ
1515 if (adev->ip_blocks[i].version->type != block_type)
1516 continue;
1517 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1518 continue;
1519 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1520 (void *)adev, state);
1521 if (r)
1522 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1523 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1524 }
1525 return r;
1526}
1527
e3ecdffa
AD
1528/**
1529 * amdgpu_device_ip_get_clockgating_state - get the CG state
1530 *
1531 * @adev: amdgpu_device pointer
1532 * @flags: clockgating feature flags
1533 *
1534 * Walks the list of IPs on the device and updates the clockgating
1535 * flags for each IP.
1536 * Updates @flags with the feature flags for each hardware IP where
1537 * clockgating is enabled.
1538 */
2990a1fc
AD
1539void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1540 u32 *flags)
6cb2d4e4
HR
1541{
1542 int i;
1543
1544 for (i = 0; i < adev->num_ip_blocks; i++) {
1545 if (!adev->ip_blocks[i].status.valid)
1546 continue;
1547 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1548 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1549 }
1550}
1551
e3ecdffa
AD
1552/**
1553 * amdgpu_device_ip_wait_for_idle - wait for idle
1554 *
1555 * @adev: amdgpu_device pointer
1556 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1557 *
1558 * Waits for the request hardware IP to be idle.
1559 * Returns 0 for success or a negative error code on failure.
1560 */
2990a1fc
AD
1561int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1562 enum amd_ip_block_type block_type)
5dbbb60b
AD
1563{
1564 int i, r;
1565
1566 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1567 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1568 continue;
a1255107
AD
1569 if (adev->ip_blocks[i].version->type == block_type) {
1570 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1571 if (r)
1572 return r;
1573 break;
1574 }
1575 }
1576 return 0;
1577
1578}
1579
e3ecdffa
AD
1580/**
1581 * amdgpu_device_ip_is_idle - is the hardware IP idle
1582 *
1583 * @adev: amdgpu_device pointer
1584 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1585 *
1586 * Check if the hardware IP is idle or not.
1587 * Returns true if it the IP is idle, false if not.
1588 */
2990a1fc
AD
1589bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1590 enum amd_ip_block_type block_type)
5dbbb60b
AD
1591{
1592 int i;
1593
1594 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1595 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1596 continue;
a1255107
AD
1597 if (adev->ip_blocks[i].version->type == block_type)
1598 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1599 }
1600 return true;
1601
1602}
1603
e3ecdffa
AD
1604/**
1605 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1606 *
1607 * @adev: amdgpu_device pointer
87e3f136 1608 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1609 *
1610 * Returns a pointer to the hardware IP block structure
1611 * if it exists for the asic, otherwise NULL.
1612 */
2990a1fc
AD
1613struct amdgpu_ip_block *
1614amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1615 enum amd_ip_block_type type)
d38ceaf9
AD
1616{
1617 int i;
1618
1619 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1620 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1621 return &adev->ip_blocks[i];
1622
1623 return NULL;
1624}
1625
1626/**
2990a1fc 1627 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1628 *
1629 * @adev: amdgpu_device pointer
5fc3aeeb 1630 * @type: enum amd_ip_block_type
d38ceaf9
AD
1631 * @major: major version
1632 * @minor: minor version
1633 *
1634 * return 0 if equal or greater
1635 * return 1 if smaller or the ip_block doesn't exist
1636 */
2990a1fc
AD
1637int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1638 enum amd_ip_block_type type,
1639 u32 major, u32 minor)
d38ceaf9 1640{
2990a1fc 1641 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1642
a1255107
AD
1643 if (ip_block && ((ip_block->version->major > major) ||
1644 ((ip_block->version->major == major) &&
1645 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1646 return 0;
1647
1648 return 1;
1649}
1650
a1255107 1651/**
2990a1fc 1652 * amdgpu_device_ip_block_add
a1255107
AD
1653 *
1654 * @adev: amdgpu_device pointer
1655 * @ip_block_version: pointer to the IP to add
1656 *
1657 * Adds the IP block driver information to the collection of IPs
1658 * on the asic.
1659 */
2990a1fc
AD
1660int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1661 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1662{
1663 if (!ip_block_version)
1664 return -EINVAL;
1665
e966a725 1666 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1667 ip_block_version->funcs->name);
1668
a1255107
AD
1669 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1670
1671 return 0;
1672}
1673
e3ecdffa
AD
1674/**
1675 * amdgpu_device_enable_virtual_display - enable virtual display feature
1676 *
1677 * @adev: amdgpu_device pointer
1678 *
1679 * Enabled the virtual display feature if the user has enabled it via
1680 * the module parameter virtual_display. This feature provides a virtual
1681 * display hardware on headless boards or in virtualized environments.
1682 * This function parses and validates the configuration string specified by
1683 * the user and configues the virtual display configuration (number of
1684 * virtual connectors, crtcs, etc.) specified.
1685 */
483ef985 1686static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1687{
1688 adev->enable_virtual_display = false;
1689
1690 if (amdgpu_virtual_display) {
4a580877 1691 struct drm_device *ddev = adev_to_drm(adev);
9accf2fd 1692 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1693 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1694
1695 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1696 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1697 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1698 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1699 if (!strcmp("all", pciaddname)
1700 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1701 long num_crtc;
1702 int res = -1;
1703
9accf2fd 1704 adev->enable_virtual_display = true;
0f66356d
ED
1705
1706 if (pciaddname_tmp)
1707 res = kstrtol(pciaddname_tmp, 10,
1708 &num_crtc);
1709
1710 if (!res) {
1711 if (num_crtc < 1)
1712 num_crtc = 1;
1713 if (num_crtc > 6)
1714 num_crtc = 6;
1715 adev->mode_info.num_crtc = num_crtc;
1716 } else {
1717 adev->mode_info.num_crtc = 1;
1718 }
9accf2fd
ED
1719 break;
1720 }
1721 }
1722
0f66356d
ED
1723 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1724 amdgpu_virtual_display, pci_address_name,
1725 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1726
1727 kfree(pciaddstr);
1728 }
1729}
1730
e3ecdffa
AD
1731/**
1732 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1733 *
1734 * @adev: amdgpu_device pointer
1735 *
1736 * Parses the asic configuration parameters specified in the gpu info
1737 * firmware and makes them availale to the driver for use in configuring
1738 * the asic.
1739 * Returns 0 on success, -EINVAL on failure.
1740 */
e2a75f88
AD
1741static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1742{
e2a75f88 1743 const char *chip_name;
c0a43457 1744 char fw_name[40];
e2a75f88
AD
1745 int err;
1746 const struct gpu_info_firmware_header_v1_0 *hdr;
1747
ab4fe3e1
HR
1748 adev->firmware.gpu_info_fw = NULL;
1749
72de33f8 1750 if (adev->mman.discovery_bin) {
258620d0 1751 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1752
1753 /*
1754 * FIXME: The bounding box is still needed by Navi12, so
1755 * temporarily read it from gpu_info firmware. Should be droped
1756 * when DAL no longer needs it.
1757 */
1758 if (adev->asic_type != CHIP_NAVI12)
1759 return 0;
258620d0
AD
1760 }
1761
e2a75f88 1762 switch (adev->asic_type) {
e2a75f88
AD
1763#ifdef CONFIG_DRM_AMDGPU_SI
1764 case CHIP_VERDE:
1765 case CHIP_TAHITI:
1766 case CHIP_PITCAIRN:
1767 case CHIP_OLAND:
1768 case CHIP_HAINAN:
1769#endif
1770#ifdef CONFIG_DRM_AMDGPU_CIK
1771 case CHIP_BONAIRE:
1772 case CHIP_HAWAII:
1773 case CHIP_KAVERI:
1774 case CHIP_KABINI:
1775 case CHIP_MULLINS:
1776#endif
da87c30b
AD
1777 case CHIP_TOPAZ:
1778 case CHIP_TONGA:
1779 case CHIP_FIJI:
1780 case CHIP_POLARIS10:
1781 case CHIP_POLARIS11:
1782 case CHIP_POLARIS12:
1783 case CHIP_VEGAM:
1784 case CHIP_CARRIZO:
1785 case CHIP_STONEY:
27c0bc71 1786 case CHIP_VEGA20:
84d244a3
JC
1787 case CHIP_SIENNA_CICHLID:
1788 case CHIP_NAVY_FLOUNDER:
e2a75f88
AD
1789 default:
1790 return 0;
1791 case CHIP_VEGA10:
1792 chip_name = "vega10";
1793 break;
3f76dced
AD
1794 case CHIP_VEGA12:
1795 chip_name = "vega12";
1796 break;
2d2e5e7e 1797 case CHIP_RAVEN:
54f78a76 1798 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1799 chip_name = "raven2";
54f78a76 1800 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1801 chip_name = "picasso";
54c4d17e
FX
1802 else
1803 chip_name = "raven";
2d2e5e7e 1804 break;
65e60f6e
LM
1805 case CHIP_ARCTURUS:
1806 chip_name = "arcturus";
1807 break;
b51a26a0 1808 case CHIP_RENOIR:
2e62f0b5
PL
1809 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1810 chip_name = "renoir";
1811 else
1812 chip_name = "green_sardine";
b51a26a0 1813 break;
23c6268e
HR
1814 case CHIP_NAVI10:
1815 chip_name = "navi10";
1816 break;
ed42cfe1
XY
1817 case CHIP_NAVI14:
1818 chip_name = "navi14";
1819 break;
42b325e5
XY
1820 case CHIP_NAVI12:
1821 chip_name = "navi12";
1822 break;
4e52a9f8
HR
1823 case CHIP_VANGOGH:
1824 chip_name = "vangogh";
1825 break;
e2a75f88
AD
1826 }
1827
1828 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1829 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1830 if (err) {
1831 dev_err(adev->dev,
1832 "Failed to load gpu_info firmware \"%s\"\n",
1833 fw_name);
1834 goto out;
1835 }
ab4fe3e1 1836 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1837 if (err) {
1838 dev_err(adev->dev,
1839 "Failed to validate gpu_info firmware \"%s\"\n",
1840 fw_name);
1841 goto out;
1842 }
1843
ab4fe3e1 1844 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1845 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1846
1847 switch (hdr->version_major) {
1848 case 1:
1849 {
1850 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1851 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1852 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1853
cc375d8c
TY
1854 /*
1855 * Should be droped when DAL no longer needs it.
1856 */
1857 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1858 goto parse_soc_bounding_box;
1859
b5ab16bf
AD
1860 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1861 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1862 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1863 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1864 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1865 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1866 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1867 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1868 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1869 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1870 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1871 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1872 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1873 adev->gfx.cu_info.max_waves_per_simd =
1874 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1875 adev->gfx.cu_info.max_scratch_slots_per_cu =
1876 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1877 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1878 if (hdr->version_minor >= 1) {
35c2e910
HZ
1879 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1880 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1881 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1882 adev->gfx.config.num_sc_per_sh =
1883 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1884 adev->gfx.config.num_packer_per_sc =
1885 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1886 }
ec51d3fa
XY
1887
1888parse_soc_bounding_box:
ec51d3fa
XY
1889 /*
1890 * soc bounding box info is not integrated in disocovery table,
258620d0 1891 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1892 */
48321c3d
HW
1893 if (hdr->version_minor == 2) {
1894 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1895 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1896 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1898 }
e2a75f88
AD
1899 break;
1900 }
1901 default:
1902 dev_err(adev->dev,
1903 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1904 err = -EINVAL;
1905 goto out;
1906 }
1907out:
e2a75f88
AD
1908 return err;
1909}
1910
e3ecdffa
AD
1911/**
1912 * amdgpu_device_ip_early_init - run early init for hardware IPs
1913 *
1914 * @adev: amdgpu_device pointer
1915 *
1916 * Early initialization pass for hardware IPs. The hardware IPs that make
1917 * up each asic are discovered each IP's early_init callback is run. This
1918 * is the first stage in initializing the asic.
1919 * Returns 0 on success, negative error code on failure.
1920 */
06ec9070 1921static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1922{
aaa36a97 1923 int i, r;
d38ceaf9 1924
483ef985 1925 amdgpu_device_enable_virtual_display(adev);
a6be7570 1926
00a979f3 1927 if (amdgpu_sriov_vf(adev)) {
00a979f3 1928 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
1929 if (r)
1930 return r;
00a979f3
WS
1931 }
1932
d38ceaf9 1933 switch (adev->asic_type) {
33f34802
KW
1934#ifdef CONFIG_DRM_AMDGPU_SI
1935 case CHIP_VERDE:
1936 case CHIP_TAHITI:
1937 case CHIP_PITCAIRN:
1938 case CHIP_OLAND:
1939 case CHIP_HAINAN:
295d0daf 1940 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1941 r = si_set_ip_blocks(adev);
1942 if (r)
1943 return r;
1944 break;
1945#endif
a2e73f56
AD
1946#ifdef CONFIG_DRM_AMDGPU_CIK
1947 case CHIP_BONAIRE:
1948 case CHIP_HAWAII:
1949 case CHIP_KAVERI:
1950 case CHIP_KABINI:
1951 case CHIP_MULLINS:
e1ad2d53 1952 if (adev->flags & AMD_IS_APU)
a2e73f56 1953 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1954 else
1955 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1956
1957 r = cik_set_ip_blocks(adev);
1958 if (r)
1959 return r;
1960 break;
1961#endif
da87c30b
AD
1962 case CHIP_TOPAZ:
1963 case CHIP_TONGA:
1964 case CHIP_FIJI:
1965 case CHIP_POLARIS10:
1966 case CHIP_POLARIS11:
1967 case CHIP_POLARIS12:
1968 case CHIP_VEGAM:
1969 case CHIP_CARRIZO:
1970 case CHIP_STONEY:
1971 if (adev->flags & AMD_IS_APU)
1972 adev->family = AMDGPU_FAMILY_CZ;
1973 else
1974 adev->family = AMDGPU_FAMILY_VI;
1975
1976 r = vi_set_ip_blocks(adev);
1977 if (r)
1978 return r;
1979 break;
e48a3cd9
AD
1980 case CHIP_VEGA10:
1981 case CHIP_VEGA12:
e4bd8170 1982 case CHIP_VEGA20:
e48a3cd9 1983 case CHIP_RAVEN:
61cf44c1 1984 case CHIP_ARCTURUS:
b51a26a0 1985 case CHIP_RENOIR:
70534d1e 1986 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
1987 adev->family = AMDGPU_FAMILY_RV;
1988 else
1989 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1990
1991 r = soc15_set_ip_blocks(adev);
1992 if (r)
1993 return r;
1994 break;
0a5b8c7b 1995 case CHIP_NAVI10:
7ecb5cd4 1996 case CHIP_NAVI14:
4808cf9c 1997 case CHIP_NAVI12:
11e8aef5 1998 case CHIP_SIENNA_CICHLID:
41f446bf 1999 case CHIP_NAVY_FLOUNDER:
4e52a9f8
HR
2000 case CHIP_VANGOGH:
2001 if (adev->asic_type == CHIP_VANGOGH)
2002 adev->family = AMDGPU_FAMILY_VGH;
2003 else
2004 adev->family = AMDGPU_FAMILY_NV;
0a5b8c7b
HR
2005
2006 r = nv_set_ip_blocks(adev);
2007 if (r)
2008 return r;
2009 break;
d38ceaf9
AD
2010 default:
2011 /* FIXME: not supported yet */
2012 return -EINVAL;
2013 }
2014
1884734a 2015 amdgpu_amdkfd_device_probe(adev);
2016
3b94fb10 2017 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2018 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2019 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 2020
d38ceaf9
AD
2021 for (i = 0; i < adev->num_ip_blocks; i++) {
2022 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2023 DRM_ERROR("disabled ip block: %d <%s>\n",
2024 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2025 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2026 } else {
a1255107
AD
2027 if (adev->ip_blocks[i].version->funcs->early_init) {
2028 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2029 if (r == -ENOENT) {
a1255107 2030 adev->ip_blocks[i].status.valid = false;
2c1a2784 2031 } else if (r) {
a1255107
AD
2032 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2033 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2034 return r;
2c1a2784 2035 } else {
a1255107 2036 adev->ip_blocks[i].status.valid = true;
2c1a2784 2037 }
974e6b64 2038 } else {
a1255107 2039 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2040 }
d38ceaf9 2041 }
21a249ca
AD
2042 /* get the vbios after the asic_funcs are set up */
2043 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2044 r = amdgpu_device_parse_gpu_info_fw(adev);
2045 if (r)
2046 return r;
2047
21a249ca
AD
2048 /* Read BIOS */
2049 if (!amdgpu_get_bios(adev))
2050 return -EINVAL;
2051
2052 r = amdgpu_atombios_init(adev);
2053 if (r) {
2054 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2055 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2056 return r;
2057 }
2058 }
d38ceaf9
AD
2059 }
2060
395d1fb9
NH
2061 adev->cg_flags &= amdgpu_cg_mask;
2062 adev->pg_flags &= amdgpu_pg_mask;
2063
d38ceaf9
AD
2064 return 0;
2065}
2066
0a4f2520
RZ
2067static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2068{
2069 int i, r;
2070
2071 for (i = 0; i < adev->num_ip_blocks; i++) {
2072 if (!adev->ip_blocks[i].status.sw)
2073 continue;
2074 if (adev->ip_blocks[i].status.hw)
2075 continue;
2076 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2077 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2078 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2079 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2080 if (r) {
2081 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2082 adev->ip_blocks[i].version->funcs->name, r);
2083 return r;
2084 }
2085 adev->ip_blocks[i].status.hw = true;
2086 }
2087 }
2088
2089 return 0;
2090}
2091
2092static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2093{
2094 int i, r;
2095
2096 for (i = 0; i < adev->num_ip_blocks; i++) {
2097 if (!adev->ip_blocks[i].status.sw)
2098 continue;
2099 if (adev->ip_blocks[i].status.hw)
2100 continue;
2101 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2102 if (r) {
2103 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2104 adev->ip_blocks[i].version->funcs->name, r);
2105 return r;
2106 }
2107 adev->ip_blocks[i].status.hw = true;
2108 }
2109
2110 return 0;
2111}
2112
7a3e0bb2
RZ
2113static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2114{
2115 int r = 0;
2116 int i;
80f41f84 2117 uint32_t smu_version;
7a3e0bb2
RZ
2118
2119 if (adev->asic_type >= CHIP_VEGA10) {
2120 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2121 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2122 continue;
2123
2124 /* no need to do the fw loading again if already done*/
2125 if (adev->ip_blocks[i].status.hw == true)
2126 break;
2127
53b3f8f4 2128 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2129 r = adev->ip_blocks[i].version->funcs->resume(adev);
2130 if (r) {
2131 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2132 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2133 return r;
2134 }
2135 } else {
2136 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2137 if (r) {
2138 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2139 adev->ip_blocks[i].version->funcs->name, r);
2140 return r;
7a3e0bb2 2141 }
7a3e0bb2 2142 }
482f0e53
ML
2143
2144 adev->ip_blocks[i].status.hw = true;
2145 break;
7a3e0bb2
RZ
2146 }
2147 }
482f0e53 2148
8973d9ec
ED
2149 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2150 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2151
80f41f84 2152 return r;
7a3e0bb2
RZ
2153}
2154
e3ecdffa
AD
2155/**
2156 * amdgpu_device_ip_init - run init for hardware IPs
2157 *
2158 * @adev: amdgpu_device pointer
2159 *
2160 * Main initialization pass for hardware IPs. The list of all the hardware
2161 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2162 * are run. sw_init initializes the software state associated with each IP
2163 * and hw_init initializes the hardware associated with each IP.
2164 * Returns 0 on success, negative error code on failure.
2165 */
06ec9070 2166static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2167{
2168 int i, r;
2169
c030f2e4 2170 r = amdgpu_ras_init(adev);
2171 if (r)
2172 return r;
2173
d38ceaf9 2174 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2175 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2176 continue;
a1255107 2177 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2178 if (r) {
a1255107
AD
2179 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2180 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2181 goto init_failed;
2c1a2784 2182 }
a1255107 2183 adev->ip_blocks[i].status.sw = true;
bfca0289 2184
d38ceaf9 2185 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 2186 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 2187 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2188 if (r) {
2189 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2190 goto init_failed;
2c1a2784 2191 }
a1255107 2192 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2193 if (r) {
2194 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2195 goto init_failed;
2c1a2784 2196 }
06ec9070 2197 r = amdgpu_device_wb_init(adev);
2c1a2784 2198 if (r) {
06ec9070 2199 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2200 goto init_failed;
2c1a2784 2201 }
a1255107 2202 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2203
2204 /* right after GMC hw init, we create CSA */
f92d5c61 2205 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2206 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2207 AMDGPU_GEM_DOMAIN_VRAM,
2208 AMDGPU_CSA_SIZE);
2493664f
ML
2209 if (r) {
2210 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2211 goto init_failed;
2493664f
ML
2212 }
2213 }
d38ceaf9
AD
2214 }
2215 }
2216
c9ffa427
YT
2217 if (amdgpu_sriov_vf(adev))
2218 amdgpu_virt_init_data_exchange(adev);
2219
533aed27
AG
2220 r = amdgpu_ib_pool_init(adev);
2221 if (r) {
2222 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2223 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2224 goto init_failed;
2225 }
2226
c8963ea4
RZ
2227 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2228 if (r)
72d3f592 2229 goto init_failed;
0a4f2520
RZ
2230
2231 r = amdgpu_device_ip_hw_init_phase1(adev);
2232 if (r)
72d3f592 2233 goto init_failed;
0a4f2520 2234
7a3e0bb2
RZ
2235 r = amdgpu_device_fw_loading(adev);
2236 if (r)
72d3f592 2237 goto init_failed;
7a3e0bb2 2238
0a4f2520
RZ
2239 r = amdgpu_device_ip_hw_init_phase2(adev);
2240 if (r)
72d3f592 2241 goto init_failed;
d38ceaf9 2242
121a2bc6
AG
2243 /*
2244 * retired pages will be loaded from eeprom and reserved here,
2245 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2246 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2247 * for I2C communication which only true at this point.
b82e65a9
GC
2248 *
2249 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2250 * failure from bad gpu situation and stop amdgpu init process
2251 * accordingly. For other failed cases, it will still release all
2252 * the resource and print error message, rather than returning one
2253 * negative value to upper level.
121a2bc6
AG
2254 *
2255 * Note: theoretically, this should be called before all vram allocations
2256 * to protect retired page from abusing
2257 */
b82e65a9
GC
2258 r = amdgpu_ras_recovery_init(adev);
2259 if (r)
2260 goto init_failed;
121a2bc6 2261
3e2e2ab5
HZ
2262 if (adev->gmc.xgmi.num_physical_nodes > 1)
2263 amdgpu_xgmi_add_device(adev);
1884734a 2264 amdgpu_amdkfd_device_init(adev);
c6332b97 2265
bd607166
KR
2266 amdgpu_fru_get_product_info(adev);
2267
72d3f592 2268init_failed:
c9ffa427 2269 if (amdgpu_sriov_vf(adev))
c6332b97 2270 amdgpu_virt_release_full_gpu(adev, true);
2271
72d3f592 2272 return r;
d38ceaf9
AD
2273}
2274
e3ecdffa
AD
2275/**
2276 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2277 *
2278 * @adev: amdgpu_device pointer
2279 *
2280 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2281 * this function before a GPU reset. If the value is retained after a
2282 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2283 */
06ec9070 2284static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2285{
2286 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2287}
2288
e3ecdffa
AD
2289/**
2290 * amdgpu_device_check_vram_lost - check if vram is valid
2291 *
2292 * @adev: amdgpu_device pointer
2293 *
2294 * Checks the reset magic value written to the gart pointer in VRAM.
2295 * The driver calls this after a GPU reset to see if the contents of
2296 * VRAM is lost or now.
2297 * returns true if vram is lost, false if not.
2298 */
06ec9070 2299static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2300{
dadce777
EQ
2301 if (memcmp(adev->gart.ptr, adev->reset_magic,
2302 AMDGPU_RESET_MAGIC_NUM))
2303 return true;
2304
53b3f8f4 2305 if (!amdgpu_in_reset(adev))
dadce777
EQ
2306 return false;
2307
2308 /*
2309 * For all ASICs with baco/mode1 reset, the VRAM is
2310 * always assumed to be lost.
2311 */
2312 switch (amdgpu_asic_reset_method(adev)) {
2313 case AMD_RESET_METHOD_BACO:
2314 case AMD_RESET_METHOD_MODE1:
2315 return true;
2316 default:
2317 return false;
2318 }
0c49e0b8
CZ
2319}
2320
e3ecdffa 2321/**
1112a46b 2322 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2323 *
2324 * @adev: amdgpu_device pointer
b8b72130 2325 * @state: clockgating state (gate or ungate)
e3ecdffa 2326 *
e3ecdffa 2327 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2328 * set_clockgating_state callbacks are run.
2329 * Late initialization pass enabling clockgating for hardware IPs.
2330 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2331 * Returns 0 on success, negative error code on failure.
2332 */
fdd34271 2333
1112a46b
RZ
2334static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2335 enum amd_clockgating_state state)
d38ceaf9 2336{
1112a46b 2337 int i, j, r;
d38ceaf9 2338
4a2ba394
SL
2339 if (amdgpu_emu_mode == 1)
2340 return 0;
2341
1112a46b
RZ
2342 for (j = 0; j < adev->num_ip_blocks; j++) {
2343 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2344 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2345 continue;
4a446d55 2346 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2347 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2348 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2349 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2350 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2351 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2352 /* enable clockgating to save power */
a1255107 2353 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2354 state);
4a446d55
AD
2355 if (r) {
2356 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2357 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2358 return r;
2359 }
b0b00ff1 2360 }
d38ceaf9 2361 }
06b18f61 2362
c9f96fd5
RZ
2363 return 0;
2364}
2365
1112a46b 2366static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2367{
1112a46b 2368 int i, j, r;
06b18f61 2369
c9f96fd5
RZ
2370 if (amdgpu_emu_mode == 1)
2371 return 0;
2372
1112a46b
RZ
2373 for (j = 0; j < adev->num_ip_blocks; j++) {
2374 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2375 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2376 continue;
2377 /* skip CG for VCE/UVD, it's handled specially */
2378 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2379 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2380 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2381 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2382 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2383 /* enable powergating to save power */
2384 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2385 state);
c9f96fd5
RZ
2386 if (r) {
2387 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2388 adev->ip_blocks[i].version->funcs->name, r);
2389 return r;
2390 }
2391 }
2392 }
2dc80b00
S
2393 return 0;
2394}
2395
beff74bc
AD
2396static int amdgpu_device_enable_mgpu_fan_boost(void)
2397{
2398 struct amdgpu_gpu_instance *gpu_ins;
2399 struct amdgpu_device *adev;
2400 int i, ret = 0;
2401
2402 mutex_lock(&mgpu_info.mutex);
2403
2404 /*
2405 * MGPU fan boost feature should be enabled
2406 * only when there are two or more dGPUs in
2407 * the system
2408 */
2409 if (mgpu_info.num_dgpu < 2)
2410 goto out;
2411
2412 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2413 gpu_ins = &(mgpu_info.gpu_ins[i]);
2414 adev = gpu_ins->adev;
2415 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2416 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2417 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2418 if (ret)
2419 break;
2420
2421 gpu_ins->mgpu_fan_enabled = 1;
2422 }
2423 }
2424
2425out:
2426 mutex_unlock(&mgpu_info.mutex);
2427
2428 return ret;
2429}
2430
e3ecdffa
AD
2431/**
2432 * amdgpu_device_ip_late_init - run late init for hardware IPs
2433 *
2434 * @adev: amdgpu_device pointer
2435 *
2436 * Late initialization pass for hardware IPs. The list of all the hardware
2437 * IPs that make up the asic is walked and the late_init callbacks are run.
2438 * late_init covers any special initialization that an IP requires
2439 * after all of the have been initialized or something that needs to happen
2440 * late in the init process.
2441 * Returns 0 on success, negative error code on failure.
2442 */
06ec9070 2443static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2444{
60599a03 2445 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2446 int i = 0, r;
2447
2448 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2449 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2450 continue;
2451 if (adev->ip_blocks[i].version->funcs->late_init) {
2452 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2453 if (r) {
2454 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2455 adev->ip_blocks[i].version->funcs->name, r);
2456 return r;
2457 }
2dc80b00 2458 }
73f847db 2459 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2460 }
2461
a891d239
DL
2462 amdgpu_ras_set_error_query_ready(adev, true);
2463
1112a46b
RZ
2464 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2465 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2466
06ec9070 2467 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2468
beff74bc
AD
2469 r = amdgpu_device_enable_mgpu_fan_boost();
2470 if (r)
2471 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2472
60599a03
EQ
2473
2474 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2475 mutex_lock(&mgpu_info.mutex);
2476
2477 /*
2478 * Reset device p-state to low as this was booted with high.
2479 *
2480 * This should be performed only after all devices from the same
2481 * hive get initialized.
2482 *
2483 * However, it's unknown how many device in the hive in advance.
2484 * As this is counted one by one during devices initializations.
2485 *
2486 * So, we wait for all XGMI interlinked devices initialized.
2487 * This may bring some delays as those devices may come from
2488 * different hives. But that should be OK.
2489 */
2490 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2491 for (i = 0; i < mgpu_info.num_gpu; i++) {
2492 gpu_instance = &(mgpu_info.gpu_ins[i]);
2493 if (gpu_instance->adev->flags & AMD_IS_APU)
2494 continue;
2495
d84a430d
JK
2496 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2497 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2498 if (r) {
2499 DRM_ERROR("pstate setting failed (%d).\n", r);
2500 break;
2501 }
2502 }
2503 }
2504
2505 mutex_unlock(&mgpu_info.mutex);
2506 }
2507
d38ceaf9
AD
2508 return 0;
2509}
2510
e3ecdffa
AD
2511/**
2512 * amdgpu_device_ip_fini - run fini for hardware IPs
2513 *
2514 * @adev: amdgpu_device pointer
2515 *
2516 * Main teardown pass for hardware IPs. The list of all the hardware
2517 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2518 * are run. hw_fini tears down the hardware associated with each IP
2519 * and sw_fini tears down any software state associated with each IP.
2520 * Returns 0 on success, negative error code on failure.
2521 */
06ec9070 2522static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2523{
2524 int i, r;
2525
5278a159
SY
2526 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2527 amdgpu_virt_release_ras_err_handler_data(adev);
2528
c030f2e4 2529 amdgpu_ras_pre_fini(adev);
2530
a82400b5
AG
2531 if (adev->gmc.xgmi.num_physical_nodes > 1)
2532 amdgpu_xgmi_remove_device(adev);
2533
1884734a 2534 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2535
2536 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2537 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2538
3e96dbfd
AD
2539 /* need to disable SMC first */
2540 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2541 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2542 continue;
fdd34271 2543 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2544 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2545 /* XXX handle errors */
2546 if (r) {
2547 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2548 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2549 }
a1255107 2550 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2551 break;
2552 }
2553 }
2554
d38ceaf9 2555 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2556 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2557 continue;
8201a67a 2558
a1255107 2559 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2560 /* XXX handle errors */
2c1a2784 2561 if (r) {
a1255107
AD
2562 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2563 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2564 }
8201a67a 2565
a1255107 2566 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2567 }
2568
9950cda2 2569
d38ceaf9 2570 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2571 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2572 continue;
c12aba3a
ML
2573
2574 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2575 amdgpu_ucode_free_bo(adev);
1e256e27 2576 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2577 amdgpu_device_wb_fini(adev);
2578 amdgpu_device_vram_scratch_fini(adev);
533aed27 2579 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2580 }
2581
a1255107 2582 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2583 /* XXX handle errors */
2c1a2784 2584 if (r) {
a1255107
AD
2585 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2586 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2587 }
a1255107
AD
2588 adev->ip_blocks[i].status.sw = false;
2589 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2590 }
2591
a6dcfd9c 2592 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2593 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2594 continue;
a1255107
AD
2595 if (adev->ip_blocks[i].version->funcs->late_fini)
2596 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2597 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2598 }
2599
c030f2e4 2600 amdgpu_ras_fini(adev);
2601
030308fc 2602 if (amdgpu_sriov_vf(adev))
24136135
ML
2603 if (amdgpu_virt_release_full_gpu(adev, false))
2604 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2605
d38ceaf9
AD
2606 return 0;
2607}
2608
e3ecdffa 2609/**
beff74bc 2610 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2611 *
1112a46b 2612 * @work: work_struct.
e3ecdffa 2613 */
beff74bc 2614static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2615{
2616 struct amdgpu_device *adev =
beff74bc 2617 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2618 int r;
2619
2620 r = amdgpu_ib_ring_tests(adev);
2621 if (r)
2622 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2623}
2624
1e317b99
RZ
2625static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2626{
2627 struct amdgpu_device *adev =
2628 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2629
2630 mutex_lock(&adev->gfx.gfx_off_mutex);
2631 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2632 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2633 adev->gfx.gfx_off_state = true;
2634 }
2635 mutex_unlock(&adev->gfx.gfx_off_mutex);
2636}
2637
e3ecdffa 2638/**
e7854a03 2639 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2640 *
2641 * @adev: amdgpu_device pointer
2642 *
2643 * Main suspend function for hardware IPs. The list of all the hardware
2644 * IPs that make up the asic is walked, clockgating is disabled and the
2645 * suspend callbacks are run. suspend puts the hardware and software state
2646 * in each IP into a state suitable for suspend.
2647 * Returns 0 on success, negative error code on failure.
2648 */
e7854a03
AD
2649static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2650{
2651 int i, r;
2652
ced1ba97
PL
2653 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2654 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2655
e7854a03
AD
2656 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2657 if (!adev->ip_blocks[i].status.valid)
2658 continue;
2b9f7848 2659
e7854a03 2660 /* displays are handled separately */
2b9f7848
ND
2661 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2662 continue;
2663
2664 /* XXX handle errors */
2665 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2666 /* XXX handle errors */
2667 if (r) {
2668 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2669 adev->ip_blocks[i].version->funcs->name, r);
2670 return r;
e7854a03 2671 }
2b9f7848
ND
2672
2673 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2674 }
2675
e7854a03
AD
2676 return 0;
2677}
2678
2679/**
2680 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2681 *
2682 * @adev: amdgpu_device pointer
2683 *
2684 * Main suspend function for hardware IPs. The list of all the hardware
2685 * IPs that make up the asic is walked, clockgating is disabled and the
2686 * suspend callbacks are run. suspend puts the hardware and software state
2687 * in each IP into a state suitable for suspend.
2688 * Returns 0 on success, negative error code on failure.
2689 */
2690static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2691{
2692 int i, r;
2693
2694 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2695 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2696 continue;
e7854a03
AD
2697 /* displays are handled in phase1 */
2698 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2699 continue;
bff77e86
LM
2700 /* PSP lost connection when err_event_athub occurs */
2701 if (amdgpu_ras_intr_triggered() &&
2702 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2703 adev->ip_blocks[i].status.hw = false;
2704 continue;
2705 }
d38ceaf9 2706 /* XXX handle errors */
a1255107 2707 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2708 /* XXX handle errors */
2c1a2784 2709 if (r) {
a1255107
AD
2710 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2711 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2712 }
876923fb 2713 adev->ip_blocks[i].status.hw = false;
a3a09142 2714 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2715 if(!amdgpu_sriov_vf(adev)){
2716 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2717 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2718 if (r) {
2719 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2720 adev->mp1_state, r);
2721 return r;
2722 }
a3a09142
AD
2723 }
2724 }
b5507c7e 2725 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2726 }
2727
2728 return 0;
2729}
2730
e7854a03
AD
2731/**
2732 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2733 *
2734 * @adev: amdgpu_device pointer
2735 *
2736 * Main suspend function for hardware IPs. The list of all the hardware
2737 * IPs that make up the asic is walked, clockgating is disabled and the
2738 * suspend callbacks are run. suspend puts the hardware and software state
2739 * in each IP into a state suitable for suspend.
2740 * Returns 0 on success, negative error code on failure.
2741 */
2742int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2743{
2744 int r;
2745
e7819644
YT
2746 if (amdgpu_sriov_vf(adev))
2747 amdgpu_virt_request_full_gpu(adev, false);
2748
e7854a03
AD
2749 r = amdgpu_device_ip_suspend_phase1(adev);
2750 if (r)
2751 return r;
2752 r = amdgpu_device_ip_suspend_phase2(adev);
2753
e7819644
YT
2754 if (amdgpu_sriov_vf(adev))
2755 amdgpu_virt_release_full_gpu(adev, false);
2756
e7854a03
AD
2757 return r;
2758}
2759
06ec9070 2760static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2761{
2762 int i, r;
2763
2cb681b6
ML
2764 static enum amd_ip_block_type ip_order[] = {
2765 AMD_IP_BLOCK_TYPE_GMC,
2766 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2767 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2768 AMD_IP_BLOCK_TYPE_IH,
2769 };
a90ad3c2 2770
2cb681b6
ML
2771 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2772 int j;
2773 struct amdgpu_ip_block *block;
a90ad3c2 2774
4cd2a96d
J
2775 block = &adev->ip_blocks[i];
2776 block->status.hw = false;
2cb681b6 2777
4cd2a96d 2778 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 2779
4cd2a96d 2780 if (block->version->type != ip_order[j] ||
2cb681b6
ML
2781 !block->status.valid)
2782 continue;
2783
2784 r = block->version->funcs->hw_init(adev);
0aaeefcc 2785 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2786 if (r)
2787 return r;
482f0e53 2788 block->status.hw = true;
a90ad3c2
ML
2789 }
2790 }
2791
2792 return 0;
2793}
2794
06ec9070 2795static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2796{
2797 int i, r;
2798
2cb681b6
ML
2799 static enum amd_ip_block_type ip_order[] = {
2800 AMD_IP_BLOCK_TYPE_SMC,
2801 AMD_IP_BLOCK_TYPE_DCE,
2802 AMD_IP_BLOCK_TYPE_GFX,
2803 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2804 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2805 AMD_IP_BLOCK_TYPE_VCE,
2806 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2807 };
a90ad3c2 2808
2cb681b6
ML
2809 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2810 int j;
2811 struct amdgpu_ip_block *block;
a90ad3c2 2812
2cb681b6
ML
2813 for (j = 0; j < adev->num_ip_blocks; j++) {
2814 block = &adev->ip_blocks[j];
2815
2816 if (block->version->type != ip_order[i] ||
482f0e53
ML
2817 !block->status.valid ||
2818 block->status.hw)
2cb681b6
ML
2819 continue;
2820
895bd048
JZ
2821 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2822 r = block->version->funcs->resume(adev);
2823 else
2824 r = block->version->funcs->hw_init(adev);
2825
0aaeefcc 2826 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2827 if (r)
2828 return r;
482f0e53 2829 block->status.hw = true;
a90ad3c2
ML
2830 }
2831 }
2832
2833 return 0;
2834}
2835
e3ecdffa
AD
2836/**
2837 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2838 *
2839 * @adev: amdgpu_device pointer
2840 *
2841 * First resume function for hardware IPs. The list of all the hardware
2842 * IPs that make up the asic is walked and the resume callbacks are run for
2843 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2844 * after a suspend and updates the software state as necessary. This
2845 * function is also used for restoring the GPU after a GPU reset.
2846 * Returns 0 on success, negative error code on failure.
2847 */
06ec9070 2848static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2849{
2850 int i, r;
2851
a90ad3c2 2852 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2853 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2854 continue;
a90ad3c2 2855 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2856 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2857 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2858
fcf0649f
CZ
2859 r = adev->ip_blocks[i].version->funcs->resume(adev);
2860 if (r) {
2861 DRM_ERROR("resume of IP block <%s> failed %d\n",
2862 adev->ip_blocks[i].version->funcs->name, r);
2863 return r;
2864 }
482f0e53 2865 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2866 }
2867 }
2868
2869 return 0;
2870}
2871
e3ecdffa
AD
2872/**
2873 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2874 *
2875 * @adev: amdgpu_device pointer
2876 *
2877 * First resume function for hardware IPs. The list of all the hardware
2878 * IPs that make up the asic is walked and the resume callbacks are run for
2879 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2880 * functional state after a suspend and updates the software state as
2881 * necessary. This function is also used for restoring the GPU after a GPU
2882 * reset.
2883 * Returns 0 on success, negative error code on failure.
2884 */
06ec9070 2885static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2886{
2887 int i, r;
2888
2889 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2890 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2891 continue;
fcf0649f 2892 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2893 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2894 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2895 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2896 continue;
a1255107 2897 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2898 if (r) {
a1255107
AD
2899 DRM_ERROR("resume of IP block <%s> failed %d\n",
2900 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2901 return r;
2c1a2784 2902 }
482f0e53 2903 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2904 }
2905
2906 return 0;
2907}
2908
e3ecdffa
AD
2909/**
2910 * amdgpu_device_ip_resume - run resume for hardware IPs
2911 *
2912 * @adev: amdgpu_device pointer
2913 *
2914 * Main resume function for hardware IPs. The hardware IPs
2915 * are split into two resume functions because they are
2916 * are also used in in recovering from a GPU reset and some additional
2917 * steps need to be take between them. In this case (S3/S4) they are
2918 * run sequentially.
2919 * Returns 0 on success, negative error code on failure.
2920 */
06ec9070 2921static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2922{
2923 int r;
2924
06ec9070 2925 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2926 if (r)
2927 return r;
7a3e0bb2
RZ
2928
2929 r = amdgpu_device_fw_loading(adev);
2930 if (r)
2931 return r;
2932
06ec9070 2933 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2934
2935 return r;
2936}
2937
e3ecdffa
AD
2938/**
2939 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2940 *
2941 * @adev: amdgpu_device pointer
2942 *
2943 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2944 */
4e99a44e 2945static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2946{
6867e1b5
ML
2947 if (amdgpu_sriov_vf(adev)) {
2948 if (adev->is_atom_fw) {
2949 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2950 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2951 } else {
2952 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2953 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2954 }
2955
2956 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2957 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2958 }
048765ad
AR
2959}
2960
e3ecdffa
AD
2961/**
2962 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2963 *
2964 * @asic_type: AMD asic type
2965 *
2966 * Check if there is DC (new modesetting infrastructre) support for an asic.
2967 * returns true if DC has support, false if not.
2968 */
4562236b
HW
2969bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2970{
2971 switch (asic_type) {
2972#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
2973#if defined(CONFIG_DRM_AMD_DC_SI)
2974 case CHIP_TAHITI:
2975 case CHIP_PITCAIRN:
2976 case CHIP_VERDE:
2977 case CHIP_OLAND:
2978#endif
4562236b 2979 case CHIP_BONAIRE:
0d6fbccb 2980 case CHIP_KAVERI:
367e6687
AD
2981 case CHIP_KABINI:
2982 case CHIP_MULLINS:
d9fda248
HW
2983 /*
2984 * We have systems in the wild with these ASICs that require
2985 * LVDS and VGA support which is not supported with DC.
2986 *
2987 * Fallback to the non-DC driver here by default so as not to
2988 * cause regressions.
2989 */
2990 return amdgpu_dc > 0;
2991 case CHIP_HAWAII:
4562236b
HW
2992 case CHIP_CARRIZO:
2993 case CHIP_STONEY:
4562236b 2994 case CHIP_POLARIS10:
675fd32b 2995 case CHIP_POLARIS11:
2c8ad2d5 2996 case CHIP_POLARIS12:
675fd32b 2997 case CHIP_VEGAM:
4562236b
HW
2998 case CHIP_TONGA:
2999 case CHIP_FIJI:
42f8ffa1 3000 case CHIP_VEGA10:
dca7b401 3001 case CHIP_VEGA12:
c6034aa2 3002 case CHIP_VEGA20:
b86a1aa3 3003#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 3004 case CHIP_RAVEN:
b4f199c7 3005 case CHIP_NAVI10:
8fceceb6 3006 case CHIP_NAVI14:
078655d9 3007 case CHIP_NAVI12:
e1c14c43 3008 case CHIP_RENOIR:
81d9bfb8
JFZ
3009#endif
3010#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
3011 case CHIP_SIENNA_CICHLID:
a6c5308f 3012 case CHIP_NAVY_FLOUNDER:
42f8ffa1 3013#endif
fd187853 3014 return amdgpu_dc != 0;
4562236b
HW
3015#endif
3016 default:
93b09a9a
SS
3017 if (amdgpu_dc > 0)
3018 DRM_INFO("Display Core has been requested via kernel parameter "
3019 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
3020 return false;
3021 }
3022}
3023
3024/**
3025 * amdgpu_device_has_dc_support - check if dc is supported
3026 *
3027 * @adev: amdgpu_device_pointer
3028 *
3029 * Returns true for supported, false for not supported
3030 */
3031bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3032{
c997e8e2 3033 if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
2555039d
XY
3034 return false;
3035
4562236b
HW
3036 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3037}
3038
d4535e2c
AG
3039
3040static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3041{
3042 struct amdgpu_device *adev =
3043 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3044 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3045
c6a6e2db
AG
3046 /* It's a bug to not have a hive within this function */
3047 if (WARN_ON(!hive))
3048 return;
3049
3050 /*
3051 * Use task barrier to synchronize all xgmi reset works across the
3052 * hive. task_barrier_enter and task_barrier_exit will block
3053 * until all the threads running the xgmi reset works reach
3054 * those points. task_barrier_full will do both blocks.
3055 */
3056 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3057
3058 task_barrier_enter(&hive->tb);
4a580877 3059 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3060
3061 if (adev->asic_reset_res)
3062 goto fail;
3063
3064 task_barrier_exit(&hive->tb);
4a580877 3065 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3066
3067 if (adev->asic_reset_res)
3068 goto fail;
43c4d576
JC
3069
3070 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
3071 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
3072 } else {
3073
3074 task_barrier_full(&hive->tb);
3075 adev->asic_reset_res = amdgpu_asic_reset(adev);
3076 }
ce316fa5 3077
c6a6e2db 3078fail:
d4535e2c 3079 if (adev->asic_reset_res)
fed184e9 3080 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3081 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3082 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3083}
3084
71f98027
AD
3085static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3086{
3087 char *input = amdgpu_lockup_timeout;
3088 char *timeout_setting = NULL;
3089 int index = 0;
3090 long timeout;
3091 int ret = 0;
3092
3093 /*
3094 * By default timeout for non compute jobs is 10000.
3095 * And there is no timeout enforced on compute jobs.
3096 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3097 * jobs are 60000 by default.
71f98027
AD
3098 */
3099 adev->gfx_timeout = msecs_to_jiffies(10000);
3100 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3101 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 3102 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
3103 else
3104 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
3105
f440ff44 3106 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3107 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3108 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3109 ret = kstrtol(timeout_setting, 0, &timeout);
3110 if (ret)
3111 return ret;
3112
3113 if (timeout == 0) {
3114 index++;
3115 continue;
3116 } else if (timeout < 0) {
3117 timeout = MAX_SCHEDULE_TIMEOUT;
3118 } else {
3119 timeout = msecs_to_jiffies(timeout);
3120 }
3121
3122 switch (index++) {
3123 case 0:
3124 adev->gfx_timeout = timeout;
3125 break;
3126 case 1:
3127 adev->compute_timeout = timeout;
3128 break;
3129 case 2:
3130 adev->sdma_timeout = timeout;
3131 break;
3132 case 3:
3133 adev->video_timeout = timeout;
3134 break;
3135 default:
3136 break;
3137 }
3138 }
3139 /*
3140 * There is only one value specified and
3141 * it should apply to all non-compute jobs.
3142 */
bcccee89 3143 if (index == 1) {
71f98027 3144 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3145 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3146 adev->compute_timeout = adev->gfx_timeout;
3147 }
71f98027
AD
3148 }
3149
3150 return ret;
3151}
d4535e2c 3152
77f3a5cd
ND
3153static const struct attribute *amdgpu_dev_attributes[] = {
3154 &dev_attr_product_name.attr,
3155 &dev_attr_product_number.attr,
3156 &dev_attr_serial_number.attr,
3157 &dev_attr_pcie_replay_count.attr,
3158 NULL
3159};
3160
c9a6b82f 3161
d38ceaf9
AD
3162/**
3163 * amdgpu_device_init - initialize the driver
3164 *
3165 * @adev: amdgpu_device pointer
d38ceaf9
AD
3166 * @flags: driver flags
3167 *
3168 * Initializes the driver info and hw (all asics).
3169 * Returns 0 for success or an error on failure.
3170 * Called at driver startup.
3171 */
3172int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3173 uint32_t flags)
3174{
8aba21b7
LT
3175 struct drm_device *ddev = adev_to_drm(adev);
3176 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3177 int r, i;
3840c5bc 3178 bool boco = false;
95844d20 3179 u32 max_MBps;
d38ceaf9
AD
3180
3181 adev->shutdown = false;
d38ceaf9 3182 adev->flags = flags;
4e66d7d2
YZ
3183
3184 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3185 adev->asic_type = amdgpu_force_asic_type;
3186 else
3187 adev->asic_type = flags & AMD_ASIC_MASK;
3188
d38ceaf9 3189 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3190 if (amdgpu_emu_mode == 1)
8bdab6bb 3191 adev->usec_timeout *= 10;
770d13b1 3192 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3193 adev->accel_working = false;
3194 adev->num_rings = 0;
3195 adev->mman.buffer_funcs = NULL;
3196 adev->mman.buffer_funcs_ring = NULL;
3197 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3198 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3199 adev->gmc.gmc_funcs = NULL;
f54d1867 3200 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3201 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3202
3203 adev->smc_rreg = &amdgpu_invalid_rreg;
3204 adev->smc_wreg = &amdgpu_invalid_wreg;
3205 adev->pcie_rreg = &amdgpu_invalid_rreg;
3206 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3207 adev->pciep_rreg = &amdgpu_invalid_rreg;
3208 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3209 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3210 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3211 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3212 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3213 adev->didt_rreg = &amdgpu_invalid_rreg;
3214 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3215 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3216 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3217 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3218 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3219
3e39ab90
AD
3220 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3221 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3222 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3223
3224 /* mutex initialization are all done here so we
3225 * can recall function without having locking issues */
d38ceaf9 3226 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 3227 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3228 mutex_init(&adev->pm.mutex);
3229 mutex_init(&adev->gfx.gpu_clock_mutex);
3230 mutex_init(&adev->srbm_mutex);
b8866c26 3231 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3232 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3233 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3234 mutex_init(&adev->mn_lock);
e23b74aa 3235 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3236 hash_init(adev->mn_hash);
53b3f8f4 3237 atomic_set(&adev->in_gpu_reset, 0);
6049db43 3238 init_rwsem(&adev->reset_sem);
32eaeae0 3239 mutex_init(&adev->psp.mutex);
bd052211 3240 mutex_init(&adev->notifier_lock);
d38ceaf9 3241
912dfc84
EQ
3242 r = amdgpu_device_check_arguments(adev);
3243 if (r)
3244 return r;
d38ceaf9 3245
d38ceaf9
AD
3246 spin_lock_init(&adev->mmio_idx_lock);
3247 spin_lock_init(&adev->smc_idx_lock);
3248 spin_lock_init(&adev->pcie_idx_lock);
3249 spin_lock_init(&adev->uvd_ctx_idx_lock);
3250 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3251 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3252 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3253 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3254 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3255
0c4e7fa5
CZ
3256 INIT_LIST_HEAD(&adev->shadow_list);
3257 mutex_init(&adev->shadow_list_lock);
3258
beff74bc
AD
3259 INIT_DELAYED_WORK(&adev->delayed_init_work,
3260 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3261 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3262 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3263
d4535e2c
AG
3264 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3265
d23ee13f 3266 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3267 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3268
b265bdbd
EQ
3269 atomic_set(&adev->throttling_logging_enabled, 1);
3270 /*
3271 * If throttling continues, logging will be performed every minute
3272 * to avoid log flooding. "-1" is subtracted since the thermal
3273 * throttling interrupt comes every second. Thus, the total logging
3274 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3275 * for throttling interrupt) = 60 seconds.
3276 */
3277 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3278 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3279
0fa49558
AX
3280 /* Registers mapping */
3281 /* TODO: block userspace mapping of io register */
da69c161
KW
3282 if (adev->asic_type >= CHIP_BONAIRE) {
3283 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3284 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3285 } else {
3286 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3287 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3288 }
d38ceaf9 3289
d38ceaf9
AD
3290 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3291 if (adev->rmmio == NULL) {
3292 return -ENOMEM;
3293 }
3294 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3295 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3296
d38ceaf9
AD
3297 /* io port mapping */
3298 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3299 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3300 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3301 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3302 break;
3303 }
3304 }
3305 if (adev->rio_mem == NULL)
b64a18c5 3306 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3307
b2109d8e
JX
3308 /* enable PCIE atomic ops */
3309 r = pci_enable_atomic_ops_to_root(adev->pdev,
3310 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3311 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3312 if (r) {
3313 adev->have_atomics_support = false;
3314 DRM_INFO("PCIE atomic ops is not supported\n");
3315 } else {
3316 adev->have_atomics_support = true;
3317 }
3318
5494d864
AD
3319 amdgpu_device_get_pcie_info(adev);
3320
b239c017
JX
3321 if (amdgpu_mcbp)
3322 DRM_INFO("MCBP is enabled\n");
3323
5f84cc63
JX
3324 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3325 adev->enable_mes = true;
3326
3aa0115d
ML
3327 /* detect hw virtualization here */
3328 amdgpu_detect_virtualization(adev);
3329
dffa11b4
ML
3330 r = amdgpu_device_get_job_timeout_settings(adev);
3331 if (r) {
3332 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4192f7b5 3333 goto failed_unmap;
a190d1c7
XY
3334 }
3335
d38ceaf9 3336 /* early init functions */
06ec9070 3337 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3338 if (r)
4192f7b5 3339 goto failed_unmap;
d38ceaf9 3340
6585661d
OZ
3341 /* doorbell bar mapping and doorbell index init*/
3342 amdgpu_device_doorbell_init(adev);
3343
d38ceaf9
AD
3344 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3345 /* this will fail for cards that aren't VGA class devices, just
3346 * ignore it */
06ec9070 3347 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3348
31af062a 3349 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3350 boco = true;
3351 if (amdgpu_has_atpx() &&
3352 (amdgpu_is_atpx_hybrid() ||
3353 amdgpu_has_atpx_dgpu_power_cntl()) &&
3354 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3355 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3356 &amdgpu_switcheroo_ops, boco);
3357 if (boco)
d38ceaf9
AD
3358 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3359
9475a943
SL
3360 if (amdgpu_emu_mode == 1) {
3361 /* post the asic on emulation mode */
3362 emu_soc_asic_init(adev);
bfca0289 3363 goto fence_driver_init;
9475a943 3364 }
bfca0289 3365
4e99a44e
ML
3366 /* detect if we are with an SRIOV vbios */
3367 amdgpu_device_detect_sriov_bios(adev);
048765ad 3368
95e8e59e
AD
3369 /* check if we need to reset the asic
3370 * E.g., driver was not cleanly unloaded previously, etc.
3371 */
f14899fd 3372 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3373 r = amdgpu_asic_reset(adev);
3374 if (r) {
3375 dev_err(adev->dev, "asic reset on init failed\n");
3376 goto failed;
3377 }
3378 }
3379
c9a6b82f
AG
3380 pci_enable_pcie_error_reporting(adev->ddev.pdev);
3381
d38ceaf9 3382 /* Post card if necessary */
39c640c0 3383 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3384 if (!adev->bios) {
bec86378 3385 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3386 r = -EINVAL;
3387 goto failed;
d38ceaf9 3388 }
bec86378 3389 DRM_INFO("GPU posting now...\n");
4d2997ab 3390 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3391 if (r) {
3392 dev_err(adev->dev, "gpu post error!\n");
3393 goto failed;
3394 }
d38ceaf9
AD
3395 }
3396
88b64e95
AD
3397 if (adev->is_atom_fw) {
3398 /* Initialize clocks */
3399 r = amdgpu_atomfirmware_get_clock_info(adev);
3400 if (r) {
3401 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3402 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3403 goto failed;
3404 }
3405 } else {
a5bde2f9
AD
3406 /* Initialize clocks */
3407 r = amdgpu_atombios_get_clock_info(adev);
3408 if (r) {
3409 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3410 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3411 goto failed;
a5bde2f9
AD
3412 }
3413 /* init i2c buses */
4562236b
HW
3414 if (!amdgpu_device_has_dc_support(adev))
3415 amdgpu_atombios_i2c_init(adev);
2c1a2784 3416 }
d38ceaf9 3417
bfca0289 3418fence_driver_init:
d38ceaf9
AD
3419 /* Fence driver */
3420 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3421 if (r) {
3422 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3423 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3424 goto failed;
2c1a2784 3425 }
d38ceaf9
AD
3426
3427 /* init the mode config */
4a580877 3428 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3429
06ec9070 3430 r = amdgpu_device_ip_init(adev);
d38ceaf9 3431 if (r) {
8840a387 3432 /* failed in exclusive mode due to timeout */
3433 if (amdgpu_sriov_vf(adev) &&
3434 !amdgpu_sriov_runtime(adev) &&
3435 amdgpu_virt_mmio_blocked(adev) &&
3436 !amdgpu_virt_wait_reset(adev)) {
3437 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3438 /* Don't send request since VF is inactive. */
3439 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3440 adev->virt.ops = NULL;
8840a387 3441 r = -EAGAIN;
3442 goto failed;
3443 }
06ec9070 3444 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3445 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3446 goto failed;
d38ceaf9
AD
3447 }
3448
d69b8971
YZ
3449 dev_info(adev->dev,
3450 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3451 adev->gfx.config.max_shader_engines,
3452 adev->gfx.config.max_sh_per_se,
3453 adev->gfx.config.max_cu_per_sh,
3454 adev->gfx.cu_info.number);
3455
d38ceaf9
AD
3456 adev->accel_working = true;
3457
e59c0205
AX
3458 amdgpu_vm_check_compute_bug(adev);
3459
95844d20
MO
3460 /* Initialize the buffer migration limit. */
3461 if (amdgpu_moverate >= 0)
3462 max_MBps = amdgpu_moverate;
3463 else
3464 max_MBps = 8; /* Allow 8 MB/s. */
3465 /* Get a log2 for easy divisions. */
3466 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3467
9bc92b9c
ML
3468 amdgpu_fbdev_init(adev);
3469
d2f52ac8 3470 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3471 if (r) {
3472 adev->pm_sysfs_en = false;
d2f52ac8 3473 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3474 } else
3475 adev->pm_sysfs_en = true;
d2f52ac8 3476
5bb23532 3477 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3478 if (r) {
3479 adev->ucode_sysfs_en = false;
5bb23532 3480 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3481 } else
3482 adev->ucode_sysfs_en = true;
5bb23532 3483
d38ceaf9
AD
3484 if ((amdgpu_testing & 1)) {
3485 if (adev->accel_working)
3486 amdgpu_test_moves(adev);
3487 else
3488 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3489 }
d38ceaf9
AD
3490 if (amdgpu_benchmarking) {
3491 if (adev->accel_working)
3492 amdgpu_benchmark(adev, amdgpu_benchmarking);
3493 else
3494 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3495 }
3496
b0adca4d
EQ
3497 /*
3498 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3499 * Otherwise the mgpu fan boost feature will be skipped due to the
3500 * gpu instance is counted less.
3501 */
3502 amdgpu_register_gpu_instance(adev);
3503
d38ceaf9
AD
3504 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3505 * explicit gating rather than handling it automatically.
3506 */
06ec9070 3507 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3508 if (r) {
06ec9070 3509 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3510 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3511 goto failed;
2c1a2784 3512 }
d38ceaf9 3513
108c6a63 3514 /* must succeed. */
511fdbc3 3515 amdgpu_ras_resume(adev);
108c6a63 3516
beff74bc
AD
3517 queue_delayed_work(system_wq, &adev->delayed_init_work,
3518 msecs_to_jiffies(AMDGPU_RESUME_MS));
3519
2c738637
ML
3520 if (amdgpu_sriov_vf(adev))
3521 flush_delayed_work(&adev->delayed_init_work);
3522
77f3a5cd 3523 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3524 if (r)
77f3a5cd 3525 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3526
d155bef0
AB
3527 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3528 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3529 if (r)
3530 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3531
c1dd4aa6
AG
3532 /* Have stored pci confspace at hand for restore in sudden PCI error */
3533 if (amdgpu_device_cache_pci_state(adev->pdev))
3534 pci_restore_state(pdev);
3535
d38ceaf9 3536 return 0;
83ba126a
AD
3537
3538failed:
89041940 3539 amdgpu_vf_error_trans_all(adev);
3840c5bc 3540 if (boco)
83ba126a 3541 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3542
4192f7b5
AD
3543failed_unmap:
3544 iounmap(adev->rmmio);
3545 adev->rmmio = NULL;
3546
83ba126a 3547 return r;
d38ceaf9
AD
3548}
3549
d38ceaf9
AD
3550/**
3551 * amdgpu_device_fini - tear down the driver
3552 *
3553 * @adev: amdgpu_device pointer
3554 *
3555 * Tear down the driver info (all asics).
3556 * Called at driver shutdown.
3557 */
3558void amdgpu_device_fini(struct amdgpu_device *adev)
3559{
aac89168 3560 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3561 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3562 adev->shutdown = true;
9f875167 3563
c1dd4aa6
AG
3564 kfree(adev->pci_state);
3565
752c683d
ML
3566 /* make sure IB test finished before entering exclusive mode
3567 * to avoid preemption on IB test
3568 * */
519b8b76 3569 if (amdgpu_sriov_vf(adev)) {
752c683d 3570 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3571 amdgpu_virt_fini_data_exchange(adev);
3572 }
752c683d 3573
e5b03032
ML
3574 /* disable all interrupts */
3575 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3576 if (adev->mode_info.mode_config_initialized){
3577 if (!amdgpu_device_has_dc_support(adev))
4a580877 3578 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3579 else
4a580877 3580 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3581 }
d38ceaf9 3582 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3583 if (adev->pm_sysfs_en)
3584 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3585 amdgpu_fbdev_fini(adev);
e230ac11 3586 amdgpu_device_ip_fini(adev);
75e1658e
ND
3587 release_firmware(adev->firmware.gpu_info_fw);
3588 adev->firmware.gpu_info_fw = NULL;
d38ceaf9
AD
3589 adev->accel_working = false;
3590 /* free i2c buses */
4562236b
HW
3591 if (!amdgpu_device_has_dc_support(adev))
3592 amdgpu_i2c_fini(adev);
bfca0289
SL
3593
3594 if (amdgpu_emu_mode != 1)
3595 amdgpu_atombios_fini(adev);
3596
d38ceaf9
AD
3597 kfree(adev->bios);
3598 adev->bios = NULL;
3840c5bc
AD
3599 if (amdgpu_has_atpx() &&
3600 (amdgpu_is_atpx_hybrid() ||
3601 amdgpu_has_atpx_dgpu_power_cntl()) &&
3602 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3603 vga_switcheroo_unregister_client(adev->pdev);
4a580877 3604 if (amdgpu_device_supports_boco(adev_to_drm(adev)))
83ba126a 3605 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3606 vga_client_register(adev->pdev, NULL, NULL, NULL);
3607 if (adev->rio_mem)
3608 pci_iounmap(adev->pdev, adev->rio_mem);
3609 adev->rio_mem = NULL;
3610 iounmap(adev->rmmio);
3611 adev->rmmio = NULL;
06ec9070 3612 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3613
7c868b59
YT
3614 if (adev->ucode_sysfs_en)
3615 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3616
3617 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3618 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3619 amdgpu_pmu_fini(adev);
72de33f8 3620 if (adev->mman.discovery_bin)
a190d1c7 3621 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3622}
3623
3624
3625/*
3626 * Suspend & resume.
3627 */
3628/**
810ddc3a 3629 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3630 *
87e3f136 3631 * @dev: drm dev pointer
87e3f136 3632 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3633 *
3634 * Puts the hw in the suspend state (all asics).
3635 * Returns 0 for success or an error on failure.
3636 * Called at driver suspend.
3637 */
de185019 3638int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3639{
3640 struct amdgpu_device *adev;
3641 struct drm_crtc *crtc;
3642 struct drm_connector *connector;
f8d2d39e 3643 struct drm_connector_list_iter iter;
5ceb54c6 3644 int r;
d38ceaf9 3645
1348969a 3646 adev = drm_to_adev(dev);
d38ceaf9
AD
3647
3648 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3649 return 0;
3650
44779b43 3651 adev->in_suspend = true;
d38ceaf9
AD
3652 drm_kms_helper_poll_disable(dev);
3653
5f818173
S
3654 if (fbcon)
3655 amdgpu_fbdev_set_suspend(adev, 1);
3656
beff74bc 3657 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3658
4562236b
HW
3659 if (!amdgpu_device_has_dc_support(adev)) {
3660 /* turn off display hw */
3661 drm_modeset_lock_all(dev);
f8d2d39e
LP
3662 drm_connector_list_iter_begin(dev, &iter);
3663 drm_for_each_connector_iter(connector, &iter)
3664 drm_helper_connector_dpms(connector,
3665 DRM_MODE_DPMS_OFF);
3666 drm_connector_list_iter_end(&iter);
4562236b 3667 drm_modeset_unlock_all(dev);
fe1053b7
AD
3668 /* unpin the front buffers and cursors */
3669 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3670 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3671 struct drm_framebuffer *fb = crtc->primary->fb;
3672 struct amdgpu_bo *robj;
3673
91334223 3674 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3675 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3676 r = amdgpu_bo_reserve(aobj, true);
3677 if (r == 0) {
3678 amdgpu_bo_unpin(aobj);
3679 amdgpu_bo_unreserve(aobj);
3680 }
756e6880 3681 }
756e6880 3682
fe1053b7
AD
3683 if (fb == NULL || fb->obj[0] == NULL) {
3684 continue;
3685 }
3686 robj = gem_to_amdgpu_bo(fb->obj[0]);
3687 /* don't unpin kernel fb objects */
3688 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3689 r = amdgpu_bo_reserve(robj, true);
3690 if (r == 0) {
3691 amdgpu_bo_unpin(robj);
3692 amdgpu_bo_unreserve(robj);
3693 }
d38ceaf9
AD
3694 }
3695 }
3696 }
fe1053b7 3697
5e6932fe 3698 amdgpu_ras_suspend(adev);
3699
fe1053b7
AD
3700 r = amdgpu_device_ip_suspend_phase1(adev);
3701
94fa5660
EQ
3702 amdgpu_amdkfd_suspend(adev, !fbcon);
3703
d38ceaf9
AD
3704 /* evict vram memory */
3705 amdgpu_bo_evict_vram(adev);
3706
5ceb54c6 3707 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3708
fe1053b7 3709 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3710
a0a71e49
AD
3711 /* evict remaining vram memory
3712 * This second call to evict vram is to evict the gart page table
3713 * using the CPU.
3714 */
d38ceaf9
AD
3715 amdgpu_bo_evict_vram(adev);
3716
d38ceaf9
AD
3717 return 0;
3718}
3719
3720/**
810ddc3a 3721 * amdgpu_device_resume - initiate device resume
d38ceaf9 3722 *
87e3f136 3723 * @dev: drm dev pointer
87e3f136 3724 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3725 *
3726 * Bring the hw back to operating state (all asics).
3727 * Returns 0 for success or an error on failure.
3728 * Called at driver resume.
3729 */
de185019 3730int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3731{
3732 struct drm_connector *connector;
f8d2d39e 3733 struct drm_connector_list_iter iter;
1348969a 3734 struct amdgpu_device *adev = drm_to_adev(dev);
756e6880 3735 struct drm_crtc *crtc;
03161a6e 3736 int r = 0;
d38ceaf9
AD
3737
3738 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3739 return 0;
3740
d38ceaf9 3741 /* post card */
39c640c0 3742 if (amdgpu_device_need_post(adev)) {
4d2997ab 3743 r = amdgpu_device_asic_init(adev);
74b0b157 3744 if (r)
aac89168 3745 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 3746 }
d38ceaf9 3747
06ec9070 3748 r = amdgpu_device_ip_resume(adev);
e6707218 3749 if (r) {
aac89168 3750 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3751 return r;
e6707218 3752 }
5ceb54c6
AD
3753 amdgpu_fence_driver_resume(adev);
3754
d38ceaf9 3755
06ec9070 3756 r = amdgpu_device_ip_late_init(adev);
03161a6e 3757 if (r)
4d3b9ae5 3758 return r;
d38ceaf9 3759
beff74bc
AD
3760 queue_delayed_work(system_wq, &adev->delayed_init_work,
3761 msecs_to_jiffies(AMDGPU_RESUME_MS));
3762
fe1053b7
AD
3763 if (!amdgpu_device_has_dc_support(adev)) {
3764 /* pin cursors */
3765 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3766 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3767
91334223 3768 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3769 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3770 r = amdgpu_bo_reserve(aobj, true);
3771 if (r == 0) {
3772 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3773 if (r != 0)
aac89168 3774 dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
fe1053b7
AD
3775 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3776 amdgpu_bo_unreserve(aobj);
3777 }
756e6880
AD
3778 }
3779 }
3780 }
9593f4d6 3781 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3782 if (r)
3783 return r;
756e6880 3784
96a5d8d4 3785 /* Make sure IB tests flushed */
beff74bc 3786 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3787
d38ceaf9
AD
3788 /* blat the mode back in */
3789 if (fbcon) {
4562236b
HW
3790 if (!amdgpu_device_has_dc_support(adev)) {
3791 /* pre DCE11 */
3792 drm_helper_resume_force_mode(dev);
3793
3794 /* turn on display hw */
3795 drm_modeset_lock_all(dev);
f8d2d39e
LP
3796
3797 drm_connector_list_iter_begin(dev, &iter);
3798 drm_for_each_connector_iter(connector, &iter)
3799 drm_helper_connector_dpms(connector,
3800 DRM_MODE_DPMS_ON);
3801 drm_connector_list_iter_end(&iter);
3802
4562236b 3803 drm_modeset_unlock_all(dev);
d38ceaf9 3804 }
4d3b9ae5 3805 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3806 }
3807
3808 drm_kms_helper_poll_enable(dev);
23a1a9e5 3809
5e6932fe 3810 amdgpu_ras_resume(adev);
3811
23a1a9e5
L
3812 /*
3813 * Most of the connector probing functions try to acquire runtime pm
3814 * refs to ensure that the GPU is powered on when connector polling is
3815 * performed. Since we're calling this from a runtime PM callback,
3816 * trying to acquire rpm refs will cause us to deadlock.
3817 *
3818 * Since we're guaranteed to be holding the rpm lock, it's safe to
3819 * temporarily disable the rpm helpers so this doesn't deadlock us.
3820 */
3821#ifdef CONFIG_PM
3822 dev->dev->power.disable_depth++;
3823#endif
4562236b
HW
3824 if (!amdgpu_device_has_dc_support(adev))
3825 drm_helper_hpd_irq_event(dev);
3826 else
3827 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3828#ifdef CONFIG_PM
3829 dev->dev->power.disable_depth--;
3830#endif
44779b43
RZ
3831 adev->in_suspend = false;
3832
4d3b9ae5 3833 return 0;
d38ceaf9
AD
3834}
3835
e3ecdffa
AD
3836/**
3837 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3838 *
3839 * @adev: amdgpu_device pointer
3840 *
3841 * The list of all the hardware IPs that make up the asic is walked and
3842 * the check_soft_reset callbacks are run. check_soft_reset determines
3843 * if the asic is still hung or not.
3844 * Returns true if any of the IPs are still in a hung state, false if not.
3845 */
06ec9070 3846static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3847{
3848 int i;
3849 bool asic_hang = false;
3850
f993d628
ML
3851 if (amdgpu_sriov_vf(adev))
3852 return true;
3853
8bc04c29
AD
3854 if (amdgpu_asic_need_full_reset(adev))
3855 return true;
3856
63fbf42f 3857 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3858 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3859 continue;
a1255107
AD
3860 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3861 adev->ip_blocks[i].status.hang =
3862 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3863 if (adev->ip_blocks[i].status.hang) {
aac89168 3864 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3865 asic_hang = true;
3866 }
3867 }
3868 return asic_hang;
3869}
3870
e3ecdffa
AD
3871/**
3872 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3873 *
3874 * @adev: amdgpu_device pointer
3875 *
3876 * The list of all the hardware IPs that make up the asic is walked and the
3877 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3878 * handles any IP specific hardware or software state changes that are
3879 * necessary for a soft reset to succeed.
3880 * Returns 0 on success, negative error code on failure.
3881 */
06ec9070 3882static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3883{
3884 int i, r = 0;
3885
3886 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3887 if (!adev->ip_blocks[i].status.valid)
d31a501e 3888 continue;
a1255107
AD
3889 if (adev->ip_blocks[i].status.hang &&
3890 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3891 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3892 if (r)
3893 return r;
3894 }
3895 }
3896
3897 return 0;
3898}
3899
e3ecdffa
AD
3900/**
3901 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3902 *
3903 * @adev: amdgpu_device pointer
3904 *
3905 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3906 * reset is necessary to recover.
3907 * Returns true if a full asic reset is required, false if not.
3908 */
06ec9070 3909static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3910{
da146d3b
AD
3911 int i;
3912
8bc04c29
AD
3913 if (amdgpu_asic_need_full_reset(adev))
3914 return true;
3915
da146d3b 3916 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3917 if (!adev->ip_blocks[i].status.valid)
da146d3b 3918 continue;
a1255107
AD
3919 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3920 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3921 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3922 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3923 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3924 if (adev->ip_blocks[i].status.hang) {
aac89168 3925 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
3926 return true;
3927 }
3928 }
35d782fe
CZ
3929 }
3930 return false;
3931}
3932
e3ecdffa
AD
3933/**
3934 * amdgpu_device_ip_soft_reset - do a soft reset
3935 *
3936 * @adev: amdgpu_device pointer
3937 *
3938 * The list of all the hardware IPs that make up the asic is walked and the
3939 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3940 * IP specific hardware or software state changes that are necessary to soft
3941 * reset the IP.
3942 * Returns 0 on success, negative error code on failure.
3943 */
06ec9070 3944static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3945{
3946 int i, r = 0;
3947
3948 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3949 if (!adev->ip_blocks[i].status.valid)
35d782fe 3950 continue;
a1255107
AD
3951 if (adev->ip_blocks[i].status.hang &&
3952 adev->ip_blocks[i].version->funcs->soft_reset) {
3953 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3954 if (r)
3955 return r;
3956 }
3957 }
3958
3959 return 0;
3960}
3961
e3ecdffa
AD
3962/**
3963 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3964 *
3965 * @adev: amdgpu_device pointer
3966 *
3967 * The list of all the hardware IPs that make up the asic is walked and the
3968 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3969 * handles any IP specific hardware or software state changes that are
3970 * necessary after the IP has been soft reset.
3971 * Returns 0 on success, negative error code on failure.
3972 */
06ec9070 3973static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3974{
3975 int i, r = 0;
3976
3977 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3978 if (!adev->ip_blocks[i].status.valid)
35d782fe 3979 continue;
a1255107
AD
3980 if (adev->ip_blocks[i].status.hang &&
3981 adev->ip_blocks[i].version->funcs->post_soft_reset)
3982 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3983 if (r)
3984 return r;
3985 }
3986
3987 return 0;
3988}
3989
e3ecdffa 3990/**
c33adbc7 3991 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3992 *
3993 * @adev: amdgpu_device pointer
3994 *
3995 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3996 * restore things like GPUVM page tables after a GPU reset where
3997 * the contents of VRAM might be lost.
403009bf
CK
3998 *
3999 * Returns:
4000 * 0 on success, negative error code on failure.
e3ecdffa 4001 */
c33adbc7 4002static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4003{
c41d1cf6 4004 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
4005 struct amdgpu_bo *shadow;
4006 long r = 1, tmo;
c41d1cf6
ML
4007
4008 if (amdgpu_sriov_runtime(adev))
b045d3af 4009 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4010 else
4011 tmo = msecs_to_jiffies(100);
4012
aac89168 4013 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4014 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
4015 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
4016
4017 /* No need to recover an evicted BO */
4018 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 4019 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
4020 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
4021 continue;
4022
4023 r = amdgpu_bo_restore_shadow(shadow, &next);
4024 if (r)
4025 break;
4026
c41d1cf6 4027 if (fence) {
1712fb1a 4028 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4029 dma_fence_put(fence);
4030 fence = next;
1712fb1a 4031 if (tmo == 0) {
4032 r = -ETIMEDOUT;
c41d1cf6 4033 break;
1712fb1a 4034 } else if (tmo < 0) {
4035 r = tmo;
4036 break;
4037 }
403009bf
CK
4038 } else {
4039 fence = next;
c41d1cf6 4040 }
c41d1cf6
ML
4041 }
4042 mutex_unlock(&adev->shadow_list_lock);
4043
403009bf
CK
4044 if (fence)
4045 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4046 dma_fence_put(fence);
4047
1712fb1a 4048 if (r < 0 || tmo <= 0) {
aac89168 4049 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4050 return -EIO;
4051 }
c41d1cf6 4052
aac89168 4053 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4054 return 0;
c41d1cf6
ML
4055}
4056
a90ad3c2 4057
e3ecdffa 4058/**
06ec9070 4059 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
4060 *
4061 * @adev: amdgpu device pointer
87e3f136 4062 * @from_hypervisor: request from hypervisor
5740682e
ML
4063 *
4064 * do VF FLR and reinitialize Asic
3f48c681 4065 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4066 */
4067static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4068 bool from_hypervisor)
5740682e
ML
4069{
4070 int r;
4071
4072 if (from_hypervisor)
4073 r = amdgpu_virt_request_full_gpu(adev, true);
4074 else
4075 r = amdgpu_virt_reset_gpu(adev);
4076 if (r)
4077 return r;
a90ad3c2 4078
b639c22c
JZ
4079 amdgpu_amdkfd_pre_reset(adev);
4080
a90ad3c2 4081 /* Resume IP prior to SMC */
06ec9070 4082 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4083 if (r)
4084 goto error;
a90ad3c2 4085
c9ffa427 4086 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4087 /* we need recover gart prior to run SMC/CP/SDMA resume */
6c28aed6 4088 amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
a90ad3c2 4089
7a3e0bb2
RZ
4090 r = amdgpu_device_fw_loading(adev);
4091 if (r)
4092 return r;
4093
a90ad3c2 4094 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4095 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4096 if (r)
4097 goto error;
a90ad3c2
ML
4098
4099 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 4100 r = amdgpu_ib_ring_tests(adev);
f81e8d53 4101 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 4102
abc34253
ED
4103error:
4104 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 4105 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4106 amdgpu_inc_vram_lost(adev);
c33adbc7 4107 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
4108 }
4109
4110 return r;
4111}
4112
9a1cddd6 4113/**
4114 * amdgpu_device_has_job_running - check if there is any job in mirror list
4115 *
4116 * @adev: amdgpu device pointer
4117 *
4118 * check if there is any job in mirror list
4119 */
4120bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4121{
4122 int i;
4123 struct drm_sched_job *job;
4124
4125 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4126 struct amdgpu_ring *ring = adev->rings[i];
4127
4128 if (!ring || !ring->sched.thread)
4129 continue;
4130
4131 spin_lock(&ring->sched.job_list_lock);
4132 job = list_first_entry_or_null(&ring->sched.ring_mirror_list,
4133 struct drm_sched_job, node);
4134 spin_unlock(&ring->sched.job_list_lock);
4135 if (job)
4136 return true;
4137 }
4138 return false;
4139}
4140
12938fad
CK
4141/**
4142 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4143 *
4144 * @adev: amdgpu device pointer
4145 *
4146 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4147 * a hung GPU.
4148 */
4149bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4150{
4151 if (!amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4152 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
12938fad
CK
4153 return false;
4154 }
4155
3ba7b418
AG
4156 if (amdgpu_gpu_recovery == 0)
4157 goto disabled;
4158
4159 if (amdgpu_sriov_vf(adev))
4160 return true;
4161
4162 if (amdgpu_gpu_recovery == -1) {
4163 switch (adev->asic_type) {
fc42d47c
AG
4164 case CHIP_BONAIRE:
4165 case CHIP_HAWAII:
3ba7b418
AG
4166 case CHIP_TOPAZ:
4167 case CHIP_TONGA:
4168 case CHIP_FIJI:
4169 case CHIP_POLARIS10:
4170 case CHIP_POLARIS11:
4171 case CHIP_POLARIS12:
4172 case CHIP_VEGAM:
4173 case CHIP_VEGA20:
4174 case CHIP_VEGA10:
4175 case CHIP_VEGA12:
c43b849f 4176 case CHIP_RAVEN:
e9d4cf91 4177 case CHIP_ARCTURUS:
2cb44fb0 4178 case CHIP_RENOIR:
658c6639
AD
4179 case CHIP_NAVI10:
4180 case CHIP_NAVI14:
4181 case CHIP_NAVI12:
131a3c74 4182 case CHIP_SIENNA_CICHLID:
3ba7b418
AG
4183 break;
4184 default:
4185 goto disabled;
4186 }
12938fad
CK
4187 }
4188
4189 return true;
3ba7b418
AG
4190
4191disabled:
aac89168 4192 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4193 return false;
12938fad
CK
4194}
4195
5c6dd71e 4196
26bc5340
AG
4197static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4198 struct amdgpu_job *job,
4199 bool *need_full_reset_arg)
4200{
4201 int i, r = 0;
4202 bool need_full_reset = *need_full_reset_arg;
71182665 4203
728e7e0c
JZ
4204 amdgpu_debugfs_wait_dump(adev);
4205
b602ca5f
TZ
4206 if (amdgpu_sriov_vf(adev)) {
4207 /* stop the data exchange thread */
4208 amdgpu_virt_fini_data_exchange(adev);
4209 }
4210
71182665 4211 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4212 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4213 struct amdgpu_ring *ring = adev->rings[i];
4214
51687759 4215 if (!ring || !ring->sched.thread)
0875dc9e 4216 continue;
5740682e 4217
2f9d4084
ML
4218 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4219 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4220 }
d38ceaf9 4221
222b5f04
AG
4222 if(job)
4223 drm_sched_increase_karma(&job->base);
4224
1d721ed6 4225 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4226 if (!amdgpu_sriov_vf(adev)) {
4227
4228 if (!need_full_reset)
4229 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4230
4231 if (!need_full_reset) {
4232 amdgpu_device_ip_pre_soft_reset(adev);
4233 r = amdgpu_device_ip_soft_reset(adev);
4234 amdgpu_device_ip_post_soft_reset(adev);
4235 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4236 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4237 need_full_reset = true;
4238 }
4239 }
4240
4241 if (need_full_reset)
4242 r = amdgpu_device_ip_suspend(adev);
4243
4244 *need_full_reset_arg = need_full_reset;
4245 }
4246
4247 return r;
4248}
4249
041a62bc 4250static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340 4251 struct list_head *device_list_handle,
7ac71382
AG
4252 bool *need_full_reset_arg,
4253 bool skip_hw_reset)
26bc5340
AG
4254{
4255 struct amdgpu_device *tmp_adev = NULL;
4256 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4257 int r = 0;
4258
4259 /*
4260 * ASIC reset has to be done on all HGMI hive nodes ASAP
4261 * to allow proper links negotiation in FW (within 1 sec)
4262 */
7ac71382 4263 if (!skip_hw_reset && need_full_reset) {
26bc5340 4264 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 4265 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4266 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 4267 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4268 r = -EALREADY;
4269 } else
4270 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4271
041a62bc 4272 if (r) {
aac89168 4273 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4274 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4275 break;
ce316fa5
LM
4276 }
4277 }
4278
041a62bc
AG
4279 /* For XGMI wait for all resets to complete before proceed */
4280 if (!r) {
ce316fa5
LM
4281 list_for_each_entry(tmp_adev, device_list_handle,
4282 gmc.xgmi.head) {
4283 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4284 flush_work(&tmp_adev->xgmi_reset_work);
4285 r = tmp_adev->asic_reset_res;
4286 if (r)
4287 break;
ce316fa5
LM
4288 }
4289 }
4290 }
ce316fa5 4291 }
26bc5340 4292
43c4d576
JC
4293 if (!r && amdgpu_ras_intr_triggered()) {
4294 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4295 if (tmp_adev->mmhub.funcs &&
4296 tmp_adev->mmhub.funcs->reset_ras_error_count)
4297 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4298 }
4299
00eaa571 4300 amdgpu_ras_intr_cleared();
43c4d576 4301 }
00eaa571 4302
26bc5340
AG
4303 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4304 if (need_full_reset) {
4305 /* post card */
4d2997ab 4306 if (amdgpu_device_asic_init(tmp_adev))
aac89168 4307 dev_warn(tmp_adev->dev, "asic atom init failed!");
26bc5340
AG
4308
4309 if (!r) {
4310 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4311 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4312 if (r)
4313 goto out;
4314
4315 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4316 if (vram_lost) {
77e7f829 4317 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4318 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4319 }
4320
6c28aed6 4321 r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
26bc5340
AG
4322 if (r)
4323 goto out;
4324
4325 r = amdgpu_device_fw_loading(tmp_adev);
4326 if (r)
4327 return r;
4328
4329 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4330 if (r)
4331 goto out;
4332
4333 if (vram_lost)
4334 amdgpu_device_fill_reset_magic(tmp_adev);
4335
fdafb359
EQ
4336 /*
4337 * Add this ASIC as tracked as reset was already
4338 * complete successfully.
4339 */
4340 amdgpu_register_gpu_instance(tmp_adev);
4341
7c04ca50 4342 r = amdgpu_device_ip_late_init(tmp_adev);
4343 if (r)
4344 goto out;
4345
565d1941
EQ
4346 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4347
e8fbaf03
GC
4348 /*
4349 * The GPU enters bad state once faulty pages
4350 * by ECC has reached the threshold, and ras
4351 * recovery is scheduled next. So add one check
4352 * here to break recovery if it indeed exceeds
4353 * bad page threshold, and remind user to
4354 * retire this GPU or setting one bigger
4355 * bad_page_threshold value to fix this once
4356 * probing driver again.
4357 */
4358 if (!amdgpu_ras_check_err_threshold(tmp_adev)) {
4359 /* must succeed. */
4360 amdgpu_ras_resume(tmp_adev);
4361 } else {
4362 r = -EINVAL;
4363 goto out;
4364 }
e79a04d5 4365
26bc5340
AG
4366 /* Update PSP FW topology after reset */
4367 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4368 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4369 }
4370 }
4371
26bc5340
AG
4372out:
4373 if (!r) {
4374 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4375 r = amdgpu_ib_ring_tests(tmp_adev);
4376 if (r) {
4377 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4378 r = amdgpu_device_ip_suspend(tmp_adev);
4379 need_full_reset = true;
4380 r = -EAGAIN;
4381 goto end;
4382 }
4383 }
4384
4385 if (!r)
4386 r = amdgpu_device_recover_vram(tmp_adev);
4387 else
4388 tmp_adev->asic_reset_res = r;
4389 }
4390
4391end:
4392 *need_full_reset_arg = need_full_reset;
4393 return r;
4394}
4395
08ebb485
DL
4396static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
4397 struct amdgpu_hive_info *hive)
26bc5340 4398{
53b3f8f4
DL
4399 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4400 return false;
4401
08ebb485
DL
4402 if (hive) {
4403 down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
4404 } else {
4405 down_write(&adev->reset_sem);
4406 }
5740682e 4407
26bc5340 4408 atomic_inc(&adev->gpu_reset_counter);
a3a09142
AD
4409 switch (amdgpu_asic_reset_method(adev)) {
4410 case AMD_RESET_METHOD_MODE1:
4411 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4412 break;
4413 case AMD_RESET_METHOD_MODE2:
4414 adev->mp1_state = PP_MP1_STATE_RESET;
4415 break;
4416 default:
4417 adev->mp1_state = PP_MP1_STATE_NONE;
4418 break;
4419 }
1d721ed6
AG
4420
4421 return true;
26bc5340 4422}
d38ceaf9 4423
26bc5340
AG
4424static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4425{
89041940 4426 amdgpu_vf_error_trans_all(adev);
a3a09142 4427 adev->mp1_state = PP_MP1_STATE_NONE;
53b3f8f4 4428 atomic_set(&adev->in_gpu_reset, 0);
6049db43 4429 up_write(&adev->reset_sem);
26bc5340
AG
4430}
4431
3f12acc8
EQ
4432static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4433{
4434 struct pci_dev *p = NULL;
4435
4436 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4437 adev->pdev->bus->number, 1);
4438 if (p) {
4439 pm_runtime_enable(&(p->dev));
4440 pm_runtime_resume(&(p->dev));
4441 }
4442}
4443
4444static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4445{
4446 enum amd_reset_method reset_method;
4447 struct pci_dev *p = NULL;
4448 u64 expires;
4449
4450 /*
4451 * For now, only BACO and mode1 reset are confirmed
4452 * to suffer the audio issue without proper suspended.
4453 */
4454 reset_method = amdgpu_asic_reset_method(adev);
4455 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4456 (reset_method != AMD_RESET_METHOD_MODE1))
4457 return -EINVAL;
4458
4459 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4460 adev->pdev->bus->number, 1);
4461 if (!p)
4462 return -ENODEV;
4463
4464 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4465 if (!expires)
4466 /*
4467 * If we cannot get the audio device autosuspend delay,
4468 * a fixed 4S interval will be used. Considering 3S is
4469 * the audio controller default autosuspend delay setting.
4470 * 4S used here is guaranteed to cover that.
4471 */
54b7feb9 4472 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4473
4474 while (!pm_runtime_status_suspended(&(p->dev))) {
4475 if (!pm_runtime_suspend(&(p->dev)))
4476 break;
4477
4478 if (expires < ktime_get_mono_fast_ns()) {
4479 dev_warn(adev->dev, "failed to suspend display audio\n");
4480 /* TODO: abort the succeeding gpu reset? */
4481 return -ETIMEDOUT;
4482 }
4483 }
4484
4485 pm_runtime_disable(&(p->dev));
4486
4487 return 0;
4488}
4489
26bc5340
AG
4490/**
4491 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4492 *
4493 * @adev: amdgpu device pointer
4494 * @job: which job trigger hang
4495 *
4496 * Attempt to reset the GPU if it has hung (all asics).
4497 * Attempt to do soft-reset or full-reset and reinitialize Asic
4498 * Returns 0 for success or an error on failure.
4499 */
4500
4501int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4502 struct amdgpu_job *job)
4503{
1d721ed6 4504 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4505 bool need_full_reset = false;
4506 bool job_signaled = false;
26bc5340 4507 struct amdgpu_hive_info *hive = NULL;
26bc5340 4508 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4509 int i, r = 0;
bb5c7235 4510 bool need_emergency_restart = false;
3f12acc8 4511 bool audio_suspended = false;
26bc5340 4512
bb5c7235
WS
4513 /**
4514 * Special case: RAS triggered and full reset isn't supported
4515 */
4516 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4517
d5ea093e
AG
4518 /*
4519 * Flush RAM to disk so that after reboot
4520 * the user can read log and see why the system rebooted.
4521 */
bb5c7235 4522 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4523 DRM_WARN("Emergency reboot.");
4524
4525 ksys_sync_helper();
4526 emergency_restart();
4527 }
4528
b823821f 4529 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4530 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4531
4532 /*
1d721ed6
AG
4533 * Here we trylock to avoid chain of resets executing from
4534 * either trigger by jobs on different adevs in XGMI hive or jobs on
4535 * different schedulers for same device while this TO handler is running.
4536 * We always reset all schedulers for device and all devices for XGMI
4537 * hive so that should take care of them too.
26bc5340 4538 */
d95e8e97 4539 hive = amdgpu_get_xgmi_hive(adev);
53b3f8f4
DL
4540 if (hive) {
4541 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4542 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4543 job ? job->base.id : -1, hive->hive_id);
d95e8e97 4544 amdgpu_put_xgmi_hive(hive);
53b3f8f4
DL
4545 return 0;
4546 }
4547 mutex_lock(&hive->hive_lock);
1d721ed6 4548 }
26bc5340 4549
9e94d22c
EQ
4550 /*
4551 * Build list of devices to reset.
4552 * In case we are in XGMI hive mode, resort the device list
4553 * to put adev in the 1st position.
4554 */
4555 INIT_LIST_HEAD(&device_list);
4556 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4557 if (!hive)
26bc5340 4558 return -ENODEV;
9e94d22c
EQ
4559 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4560 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4561 device_list_handle = &hive->device_list;
4562 } else {
4563 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4564 device_list_handle = &device_list;
4565 }
4566
1d721ed6
AG
4567 /* block all schedulers and reset given job's ring */
4568 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
08ebb485 4569 if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
aac89168 4570 dev_info(tmp_adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
9e94d22c 4571 job ? job->base.id : -1);
cbfd17f7
DL
4572 r = 0;
4573 goto skip_recovery;
7c6e68c7
AG
4574 }
4575
3f12acc8
EQ
4576 /*
4577 * Try to put the audio codec into suspend state
4578 * before gpu reset started.
4579 *
4580 * Due to the power domain of the graphics device
4581 * is shared with AZ power domain. Without this,
4582 * we may change the audio hardware from behind
4583 * the audio driver's back. That will trigger
4584 * some audio codec errors.
4585 */
4586 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4587 audio_suspended = true;
4588
9e94d22c
EQ
4589 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4590
52fb44cf
EQ
4591 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4592
9e94d22c
EQ
4593 if (!amdgpu_sriov_vf(tmp_adev))
4594 amdgpu_amdkfd_pre_reset(tmp_adev);
4595
12ffa55d
AG
4596 /*
4597 * Mark these ASICs to be reseted as untracked first
4598 * And add them back after reset completed
4599 */
4600 amdgpu_unregister_gpu_instance(tmp_adev);
4601
a2f63ee8 4602 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4603
f1c1314b 4604 /* disable ras on ALL IPs */
bb5c7235 4605 if (!need_emergency_restart &&
b823821f 4606 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4607 amdgpu_ras_suspend(tmp_adev);
4608
1d721ed6
AG
4609 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4610 struct amdgpu_ring *ring = tmp_adev->rings[i];
4611
4612 if (!ring || !ring->sched.thread)
4613 continue;
4614
0b2d2c2e 4615 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4616
bb5c7235 4617 if (need_emergency_restart)
7c6e68c7 4618 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4619 }
4620 }
4621
bb5c7235 4622 if (need_emergency_restart)
7c6e68c7
AG
4623 goto skip_sched_resume;
4624
1d721ed6
AG
4625 /*
4626 * Must check guilty signal here since after this point all old
4627 * HW fences are force signaled.
4628 *
4629 * job->base holds a reference to parent fence
4630 */
4631 if (job && job->base.s_fence->parent &&
7dd8c205 4632 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4633 job_signaled = true;
1d721ed6
AG
4634 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4635 goto skip_hw_reset;
4636 }
4637
26bc5340
AG
4638retry: /* Rest of adevs pre asic reset from XGMI hive. */
4639 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340
AG
4640 r = amdgpu_device_pre_asic_reset(tmp_adev,
4641 NULL,
4642 &need_full_reset);
4643 /*TODO Should we stop ?*/
4644 if (r) {
aac89168 4645 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 4646 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
4647 tmp_adev->asic_reset_res = r;
4648 }
4649 }
4650
4651 /* Actual ASIC resets if needed.*/
4652 /* TODO Implement XGMI hive reset logic for SRIOV */
4653 if (amdgpu_sriov_vf(adev)) {
4654 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4655 if (r)
4656 adev->asic_reset_res = r;
4657 } else {
7ac71382 4658 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset, false);
26bc5340
AG
4659 if (r && r == -EAGAIN)
4660 goto retry;
4661 }
4662
1d721ed6
AG
4663skip_hw_reset:
4664
26bc5340
AG
4665 /* Post ASIC reset for all devs .*/
4666 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4667
1d721ed6
AG
4668 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4669 struct amdgpu_ring *ring = tmp_adev->rings[i];
4670
4671 if (!ring || !ring->sched.thread)
4672 continue;
4673
4674 /* No point to resubmit jobs if we didn't HW reset*/
4675 if (!tmp_adev->asic_reset_res && !job_signaled)
4676 drm_sched_resubmit_jobs(&ring->sched);
4677
4678 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4679 }
4680
4681 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4a580877 4682 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
4683 }
4684
4685 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4686
4687 if (r) {
4688 /* bad news, how to tell it to userspace ? */
12ffa55d 4689 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4690 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4691 } else {
12ffa55d 4692 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4693 }
7c6e68c7 4694 }
26bc5340 4695
7c6e68c7
AG
4696skip_sched_resume:
4697 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4698 /*unlock kfd: SRIOV would do it separately */
bb5c7235 4699 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4700 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4701 if (audio_suspended)
4702 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4703 amdgpu_device_unlock_adev(tmp_adev);
4704 }
4705
cbfd17f7 4706skip_recovery:
9e94d22c 4707 if (hive) {
53b3f8f4 4708 atomic_set(&hive->in_reset, 0);
9e94d22c 4709 mutex_unlock(&hive->hive_lock);
d95e8e97 4710 amdgpu_put_xgmi_hive(hive);
9e94d22c 4711 }
26bc5340
AG
4712
4713 if (r)
4714 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4715 return r;
4716}
4717
e3ecdffa
AD
4718/**
4719 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4720 *
4721 * @adev: amdgpu_device pointer
4722 *
4723 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4724 * and lanes) of the slot the device is in. Handles APUs and
4725 * virtualized environments where PCIE config space may not be available.
4726 */
5494d864 4727static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4728{
5d9a6330 4729 struct pci_dev *pdev;
c5313457
HK
4730 enum pci_bus_speed speed_cap, platform_speed_cap;
4731 enum pcie_link_width platform_link_width;
d0dd7f0c 4732
cd474ba0
AD
4733 if (amdgpu_pcie_gen_cap)
4734 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4735
cd474ba0
AD
4736 if (amdgpu_pcie_lane_cap)
4737 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4738
cd474ba0
AD
4739 /* covers APUs as well */
4740 if (pci_is_root_bus(adev->pdev->bus)) {
4741 if (adev->pm.pcie_gen_mask == 0)
4742 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4743 if (adev->pm.pcie_mlw_mask == 0)
4744 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4745 return;
cd474ba0 4746 }
d0dd7f0c 4747
c5313457
HK
4748 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4749 return;
4750
dbaa922b
AD
4751 pcie_bandwidth_available(adev->pdev, NULL,
4752 &platform_speed_cap, &platform_link_width);
c5313457 4753
cd474ba0 4754 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4755 /* asic caps */
4756 pdev = adev->pdev;
4757 speed_cap = pcie_get_speed_cap(pdev);
4758 if (speed_cap == PCI_SPEED_UNKNOWN) {
4759 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4760 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4761 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4762 } else {
5d9a6330
AD
4763 if (speed_cap == PCIE_SPEED_16_0GT)
4764 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4765 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4766 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4767 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4768 else if (speed_cap == PCIE_SPEED_8_0GT)
4769 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4770 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4771 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4772 else if (speed_cap == PCIE_SPEED_5_0GT)
4773 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4774 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4775 else
4776 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4777 }
4778 /* platform caps */
c5313457 4779 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4780 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4781 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4782 } else {
c5313457 4783 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4784 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4785 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4786 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4787 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4788 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4789 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4790 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4791 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4792 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4793 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4794 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4795 else
4796 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4797
cd474ba0
AD
4798 }
4799 }
4800 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4801 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4802 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4803 } else {
c5313457 4804 switch (platform_link_width) {
5d9a6330 4805 case PCIE_LNK_X32:
cd474ba0
AD
4806 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4807 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4808 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4809 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4810 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4811 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4812 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4813 break;
5d9a6330 4814 case PCIE_LNK_X16:
cd474ba0
AD
4815 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4816 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4817 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4818 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4819 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4820 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4821 break;
5d9a6330 4822 case PCIE_LNK_X12:
cd474ba0
AD
4823 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4824 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4825 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4826 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4827 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4828 break;
5d9a6330 4829 case PCIE_LNK_X8:
cd474ba0
AD
4830 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4831 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4832 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4833 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4834 break;
5d9a6330 4835 case PCIE_LNK_X4:
cd474ba0
AD
4836 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4837 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4838 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4839 break;
5d9a6330 4840 case PCIE_LNK_X2:
cd474ba0
AD
4841 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4842 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4843 break;
5d9a6330 4844 case PCIE_LNK_X1:
cd474ba0
AD
4845 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4846 break;
4847 default:
4848 break;
4849 }
d0dd7f0c
AD
4850 }
4851 }
4852}
d38ceaf9 4853
361dbd01
AD
4854int amdgpu_device_baco_enter(struct drm_device *dev)
4855{
1348969a 4856 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 4857 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 4858
4a580877 4859 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
4860 return -ENOTSUPP;
4861
7a22677b
LM
4862 if (ras && ras->supported)
4863 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4864
9530273e 4865 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4866}
4867
4868int amdgpu_device_baco_exit(struct drm_device *dev)
4869{
1348969a 4870 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 4871 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4872 int ret = 0;
361dbd01 4873
4a580877 4874 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
4875 return -ENOTSUPP;
4876
9530273e
EQ
4877 ret = amdgpu_dpm_baco_exit(adev);
4878 if (ret)
4879 return ret;
7a22677b
LM
4880
4881 if (ras && ras->supported)
4882 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4883
4884 return 0;
361dbd01 4885}
c9a6b82f 4886
acd89fca
AG
4887static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
4888{
4889 int i;
4890
4891 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4892 struct amdgpu_ring *ring = adev->rings[i];
4893
4894 if (!ring || !ring->sched.thread)
4895 continue;
4896
4897 cancel_delayed_work_sync(&ring->sched.work_tdr);
4898 }
4899}
4900
c9a6b82f
AG
4901/**
4902 * amdgpu_pci_error_detected - Called when a PCI error is detected.
4903 * @pdev: PCI device struct
4904 * @state: PCI channel state
4905 *
4906 * Description: Called when a PCI error is detected.
4907 *
4908 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
4909 */
4910pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
4911{
4912 struct drm_device *dev = pci_get_drvdata(pdev);
4913 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 4914 int i;
c9a6b82f
AG
4915
4916 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
4917
6894305c
AG
4918 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4919 DRM_WARN("No support for XGMI hive yet...");
4920 return PCI_ERS_RESULT_DISCONNECT;
4921 }
4922
c9a6b82f
AG
4923 switch (state) {
4924 case pci_channel_io_normal:
4925 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca
AG
4926 /* Fatal error, prepare for slot reset */
4927 case pci_channel_io_frozen:
4928 /*
4929 * Cancel and wait for all TDRs in progress if failing to
4930 * set adev->in_gpu_reset in amdgpu_device_lock_adev
4931 *
4932 * Locking adev->reset_sem will prevent any external access
4933 * to GPU during PCI error recovery
4934 */
4935 while (!amdgpu_device_lock_adev(adev, NULL))
4936 amdgpu_cancel_all_tdr(adev);
4937
4938 /*
4939 * Block any work scheduling as we do for regular GPU reset
4940 * for the duration of the recovery
4941 */
4942 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4943 struct amdgpu_ring *ring = adev->rings[i];
4944
4945 if (!ring || !ring->sched.thread)
4946 continue;
4947
4948 drm_sched_stop(&ring->sched, NULL);
4949 }
c9a6b82f
AG
4950 return PCI_ERS_RESULT_NEED_RESET;
4951 case pci_channel_io_perm_failure:
4952 /* Permanent error, prepare for device removal */
4953 return PCI_ERS_RESULT_DISCONNECT;
4954 }
4955
4956 return PCI_ERS_RESULT_NEED_RESET;
4957}
4958
4959/**
4960 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
4961 * @pdev: pointer to PCI device
4962 */
4963pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
4964{
4965
4966 DRM_INFO("PCI error: mmio enabled callback!!\n");
4967
4968 /* TODO - dump whatever for debugging purposes */
4969
4970 /* This called only if amdgpu_pci_error_detected returns
4971 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
4972 * works, no need to reset slot.
4973 */
4974
4975 return PCI_ERS_RESULT_RECOVERED;
4976}
4977
4978/**
4979 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
4980 * @pdev: PCI device struct
4981 *
4982 * Description: This routine is called by the pci error recovery
4983 * code after the PCI slot has been reset, just before we
4984 * should resume normal operations.
4985 */
4986pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
4987{
4988 struct drm_device *dev = pci_get_drvdata(pdev);
4989 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 4990 int r, i;
7ac71382 4991 bool need_full_reset = true;
362c7b91 4992 u32 memsize;
7ac71382 4993 struct list_head device_list;
c9a6b82f
AG
4994
4995 DRM_INFO("PCI error: slot reset callback!!\n");
4996
7ac71382
AG
4997 INIT_LIST_HEAD(&device_list);
4998 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4999
362c7b91
AG
5000 /* wait for asic to come out of reset */
5001 msleep(500);
5002
7ac71382 5003 /* Restore PCI confspace */
c1dd4aa6 5004 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5005
362c7b91
AG
5006 /* confirm ASIC came out of reset */
5007 for (i = 0; i < adev->usec_timeout; i++) {
5008 memsize = amdgpu_asic_get_config_memsize(adev);
5009
5010 if (memsize != 0xffffffff)
5011 break;
5012 udelay(1);
5013 }
5014 if (memsize == 0xffffffff) {
5015 r = -ETIME;
5016 goto out;
5017 }
5018
362c7b91 5019 adev->in_pci_err_recovery = true;
7ac71382 5020 r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);
bf36b52e 5021 adev->in_pci_err_recovery = false;
c9a6b82f
AG
5022 if (r)
5023 goto out;
5024
7ac71382 5025 r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
c9a6b82f
AG
5026
5027out:
c9a6b82f 5028 if (!r) {
c1dd4aa6
AG
5029 if (amdgpu_device_cache_pci_state(adev->pdev))
5030 pci_restore_state(adev->pdev);
5031
c9a6b82f
AG
5032 DRM_INFO("PCIe error recovery succeeded\n");
5033 } else {
5034 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5035 amdgpu_device_unlock_adev(adev);
5036 }
5037
5038 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5039}
5040
5041/**
5042 * amdgpu_pci_resume() - resume normal ops after PCI reset
5043 * @pdev: pointer to PCI device
5044 *
5045 * Called when the error recovery driver tells us that its
5046 * OK to resume normal operation. Use completion to allow
5047 * halted scsi ops to resume.
5048 */
5049void amdgpu_pci_resume(struct pci_dev *pdev)
5050{
5051 struct drm_device *dev = pci_get_drvdata(pdev);
5052 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5053 int i;
c9a6b82f 5054
c9a6b82f
AG
5055
5056 DRM_INFO("PCI error: resume callback!!\n");
acd89fca
AG
5057
5058 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5059 struct amdgpu_ring *ring = adev->rings[i];
5060
5061 if (!ring || !ring->sched.thread)
5062 continue;
5063
5064
5065 drm_sched_resubmit_jobs(&ring->sched);
5066 drm_sched_start(&ring->sched, true);
5067 }
5068
5069 amdgpu_device_unlock_adev(adev);
c9a6b82f 5070}
c1dd4aa6
AG
5071
5072bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5073{
5074 struct drm_device *dev = pci_get_drvdata(pdev);
5075 struct amdgpu_device *adev = drm_to_adev(dev);
5076 int r;
5077
5078 r = pci_save_state(pdev);
5079 if (!r) {
5080 kfree(adev->pci_state);
5081
5082 adev->pci_state = pci_store_saved_state(pdev);
5083
5084 if (!adev->pci_state) {
5085 DRM_ERROR("Failed to store PCI saved state");
5086 return false;
5087 }
5088 } else {
5089 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5090 return false;
5091 }
5092
5093 return true;
5094}
5095
5096bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5097{
5098 struct drm_device *dev = pci_get_drvdata(pdev);
5099 struct amdgpu_device *adev = drm_to_adev(dev);
5100 int r;
5101
5102 if (!adev->pci_state)
5103 return false;
5104
5105 r = pci_load_saved_state(pdev, adev->pci_state);
5106
5107 if (!r) {
5108 pci_restore_state(pdev);
5109 } else {
5110 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5111 return false;
5112 }
5113
5114 return true;
5115}
5116
5117