Revert "drm/amdgpu: add psp RAP L0 check support"
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
4e52a9f8 83MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
e2a75f88 84
2dc80b00
S
85#define AMDGPU_RESUME_MS 2000
86
050091ab 87const char *amdgpu_asic_name[] = {
da69c161
KW
88 "TAHITI",
89 "PITCAIRN",
90 "VERDE",
91 "OLAND",
92 "HAINAN",
d38ceaf9
AD
93 "BONAIRE",
94 "KAVERI",
95 "KABINI",
96 "HAWAII",
97 "MULLINS",
98 "TOPAZ",
99 "TONGA",
48299f95 100 "FIJI",
d38ceaf9 101 "CARRIZO",
139f4917 102 "STONEY",
2cc0c0b5
FC
103 "POLARIS10",
104 "POLARIS11",
c4642a47 105 "POLARIS12",
48ff108d 106 "VEGAM",
d4196f01 107 "VEGA10",
8fab806a 108 "VEGA12",
956fcddc 109 "VEGA20",
2ca8a5d2 110 "RAVEN",
d6c3b24e 111 "ARCTURUS",
1eee4228 112 "RENOIR",
d46b417a 113 "ALDEBARAN",
852a6626 114 "NAVI10",
87dbad02 115 "NAVI14",
9802f5d7 116 "NAVI12",
ccaf72d3 117 "SIENNA_CICHLID",
ddd8fbe7 118 "NAVY_FLOUNDER",
4f1e9a76 119 "VANGOGH",
a2468e04 120 "DIMGREY_CAVEFISH",
d38ceaf9
AD
121 "LAST",
122};
123
dcea6e65
KR
124/**
125 * DOC: pcie_replay_count
126 *
127 * The amdgpu driver provides a sysfs API for reporting the total number
128 * of PCIe replays (NAKs)
129 * The file pcie_replay_count is used for this and returns the total
130 * number of replays as a sum of the NAKs generated and NAKs received
131 */
132
133static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
134 struct device_attribute *attr, char *buf)
135{
136 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 137 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
138 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
139
140 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
141}
142
143static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
144 amdgpu_device_get_pcie_replay_count, NULL);
145
5494d864
AD
146static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
147
bd607166
KR
148/**
149 * DOC: product_name
150 *
151 * The amdgpu driver provides a sysfs API for reporting the product name
152 * for the device
153 * The file serial_number is used for this and returns the product name
154 * as returned from the FRU.
155 * NOTE: This is only available for certain server cards
156 */
157
158static ssize_t amdgpu_device_get_product_name(struct device *dev,
159 struct device_attribute *attr, char *buf)
160{
161 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 162 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
163
164 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
165}
166
167static DEVICE_ATTR(product_name, S_IRUGO,
168 amdgpu_device_get_product_name, NULL);
169
170/**
171 * DOC: product_number
172 *
173 * The amdgpu driver provides a sysfs API for reporting the part number
174 * for the device
175 * The file serial_number is used for this and returns the part number
176 * as returned from the FRU.
177 * NOTE: This is only available for certain server cards
178 */
179
180static ssize_t amdgpu_device_get_product_number(struct device *dev,
181 struct device_attribute *attr, char *buf)
182{
183 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 184 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
185
186 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
187}
188
189static DEVICE_ATTR(product_number, S_IRUGO,
190 amdgpu_device_get_product_number, NULL);
191
192/**
193 * DOC: serial_number
194 *
195 * The amdgpu driver provides a sysfs API for reporting the serial number
196 * for the device
197 * The file serial_number is used for this and returns the serial number
198 * as returned from the FRU.
199 * NOTE: This is only available for certain server cards
200 */
201
202static ssize_t amdgpu_device_get_serial_number(struct device *dev,
203 struct device_attribute *attr, char *buf)
204{
205 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 206 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
207
208 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
209}
210
211static DEVICE_ATTR(serial_number, S_IRUGO,
212 amdgpu_device_get_serial_number, NULL);
213
fd496ca8
AD
214/**
215 * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control
216 *
217 * @dev: drm_device pointer
218 *
219 * Returns true if the device is a dGPU with HG/PX power control,
220 * otherwise return false.
221 */
222bool amdgpu_device_supports_atpx(struct drm_device *dev)
223{
224 struct amdgpu_device *adev = drm_to_adev(dev);
225
226 if (adev->flags & AMD_IS_PX)
227 return true;
228 return false;
229}
230
e3ecdffa 231/**
0330b848 232 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
233 *
234 * @dev: drm_device pointer
235 *
236 * Returns true if the device is a dGPU with HG/PX power control,
237 * otherwise return false.
238 */
31af062a 239bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 240{
1348969a 241 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 242
0330b848 243 if (adev->has_pr3)
d38ceaf9
AD
244 return true;
245 return false;
246}
247
a69cba42
AD
248/**
249 * amdgpu_device_supports_baco - Does the device support BACO
250 *
251 * @dev: drm_device pointer
252 *
253 * Returns true if the device supporte BACO,
254 * otherwise return false.
255 */
256bool amdgpu_device_supports_baco(struct drm_device *dev)
257{
1348969a 258 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
259
260 return amdgpu_asic_supports_baco(adev);
261}
262
6e3cd2a9
MCC
263/*
264 * VRAM access helper functions
265 */
266
e35e2b11 267/**
e35e2b11
TY
268 * amdgpu_device_vram_access - read/write a buffer in vram
269 *
270 * @adev: amdgpu_device pointer
271 * @pos: offset of the buffer in vram
272 * @buf: virtual address of the buffer in system memory
273 * @size: read/write size, sizeof(@buf) must > @size
274 * @write: true - write to vram, otherwise - read from vram
275 */
276void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
277 uint32_t *buf, size_t size, bool write)
278{
e35e2b11 279 unsigned long flags;
ce05ac56
CK
280 uint32_t hi = ~0;
281 uint64_t last;
282
9d11eb0d
CK
283
284#ifdef CONFIG_64BIT
285 last = min(pos + size, adev->gmc.visible_vram_size);
286 if (last > pos) {
287 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
288 size_t count = last - pos;
289
290 if (write) {
291 memcpy_toio(addr, buf, count);
292 mb();
293 amdgpu_asic_flush_hdp(adev, NULL);
294 } else {
295 amdgpu_asic_invalidate_hdp(adev, NULL);
296 mb();
297 memcpy_fromio(buf, addr, count);
298 }
299
300 if (count == size)
301 return;
302
303 pos += count;
304 buf += count / 4;
305 size -= count;
306 }
307#endif
308
ce05ac56
CK
309 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
310 for (last = pos + size; pos < last; pos += 4) {
311 uint32_t tmp = pos >> 31;
e35e2b11 312
e35e2b11 313 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
314 if (tmp != hi) {
315 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
316 hi = tmp;
317 }
e35e2b11
TY
318 if (write)
319 WREG32_NO_KIQ(mmMM_DATA, *buf++);
320 else
321 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 322 }
ce05ac56 323 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
324}
325
d38ceaf9 326/*
f7ee1874 327 * register access helper functions.
d38ceaf9 328 */
e3ecdffa 329/**
f7ee1874 330 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
331 *
332 * @adev: amdgpu_device pointer
333 * @reg: dword aligned register offset
334 * @acc_flags: access flags which require special behavior
335 *
336 * Returns the 32 bit value from the offset specified.
337 */
f7ee1874
HZ
338uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
339 uint32_t reg, uint32_t acc_flags)
d38ceaf9 340{
f4b373f4
TSD
341 uint32_t ret;
342
bf36b52e
AG
343 if (adev->in_pci_err_recovery)
344 return 0;
345
f7ee1874
HZ
346 if ((reg * 4) < adev->rmmio_size) {
347 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
348 amdgpu_sriov_runtime(adev) &&
349 down_read_trylock(&adev->reset_sem)) {
350 ret = amdgpu_kiq_rreg(adev, reg);
351 up_read(&adev->reset_sem);
352 } else {
353 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
354 }
355 } else {
356 ret = adev->pcie_rreg(adev, reg * 4);
81202807 357 }
bc992ba5 358
f7ee1874 359 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 360
f4b373f4 361 return ret;
d38ceaf9
AD
362}
363
421a2a30
ML
364/*
365 * MMIO register read with bytes helper functions
366 * @offset:bytes offset from MMIO start
367 *
368*/
369
e3ecdffa
AD
370/**
371 * amdgpu_mm_rreg8 - read a memory mapped IO register
372 *
373 * @adev: amdgpu_device pointer
374 * @offset: byte aligned register offset
375 *
376 * Returns the 8 bit value from the offset specified.
377 */
7cbbc745
AG
378uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
379{
bf36b52e
AG
380 if (adev->in_pci_err_recovery)
381 return 0;
382
421a2a30
ML
383 if (offset < adev->rmmio_size)
384 return (readb(adev->rmmio + offset));
385 BUG();
386}
387
388/*
389 * MMIO register write with bytes helper functions
390 * @offset:bytes offset from MMIO start
391 * @value: the value want to be written to the register
392 *
393*/
e3ecdffa
AD
394/**
395 * amdgpu_mm_wreg8 - read a memory mapped IO register
396 *
397 * @adev: amdgpu_device pointer
398 * @offset: byte aligned register offset
399 * @value: 8 bit value to write
400 *
401 * Writes the value specified to the offset specified.
402 */
7cbbc745
AG
403void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
404{
bf36b52e
AG
405 if (adev->in_pci_err_recovery)
406 return;
407
421a2a30
ML
408 if (offset < adev->rmmio_size)
409 writeb(value, adev->rmmio + offset);
410 else
411 BUG();
412}
413
e3ecdffa 414/**
f7ee1874 415 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
416 *
417 * @adev: amdgpu_device pointer
418 * @reg: dword aligned register offset
419 * @v: 32 bit value to write to the register
420 * @acc_flags: access flags which require special behavior
421 *
422 * Writes the value specified to the offset specified.
423 */
f7ee1874
HZ
424void amdgpu_device_wreg(struct amdgpu_device *adev,
425 uint32_t reg, uint32_t v,
426 uint32_t acc_flags)
d38ceaf9 427{
bf36b52e
AG
428 if (adev->in_pci_err_recovery)
429 return;
430
f7ee1874
HZ
431 if ((reg * 4) < adev->rmmio_size) {
432 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
433 amdgpu_sriov_runtime(adev) &&
434 down_read_trylock(&adev->reset_sem)) {
435 amdgpu_kiq_wreg(adev, reg, v);
436 up_read(&adev->reset_sem);
437 } else {
438 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
439 }
440 } else {
441 adev->pcie_wreg(adev, reg * 4, v);
81202807 442 }
bc992ba5 443
f7ee1874 444 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 445}
d38ceaf9 446
2e0cc4d4
ML
447/*
448 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
449 *
450 * this function is invoked only the debugfs register access
451 * */
f7ee1874
HZ
452void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
453 uint32_t reg, uint32_t v)
2e0cc4d4 454{
bf36b52e
AG
455 if (adev->in_pci_err_recovery)
456 return;
457
2e0cc4d4 458 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
459 adev->gfx.rlc.funcs &&
460 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4
ML
461 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
462 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
f7ee1874
HZ
463 } else {
464 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 465 }
d38ceaf9
AD
466}
467
e3ecdffa
AD
468/**
469 * amdgpu_io_rreg - read an IO register
470 *
471 * @adev: amdgpu_device pointer
472 * @reg: dword aligned register offset
473 *
474 * Returns the 32 bit value from the offset specified.
475 */
d38ceaf9
AD
476u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
477{
bf36b52e
AG
478 if (adev->in_pci_err_recovery)
479 return 0;
480
d38ceaf9
AD
481 if ((reg * 4) < adev->rio_mem_size)
482 return ioread32(adev->rio_mem + (reg * 4));
483 else {
484 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
485 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
486 }
487}
488
e3ecdffa
AD
489/**
490 * amdgpu_io_wreg - write to an IO register
491 *
492 * @adev: amdgpu_device pointer
493 * @reg: dword aligned register offset
494 * @v: 32 bit value to write to the register
495 *
496 * Writes the value specified to the offset specified.
497 */
d38ceaf9
AD
498void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
499{
bf36b52e
AG
500 if (adev->in_pci_err_recovery)
501 return;
502
d38ceaf9
AD
503 if ((reg * 4) < adev->rio_mem_size)
504 iowrite32(v, adev->rio_mem + (reg * 4));
505 else {
506 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
507 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
508 }
509}
510
511/**
512 * amdgpu_mm_rdoorbell - read a doorbell dword
513 *
514 * @adev: amdgpu_device pointer
515 * @index: doorbell index
516 *
517 * Returns the value in the doorbell aperture at the
518 * requested doorbell index (CIK).
519 */
520u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
521{
bf36b52e
AG
522 if (adev->in_pci_err_recovery)
523 return 0;
524
d38ceaf9
AD
525 if (index < adev->doorbell.num_doorbells) {
526 return readl(adev->doorbell.ptr + index);
527 } else {
528 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
529 return 0;
530 }
531}
532
533/**
534 * amdgpu_mm_wdoorbell - write a doorbell dword
535 *
536 * @adev: amdgpu_device pointer
537 * @index: doorbell index
538 * @v: value to write
539 *
540 * Writes @v to the doorbell aperture at the
541 * requested doorbell index (CIK).
542 */
543void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
544{
bf36b52e
AG
545 if (adev->in_pci_err_recovery)
546 return;
547
d38ceaf9
AD
548 if (index < adev->doorbell.num_doorbells) {
549 writel(v, adev->doorbell.ptr + index);
550 } else {
551 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
552 }
553}
554
832be404
KW
555/**
556 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
557 *
558 * @adev: amdgpu_device pointer
559 * @index: doorbell index
560 *
561 * Returns the value in the doorbell aperture at the
562 * requested doorbell index (VEGA10+).
563 */
564u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
565{
bf36b52e
AG
566 if (adev->in_pci_err_recovery)
567 return 0;
568
832be404
KW
569 if (index < adev->doorbell.num_doorbells) {
570 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
571 } else {
572 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
573 return 0;
574 }
575}
576
577/**
578 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
579 *
580 * @adev: amdgpu_device pointer
581 * @index: doorbell index
582 * @v: value to write
583 *
584 * Writes @v to the doorbell aperture at the
585 * requested doorbell index (VEGA10+).
586 */
587void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
588{
bf36b52e
AG
589 if (adev->in_pci_err_recovery)
590 return;
591
832be404
KW
592 if (index < adev->doorbell.num_doorbells) {
593 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
594 } else {
595 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
596 }
597}
598
1bba3683
HZ
599/**
600 * amdgpu_device_indirect_rreg - read an indirect register
601 *
602 * @adev: amdgpu_device pointer
603 * @pcie_index: mmio register offset
604 * @pcie_data: mmio register offset
22f453fb 605 * @reg_addr: indirect register address to read from
1bba3683
HZ
606 *
607 * Returns the value of indirect register @reg_addr
608 */
609u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
610 u32 pcie_index, u32 pcie_data,
611 u32 reg_addr)
612{
613 unsigned long flags;
614 u32 r;
615 void __iomem *pcie_index_offset;
616 void __iomem *pcie_data_offset;
617
618 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
619 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
620 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
621
622 writel(reg_addr, pcie_index_offset);
623 readl(pcie_index_offset);
624 r = readl(pcie_data_offset);
625 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
626
627 return r;
628}
629
630/**
631 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
632 *
633 * @adev: amdgpu_device pointer
634 * @pcie_index: mmio register offset
635 * @pcie_data: mmio register offset
22f453fb 636 * @reg_addr: indirect register address to read from
1bba3683
HZ
637 *
638 * Returns the value of indirect register @reg_addr
639 */
640u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
641 u32 pcie_index, u32 pcie_data,
642 u32 reg_addr)
643{
644 unsigned long flags;
645 u64 r;
646 void __iomem *pcie_index_offset;
647 void __iomem *pcie_data_offset;
648
649 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
650 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
651 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
652
653 /* read low 32 bits */
654 writel(reg_addr, pcie_index_offset);
655 readl(pcie_index_offset);
656 r = readl(pcie_data_offset);
657 /* read high 32 bits */
658 writel(reg_addr + 4, pcie_index_offset);
659 readl(pcie_index_offset);
660 r |= ((u64)readl(pcie_data_offset) << 32);
661 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
662
663 return r;
664}
665
666/**
667 * amdgpu_device_indirect_wreg - write an indirect register address
668 *
669 * @adev: amdgpu_device pointer
670 * @pcie_index: mmio register offset
671 * @pcie_data: mmio register offset
672 * @reg_addr: indirect register offset
673 * @reg_data: indirect register data
674 *
675 */
676void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
677 u32 pcie_index, u32 pcie_data,
678 u32 reg_addr, u32 reg_data)
679{
680 unsigned long flags;
681 void __iomem *pcie_index_offset;
682 void __iomem *pcie_data_offset;
683
684 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
685 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
686 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
687
688 writel(reg_addr, pcie_index_offset);
689 readl(pcie_index_offset);
690 writel(reg_data, pcie_data_offset);
691 readl(pcie_data_offset);
692 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
693}
694
695/**
696 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
697 *
698 * @adev: amdgpu_device pointer
699 * @pcie_index: mmio register offset
700 * @pcie_data: mmio register offset
701 * @reg_addr: indirect register offset
702 * @reg_data: indirect register data
703 *
704 */
705void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
706 u32 pcie_index, u32 pcie_data,
707 u32 reg_addr, u64 reg_data)
708{
709 unsigned long flags;
710 void __iomem *pcie_index_offset;
711 void __iomem *pcie_data_offset;
712
713 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
714 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
715 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
716
717 /* write low 32 bits */
718 writel(reg_addr, pcie_index_offset);
719 readl(pcie_index_offset);
720 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
721 readl(pcie_data_offset);
722 /* write high 32 bits */
723 writel(reg_addr + 4, pcie_index_offset);
724 readl(pcie_index_offset);
725 writel((u32)(reg_data >> 32), pcie_data_offset);
726 readl(pcie_data_offset);
727 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
728}
729
d38ceaf9
AD
730/**
731 * amdgpu_invalid_rreg - dummy reg read function
732 *
982a820b 733 * @adev: amdgpu_device pointer
d38ceaf9
AD
734 * @reg: offset of register
735 *
736 * Dummy register read function. Used for register blocks
737 * that certain asics don't have (all asics).
738 * Returns the value in the register.
739 */
740static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
741{
742 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
743 BUG();
744 return 0;
745}
746
747/**
748 * amdgpu_invalid_wreg - dummy reg write function
749 *
982a820b 750 * @adev: amdgpu_device pointer
d38ceaf9
AD
751 * @reg: offset of register
752 * @v: value to write to the register
753 *
754 * Dummy register read function. Used for register blocks
755 * that certain asics don't have (all asics).
756 */
757static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
758{
759 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
760 reg, v);
761 BUG();
762}
763
4fa1c6a6
TZ
764/**
765 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
766 *
982a820b 767 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
768 * @reg: offset of register
769 *
770 * Dummy register read function. Used for register blocks
771 * that certain asics don't have (all asics).
772 * Returns the value in the register.
773 */
774static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
775{
776 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
777 BUG();
778 return 0;
779}
780
781/**
782 * amdgpu_invalid_wreg64 - dummy reg write function
783 *
982a820b 784 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
785 * @reg: offset of register
786 * @v: value to write to the register
787 *
788 * Dummy register read function. Used for register blocks
789 * that certain asics don't have (all asics).
790 */
791static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
792{
793 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
794 reg, v);
795 BUG();
796}
797
d38ceaf9
AD
798/**
799 * amdgpu_block_invalid_rreg - dummy reg read function
800 *
982a820b 801 * @adev: amdgpu_device pointer
d38ceaf9
AD
802 * @block: offset of instance
803 * @reg: offset of register
804 *
805 * Dummy register read function. Used for register blocks
806 * that certain asics don't have (all asics).
807 * Returns the value in the register.
808 */
809static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
810 uint32_t block, uint32_t reg)
811{
812 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
813 reg, block);
814 BUG();
815 return 0;
816}
817
818/**
819 * amdgpu_block_invalid_wreg - dummy reg write function
820 *
982a820b 821 * @adev: amdgpu_device pointer
d38ceaf9
AD
822 * @block: offset of instance
823 * @reg: offset of register
824 * @v: value to write to the register
825 *
826 * Dummy register read function. Used for register blocks
827 * that certain asics don't have (all asics).
828 */
829static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
830 uint32_t block,
831 uint32_t reg, uint32_t v)
832{
833 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
834 reg, block, v);
835 BUG();
836}
837
4d2997ab
AD
838/**
839 * amdgpu_device_asic_init - Wrapper for atom asic_init
840 *
982a820b 841 * @adev: amdgpu_device pointer
4d2997ab
AD
842 *
843 * Does any asic specific work and then calls atom asic init.
844 */
845static int amdgpu_device_asic_init(struct amdgpu_device *adev)
846{
847 amdgpu_asic_pre_asic_init(adev);
848
849 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
850}
851
e3ecdffa
AD
852/**
853 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
854 *
982a820b 855 * @adev: amdgpu_device pointer
e3ecdffa
AD
856 *
857 * Allocates a scratch page of VRAM for use by various things in the
858 * driver.
859 */
06ec9070 860static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 861{
a4a02777
CK
862 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
863 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
864 &adev->vram_scratch.robj,
865 &adev->vram_scratch.gpu_addr,
866 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
867}
868
e3ecdffa
AD
869/**
870 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
871 *
982a820b 872 * @adev: amdgpu_device pointer
e3ecdffa
AD
873 *
874 * Frees the VRAM scratch page.
875 */
06ec9070 876static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 877{
078af1a3 878 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
879}
880
881/**
9c3f2b54 882 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
883 *
884 * @adev: amdgpu_device pointer
885 * @registers: pointer to the register array
886 * @array_size: size of the register array
887 *
888 * Programs an array or registers with and and or masks.
889 * This is a helper for setting golden registers.
890 */
9c3f2b54
AD
891void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
892 const u32 *registers,
893 const u32 array_size)
d38ceaf9
AD
894{
895 u32 tmp, reg, and_mask, or_mask;
896 int i;
897
898 if (array_size % 3)
899 return;
900
901 for (i = 0; i < array_size; i +=3) {
902 reg = registers[i + 0];
903 and_mask = registers[i + 1];
904 or_mask = registers[i + 2];
905
906 if (and_mask == 0xffffffff) {
907 tmp = or_mask;
908 } else {
909 tmp = RREG32(reg);
910 tmp &= ~and_mask;
e0d07657
HZ
911 if (adev->family >= AMDGPU_FAMILY_AI)
912 tmp |= (or_mask & and_mask);
913 else
914 tmp |= or_mask;
d38ceaf9
AD
915 }
916 WREG32(reg, tmp);
917 }
918}
919
e3ecdffa
AD
920/**
921 * amdgpu_device_pci_config_reset - reset the GPU
922 *
923 * @adev: amdgpu_device pointer
924 *
925 * Resets the GPU using the pci config reset sequence.
926 * Only applicable to asics prior to vega10.
927 */
8111c387 928void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
929{
930 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
931}
932
af484df8
AD
933/**
934 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
935 *
936 * @adev: amdgpu_device pointer
937 *
938 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
939 */
940int amdgpu_device_pci_reset(struct amdgpu_device *adev)
941{
942 return pci_reset_function(adev->pdev);
943}
944
d38ceaf9
AD
945/*
946 * GPU doorbell aperture helpers function.
947 */
948/**
06ec9070 949 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
950 *
951 * @adev: amdgpu_device pointer
952 *
953 * Init doorbell driver information (CIK)
954 * Returns 0 on success, error on failure.
955 */
06ec9070 956static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 957{
6585661d 958
705e519e
CK
959 /* No doorbell on SI hardware generation */
960 if (adev->asic_type < CHIP_BONAIRE) {
961 adev->doorbell.base = 0;
962 adev->doorbell.size = 0;
963 adev->doorbell.num_doorbells = 0;
964 adev->doorbell.ptr = NULL;
965 return 0;
966 }
967
d6895ad3
CK
968 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
969 return -EINVAL;
970
22357775
AD
971 amdgpu_asic_init_doorbell_index(adev);
972
d38ceaf9
AD
973 /* doorbell bar mapping */
974 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
975 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
976
edf600da 977 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 978 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
979 if (adev->doorbell.num_doorbells == 0)
980 return -EINVAL;
981
ec3db8a6 982 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
983 * paging queue doorbell use the second page. The
984 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
985 * doorbells are in the first page. So with paging queue enabled,
986 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
987 */
988 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 989 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 990
8972e5d2
CK
991 adev->doorbell.ptr = ioremap(adev->doorbell.base,
992 adev->doorbell.num_doorbells *
993 sizeof(u32));
994 if (adev->doorbell.ptr == NULL)
d38ceaf9 995 return -ENOMEM;
d38ceaf9
AD
996
997 return 0;
998}
999
1000/**
06ec9070 1001 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
1002 *
1003 * @adev: amdgpu_device pointer
1004 *
1005 * Tear down doorbell driver information (CIK)
1006 */
06ec9070 1007static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1008{
1009 iounmap(adev->doorbell.ptr);
1010 adev->doorbell.ptr = NULL;
1011}
1012
22cb0164 1013
d38ceaf9
AD
1014
1015/*
06ec9070 1016 * amdgpu_device_wb_*()
455a7bc2 1017 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 1018 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
1019 */
1020
1021/**
06ec9070 1022 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
1023 *
1024 * @adev: amdgpu_device pointer
1025 *
1026 * Disables Writeback and frees the Writeback memory (all asics).
1027 * Used at driver shutdown.
1028 */
06ec9070 1029static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1030{
1031 if (adev->wb.wb_obj) {
a76ed485
AD
1032 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1033 &adev->wb.gpu_addr,
1034 (void **)&adev->wb.wb);
d38ceaf9
AD
1035 adev->wb.wb_obj = NULL;
1036 }
1037}
1038
1039/**
06ec9070 1040 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
1041 *
1042 * @adev: amdgpu_device pointer
1043 *
455a7bc2 1044 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1045 * Used at driver startup.
1046 * Returns 0 on success or an -error on failure.
1047 */
06ec9070 1048static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1049{
1050 int r;
1051
1052 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1053 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1054 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1055 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1056 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1057 (void **)&adev->wb.wb);
d38ceaf9
AD
1058 if (r) {
1059 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1060 return r;
1061 }
d38ceaf9
AD
1062
1063 adev->wb.num_wb = AMDGPU_MAX_WB;
1064 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1065
1066 /* clear wb memory */
73469585 1067 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1068 }
1069
1070 return 0;
1071}
1072
1073/**
131b4b36 1074 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1075 *
1076 * @adev: amdgpu_device pointer
1077 * @wb: wb index
1078 *
1079 * Allocate a wb slot for use by the driver (all asics).
1080 * Returns 0 on success or -EINVAL on failure.
1081 */
131b4b36 1082int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1083{
1084 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1085
97407b63 1086 if (offset < adev->wb.num_wb) {
7014285a 1087 __set_bit(offset, adev->wb.used);
63ae07ca 1088 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1089 return 0;
1090 } else {
1091 return -EINVAL;
1092 }
1093}
1094
d38ceaf9 1095/**
131b4b36 1096 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1097 *
1098 * @adev: amdgpu_device pointer
1099 * @wb: wb index
1100 *
1101 * Free a wb slot allocated for use by the driver (all asics)
1102 */
131b4b36 1103void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1104{
73469585 1105 wb >>= 3;
d38ceaf9 1106 if (wb < adev->wb.num_wb)
73469585 1107 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1108}
1109
d6895ad3
CK
1110/**
1111 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1112 *
1113 * @adev: amdgpu_device pointer
1114 *
1115 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1116 * to fail, but if any of the BARs is not accessible after the size we abort
1117 * driver loading by returning -ENODEV.
1118 */
1119int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1120{
453f617a 1121 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1122 struct pci_bus *root;
1123 struct resource *res;
1124 unsigned i;
d6895ad3
CK
1125 u16 cmd;
1126 int r;
1127
0c03b912 1128 /* Bypass for VF */
1129 if (amdgpu_sriov_vf(adev))
1130 return 0;
1131
b7221f2b
AD
1132 /* skip if the bios has already enabled large BAR */
1133 if (adev->gmc.real_vram_size &&
1134 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1135 return 0;
1136
31b8adab
CK
1137 /* Check if the root BUS has 64bit memory resources */
1138 root = adev->pdev->bus;
1139 while (root->parent)
1140 root = root->parent;
1141
1142 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1143 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1144 res->start > 0x100000000ull)
1145 break;
1146 }
1147
1148 /* Trying to resize is pointless without a root hub window above 4GB */
1149 if (!res)
1150 return 0;
1151
453f617a
ND
1152 /* Limit the BAR size to what is available */
1153 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1154 rbar_size);
1155
d6895ad3
CK
1156 /* Disable memory decoding while we change the BAR addresses and size */
1157 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1158 pci_write_config_word(adev->pdev, PCI_COMMAND,
1159 cmd & ~PCI_COMMAND_MEMORY);
1160
1161 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1162 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1163 if (adev->asic_type >= CHIP_BONAIRE)
1164 pci_release_resource(adev->pdev, 2);
1165
1166 pci_release_resource(adev->pdev, 0);
1167
1168 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1169 if (r == -ENOSPC)
1170 DRM_INFO("Not enough PCI address space for a large BAR.");
1171 else if (r && r != -ENOTSUPP)
1172 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1173
1174 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1175
1176 /* When the doorbell or fb BAR isn't available we have no chance of
1177 * using the device.
1178 */
06ec9070 1179 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1180 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1181 return -ENODEV;
1182
1183 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1184
1185 return 0;
1186}
a05502e5 1187
d38ceaf9
AD
1188/*
1189 * GPU helpers function.
1190 */
1191/**
39c640c0 1192 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1193 *
1194 * @adev: amdgpu_device pointer
1195 *
c836fec5
JQ
1196 * Check if the asic has been initialized (all asics) at driver startup
1197 * or post is needed if hw reset is performed.
1198 * Returns true if need or false if not.
d38ceaf9 1199 */
39c640c0 1200bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1201{
1202 uint32_t reg;
1203
bec86378
ML
1204 if (amdgpu_sriov_vf(adev))
1205 return false;
1206
1207 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1208 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1209 * some old smc fw still need driver do vPost otherwise gpu hang, while
1210 * those smc fw version above 22.15 doesn't have this flaw, so we force
1211 * vpost executed for smc version below 22.15
bec86378
ML
1212 */
1213 if (adev->asic_type == CHIP_FIJI) {
1214 int err;
1215 uint32_t fw_ver;
1216 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1217 /* force vPost if error occured */
1218 if (err)
1219 return true;
1220
1221 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1222 if (fw_ver < 0x00160e00)
1223 return true;
bec86378 1224 }
bec86378 1225 }
91fe77eb 1226
1227 if (adev->has_hw_reset) {
1228 adev->has_hw_reset = false;
1229 return true;
1230 }
1231
1232 /* bios scratch used on CIK+ */
1233 if (adev->asic_type >= CHIP_BONAIRE)
1234 return amdgpu_atombios_scratch_need_asic_init(adev);
1235
1236 /* check MEM_SIZE for older asics */
1237 reg = amdgpu_asic_get_config_memsize(adev);
1238
1239 if ((reg != 0) && (reg != 0xffffffff))
1240 return false;
1241
1242 return true;
bec86378
ML
1243}
1244
d38ceaf9
AD
1245/* if we get transitioned to only one device, take VGA back */
1246/**
06ec9070 1247 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1248 *
1249 * @cookie: amdgpu_device pointer
1250 * @state: enable/disable vga decode
1251 *
1252 * Enable/disable vga decode (all asics).
1253 * Returns VGA resource flags.
1254 */
06ec9070 1255static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1256{
1257 struct amdgpu_device *adev = cookie;
1258 amdgpu_asic_set_vga_state(adev, state);
1259 if (state)
1260 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1261 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1262 else
1263 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1264}
1265
e3ecdffa
AD
1266/**
1267 * amdgpu_device_check_block_size - validate the vm block size
1268 *
1269 * @adev: amdgpu_device pointer
1270 *
1271 * Validates the vm block size specified via module parameter.
1272 * The vm block size defines number of bits in page table versus page directory,
1273 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1274 * page table and the remaining bits are in the page directory.
1275 */
06ec9070 1276static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1277{
1278 /* defines number of bits in page table versus page directory,
1279 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1280 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1281 if (amdgpu_vm_block_size == -1)
1282 return;
a1adf8be 1283
bab4fee7 1284 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1285 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1286 amdgpu_vm_block_size);
97489129 1287 amdgpu_vm_block_size = -1;
a1adf8be 1288 }
a1adf8be
CZ
1289}
1290
e3ecdffa
AD
1291/**
1292 * amdgpu_device_check_vm_size - validate the vm size
1293 *
1294 * @adev: amdgpu_device pointer
1295 *
1296 * Validates the vm size in GB specified via module parameter.
1297 * The VM size is the size of the GPU virtual memory space in GB.
1298 */
06ec9070 1299static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1300{
64dab074
AD
1301 /* no need to check the default value */
1302 if (amdgpu_vm_size == -1)
1303 return;
1304
83ca145d
ZJ
1305 if (amdgpu_vm_size < 1) {
1306 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1307 amdgpu_vm_size);
f3368128 1308 amdgpu_vm_size = -1;
83ca145d 1309 }
83ca145d
ZJ
1310}
1311
7951e376
RZ
1312static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1313{
1314 struct sysinfo si;
a9d4fe2f 1315 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1316 uint64_t total_memory;
1317 uint64_t dram_size_seven_GB = 0x1B8000000;
1318 uint64_t dram_size_three_GB = 0xB8000000;
1319
1320 if (amdgpu_smu_memory_pool_size == 0)
1321 return;
1322
1323 if (!is_os_64) {
1324 DRM_WARN("Not 64-bit OS, feature not supported\n");
1325 goto def_value;
1326 }
1327 si_meminfo(&si);
1328 total_memory = (uint64_t)si.totalram * si.mem_unit;
1329
1330 if ((amdgpu_smu_memory_pool_size == 1) ||
1331 (amdgpu_smu_memory_pool_size == 2)) {
1332 if (total_memory < dram_size_three_GB)
1333 goto def_value1;
1334 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1335 (amdgpu_smu_memory_pool_size == 8)) {
1336 if (total_memory < dram_size_seven_GB)
1337 goto def_value1;
1338 } else {
1339 DRM_WARN("Smu memory pool size not supported\n");
1340 goto def_value;
1341 }
1342 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1343
1344 return;
1345
1346def_value1:
1347 DRM_WARN("No enough system memory\n");
1348def_value:
1349 adev->pm.smu_prv_buffer_size = 0;
1350}
1351
d38ceaf9 1352/**
06ec9070 1353 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1354 *
1355 * @adev: amdgpu_device pointer
1356 *
1357 * Validates certain module parameters and updates
1358 * the associated values used by the driver (all asics).
1359 */
912dfc84 1360static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1361{
5b011235
CZ
1362 if (amdgpu_sched_jobs < 4) {
1363 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1364 amdgpu_sched_jobs);
1365 amdgpu_sched_jobs = 4;
76117507 1366 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1367 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1368 amdgpu_sched_jobs);
1369 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1370 }
d38ceaf9 1371
83e74db6 1372 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1373 /* gart size must be greater or equal to 32M */
1374 dev_warn(adev->dev, "gart size (%d) too small\n",
1375 amdgpu_gart_size);
83e74db6 1376 amdgpu_gart_size = -1;
d38ceaf9
AD
1377 }
1378
36d38372 1379 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1380 /* gtt size must be greater or equal to 32M */
36d38372
CK
1381 dev_warn(adev->dev, "gtt size (%d) too small\n",
1382 amdgpu_gtt_size);
1383 amdgpu_gtt_size = -1;
d38ceaf9
AD
1384 }
1385
d07f14be
RH
1386 /* valid range is between 4 and 9 inclusive */
1387 if (amdgpu_vm_fragment_size != -1 &&
1388 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1389 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1390 amdgpu_vm_fragment_size = -1;
1391 }
1392
5d5bd5e3
KW
1393 if (amdgpu_sched_hw_submission < 2) {
1394 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1395 amdgpu_sched_hw_submission);
1396 amdgpu_sched_hw_submission = 2;
1397 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1398 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1399 amdgpu_sched_hw_submission);
1400 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1401 }
1402
7951e376
RZ
1403 amdgpu_device_check_smu_prv_buffer_size(adev);
1404
06ec9070 1405 amdgpu_device_check_vm_size(adev);
d38ceaf9 1406
06ec9070 1407 amdgpu_device_check_block_size(adev);
6a7f76e7 1408
19aede77 1409 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1410
c6252390 1411 amdgpu_gmc_tmz_set(adev);
01a8dcec 1412
9b498efa
AD
1413 amdgpu_gmc_noretry_set(adev);
1414
e3c00faa 1415 return 0;
d38ceaf9
AD
1416}
1417
1418/**
1419 * amdgpu_switcheroo_set_state - set switcheroo state
1420 *
1421 * @pdev: pci dev pointer
1694467b 1422 * @state: vga_switcheroo state
d38ceaf9
AD
1423 *
1424 * Callback for the switcheroo driver. Suspends or resumes the
1425 * the asics before or after it is powered up using ACPI methods.
1426 */
8aba21b7
LT
1427static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1428 enum vga_switcheroo_state state)
d38ceaf9
AD
1429{
1430 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1431 int r;
d38ceaf9 1432
fd496ca8 1433 if (amdgpu_device_supports_atpx(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1434 return;
1435
1436 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1437 pr_info("switched on\n");
d38ceaf9
AD
1438 /* don't suspend or resume card normally */
1439 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1440
8f66090b
TZ
1441 pci_set_power_state(pdev, PCI_D0);
1442 amdgpu_device_load_pci_state(pdev);
1443 r = pci_enable_device(pdev);
de185019
AD
1444 if (r)
1445 DRM_WARN("pci_enable_device failed (%d)\n", r);
1446 amdgpu_device_resume(dev, true);
d38ceaf9 1447
d38ceaf9 1448 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1449 } else {
dd4fa6c1 1450 pr_info("switched off\n");
d38ceaf9 1451 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1452 amdgpu_device_suspend(dev, true);
8f66090b 1453 amdgpu_device_cache_pci_state(pdev);
de185019 1454 /* Shut down the device */
8f66090b
TZ
1455 pci_disable_device(pdev);
1456 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1457 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1458 }
1459}
1460
1461/**
1462 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1463 *
1464 * @pdev: pci dev pointer
1465 *
1466 * Callback for the switcheroo driver. Check of the switcheroo
1467 * state can be changed.
1468 * Returns true if the state can be changed, false if not.
1469 */
1470static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1471{
1472 struct drm_device *dev = pci_get_drvdata(pdev);
1473
1474 /*
1475 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1476 * locking inversion with the driver load path. And the access here is
1477 * completely racy anyway. So don't bother with locking for now.
1478 */
7e13ad89 1479 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1480}
1481
1482static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1483 .set_gpu_state = amdgpu_switcheroo_set_state,
1484 .reprobe = NULL,
1485 .can_switch = amdgpu_switcheroo_can_switch,
1486};
1487
e3ecdffa
AD
1488/**
1489 * amdgpu_device_ip_set_clockgating_state - set the CG state
1490 *
87e3f136 1491 * @dev: amdgpu_device pointer
e3ecdffa
AD
1492 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1493 * @state: clockgating state (gate or ungate)
1494 *
1495 * Sets the requested clockgating state for all instances of
1496 * the hardware IP specified.
1497 * Returns the error code from the last instance.
1498 */
43fa561f 1499int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1500 enum amd_ip_block_type block_type,
1501 enum amd_clockgating_state state)
d38ceaf9 1502{
43fa561f 1503 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1504 int i, r = 0;
1505
1506 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1507 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1508 continue;
c722865a
RZ
1509 if (adev->ip_blocks[i].version->type != block_type)
1510 continue;
1511 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1512 continue;
1513 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1514 (void *)adev, state);
1515 if (r)
1516 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1517 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1518 }
1519 return r;
1520}
1521
e3ecdffa
AD
1522/**
1523 * amdgpu_device_ip_set_powergating_state - set the PG state
1524 *
87e3f136 1525 * @dev: amdgpu_device pointer
e3ecdffa
AD
1526 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1527 * @state: powergating state (gate or ungate)
1528 *
1529 * Sets the requested powergating state for all instances of
1530 * the hardware IP specified.
1531 * Returns the error code from the last instance.
1532 */
43fa561f 1533int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1534 enum amd_ip_block_type block_type,
1535 enum amd_powergating_state state)
d38ceaf9 1536{
43fa561f 1537 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1538 int i, r = 0;
1539
1540 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1541 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1542 continue;
c722865a
RZ
1543 if (adev->ip_blocks[i].version->type != block_type)
1544 continue;
1545 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1546 continue;
1547 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1548 (void *)adev, state);
1549 if (r)
1550 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1551 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1552 }
1553 return r;
1554}
1555
e3ecdffa
AD
1556/**
1557 * amdgpu_device_ip_get_clockgating_state - get the CG state
1558 *
1559 * @adev: amdgpu_device pointer
1560 * @flags: clockgating feature flags
1561 *
1562 * Walks the list of IPs on the device and updates the clockgating
1563 * flags for each IP.
1564 * Updates @flags with the feature flags for each hardware IP where
1565 * clockgating is enabled.
1566 */
2990a1fc
AD
1567void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1568 u32 *flags)
6cb2d4e4
HR
1569{
1570 int i;
1571
1572 for (i = 0; i < adev->num_ip_blocks; i++) {
1573 if (!adev->ip_blocks[i].status.valid)
1574 continue;
1575 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1576 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1577 }
1578}
1579
e3ecdffa
AD
1580/**
1581 * amdgpu_device_ip_wait_for_idle - wait for idle
1582 *
1583 * @adev: amdgpu_device pointer
1584 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1585 *
1586 * Waits for the request hardware IP to be idle.
1587 * Returns 0 for success or a negative error code on failure.
1588 */
2990a1fc
AD
1589int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1590 enum amd_ip_block_type block_type)
5dbbb60b
AD
1591{
1592 int i, r;
1593
1594 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1595 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1596 continue;
a1255107
AD
1597 if (adev->ip_blocks[i].version->type == block_type) {
1598 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1599 if (r)
1600 return r;
1601 break;
1602 }
1603 }
1604 return 0;
1605
1606}
1607
e3ecdffa
AD
1608/**
1609 * amdgpu_device_ip_is_idle - is the hardware IP idle
1610 *
1611 * @adev: amdgpu_device pointer
1612 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1613 *
1614 * Check if the hardware IP is idle or not.
1615 * Returns true if it the IP is idle, false if not.
1616 */
2990a1fc
AD
1617bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1618 enum amd_ip_block_type block_type)
5dbbb60b
AD
1619{
1620 int i;
1621
1622 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1623 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1624 continue;
a1255107
AD
1625 if (adev->ip_blocks[i].version->type == block_type)
1626 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1627 }
1628 return true;
1629
1630}
1631
e3ecdffa
AD
1632/**
1633 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1634 *
1635 * @adev: amdgpu_device pointer
87e3f136 1636 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1637 *
1638 * Returns a pointer to the hardware IP block structure
1639 * if it exists for the asic, otherwise NULL.
1640 */
2990a1fc
AD
1641struct amdgpu_ip_block *
1642amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1643 enum amd_ip_block_type type)
d38ceaf9
AD
1644{
1645 int i;
1646
1647 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1648 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1649 return &adev->ip_blocks[i];
1650
1651 return NULL;
1652}
1653
1654/**
2990a1fc 1655 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1656 *
1657 * @adev: amdgpu_device pointer
5fc3aeeb 1658 * @type: enum amd_ip_block_type
d38ceaf9
AD
1659 * @major: major version
1660 * @minor: minor version
1661 *
1662 * return 0 if equal or greater
1663 * return 1 if smaller or the ip_block doesn't exist
1664 */
2990a1fc
AD
1665int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1666 enum amd_ip_block_type type,
1667 u32 major, u32 minor)
d38ceaf9 1668{
2990a1fc 1669 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1670
a1255107
AD
1671 if (ip_block && ((ip_block->version->major > major) ||
1672 ((ip_block->version->major == major) &&
1673 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1674 return 0;
1675
1676 return 1;
1677}
1678
a1255107 1679/**
2990a1fc 1680 * amdgpu_device_ip_block_add
a1255107
AD
1681 *
1682 * @adev: amdgpu_device pointer
1683 * @ip_block_version: pointer to the IP to add
1684 *
1685 * Adds the IP block driver information to the collection of IPs
1686 * on the asic.
1687 */
2990a1fc
AD
1688int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1689 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1690{
1691 if (!ip_block_version)
1692 return -EINVAL;
1693
e966a725 1694 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1695 ip_block_version->funcs->name);
1696
a1255107
AD
1697 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1698
1699 return 0;
1700}
1701
e3ecdffa
AD
1702/**
1703 * amdgpu_device_enable_virtual_display - enable virtual display feature
1704 *
1705 * @adev: amdgpu_device pointer
1706 *
1707 * Enabled the virtual display feature if the user has enabled it via
1708 * the module parameter virtual_display. This feature provides a virtual
1709 * display hardware on headless boards or in virtualized environments.
1710 * This function parses and validates the configuration string specified by
1711 * the user and configues the virtual display configuration (number of
1712 * virtual connectors, crtcs, etc.) specified.
1713 */
483ef985 1714static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1715{
1716 adev->enable_virtual_display = false;
1717
1718 if (amdgpu_virtual_display) {
8f66090b 1719 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1720 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1721
1722 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1723 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1724 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1725 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1726 if (!strcmp("all", pciaddname)
1727 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1728 long num_crtc;
1729 int res = -1;
1730
9accf2fd 1731 adev->enable_virtual_display = true;
0f66356d
ED
1732
1733 if (pciaddname_tmp)
1734 res = kstrtol(pciaddname_tmp, 10,
1735 &num_crtc);
1736
1737 if (!res) {
1738 if (num_crtc < 1)
1739 num_crtc = 1;
1740 if (num_crtc > 6)
1741 num_crtc = 6;
1742 adev->mode_info.num_crtc = num_crtc;
1743 } else {
1744 adev->mode_info.num_crtc = 1;
1745 }
9accf2fd
ED
1746 break;
1747 }
1748 }
1749
0f66356d
ED
1750 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1751 amdgpu_virtual_display, pci_address_name,
1752 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1753
1754 kfree(pciaddstr);
1755 }
1756}
1757
e3ecdffa
AD
1758/**
1759 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1760 *
1761 * @adev: amdgpu_device pointer
1762 *
1763 * Parses the asic configuration parameters specified in the gpu info
1764 * firmware and makes them availale to the driver for use in configuring
1765 * the asic.
1766 * Returns 0 on success, -EINVAL on failure.
1767 */
e2a75f88
AD
1768static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1769{
e2a75f88 1770 const char *chip_name;
c0a43457 1771 char fw_name[40];
e2a75f88
AD
1772 int err;
1773 const struct gpu_info_firmware_header_v1_0 *hdr;
1774
ab4fe3e1
HR
1775 adev->firmware.gpu_info_fw = NULL;
1776
72de33f8 1777 if (adev->mman.discovery_bin) {
258620d0 1778 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1779
1780 /*
1781 * FIXME: The bounding box is still needed by Navi12, so
1782 * temporarily read it from gpu_info firmware. Should be droped
1783 * when DAL no longer needs it.
1784 */
1785 if (adev->asic_type != CHIP_NAVI12)
1786 return 0;
258620d0
AD
1787 }
1788
e2a75f88 1789 switch (adev->asic_type) {
e2a75f88
AD
1790#ifdef CONFIG_DRM_AMDGPU_SI
1791 case CHIP_VERDE:
1792 case CHIP_TAHITI:
1793 case CHIP_PITCAIRN:
1794 case CHIP_OLAND:
1795 case CHIP_HAINAN:
1796#endif
1797#ifdef CONFIG_DRM_AMDGPU_CIK
1798 case CHIP_BONAIRE:
1799 case CHIP_HAWAII:
1800 case CHIP_KAVERI:
1801 case CHIP_KABINI:
1802 case CHIP_MULLINS:
1803#endif
da87c30b
AD
1804 case CHIP_TOPAZ:
1805 case CHIP_TONGA:
1806 case CHIP_FIJI:
1807 case CHIP_POLARIS10:
1808 case CHIP_POLARIS11:
1809 case CHIP_POLARIS12:
1810 case CHIP_VEGAM:
1811 case CHIP_CARRIZO:
1812 case CHIP_STONEY:
27c0bc71 1813 case CHIP_VEGA20:
44b3253a 1814 case CHIP_ALDEBARAN:
84d244a3
JC
1815 case CHIP_SIENNA_CICHLID:
1816 case CHIP_NAVY_FLOUNDER:
eac88a5f 1817 case CHIP_DIMGREY_CAVEFISH:
e2a75f88
AD
1818 default:
1819 return 0;
1820 case CHIP_VEGA10:
1821 chip_name = "vega10";
1822 break;
3f76dced
AD
1823 case CHIP_VEGA12:
1824 chip_name = "vega12";
1825 break;
2d2e5e7e 1826 case CHIP_RAVEN:
54f78a76 1827 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1828 chip_name = "raven2";
54f78a76 1829 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1830 chip_name = "picasso";
54c4d17e
FX
1831 else
1832 chip_name = "raven";
2d2e5e7e 1833 break;
65e60f6e
LM
1834 case CHIP_ARCTURUS:
1835 chip_name = "arcturus";
1836 break;
b51a26a0 1837 case CHIP_RENOIR:
2e62f0b5
PL
1838 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1839 chip_name = "renoir";
1840 else
1841 chip_name = "green_sardine";
b51a26a0 1842 break;
23c6268e
HR
1843 case CHIP_NAVI10:
1844 chip_name = "navi10";
1845 break;
ed42cfe1
XY
1846 case CHIP_NAVI14:
1847 chip_name = "navi14";
1848 break;
42b325e5
XY
1849 case CHIP_NAVI12:
1850 chip_name = "navi12";
1851 break;
4e52a9f8
HR
1852 case CHIP_VANGOGH:
1853 chip_name = "vangogh";
1854 break;
e2a75f88
AD
1855 }
1856
1857 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1858 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1859 if (err) {
1860 dev_err(adev->dev,
1861 "Failed to load gpu_info firmware \"%s\"\n",
1862 fw_name);
1863 goto out;
1864 }
ab4fe3e1 1865 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1866 if (err) {
1867 dev_err(adev->dev,
1868 "Failed to validate gpu_info firmware \"%s\"\n",
1869 fw_name);
1870 goto out;
1871 }
1872
ab4fe3e1 1873 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1874 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1875
1876 switch (hdr->version_major) {
1877 case 1:
1878 {
1879 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1880 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1881 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1882
cc375d8c
TY
1883 /*
1884 * Should be droped when DAL no longer needs it.
1885 */
1886 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1887 goto parse_soc_bounding_box;
1888
b5ab16bf
AD
1889 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1890 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1891 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1892 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1893 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1894 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1895 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1896 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1897 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1898 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1899 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1900 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1901 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1902 adev->gfx.cu_info.max_waves_per_simd =
1903 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1904 adev->gfx.cu_info.max_scratch_slots_per_cu =
1905 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1906 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1907 if (hdr->version_minor >= 1) {
35c2e910
HZ
1908 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1909 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1910 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1911 adev->gfx.config.num_sc_per_sh =
1912 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1913 adev->gfx.config.num_packer_per_sc =
1914 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1915 }
ec51d3fa
XY
1916
1917parse_soc_bounding_box:
ec51d3fa
XY
1918 /*
1919 * soc bounding box info is not integrated in disocovery table,
258620d0 1920 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1921 */
48321c3d
HW
1922 if (hdr->version_minor == 2) {
1923 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1924 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1925 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1926 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1927 }
e2a75f88
AD
1928 break;
1929 }
1930 default:
1931 dev_err(adev->dev,
1932 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1933 err = -EINVAL;
1934 goto out;
1935 }
1936out:
e2a75f88
AD
1937 return err;
1938}
1939
e3ecdffa
AD
1940/**
1941 * amdgpu_device_ip_early_init - run early init for hardware IPs
1942 *
1943 * @adev: amdgpu_device pointer
1944 *
1945 * Early initialization pass for hardware IPs. The hardware IPs that make
1946 * up each asic are discovered each IP's early_init callback is run. This
1947 * is the first stage in initializing the asic.
1948 * Returns 0 on success, negative error code on failure.
1949 */
06ec9070 1950static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1951{
aaa36a97 1952 int i, r;
d38ceaf9 1953
483ef985 1954 amdgpu_device_enable_virtual_display(adev);
a6be7570 1955
00a979f3 1956 if (amdgpu_sriov_vf(adev)) {
00a979f3 1957 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
1958 if (r)
1959 return r;
00a979f3
WS
1960 }
1961
d38ceaf9 1962 switch (adev->asic_type) {
33f34802
KW
1963#ifdef CONFIG_DRM_AMDGPU_SI
1964 case CHIP_VERDE:
1965 case CHIP_TAHITI:
1966 case CHIP_PITCAIRN:
1967 case CHIP_OLAND:
1968 case CHIP_HAINAN:
295d0daf 1969 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1970 r = si_set_ip_blocks(adev);
1971 if (r)
1972 return r;
1973 break;
1974#endif
a2e73f56
AD
1975#ifdef CONFIG_DRM_AMDGPU_CIK
1976 case CHIP_BONAIRE:
1977 case CHIP_HAWAII:
1978 case CHIP_KAVERI:
1979 case CHIP_KABINI:
1980 case CHIP_MULLINS:
e1ad2d53 1981 if (adev->flags & AMD_IS_APU)
a2e73f56 1982 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1983 else
1984 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1985
1986 r = cik_set_ip_blocks(adev);
1987 if (r)
1988 return r;
1989 break;
1990#endif
da87c30b
AD
1991 case CHIP_TOPAZ:
1992 case CHIP_TONGA:
1993 case CHIP_FIJI:
1994 case CHIP_POLARIS10:
1995 case CHIP_POLARIS11:
1996 case CHIP_POLARIS12:
1997 case CHIP_VEGAM:
1998 case CHIP_CARRIZO:
1999 case CHIP_STONEY:
2000 if (adev->flags & AMD_IS_APU)
2001 adev->family = AMDGPU_FAMILY_CZ;
2002 else
2003 adev->family = AMDGPU_FAMILY_VI;
2004
2005 r = vi_set_ip_blocks(adev);
2006 if (r)
2007 return r;
2008 break;
e48a3cd9
AD
2009 case CHIP_VEGA10:
2010 case CHIP_VEGA12:
e4bd8170 2011 case CHIP_VEGA20:
e48a3cd9 2012 case CHIP_RAVEN:
61cf44c1 2013 case CHIP_ARCTURUS:
b51a26a0 2014 case CHIP_RENOIR:
c00a18ec 2015 case CHIP_ALDEBARAN:
70534d1e 2016 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
2017 adev->family = AMDGPU_FAMILY_RV;
2018 else
2019 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
2020
2021 r = soc15_set_ip_blocks(adev);
2022 if (r)
2023 return r;
2024 break;
0a5b8c7b 2025 case CHIP_NAVI10:
7ecb5cd4 2026 case CHIP_NAVI14:
4808cf9c 2027 case CHIP_NAVI12:
11e8aef5 2028 case CHIP_SIENNA_CICHLID:
41f446bf 2029 case CHIP_NAVY_FLOUNDER:
144722fa 2030 case CHIP_DIMGREY_CAVEFISH:
4e52a9f8
HR
2031 case CHIP_VANGOGH:
2032 if (adev->asic_type == CHIP_VANGOGH)
2033 adev->family = AMDGPU_FAMILY_VGH;
2034 else
2035 adev->family = AMDGPU_FAMILY_NV;
0a5b8c7b
HR
2036
2037 r = nv_set_ip_blocks(adev);
2038 if (r)
2039 return r;
2040 break;
d38ceaf9
AD
2041 default:
2042 /* FIXME: not supported yet */
2043 return -EINVAL;
2044 }
2045
1884734a 2046 amdgpu_amdkfd_device_probe(adev);
2047
3b94fb10 2048 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2049 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2050 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2051 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2052 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2053
d38ceaf9
AD
2054 for (i = 0; i < adev->num_ip_blocks; i++) {
2055 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2056 DRM_ERROR("disabled ip block: %d <%s>\n",
2057 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2058 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2059 } else {
a1255107
AD
2060 if (adev->ip_blocks[i].version->funcs->early_init) {
2061 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2062 if (r == -ENOENT) {
a1255107 2063 adev->ip_blocks[i].status.valid = false;
2c1a2784 2064 } else if (r) {
a1255107
AD
2065 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2066 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2067 return r;
2c1a2784 2068 } else {
a1255107 2069 adev->ip_blocks[i].status.valid = true;
2c1a2784 2070 }
974e6b64 2071 } else {
a1255107 2072 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2073 }
d38ceaf9 2074 }
21a249ca
AD
2075 /* get the vbios after the asic_funcs are set up */
2076 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2077 r = amdgpu_device_parse_gpu_info_fw(adev);
2078 if (r)
2079 return r;
2080
21a249ca
AD
2081 /* Read BIOS */
2082 if (!amdgpu_get_bios(adev))
2083 return -EINVAL;
2084
2085 r = amdgpu_atombios_init(adev);
2086 if (r) {
2087 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2088 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2089 return r;
2090 }
2091 }
d38ceaf9
AD
2092 }
2093
395d1fb9
NH
2094 adev->cg_flags &= amdgpu_cg_mask;
2095 adev->pg_flags &= amdgpu_pg_mask;
2096
d38ceaf9
AD
2097 return 0;
2098}
2099
0a4f2520
RZ
2100static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2101{
2102 int i, r;
2103
2104 for (i = 0; i < adev->num_ip_blocks; i++) {
2105 if (!adev->ip_blocks[i].status.sw)
2106 continue;
2107 if (adev->ip_blocks[i].status.hw)
2108 continue;
2109 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2110 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2111 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2112 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2113 if (r) {
2114 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2115 adev->ip_blocks[i].version->funcs->name, r);
2116 return r;
2117 }
2118 adev->ip_blocks[i].status.hw = true;
2119 }
2120 }
2121
2122 return 0;
2123}
2124
2125static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2126{
2127 int i, r;
2128
2129 for (i = 0; i < adev->num_ip_blocks; i++) {
2130 if (!adev->ip_blocks[i].status.sw)
2131 continue;
2132 if (adev->ip_blocks[i].status.hw)
2133 continue;
2134 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2135 if (r) {
2136 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2137 adev->ip_blocks[i].version->funcs->name, r);
2138 return r;
2139 }
2140 adev->ip_blocks[i].status.hw = true;
2141 }
2142
2143 return 0;
2144}
2145
7a3e0bb2
RZ
2146static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2147{
2148 int r = 0;
2149 int i;
80f41f84 2150 uint32_t smu_version;
7a3e0bb2
RZ
2151
2152 if (adev->asic_type >= CHIP_VEGA10) {
2153 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2154 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2155 continue;
2156
2157 /* no need to do the fw loading again if already done*/
2158 if (adev->ip_blocks[i].status.hw == true)
2159 break;
2160
53b3f8f4 2161 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2162 r = adev->ip_blocks[i].version->funcs->resume(adev);
2163 if (r) {
2164 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2165 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2166 return r;
2167 }
2168 } else {
2169 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2170 if (r) {
2171 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2172 adev->ip_blocks[i].version->funcs->name, r);
2173 return r;
7a3e0bb2 2174 }
7a3e0bb2 2175 }
482f0e53
ML
2176
2177 adev->ip_blocks[i].status.hw = true;
2178 break;
7a3e0bb2
RZ
2179 }
2180 }
482f0e53 2181
8973d9ec
ED
2182 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2183 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2184
80f41f84 2185 return r;
7a3e0bb2
RZ
2186}
2187
e3ecdffa
AD
2188/**
2189 * amdgpu_device_ip_init - run init for hardware IPs
2190 *
2191 * @adev: amdgpu_device pointer
2192 *
2193 * Main initialization pass for hardware IPs. The list of all the hardware
2194 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2195 * are run. sw_init initializes the software state associated with each IP
2196 * and hw_init initializes the hardware associated with each IP.
2197 * Returns 0 on success, negative error code on failure.
2198 */
06ec9070 2199static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2200{
2201 int i, r;
2202
c030f2e4 2203 r = amdgpu_ras_init(adev);
2204 if (r)
2205 return r;
2206
d38ceaf9 2207 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2208 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2209 continue;
a1255107 2210 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2211 if (r) {
a1255107
AD
2212 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2213 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2214 goto init_failed;
2c1a2784 2215 }
a1255107 2216 adev->ip_blocks[i].status.sw = true;
bfca0289 2217
d38ceaf9 2218 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 2219 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 2220 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2221 if (r) {
2222 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2223 goto init_failed;
2c1a2784 2224 }
a1255107 2225 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2226 if (r) {
2227 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2228 goto init_failed;
2c1a2784 2229 }
06ec9070 2230 r = amdgpu_device_wb_init(adev);
2c1a2784 2231 if (r) {
06ec9070 2232 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2233 goto init_failed;
2c1a2784 2234 }
a1255107 2235 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2236
2237 /* right after GMC hw init, we create CSA */
f92d5c61 2238 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2239 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2240 AMDGPU_GEM_DOMAIN_VRAM,
2241 AMDGPU_CSA_SIZE);
2493664f
ML
2242 if (r) {
2243 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2244 goto init_failed;
2493664f
ML
2245 }
2246 }
d38ceaf9
AD
2247 }
2248 }
2249
c9ffa427
YT
2250 if (amdgpu_sriov_vf(adev))
2251 amdgpu_virt_init_data_exchange(adev);
2252
533aed27
AG
2253 r = amdgpu_ib_pool_init(adev);
2254 if (r) {
2255 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2256 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2257 goto init_failed;
2258 }
2259
c8963ea4
RZ
2260 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2261 if (r)
72d3f592 2262 goto init_failed;
0a4f2520
RZ
2263
2264 r = amdgpu_device_ip_hw_init_phase1(adev);
2265 if (r)
72d3f592 2266 goto init_failed;
0a4f2520 2267
7a3e0bb2
RZ
2268 r = amdgpu_device_fw_loading(adev);
2269 if (r)
72d3f592 2270 goto init_failed;
7a3e0bb2 2271
0a4f2520
RZ
2272 r = amdgpu_device_ip_hw_init_phase2(adev);
2273 if (r)
72d3f592 2274 goto init_failed;
d38ceaf9 2275
121a2bc6
AG
2276 /*
2277 * retired pages will be loaded from eeprom and reserved here,
2278 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2279 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2280 * for I2C communication which only true at this point.
b82e65a9
GC
2281 *
2282 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2283 * failure from bad gpu situation and stop amdgpu init process
2284 * accordingly. For other failed cases, it will still release all
2285 * the resource and print error message, rather than returning one
2286 * negative value to upper level.
121a2bc6
AG
2287 *
2288 * Note: theoretically, this should be called before all vram allocations
2289 * to protect retired page from abusing
2290 */
b82e65a9
GC
2291 r = amdgpu_ras_recovery_init(adev);
2292 if (r)
2293 goto init_failed;
121a2bc6 2294
3e2e2ab5
HZ
2295 if (adev->gmc.xgmi.num_physical_nodes > 1)
2296 amdgpu_xgmi_add_device(adev);
1884734a 2297 amdgpu_amdkfd_device_init(adev);
c6332b97 2298
bd607166
KR
2299 amdgpu_fru_get_product_info(adev);
2300
72d3f592 2301init_failed:
c9ffa427 2302 if (amdgpu_sriov_vf(adev))
c6332b97 2303 amdgpu_virt_release_full_gpu(adev, true);
2304
72d3f592 2305 return r;
d38ceaf9
AD
2306}
2307
e3ecdffa
AD
2308/**
2309 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2310 *
2311 * @adev: amdgpu_device pointer
2312 *
2313 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2314 * this function before a GPU reset. If the value is retained after a
2315 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2316 */
06ec9070 2317static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2318{
2319 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2320}
2321
e3ecdffa
AD
2322/**
2323 * amdgpu_device_check_vram_lost - check if vram is valid
2324 *
2325 * @adev: amdgpu_device pointer
2326 *
2327 * Checks the reset magic value written to the gart pointer in VRAM.
2328 * The driver calls this after a GPU reset to see if the contents of
2329 * VRAM is lost or now.
2330 * returns true if vram is lost, false if not.
2331 */
06ec9070 2332static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2333{
dadce777
EQ
2334 if (memcmp(adev->gart.ptr, adev->reset_magic,
2335 AMDGPU_RESET_MAGIC_NUM))
2336 return true;
2337
53b3f8f4 2338 if (!amdgpu_in_reset(adev))
dadce777
EQ
2339 return false;
2340
2341 /*
2342 * For all ASICs with baco/mode1 reset, the VRAM is
2343 * always assumed to be lost.
2344 */
2345 switch (amdgpu_asic_reset_method(adev)) {
2346 case AMD_RESET_METHOD_BACO:
2347 case AMD_RESET_METHOD_MODE1:
2348 return true;
2349 default:
2350 return false;
2351 }
0c49e0b8
CZ
2352}
2353
e3ecdffa 2354/**
1112a46b 2355 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2356 *
2357 * @adev: amdgpu_device pointer
b8b72130 2358 * @state: clockgating state (gate or ungate)
e3ecdffa 2359 *
e3ecdffa 2360 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2361 * set_clockgating_state callbacks are run.
2362 * Late initialization pass enabling clockgating for hardware IPs.
2363 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2364 * Returns 0 on success, negative error code on failure.
2365 */
fdd34271 2366
1112a46b
RZ
2367static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2368 enum amd_clockgating_state state)
d38ceaf9 2369{
1112a46b 2370 int i, j, r;
d38ceaf9 2371
4a2ba394
SL
2372 if (amdgpu_emu_mode == 1)
2373 return 0;
2374
1112a46b
RZ
2375 for (j = 0; j < adev->num_ip_blocks; j++) {
2376 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2377 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2378 continue;
4a446d55 2379 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2380 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2381 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2382 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2383 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2384 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2385 /* enable clockgating to save power */
a1255107 2386 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2387 state);
4a446d55
AD
2388 if (r) {
2389 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2390 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2391 return r;
2392 }
b0b00ff1 2393 }
d38ceaf9 2394 }
06b18f61 2395
c9f96fd5
RZ
2396 return 0;
2397}
2398
1112a46b 2399static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2400{
1112a46b 2401 int i, j, r;
06b18f61 2402
c9f96fd5
RZ
2403 if (amdgpu_emu_mode == 1)
2404 return 0;
2405
1112a46b
RZ
2406 for (j = 0; j < adev->num_ip_blocks; j++) {
2407 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2408 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2409 continue;
2410 /* skip CG for VCE/UVD, it's handled specially */
2411 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2412 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2413 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2414 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2415 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2416 /* enable powergating to save power */
2417 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2418 state);
c9f96fd5
RZ
2419 if (r) {
2420 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2421 adev->ip_blocks[i].version->funcs->name, r);
2422 return r;
2423 }
2424 }
2425 }
2dc80b00
S
2426 return 0;
2427}
2428
beff74bc
AD
2429static int amdgpu_device_enable_mgpu_fan_boost(void)
2430{
2431 struct amdgpu_gpu_instance *gpu_ins;
2432 struct amdgpu_device *adev;
2433 int i, ret = 0;
2434
2435 mutex_lock(&mgpu_info.mutex);
2436
2437 /*
2438 * MGPU fan boost feature should be enabled
2439 * only when there are two or more dGPUs in
2440 * the system
2441 */
2442 if (mgpu_info.num_dgpu < 2)
2443 goto out;
2444
2445 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2446 gpu_ins = &(mgpu_info.gpu_ins[i]);
2447 adev = gpu_ins->adev;
2448 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2449 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2450 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2451 if (ret)
2452 break;
2453
2454 gpu_ins->mgpu_fan_enabled = 1;
2455 }
2456 }
2457
2458out:
2459 mutex_unlock(&mgpu_info.mutex);
2460
2461 return ret;
2462}
2463
e3ecdffa
AD
2464/**
2465 * amdgpu_device_ip_late_init - run late init for hardware IPs
2466 *
2467 * @adev: amdgpu_device pointer
2468 *
2469 * Late initialization pass for hardware IPs. The list of all the hardware
2470 * IPs that make up the asic is walked and the late_init callbacks are run.
2471 * late_init covers any special initialization that an IP requires
2472 * after all of the have been initialized or something that needs to happen
2473 * late in the init process.
2474 * Returns 0 on success, negative error code on failure.
2475 */
06ec9070 2476static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2477{
60599a03 2478 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2479 int i = 0, r;
2480
2481 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2482 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2483 continue;
2484 if (adev->ip_blocks[i].version->funcs->late_init) {
2485 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2486 if (r) {
2487 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2488 adev->ip_blocks[i].version->funcs->name, r);
2489 return r;
2490 }
2dc80b00 2491 }
73f847db 2492 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2493 }
2494
a891d239
DL
2495 amdgpu_ras_set_error_query_ready(adev, true);
2496
1112a46b
RZ
2497 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2498 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2499
06ec9070 2500 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2501
beff74bc
AD
2502 r = amdgpu_device_enable_mgpu_fan_boost();
2503 if (r)
2504 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2505
60599a03
EQ
2506
2507 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2508 mutex_lock(&mgpu_info.mutex);
2509
2510 /*
2511 * Reset device p-state to low as this was booted with high.
2512 *
2513 * This should be performed only after all devices from the same
2514 * hive get initialized.
2515 *
2516 * However, it's unknown how many device in the hive in advance.
2517 * As this is counted one by one during devices initializations.
2518 *
2519 * So, we wait for all XGMI interlinked devices initialized.
2520 * This may bring some delays as those devices may come from
2521 * different hives. But that should be OK.
2522 */
2523 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2524 for (i = 0; i < mgpu_info.num_gpu; i++) {
2525 gpu_instance = &(mgpu_info.gpu_ins[i]);
2526 if (gpu_instance->adev->flags & AMD_IS_APU)
2527 continue;
2528
d84a430d
JK
2529 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2530 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2531 if (r) {
2532 DRM_ERROR("pstate setting failed (%d).\n", r);
2533 break;
2534 }
2535 }
2536 }
2537
2538 mutex_unlock(&mgpu_info.mutex);
2539 }
2540
d38ceaf9
AD
2541 return 0;
2542}
2543
e3ecdffa
AD
2544/**
2545 * amdgpu_device_ip_fini - run fini for hardware IPs
2546 *
2547 * @adev: amdgpu_device pointer
2548 *
2549 * Main teardown pass for hardware IPs. The list of all the hardware
2550 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2551 * are run. hw_fini tears down the hardware associated with each IP
2552 * and sw_fini tears down any software state associated with each IP.
2553 * Returns 0 on success, negative error code on failure.
2554 */
06ec9070 2555static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2556{
2557 int i, r;
2558
5278a159
SY
2559 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2560 amdgpu_virt_release_ras_err_handler_data(adev);
2561
c030f2e4 2562 amdgpu_ras_pre_fini(adev);
2563
a82400b5
AG
2564 if (adev->gmc.xgmi.num_physical_nodes > 1)
2565 amdgpu_xgmi_remove_device(adev);
2566
05df1f01 2567 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2568 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2569
26eb6b51
DL
2570 amdgpu_amdkfd_device_fini(adev);
2571
3e96dbfd
AD
2572 /* need to disable SMC first */
2573 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2574 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2575 continue;
fdd34271 2576 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2577 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2578 /* XXX handle errors */
2579 if (r) {
2580 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2581 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2582 }
a1255107 2583 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2584 break;
2585 }
2586 }
2587
d38ceaf9 2588 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2589 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2590 continue;
8201a67a 2591
a1255107 2592 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2593 /* XXX handle errors */
2c1a2784 2594 if (r) {
a1255107
AD
2595 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2596 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2597 }
8201a67a 2598
a1255107 2599 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2600 }
2601
9950cda2 2602
d38ceaf9 2603 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2604 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2605 continue;
c12aba3a
ML
2606
2607 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2608 amdgpu_ucode_free_bo(adev);
1e256e27 2609 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2610 amdgpu_device_wb_fini(adev);
2611 amdgpu_device_vram_scratch_fini(adev);
533aed27 2612 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2613 }
2614
a1255107 2615 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2616 /* XXX handle errors */
2c1a2784 2617 if (r) {
a1255107
AD
2618 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2619 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2620 }
a1255107
AD
2621 adev->ip_blocks[i].status.sw = false;
2622 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2623 }
2624
a6dcfd9c 2625 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2626 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2627 continue;
a1255107
AD
2628 if (adev->ip_blocks[i].version->funcs->late_fini)
2629 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2630 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2631 }
2632
c030f2e4 2633 amdgpu_ras_fini(adev);
2634
030308fc 2635 if (amdgpu_sriov_vf(adev))
24136135
ML
2636 if (amdgpu_virt_release_full_gpu(adev, false))
2637 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2638
d38ceaf9
AD
2639 return 0;
2640}
2641
e3ecdffa 2642/**
beff74bc 2643 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2644 *
1112a46b 2645 * @work: work_struct.
e3ecdffa 2646 */
beff74bc 2647static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2648{
2649 struct amdgpu_device *adev =
beff74bc 2650 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2651 int r;
2652
2653 r = amdgpu_ib_ring_tests(adev);
2654 if (r)
2655 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2656}
2657
1e317b99
RZ
2658static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2659{
2660 struct amdgpu_device *adev =
2661 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2662
2663 mutex_lock(&adev->gfx.gfx_off_mutex);
2664 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2665 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2666 adev->gfx.gfx_off_state = true;
2667 }
2668 mutex_unlock(&adev->gfx.gfx_off_mutex);
2669}
2670
e3ecdffa 2671/**
e7854a03 2672 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2673 *
2674 * @adev: amdgpu_device pointer
2675 *
2676 * Main suspend function for hardware IPs. The list of all the hardware
2677 * IPs that make up the asic is walked, clockgating is disabled and the
2678 * suspend callbacks are run. suspend puts the hardware and software state
2679 * in each IP into a state suitable for suspend.
2680 * Returns 0 on success, negative error code on failure.
2681 */
e7854a03
AD
2682static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2683{
2684 int i, r;
2685
b00978de
PL
2686 if (adev->in_poweroff_reboot_com ||
2687 !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) {
628c36d7
PL
2688 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2689 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2690 }
05df1f01 2691
e7854a03
AD
2692 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2693 if (!adev->ip_blocks[i].status.valid)
2694 continue;
2b9f7848 2695
e7854a03 2696 /* displays are handled separately */
2b9f7848
ND
2697 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2698 continue;
2699
2700 /* XXX handle errors */
2701 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2702 /* XXX handle errors */
2703 if (r) {
2704 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2705 adev->ip_blocks[i].version->funcs->name, r);
2706 return r;
e7854a03 2707 }
2b9f7848
ND
2708
2709 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2710 }
2711
e7854a03
AD
2712 return 0;
2713}
2714
2715/**
2716 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2717 *
2718 * @adev: amdgpu_device pointer
2719 *
2720 * Main suspend function for hardware IPs. The list of all the hardware
2721 * IPs that make up the asic is walked, clockgating is disabled and the
2722 * suspend callbacks are run. suspend puts the hardware and software state
2723 * in each IP into a state suitable for suspend.
2724 * Returns 0 on success, negative error code on failure.
2725 */
2726static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2727{
2728 int i, r;
2729
2730 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2731 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2732 continue;
e7854a03
AD
2733 /* displays are handled in phase1 */
2734 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2735 continue;
bff77e86
LM
2736 /* PSP lost connection when err_event_athub occurs */
2737 if (amdgpu_ras_intr_triggered() &&
2738 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2739 adev->ip_blocks[i].status.hw = false;
2740 continue;
2741 }
d38ceaf9 2742 /* XXX handle errors */
a1255107 2743 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2744 /* XXX handle errors */
2c1a2784 2745 if (r) {
a1255107
AD
2746 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2747 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2748 }
876923fb 2749 adev->ip_blocks[i].status.hw = false;
a3a09142 2750 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2751 if(!amdgpu_sriov_vf(adev)){
2752 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2753 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2754 if (r) {
2755 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2756 adev->mp1_state, r);
2757 return r;
2758 }
a3a09142
AD
2759 }
2760 }
b5507c7e 2761 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2762 }
2763
2764 return 0;
2765}
2766
e7854a03
AD
2767/**
2768 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2769 *
2770 * @adev: amdgpu_device pointer
2771 *
2772 * Main suspend function for hardware IPs. The list of all the hardware
2773 * IPs that make up the asic is walked, clockgating is disabled and the
2774 * suspend callbacks are run. suspend puts the hardware and software state
2775 * in each IP into a state suitable for suspend.
2776 * Returns 0 on success, negative error code on failure.
2777 */
2778int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2779{
2780 int r;
2781
3c73683c
JC
2782 if (amdgpu_sriov_vf(adev)) {
2783 amdgpu_virt_fini_data_exchange(adev);
e7819644 2784 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 2785 }
e7819644 2786
e7854a03
AD
2787 r = amdgpu_device_ip_suspend_phase1(adev);
2788 if (r)
2789 return r;
2790 r = amdgpu_device_ip_suspend_phase2(adev);
2791
e7819644
YT
2792 if (amdgpu_sriov_vf(adev))
2793 amdgpu_virt_release_full_gpu(adev, false);
2794
e7854a03
AD
2795 return r;
2796}
2797
06ec9070 2798static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2799{
2800 int i, r;
2801
2cb681b6
ML
2802 static enum amd_ip_block_type ip_order[] = {
2803 AMD_IP_BLOCK_TYPE_GMC,
2804 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2805 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2806 AMD_IP_BLOCK_TYPE_IH,
2807 };
a90ad3c2 2808
2cb681b6
ML
2809 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2810 int j;
2811 struct amdgpu_ip_block *block;
a90ad3c2 2812
4cd2a96d
J
2813 block = &adev->ip_blocks[i];
2814 block->status.hw = false;
2cb681b6 2815
4cd2a96d 2816 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 2817
4cd2a96d 2818 if (block->version->type != ip_order[j] ||
2cb681b6
ML
2819 !block->status.valid)
2820 continue;
2821
2822 r = block->version->funcs->hw_init(adev);
0aaeefcc 2823 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2824 if (r)
2825 return r;
482f0e53 2826 block->status.hw = true;
a90ad3c2
ML
2827 }
2828 }
2829
2830 return 0;
2831}
2832
06ec9070 2833static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2834{
2835 int i, r;
2836
2cb681b6
ML
2837 static enum amd_ip_block_type ip_order[] = {
2838 AMD_IP_BLOCK_TYPE_SMC,
2839 AMD_IP_BLOCK_TYPE_DCE,
2840 AMD_IP_BLOCK_TYPE_GFX,
2841 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2842 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2843 AMD_IP_BLOCK_TYPE_VCE,
2844 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2845 };
a90ad3c2 2846
2cb681b6
ML
2847 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2848 int j;
2849 struct amdgpu_ip_block *block;
a90ad3c2 2850
2cb681b6
ML
2851 for (j = 0; j < adev->num_ip_blocks; j++) {
2852 block = &adev->ip_blocks[j];
2853
2854 if (block->version->type != ip_order[i] ||
482f0e53
ML
2855 !block->status.valid ||
2856 block->status.hw)
2cb681b6
ML
2857 continue;
2858
895bd048
JZ
2859 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2860 r = block->version->funcs->resume(adev);
2861 else
2862 r = block->version->funcs->hw_init(adev);
2863
0aaeefcc 2864 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2865 if (r)
2866 return r;
482f0e53 2867 block->status.hw = true;
a90ad3c2
ML
2868 }
2869 }
2870
2871 return 0;
2872}
2873
e3ecdffa
AD
2874/**
2875 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2876 *
2877 * @adev: amdgpu_device pointer
2878 *
2879 * First resume function for hardware IPs. The list of all the hardware
2880 * IPs that make up the asic is walked and the resume callbacks are run for
2881 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2882 * after a suspend and updates the software state as necessary. This
2883 * function is also used for restoring the GPU after a GPU reset.
2884 * Returns 0 on success, negative error code on failure.
2885 */
06ec9070 2886static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2887{
2888 int i, r;
2889
a90ad3c2 2890 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2891 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2892 continue;
a90ad3c2 2893 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2894 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2895 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2896
fcf0649f
CZ
2897 r = adev->ip_blocks[i].version->funcs->resume(adev);
2898 if (r) {
2899 DRM_ERROR("resume of IP block <%s> failed %d\n",
2900 adev->ip_blocks[i].version->funcs->name, r);
2901 return r;
2902 }
482f0e53 2903 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2904 }
2905 }
2906
2907 return 0;
2908}
2909
e3ecdffa
AD
2910/**
2911 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2912 *
2913 * @adev: amdgpu_device pointer
2914 *
2915 * First resume function for hardware IPs. The list of all the hardware
2916 * IPs that make up the asic is walked and the resume callbacks are run for
2917 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2918 * functional state after a suspend and updates the software state as
2919 * necessary. This function is also used for restoring the GPU after a GPU
2920 * reset.
2921 * Returns 0 on success, negative error code on failure.
2922 */
06ec9070 2923static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2924{
2925 int i, r;
2926
2927 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2928 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2929 continue;
fcf0649f 2930 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2931 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2932 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2933 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2934 continue;
a1255107 2935 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2936 if (r) {
a1255107
AD
2937 DRM_ERROR("resume of IP block <%s> failed %d\n",
2938 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2939 return r;
2c1a2784 2940 }
482f0e53 2941 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2942 }
2943
2944 return 0;
2945}
2946
e3ecdffa
AD
2947/**
2948 * amdgpu_device_ip_resume - run resume for hardware IPs
2949 *
2950 * @adev: amdgpu_device pointer
2951 *
2952 * Main resume function for hardware IPs. The hardware IPs
2953 * are split into two resume functions because they are
2954 * are also used in in recovering from a GPU reset and some additional
2955 * steps need to be take between them. In this case (S3/S4) they are
2956 * run sequentially.
2957 * Returns 0 on success, negative error code on failure.
2958 */
06ec9070 2959static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2960{
2961 int r;
2962
06ec9070 2963 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2964 if (r)
2965 return r;
7a3e0bb2
RZ
2966
2967 r = amdgpu_device_fw_loading(adev);
2968 if (r)
2969 return r;
2970
06ec9070 2971 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2972
2973 return r;
2974}
2975
e3ecdffa
AD
2976/**
2977 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2978 *
2979 * @adev: amdgpu_device pointer
2980 *
2981 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2982 */
4e99a44e 2983static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2984{
6867e1b5
ML
2985 if (amdgpu_sriov_vf(adev)) {
2986 if (adev->is_atom_fw) {
2987 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2988 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2989 } else {
2990 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2991 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2992 }
2993
2994 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2995 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2996 }
048765ad
AR
2997}
2998
e3ecdffa
AD
2999/**
3000 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3001 *
3002 * @asic_type: AMD asic type
3003 *
3004 * Check if there is DC (new modesetting infrastructre) support for an asic.
3005 * returns true if DC has support, false if not.
3006 */
4562236b
HW
3007bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3008{
3009 switch (asic_type) {
3010#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
3011#if defined(CONFIG_DRM_AMD_DC_SI)
3012 case CHIP_TAHITI:
3013 case CHIP_PITCAIRN:
3014 case CHIP_VERDE:
3015 case CHIP_OLAND:
3016#endif
4562236b 3017 case CHIP_BONAIRE:
0d6fbccb 3018 case CHIP_KAVERI:
367e6687
AD
3019 case CHIP_KABINI:
3020 case CHIP_MULLINS:
d9fda248
HW
3021 /*
3022 * We have systems in the wild with these ASICs that require
3023 * LVDS and VGA support which is not supported with DC.
3024 *
3025 * Fallback to the non-DC driver here by default so as not to
3026 * cause regressions.
3027 */
3028 return amdgpu_dc > 0;
3029 case CHIP_HAWAII:
4562236b
HW
3030 case CHIP_CARRIZO:
3031 case CHIP_STONEY:
4562236b 3032 case CHIP_POLARIS10:
675fd32b 3033 case CHIP_POLARIS11:
2c8ad2d5 3034 case CHIP_POLARIS12:
675fd32b 3035 case CHIP_VEGAM:
4562236b
HW
3036 case CHIP_TONGA:
3037 case CHIP_FIJI:
42f8ffa1 3038 case CHIP_VEGA10:
dca7b401 3039 case CHIP_VEGA12:
c6034aa2 3040 case CHIP_VEGA20:
b86a1aa3 3041#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 3042 case CHIP_RAVEN:
b4f199c7 3043 case CHIP_NAVI10:
8fceceb6 3044 case CHIP_NAVI14:
078655d9 3045 case CHIP_NAVI12:
e1c14c43 3046 case CHIP_RENOIR:
81d9bfb8 3047 case CHIP_SIENNA_CICHLID:
a6c5308f 3048 case CHIP_NAVY_FLOUNDER:
7cc656e2 3049 case CHIP_DIMGREY_CAVEFISH:
84b934bc 3050 case CHIP_VANGOGH:
42f8ffa1 3051#endif
fd187853 3052 return amdgpu_dc != 0;
4562236b
HW
3053#endif
3054 default:
93b09a9a 3055 if (amdgpu_dc > 0)
044a48f4 3056 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3057 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
3058 return false;
3059 }
3060}
3061
3062/**
3063 * amdgpu_device_has_dc_support - check if dc is supported
3064 *
982a820b 3065 * @adev: amdgpu_device pointer
4562236b
HW
3066 *
3067 * Returns true for supported, false for not supported
3068 */
3069bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3070{
c997e8e2 3071 if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
2555039d
XY
3072 return false;
3073
4562236b
HW
3074 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3075}
3076
d4535e2c
AG
3077
3078static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3079{
3080 struct amdgpu_device *adev =
3081 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3082 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3083
c6a6e2db
AG
3084 /* It's a bug to not have a hive within this function */
3085 if (WARN_ON(!hive))
3086 return;
3087
3088 /*
3089 * Use task barrier to synchronize all xgmi reset works across the
3090 * hive. task_barrier_enter and task_barrier_exit will block
3091 * until all the threads running the xgmi reset works reach
3092 * those points. task_barrier_full will do both blocks.
3093 */
3094 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3095
3096 task_barrier_enter(&hive->tb);
4a580877 3097 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3098
3099 if (adev->asic_reset_res)
3100 goto fail;
3101
3102 task_barrier_exit(&hive->tb);
4a580877 3103 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3104
3105 if (adev->asic_reset_res)
3106 goto fail;
43c4d576
JC
3107
3108 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
3109 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
3110 } else {
3111
3112 task_barrier_full(&hive->tb);
3113 adev->asic_reset_res = amdgpu_asic_reset(adev);
3114 }
ce316fa5 3115
c6a6e2db 3116fail:
d4535e2c 3117 if (adev->asic_reset_res)
fed184e9 3118 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3119 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3120 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3121}
3122
71f98027
AD
3123static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3124{
3125 char *input = amdgpu_lockup_timeout;
3126 char *timeout_setting = NULL;
3127 int index = 0;
3128 long timeout;
3129 int ret = 0;
3130
3131 /*
3132 * By default timeout for non compute jobs is 10000.
3133 * And there is no timeout enforced on compute jobs.
3134 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3135 * jobs are 60000 by default.
71f98027
AD
3136 */
3137 adev->gfx_timeout = msecs_to_jiffies(10000);
3138 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3139 if (amdgpu_sriov_vf(adev))
3140 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3141 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3142 else if (amdgpu_passthrough(adev))
b7b2a316 3143 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
3144 else
3145 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
3146
f440ff44 3147 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3148 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3149 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3150 ret = kstrtol(timeout_setting, 0, &timeout);
3151 if (ret)
3152 return ret;
3153
3154 if (timeout == 0) {
3155 index++;
3156 continue;
3157 } else if (timeout < 0) {
3158 timeout = MAX_SCHEDULE_TIMEOUT;
3159 } else {
3160 timeout = msecs_to_jiffies(timeout);
3161 }
3162
3163 switch (index++) {
3164 case 0:
3165 adev->gfx_timeout = timeout;
3166 break;
3167 case 1:
3168 adev->compute_timeout = timeout;
3169 break;
3170 case 2:
3171 adev->sdma_timeout = timeout;
3172 break;
3173 case 3:
3174 adev->video_timeout = timeout;
3175 break;
3176 default:
3177 break;
3178 }
3179 }
3180 /*
3181 * There is only one value specified and
3182 * it should apply to all non-compute jobs.
3183 */
bcccee89 3184 if (index == 1) {
71f98027 3185 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3186 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3187 adev->compute_timeout = adev->gfx_timeout;
3188 }
71f98027
AD
3189 }
3190
3191 return ret;
3192}
d4535e2c 3193
77f3a5cd
ND
3194static const struct attribute *amdgpu_dev_attributes[] = {
3195 &dev_attr_product_name.attr,
3196 &dev_attr_product_number.attr,
3197 &dev_attr_serial_number.attr,
3198 &dev_attr_pcie_replay_count.attr,
3199 NULL
3200};
3201
c9a6b82f 3202
d38ceaf9
AD
3203/**
3204 * amdgpu_device_init - initialize the driver
3205 *
3206 * @adev: amdgpu_device pointer
d38ceaf9
AD
3207 * @flags: driver flags
3208 *
3209 * Initializes the driver info and hw (all asics).
3210 * Returns 0 for success or an error on failure.
3211 * Called at driver startup.
3212 */
3213int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3214 uint32_t flags)
3215{
8aba21b7
LT
3216 struct drm_device *ddev = adev_to_drm(adev);
3217 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3218 int r, i;
fd496ca8 3219 bool atpx = false;
95844d20 3220 u32 max_MBps;
d38ceaf9
AD
3221
3222 adev->shutdown = false;
d38ceaf9 3223 adev->flags = flags;
4e66d7d2
YZ
3224
3225 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3226 adev->asic_type = amdgpu_force_asic_type;
3227 else
3228 adev->asic_type = flags & AMD_ASIC_MASK;
3229
d38ceaf9 3230 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3231 if (amdgpu_emu_mode == 1)
8bdab6bb 3232 adev->usec_timeout *= 10;
770d13b1 3233 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3234 adev->accel_working = false;
3235 adev->num_rings = 0;
3236 adev->mman.buffer_funcs = NULL;
3237 adev->mman.buffer_funcs_ring = NULL;
3238 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3239 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3240 adev->gmc.gmc_funcs = NULL;
f54d1867 3241 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3242 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3243
3244 adev->smc_rreg = &amdgpu_invalid_rreg;
3245 adev->smc_wreg = &amdgpu_invalid_wreg;
3246 adev->pcie_rreg = &amdgpu_invalid_rreg;
3247 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3248 adev->pciep_rreg = &amdgpu_invalid_rreg;
3249 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3250 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3251 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3252 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3253 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3254 adev->didt_rreg = &amdgpu_invalid_rreg;
3255 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3256 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3257 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3258 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3259 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3260
3e39ab90
AD
3261 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3262 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3263 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3264
3265 /* mutex initialization are all done here so we
3266 * can recall function without having locking issues */
d38ceaf9 3267 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 3268 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3269 mutex_init(&adev->pm.mutex);
3270 mutex_init(&adev->gfx.gpu_clock_mutex);
3271 mutex_init(&adev->srbm_mutex);
b8866c26 3272 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3273 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3274 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3275 mutex_init(&adev->mn_lock);
e23b74aa 3276 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3277 hash_init(adev->mn_hash);
53b3f8f4 3278 atomic_set(&adev->in_gpu_reset, 0);
6049db43 3279 init_rwsem(&adev->reset_sem);
32eaeae0 3280 mutex_init(&adev->psp.mutex);
bd052211 3281 mutex_init(&adev->notifier_lock);
d38ceaf9 3282
912dfc84
EQ
3283 r = amdgpu_device_check_arguments(adev);
3284 if (r)
3285 return r;
d38ceaf9 3286
d38ceaf9
AD
3287 spin_lock_init(&adev->mmio_idx_lock);
3288 spin_lock_init(&adev->smc_idx_lock);
3289 spin_lock_init(&adev->pcie_idx_lock);
3290 spin_lock_init(&adev->uvd_ctx_idx_lock);
3291 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3292 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3293 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3294 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3295 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3296
0c4e7fa5
CZ
3297 INIT_LIST_HEAD(&adev->shadow_list);
3298 mutex_init(&adev->shadow_list_lock);
3299
beff74bc
AD
3300 INIT_DELAYED_WORK(&adev->delayed_init_work,
3301 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3302 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3303 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3304
d4535e2c
AG
3305 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3306
d23ee13f 3307 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3308 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3309
b265bdbd
EQ
3310 atomic_set(&adev->throttling_logging_enabled, 1);
3311 /*
3312 * If throttling continues, logging will be performed every minute
3313 * to avoid log flooding. "-1" is subtracted since the thermal
3314 * throttling interrupt comes every second. Thus, the total logging
3315 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3316 * for throttling interrupt) = 60 seconds.
3317 */
3318 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3319 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3320
0fa49558
AX
3321 /* Registers mapping */
3322 /* TODO: block userspace mapping of io register */
da69c161
KW
3323 if (adev->asic_type >= CHIP_BONAIRE) {
3324 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3325 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3326 } else {
3327 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3328 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3329 }
d38ceaf9 3330
d38ceaf9
AD
3331 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3332 if (adev->rmmio == NULL) {
3333 return -ENOMEM;
3334 }
3335 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3336 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3337
d38ceaf9
AD
3338 /* io port mapping */
3339 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3340 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3341 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3342 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3343 break;
3344 }
3345 }
3346 if (adev->rio_mem == NULL)
b64a18c5 3347 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3348
b2109d8e
JX
3349 /* enable PCIE atomic ops */
3350 r = pci_enable_atomic_ops_to_root(adev->pdev,
3351 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3352 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3353 if (r) {
3354 adev->have_atomics_support = false;
3355 DRM_INFO("PCIE atomic ops is not supported\n");
3356 } else {
3357 adev->have_atomics_support = true;
3358 }
3359
5494d864
AD
3360 amdgpu_device_get_pcie_info(adev);
3361
b239c017
JX
3362 if (amdgpu_mcbp)
3363 DRM_INFO("MCBP is enabled\n");
3364
5f84cc63
JX
3365 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3366 adev->enable_mes = true;
3367
3aa0115d
ML
3368 /* detect hw virtualization here */
3369 amdgpu_detect_virtualization(adev);
3370
dffa11b4
ML
3371 r = amdgpu_device_get_job_timeout_settings(adev);
3372 if (r) {
3373 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4192f7b5 3374 goto failed_unmap;
a190d1c7
XY
3375 }
3376
d38ceaf9 3377 /* early init functions */
06ec9070 3378 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3379 if (r)
4192f7b5 3380 goto failed_unmap;
d38ceaf9 3381
6585661d
OZ
3382 /* doorbell bar mapping and doorbell index init*/
3383 amdgpu_device_doorbell_init(adev);
3384
d38ceaf9
AD
3385 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3386 /* this will fail for cards that aren't VGA class devices, just
3387 * ignore it */
38d6be81
AD
3388 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3389 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3390
fd496ca8
AD
3391 if (amdgpu_device_supports_atpx(ddev))
3392 atpx = true;
3840c5bc
AD
3393 if (amdgpu_has_atpx() &&
3394 (amdgpu_is_atpx_hybrid() ||
3395 amdgpu_has_atpx_dgpu_power_cntl()) &&
3396 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3397 vga_switcheroo_register_client(adev->pdev,
fd496ca8
AD
3398 &amdgpu_switcheroo_ops, atpx);
3399 if (atpx)
d38ceaf9
AD
3400 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3401
9475a943
SL
3402 if (amdgpu_emu_mode == 1) {
3403 /* post the asic on emulation mode */
3404 emu_soc_asic_init(adev);
bfca0289 3405 goto fence_driver_init;
9475a943 3406 }
bfca0289 3407
4e99a44e
ML
3408 /* detect if we are with an SRIOV vbios */
3409 amdgpu_device_detect_sriov_bios(adev);
048765ad 3410
95e8e59e
AD
3411 /* check if we need to reset the asic
3412 * E.g., driver was not cleanly unloaded previously, etc.
3413 */
f14899fd 3414 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3415 r = amdgpu_asic_reset(adev);
3416 if (r) {
3417 dev_err(adev->dev, "asic reset on init failed\n");
3418 goto failed;
3419 }
3420 }
3421
8f66090b 3422 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3423
d38ceaf9 3424 /* Post card if necessary */
39c640c0 3425 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3426 if (!adev->bios) {
bec86378 3427 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3428 r = -EINVAL;
3429 goto failed;
d38ceaf9 3430 }
bec86378 3431 DRM_INFO("GPU posting now...\n");
4d2997ab 3432 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3433 if (r) {
3434 dev_err(adev->dev, "gpu post error!\n");
3435 goto failed;
3436 }
d38ceaf9
AD
3437 }
3438
88b64e95
AD
3439 if (adev->is_atom_fw) {
3440 /* Initialize clocks */
3441 r = amdgpu_atomfirmware_get_clock_info(adev);
3442 if (r) {
3443 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3444 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3445 goto failed;
3446 }
3447 } else {
a5bde2f9
AD
3448 /* Initialize clocks */
3449 r = amdgpu_atombios_get_clock_info(adev);
3450 if (r) {
3451 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3452 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3453 goto failed;
a5bde2f9
AD
3454 }
3455 /* init i2c buses */
4562236b
HW
3456 if (!amdgpu_device_has_dc_support(adev))
3457 amdgpu_atombios_i2c_init(adev);
2c1a2784 3458 }
d38ceaf9 3459
bfca0289 3460fence_driver_init:
d38ceaf9
AD
3461 /* Fence driver */
3462 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3463 if (r) {
3464 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3465 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3466 goto failed;
2c1a2784 3467 }
d38ceaf9
AD
3468
3469 /* init the mode config */
4a580877 3470 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3471
06ec9070 3472 r = amdgpu_device_ip_init(adev);
d38ceaf9 3473 if (r) {
8840a387 3474 /* failed in exclusive mode due to timeout */
3475 if (amdgpu_sriov_vf(adev) &&
3476 !amdgpu_sriov_runtime(adev) &&
3477 amdgpu_virt_mmio_blocked(adev) &&
3478 !amdgpu_virt_wait_reset(adev)) {
3479 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3480 /* Don't send request since VF is inactive. */
3481 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3482 adev->virt.ops = NULL;
8840a387 3483 r = -EAGAIN;
3484 goto failed;
3485 }
06ec9070 3486 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3487 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3488 goto failed;
d38ceaf9
AD
3489 }
3490
d69b8971
YZ
3491 dev_info(adev->dev,
3492 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3493 adev->gfx.config.max_shader_engines,
3494 adev->gfx.config.max_sh_per_se,
3495 adev->gfx.config.max_cu_per_sh,
3496 adev->gfx.cu_info.number);
3497
d38ceaf9
AD
3498 adev->accel_working = true;
3499
e59c0205
AX
3500 amdgpu_vm_check_compute_bug(adev);
3501
95844d20
MO
3502 /* Initialize the buffer migration limit. */
3503 if (amdgpu_moverate >= 0)
3504 max_MBps = amdgpu_moverate;
3505 else
3506 max_MBps = 8; /* Allow 8 MB/s. */
3507 /* Get a log2 for easy divisions. */
3508 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3509
9bc92b9c
ML
3510 amdgpu_fbdev_init(adev);
3511
d2f52ac8 3512 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3513 if (r) {
3514 adev->pm_sysfs_en = false;
d2f52ac8 3515 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3516 } else
3517 adev->pm_sysfs_en = true;
d2f52ac8 3518
5bb23532 3519 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3520 if (r) {
3521 adev->ucode_sysfs_en = false;
5bb23532 3522 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3523 } else
3524 adev->ucode_sysfs_en = true;
5bb23532 3525
d38ceaf9
AD
3526 if ((amdgpu_testing & 1)) {
3527 if (adev->accel_working)
3528 amdgpu_test_moves(adev);
3529 else
3530 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3531 }
d38ceaf9
AD
3532 if (amdgpu_benchmarking) {
3533 if (adev->accel_working)
3534 amdgpu_benchmark(adev, amdgpu_benchmarking);
3535 else
3536 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3537 }
3538
b0adca4d
EQ
3539 /*
3540 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3541 * Otherwise the mgpu fan boost feature will be skipped due to the
3542 * gpu instance is counted less.
3543 */
3544 amdgpu_register_gpu_instance(adev);
3545
d38ceaf9
AD
3546 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3547 * explicit gating rather than handling it automatically.
3548 */
06ec9070 3549 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3550 if (r) {
06ec9070 3551 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3552 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3553 goto failed;
2c1a2784 3554 }
d38ceaf9 3555
108c6a63 3556 /* must succeed. */
511fdbc3 3557 amdgpu_ras_resume(adev);
108c6a63 3558
beff74bc
AD
3559 queue_delayed_work(system_wq, &adev->delayed_init_work,
3560 msecs_to_jiffies(AMDGPU_RESUME_MS));
3561
2c738637
ML
3562 if (amdgpu_sriov_vf(adev))
3563 flush_delayed_work(&adev->delayed_init_work);
3564
77f3a5cd 3565 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3566 if (r)
77f3a5cd 3567 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3568
d155bef0
AB
3569 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3570 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3571 if (r)
3572 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3573
c1dd4aa6
AG
3574 /* Have stored pci confspace at hand for restore in sudden PCI error */
3575 if (amdgpu_device_cache_pci_state(adev->pdev))
3576 pci_restore_state(pdev);
3577
d38ceaf9 3578 return 0;
83ba126a
AD
3579
3580failed:
89041940 3581 amdgpu_vf_error_trans_all(adev);
fd496ca8 3582 if (atpx)
83ba126a 3583 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3584
4192f7b5
AD
3585failed_unmap:
3586 iounmap(adev->rmmio);
3587 adev->rmmio = NULL;
3588
83ba126a 3589 return r;
d38ceaf9
AD
3590}
3591
d38ceaf9
AD
3592/**
3593 * amdgpu_device_fini - tear down the driver
3594 *
3595 * @adev: amdgpu_device pointer
3596 *
3597 * Tear down the driver info (all asics).
3598 * Called at driver shutdown.
3599 */
3600void amdgpu_device_fini(struct amdgpu_device *adev)
3601{
aac89168 3602 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3603 flush_delayed_work(&adev->delayed_init_work);
bb0cd09b 3604 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
d0d13fe8 3605 adev->shutdown = true;
9f875167 3606
c1dd4aa6
AG
3607 kfree(adev->pci_state);
3608
752c683d
ML
3609 /* make sure IB test finished before entering exclusive mode
3610 * to avoid preemption on IB test
3611 * */
519b8b76 3612 if (amdgpu_sriov_vf(adev)) {
752c683d 3613 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3614 amdgpu_virt_fini_data_exchange(adev);
3615 }
752c683d 3616
e5b03032
ML
3617 /* disable all interrupts */
3618 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3619 if (adev->mode_info.mode_config_initialized){
3620 if (!amdgpu_device_has_dc_support(adev))
4a580877 3621 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3622 else
4a580877 3623 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3624 }
d38ceaf9 3625 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3626 if (adev->pm_sysfs_en)
3627 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3628 amdgpu_fbdev_fini(adev);
e230ac11 3629 amdgpu_device_ip_fini(adev);
75e1658e
ND
3630 release_firmware(adev->firmware.gpu_info_fw);
3631 adev->firmware.gpu_info_fw = NULL;
d38ceaf9
AD
3632 adev->accel_working = false;
3633 /* free i2c buses */
4562236b
HW
3634 if (!amdgpu_device_has_dc_support(adev))
3635 amdgpu_i2c_fini(adev);
bfca0289
SL
3636
3637 if (amdgpu_emu_mode != 1)
3638 amdgpu_atombios_fini(adev);
3639
d38ceaf9
AD
3640 kfree(adev->bios);
3641 adev->bios = NULL;
3840c5bc
AD
3642 if (amdgpu_has_atpx() &&
3643 (amdgpu_is_atpx_hybrid() ||
3644 amdgpu_has_atpx_dgpu_power_cntl()) &&
3645 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3646 vga_switcheroo_unregister_client(adev->pdev);
fd496ca8 3647 if (amdgpu_device_supports_atpx(adev_to_drm(adev)))
83ba126a 3648 vga_switcheroo_fini_domain_pm_ops(adev->dev);
38d6be81
AD
3649 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3650 vga_client_register(adev->pdev, NULL, NULL, NULL);
d38ceaf9
AD
3651 if (adev->rio_mem)
3652 pci_iounmap(adev->pdev, adev->rio_mem);
3653 adev->rio_mem = NULL;
3654 iounmap(adev->rmmio);
3655 adev->rmmio = NULL;
06ec9070 3656 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3657
7c868b59
YT
3658 if (adev->ucode_sysfs_en)
3659 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3660
3661 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3662 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3663 amdgpu_pmu_fini(adev);
72de33f8 3664 if (adev->mman.discovery_bin)
a190d1c7 3665 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3666}
3667
3668
3669/*
3670 * Suspend & resume.
3671 */
3672/**
810ddc3a 3673 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3674 *
87e3f136 3675 * @dev: drm dev pointer
87e3f136 3676 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3677 *
3678 * Puts the hw in the suspend state (all asics).
3679 * Returns 0 for success or an error on failure.
3680 * Called at driver suspend.
3681 */
de185019 3682int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3683{
3684 struct amdgpu_device *adev;
3685 struct drm_crtc *crtc;
3686 struct drm_connector *connector;
f8d2d39e 3687 struct drm_connector_list_iter iter;
5ceb54c6 3688 int r;
d38ceaf9 3689
1348969a 3690 adev = drm_to_adev(dev);
d38ceaf9
AD
3691
3692 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3693 return 0;
3694
44779b43 3695 adev->in_suspend = true;
d38ceaf9
AD
3696 drm_kms_helper_poll_disable(dev);
3697
5f818173
S
3698 if (fbcon)
3699 amdgpu_fbdev_set_suspend(adev, 1);
3700
beff74bc 3701 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3702
4562236b
HW
3703 if (!amdgpu_device_has_dc_support(adev)) {
3704 /* turn off display hw */
3705 drm_modeset_lock_all(dev);
f8d2d39e
LP
3706 drm_connector_list_iter_begin(dev, &iter);
3707 drm_for_each_connector_iter(connector, &iter)
3708 drm_helper_connector_dpms(connector,
3709 DRM_MODE_DPMS_OFF);
3710 drm_connector_list_iter_end(&iter);
4562236b 3711 drm_modeset_unlock_all(dev);
fe1053b7
AD
3712 /* unpin the front buffers and cursors */
3713 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3714 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3715 struct drm_framebuffer *fb = crtc->primary->fb;
3716 struct amdgpu_bo *robj;
3717
91334223 3718 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3719 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3720 r = amdgpu_bo_reserve(aobj, true);
3721 if (r == 0) {
3722 amdgpu_bo_unpin(aobj);
3723 amdgpu_bo_unreserve(aobj);
3724 }
756e6880 3725 }
756e6880 3726
fe1053b7
AD
3727 if (fb == NULL || fb->obj[0] == NULL) {
3728 continue;
3729 }
3730 robj = gem_to_amdgpu_bo(fb->obj[0]);
3731 /* don't unpin kernel fb objects */
3732 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3733 r = amdgpu_bo_reserve(robj, true);
3734 if (r == 0) {
3735 amdgpu_bo_unpin(robj);
3736 amdgpu_bo_unreserve(robj);
3737 }
d38ceaf9
AD
3738 }
3739 }
3740 }
fe1053b7 3741
5e6932fe 3742 amdgpu_ras_suspend(adev);
3743
fe1053b7
AD
3744 r = amdgpu_device_ip_suspend_phase1(adev);
3745
ad887af9 3746 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 3747
d38ceaf9
AD
3748 /* evict vram memory */
3749 amdgpu_bo_evict_vram(adev);
3750
5ceb54c6 3751 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3752
b00978de
PL
3753 if (adev->in_poweroff_reboot_com ||
3754 !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev))
628c36d7
PL
3755 r = amdgpu_device_ip_suspend_phase2(adev);
3756 else
3757 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
a0a71e49
AD
3758 /* evict remaining vram memory
3759 * This second call to evict vram is to evict the gart page table
3760 * using the CPU.
3761 */
d38ceaf9
AD
3762 amdgpu_bo_evict_vram(adev);
3763
d38ceaf9
AD
3764 return 0;
3765}
3766
3767/**
810ddc3a 3768 * amdgpu_device_resume - initiate device resume
d38ceaf9 3769 *
87e3f136 3770 * @dev: drm dev pointer
87e3f136 3771 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3772 *
3773 * Bring the hw back to operating state (all asics).
3774 * Returns 0 for success or an error on failure.
3775 * Called at driver resume.
3776 */
de185019 3777int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3778{
3779 struct drm_connector *connector;
f8d2d39e 3780 struct drm_connector_list_iter iter;
1348969a 3781 struct amdgpu_device *adev = drm_to_adev(dev);
756e6880 3782 struct drm_crtc *crtc;
03161a6e 3783 int r = 0;
d38ceaf9
AD
3784
3785 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3786 return 0;
3787
9ca5b8a1 3788 if (amdgpu_acpi_is_s0ix_supported(adev))
628c36d7
PL
3789 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
3790
d38ceaf9 3791 /* post card */
39c640c0 3792 if (amdgpu_device_need_post(adev)) {
4d2997ab 3793 r = amdgpu_device_asic_init(adev);
74b0b157 3794 if (r)
aac89168 3795 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 3796 }
d38ceaf9 3797
06ec9070 3798 r = amdgpu_device_ip_resume(adev);
e6707218 3799 if (r) {
aac89168 3800 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3801 return r;
e6707218 3802 }
5ceb54c6
AD
3803 amdgpu_fence_driver_resume(adev);
3804
d38ceaf9 3805
06ec9070 3806 r = amdgpu_device_ip_late_init(adev);
03161a6e 3807 if (r)
4d3b9ae5 3808 return r;
d38ceaf9 3809
beff74bc
AD
3810 queue_delayed_work(system_wq, &adev->delayed_init_work,
3811 msecs_to_jiffies(AMDGPU_RESUME_MS));
3812
fe1053b7
AD
3813 if (!amdgpu_device_has_dc_support(adev)) {
3814 /* pin cursors */
3815 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3816 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3817
91334223 3818 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3819 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3820 r = amdgpu_bo_reserve(aobj, true);
3821 if (r == 0) {
3822 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3823 if (r != 0)
aac89168 3824 dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
fe1053b7
AD
3825 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3826 amdgpu_bo_unreserve(aobj);
3827 }
756e6880
AD
3828 }
3829 }
3830 }
ad887af9 3831 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
ba997709
YZ
3832 if (r)
3833 return r;
756e6880 3834
96a5d8d4 3835 /* Make sure IB tests flushed */
beff74bc 3836 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3837
d38ceaf9
AD
3838 /* blat the mode back in */
3839 if (fbcon) {
4562236b
HW
3840 if (!amdgpu_device_has_dc_support(adev)) {
3841 /* pre DCE11 */
3842 drm_helper_resume_force_mode(dev);
3843
3844 /* turn on display hw */
3845 drm_modeset_lock_all(dev);
f8d2d39e
LP
3846
3847 drm_connector_list_iter_begin(dev, &iter);
3848 drm_for_each_connector_iter(connector, &iter)
3849 drm_helper_connector_dpms(connector,
3850 DRM_MODE_DPMS_ON);
3851 drm_connector_list_iter_end(&iter);
3852
4562236b 3853 drm_modeset_unlock_all(dev);
d38ceaf9 3854 }
4d3b9ae5 3855 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3856 }
3857
3858 drm_kms_helper_poll_enable(dev);
23a1a9e5 3859
5e6932fe 3860 amdgpu_ras_resume(adev);
3861
23a1a9e5
L
3862 /*
3863 * Most of the connector probing functions try to acquire runtime pm
3864 * refs to ensure that the GPU is powered on when connector polling is
3865 * performed. Since we're calling this from a runtime PM callback,
3866 * trying to acquire rpm refs will cause us to deadlock.
3867 *
3868 * Since we're guaranteed to be holding the rpm lock, it's safe to
3869 * temporarily disable the rpm helpers so this doesn't deadlock us.
3870 */
3871#ifdef CONFIG_PM
3872 dev->dev->power.disable_depth++;
3873#endif
4562236b
HW
3874 if (!amdgpu_device_has_dc_support(adev))
3875 drm_helper_hpd_irq_event(dev);
3876 else
3877 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3878#ifdef CONFIG_PM
3879 dev->dev->power.disable_depth--;
3880#endif
44779b43
RZ
3881 adev->in_suspend = false;
3882
4d3b9ae5 3883 return 0;
d38ceaf9
AD
3884}
3885
e3ecdffa
AD
3886/**
3887 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3888 *
3889 * @adev: amdgpu_device pointer
3890 *
3891 * The list of all the hardware IPs that make up the asic is walked and
3892 * the check_soft_reset callbacks are run. check_soft_reset determines
3893 * if the asic is still hung or not.
3894 * Returns true if any of the IPs are still in a hung state, false if not.
3895 */
06ec9070 3896static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3897{
3898 int i;
3899 bool asic_hang = false;
3900
f993d628
ML
3901 if (amdgpu_sriov_vf(adev))
3902 return true;
3903
8bc04c29
AD
3904 if (amdgpu_asic_need_full_reset(adev))
3905 return true;
3906
63fbf42f 3907 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3908 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3909 continue;
a1255107
AD
3910 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3911 adev->ip_blocks[i].status.hang =
3912 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3913 if (adev->ip_blocks[i].status.hang) {
aac89168 3914 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3915 asic_hang = true;
3916 }
3917 }
3918 return asic_hang;
3919}
3920
e3ecdffa
AD
3921/**
3922 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3923 *
3924 * @adev: amdgpu_device pointer
3925 *
3926 * The list of all the hardware IPs that make up the asic is walked and the
3927 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3928 * handles any IP specific hardware or software state changes that are
3929 * necessary for a soft reset to succeed.
3930 * Returns 0 on success, negative error code on failure.
3931 */
06ec9070 3932static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3933{
3934 int i, r = 0;
3935
3936 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3937 if (!adev->ip_blocks[i].status.valid)
d31a501e 3938 continue;
a1255107
AD
3939 if (adev->ip_blocks[i].status.hang &&
3940 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3941 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3942 if (r)
3943 return r;
3944 }
3945 }
3946
3947 return 0;
3948}
3949
e3ecdffa
AD
3950/**
3951 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3952 *
3953 * @adev: amdgpu_device pointer
3954 *
3955 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3956 * reset is necessary to recover.
3957 * Returns true if a full asic reset is required, false if not.
3958 */
06ec9070 3959static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3960{
da146d3b
AD
3961 int i;
3962
8bc04c29
AD
3963 if (amdgpu_asic_need_full_reset(adev))
3964 return true;
3965
da146d3b 3966 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3967 if (!adev->ip_blocks[i].status.valid)
da146d3b 3968 continue;
a1255107
AD
3969 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3970 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3971 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3972 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3973 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3974 if (adev->ip_blocks[i].status.hang) {
aac89168 3975 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
3976 return true;
3977 }
3978 }
35d782fe
CZ
3979 }
3980 return false;
3981}
3982
e3ecdffa
AD
3983/**
3984 * amdgpu_device_ip_soft_reset - do a soft reset
3985 *
3986 * @adev: amdgpu_device pointer
3987 *
3988 * The list of all the hardware IPs that make up the asic is walked and the
3989 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3990 * IP specific hardware or software state changes that are necessary to soft
3991 * reset the IP.
3992 * Returns 0 on success, negative error code on failure.
3993 */
06ec9070 3994static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3995{
3996 int i, r = 0;
3997
3998 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3999 if (!adev->ip_blocks[i].status.valid)
35d782fe 4000 continue;
a1255107
AD
4001 if (adev->ip_blocks[i].status.hang &&
4002 adev->ip_blocks[i].version->funcs->soft_reset) {
4003 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
4004 if (r)
4005 return r;
4006 }
4007 }
4008
4009 return 0;
4010}
4011
e3ecdffa
AD
4012/**
4013 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4014 *
4015 * @adev: amdgpu_device pointer
4016 *
4017 * The list of all the hardware IPs that make up the asic is walked and the
4018 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4019 * handles any IP specific hardware or software state changes that are
4020 * necessary after the IP has been soft reset.
4021 * Returns 0 on success, negative error code on failure.
4022 */
06ec9070 4023static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4024{
4025 int i, r = 0;
4026
4027 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4028 if (!adev->ip_blocks[i].status.valid)
35d782fe 4029 continue;
a1255107
AD
4030 if (adev->ip_blocks[i].status.hang &&
4031 adev->ip_blocks[i].version->funcs->post_soft_reset)
4032 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4033 if (r)
4034 return r;
4035 }
4036
4037 return 0;
4038}
4039
e3ecdffa 4040/**
c33adbc7 4041 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4042 *
4043 * @adev: amdgpu_device pointer
4044 *
4045 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4046 * restore things like GPUVM page tables after a GPU reset where
4047 * the contents of VRAM might be lost.
403009bf
CK
4048 *
4049 * Returns:
4050 * 0 on success, negative error code on failure.
e3ecdffa 4051 */
c33adbc7 4052static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4053{
c41d1cf6 4054 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
4055 struct amdgpu_bo *shadow;
4056 long r = 1, tmo;
c41d1cf6
ML
4057
4058 if (amdgpu_sriov_runtime(adev))
b045d3af 4059 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4060 else
4061 tmo = msecs_to_jiffies(100);
4062
aac89168 4063 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4064 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
4065 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
4066
4067 /* No need to recover an evicted BO */
4068 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 4069 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
4070 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
4071 continue;
4072
4073 r = amdgpu_bo_restore_shadow(shadow, &next);
4074 if (r)
4075 break;
4076
c41d1cf6 4077 if (fence) {
1712fb1a 4078 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4079 dma_fence_put(fence);
4080 fence = next;
1712fb1a 4081 if (tmo == 0) {
4082 r = -ETIMEDOUT;
c41d1cf6 4083 break;
1712fb1a 4084 } else if (tmo < 0) {
4085 r = tmo;
4086 break;
4087 }
403009bf
CK
4088 } else {
4089 fence = next;
c41d1cf6 4090 }
c41d1cf6
ML
4091 }
4092 mutex_unlock(&adev->shadow_list_lock);
4093
403009bf
CK
4094 if (fence)
4095 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4096 dma_fence_put(fence);
4097
1712fb1a 4098 if (r < 0 || tmo <= 0) {
aac89168 4099 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4100 return -EIO;
4101 }
c41d1cf6 4102
aac89168 4103 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4104 return 0;
c41d1cf6
ML
4105}
4106
a90ad3c2 4107
e3ecdffa 4108/**
06ec9070 4109 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4110 *
982a820b 4111 * @adev: amdgpu_device pointer
87e3f136 4112 * @from_hypervisor: request from hypervisor
5740682e
ML
4113 *
4114 * do VF FLR and reinitialize Asic
3f48c681 4115 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4116 */
4117static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4118 bool from_hypervisor)
5740682e
ML
4119{
4120 int r;
4121
4122 if (from_hypervisor)
4123 r = amdgpu_virt_request_full_gpu(adev, true);
4124 else
4125 r = amdgpu_virt_reset_gpu(adev);
4126 if (r)
4127 return r;
a90ad3c2 4128
b639c22c
JZ
4129 amdgpu_amdkfd_pre_reset(adev);
4130
a90ad3c2 4131 /* Resume IP prior to SMC */
06ec9070 4132 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4133 if (r)
4134 goto error;
a90ad3c2 4135
c9ffa427 4136 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4137 /* we need recover gart prior to run SMC/CP/SDMA resume */
6c28aed6 4138 amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
a90ad3c2 4139
7a3e0bb2
RZ
4140 r = amdgpu_device_fw_loading(adev);
4141 if (r)
4142 return r;
4143
a90ad3c2 4144 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4145 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4146 if (r)
4147 goto error;
a90ad3c2
ML
4148
4149 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 4150 r = amdgpu_ib_ring_tests(adev);
f81e8d53 4151 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 4152
abc34253
ED
4153error:
4154 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 4155 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4156 amdgpu_inc_vram_lost(adev);
c33adbc7 4157 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
4158 }
4159
4160 return r;
4161}
4162
9a1cddd6 4163/**
4164 * amdgpu_device_has_job_running - check if there is any job in mirror list
4165 *
982a820b 4166 * @adev: amdgpu_device pointer
9a1cddd6 4167 *
4168 * check if there is any job in mirror list
4169 */
4170bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4171{
4172 int i;
4173 struct drm_sched_job *job;
4174
4175 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4176 struct amdgpu_ring *ring = adev->rings[i];
4177
4178 if (!ring || !ring->sched.thread)
4179 continue;
4180
4181 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4182 job = list_first_entry_or_null(&ring->sched.pending_list,
4183 struct drm_sched_job, list);
9a1cddd6 4184 spin_unlock(&ring->sched.job_list_lock);
4185 if (job)
4186 return true;
4187 }
4188 return false;
4189}
4190
12938fad
CK
4191/**
4192 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4193 *
982a820b 4194 * @adev: amdgpu_device pointer
12938fad
CK
4195 *
4196 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4197 * a hung GPU.
4198 */
4199bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4200{
4201 if (!amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4202 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
12938fad
CK
4203 return false;
4204 }
4205
3ba7b418
AG
4206 if (amdgpu_gpu_recovery == 0)
4207 goto disabled;
4208
4209 if (amdgpu_sriov_vf(adev))
4210 return true;
4211
4212 if (amdgpu_gpu_recovery == -1) {
4213 switch (adev->asic_type) {
fc42d47c
AG
4214 case CHIP_BONAIRE:
4215 case CHIP_HAWAII:
3ba7b418
AG
4216 case CHIP_TOPAZ:
4217 case CHIP_TONGA:
4218 case CHIP_FIJI:
4219 case CHIP_POLARIS10:
4220 case CHIP_POLARIS11:
4221 case CHIP_POLARIS12:
4222 case CHIP_VEGAM:
4223 case CHIP_VEGA20:
4224 case CHIP_VEGA10:
4225 case CHIP_VEGA12:
c43b849f 4226 case CHIP_RAVEN:
e9d4cf91 4227 case CHIP_ARCTURUS:
2cb44fb0 4228 case CHIP_RENOIR:
658c6639
AD
4229 case CHIP_NAVI10:
4230 case CHIP_NAVI14:
4231 case CHIP_NAVI12:
131a3c74 4232 case CHIP_SIENNA_CICHLID:
665fe4dc 4233 case CHIP_NAVY_FLOUNDER:
27859ee3 4234 case CHIP_DIMGREY_CAVEFISH:
3ba7b418
AG
4235 break;
4236 default:
4237 goto disabled;
4238 }
12938fad
CK
4239 }
4240
4241 return true;
3ba7b418
AG
4242
4243disabled:
aac89168 4244 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4245 return false;
12938fad
CK
4246}
4247
5c03e584
FX
4248int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4249{
4250 u32 i;
4251 int ret = 0;
4252
4253 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4254
4255 dev_info(adev->dev, "GPU mode1 reset\n");
4256
4257 /* disable BM */
4258 pci_clear_master(adev->pdev);
4259
4260 amdgpu_device_cache_pci_state(adev->pdev);
4261
4262 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4263 dev_info(adev->dev, "GPU smu mode1 reset\n");
4264 ret = amdgpu_dpm_mode1_reset(adev);
4265 } else {
4266 dev_info(adev->dev, "GPU psp mode1 reset\n");
4267 ret = psp_gpu_reset(adev);
4268 }
4269
4270 if (ret)
4271 dev_err(adev->dev, "GPU mode1 reset failed\n");
4272
4273 amdgpu_device_load_pci_state(adev->pdev);
4274
4275 /* wait for asic to come out of reset */
4276 for (i = 0; i < adev->usec_timeout; i++) {
4277 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4278
4279 if (memsize != 0xffffffff)
4280 break;
4281 udelay(1);
4282 }
4283
4284 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4285 return ret;
4286}
5c6dd71e 4287
26bc5340
AG
4288static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4289 struct amdgpu_job *job,
4290 bool *need_full_reset_arg)
4291{
4292 int i, r = 0;
4293 bool need_full_reset = *need_full_reset_arg;
71182665 4294
728e7e0c
JZ
4295 amdgpu_debugfs_wait_dump(adev);
4296
b602ca5f
TZ
4297 if (amdgpu_sriov_vf(adev)) {
4298 /* stop the data exchange thread */
4299 amdgpu_virt_fini_data_exchange(adev);
4300 }
4301
71182665 4302 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4303 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4304 struct amdgpu_ring *ring = adev->rings[i];
4305
51687759 4306 if (!ring || !ring->sched.thread)
0875dc9e 4307 continue;
5740682e 4308
2f9d4084
ML
4309 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4310 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4311 }
d38ceaf9 4312
222b5f04
AG
4313 if(job)
4314 drm_sched_increase_karma(&job->base);
4315
1d721ed6 4316 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4317 if (!amdgpu_sriov_vf(adev)) {
4318
4319 if (!need_full_reset)
4320 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4321
4322 if (!need_full_reset) {
4323 amdgpu_device_ip_pre_soft_reset(adev);
4324 r = amdgpu_device_ip_soft_reset(adev);
4325 amdgpu_device_ip_post_soft_reset(adev);
4326 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4327 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4328 need_full_reset = true;
4329 }
4330 }
4331
4332 if (need_full_reset)
4333 r = amdgpu_device_ip_suspend(adev);
4334
4335 *need_full_reset_arg = need_full_reset;
4336 }
4337
4338 return r;
4339}
4340
041a62bc 4341static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340 4342 struct list_head *device_list_handle,
7ac71382
AG
4343 bool *need_full_reset_arg,
4344 bool skip_hw_reset)
26bc5340
AG
4345{
4346 struct amdgpu_device *tmp_adev = NULL;
4347 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4348 int r = 0;
4349
4350 /*
4351 * ASIC reset has to be done on all HGMI hive nodes ASAP
4352 * to allow proper links negotiation in FW (within 1 sec)
4353 */
7ac71382 4354 if (!skip_hw_reset && need_full_reset) {
26bc5340 4355 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 4356 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4357 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 4358 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4359 r = -EALREADY;
4360 } else
4361 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4362
041a62bc 4363 if (r) {
aac89168 4364 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4365 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4366 break;
ce316fa5
LM
4367 }
4368 }
4369
041a62bc
AG
4370 /* For XGMI wait for all resets to complete before proceed */
4371 if (!r) {
ce316fa5
LM
4372 list_for_each_entry(tmp_adev, device_list_handle,
4373 gmc.xgmi.head) {
4374 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4375 flush_work(&tmp_adev->xgmi_reset_work);
4376 r = tmp_adev->asic_reset_res;
4377 if (r)
4378 break;
ce316fa5
LM
4379 }
4380 }
4381 }
ce316fa5 4382 }
26bc5340 4383
43c4d576
JC
4384 if (!r && amdgpu_ras_intr_triggered()) {
4385 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4386 if (tmp_adev->mmhub.funcs &&
4387 tmp_adev->mmhub.funcs->reset_ras_error_count)
4388 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4389 }
4390
00eaa571 4391 amdgpu_ras_intr_cleared();
43c4d576 4392 }
00eaa571 4393
26bc5340
AG
4394 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4395 if (need_full_reset) {
4396 /* post card */
4d2997ab 4397 if (amdgpu_device_asic_init(tmp_adev))
aac89168 4398 dev_warn(tmp_adev->dev, "asic atom init failed!");
26bc5340
AG
4399
4400 if (!r) {
4401 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4402 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4403 if (r)
4404 goto out;
4405
4406 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4407 if (vram_lost) {
77e7f829 4408 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4409 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4410 }
4411
6c28aed6 4412 r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
26bc5340
AG
4413 if (r)
4414 goto out;
4415
4416 r = amdgpu_device_fw_loading(tmp_adev);
4417 if (r)
4418 return r;
4419
4420 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4421 if (r)
4422 goto out;
4423
4424 if (vram_lost)
4425 amdgpu_device_fill_reset_magic(tmp_adev);
4426
fdafb359
EQ
4427 /*
4428 * Add this ASIC as tracked as reset was already
4429 * complete successfully.
4430 */
4431 amdgpu_register_gpu_instance(tmp_adev);
4432
7c04ca50 4433 r = amdgpu_device_ip_late_init(tmp_adev);
4434 if (r)
4435 goto out;
4436
565d1941
EQ
4437 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4438
e8fbaf03
GC
4439 /*
4440 * The GPU enters bad state once faulty pages
4441 * by ECC has reached the threshold, and ras
4442 * recovery is scheduled next. So add one check
4443 * here to break recovery if it indeed exceeds
4444 * bad page threshold, and remind user to
4445 * retire this GPU or setting one bigger
4446 * bad_page_threshold value to fix this once
4447 * probing driver again.
4448 */
11003c68 4449 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
4450 /* must succeed. */
4451 amdgpu_ras_resume(tmp_adev);
4452 } else {
4453 r = -EINVAL;
4454 goto out;
4455 }
e79a04d5 4456
26bc5340
AG
4457 /* Update PSP FW topology after reset */
4458 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4459 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4460 }
4461 }
4462
26bc5340
AG
4463out:
4464 if (!r) {
4465 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4466 r = amdgpu_ib_ring_tests(tmp_adev);
4467 if (r) {
4468 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4469 r = amdgpu_device_ip_suspend(tmp_adev);
4470 need_full_reset = true;
4471 r = -EAGAIN;
4472 goto end;
4473 }
4474 }
4475
4476 if (!r)
4477 r = amdgpu_device_recover_vram(tmp_adev);
4478 else
4479 tmp_adev->asic_reset_res = r;
4480 }
4481
4482end:
4483 *need_full_reset_arg = need_full_reset;
4484 return r;
4485}
4486
08ebb485
DL
4487static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
4488 struct amdgpu_hive_info *hive)
26bc5340 4489{
53b3f8f4
DL
4490 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4491 return false;
4492
08ebb485
DL
4493 if (hive) {
4494 down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
4495 } else {
4496 down_write(&adev->reset_sem);
4497 }
5740682e 4498
a3a09142
AD
4499 switch (amdgpu_asic_reset_method(adev)) {
4500 case AMD_RESET_METHOD_MODE1:
4501 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4502 break;
4503 case AMD_RESET_METHOD_MODE2:
4504 adev->mp1_state = PP_MP1_STATE_RESET;
4505 break;
4506 default:
4507 adev->mp1_state = PP_MP1_STATE_NONE;
4508 break;
4509 }
1d721ed6
AG
4510
4511 return true;
26bc5340 4512}
d38ceaf9 4513
26bc5340
AG
4514static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4515{
89041940 4516 amdgpu_vf_error_trans_all(adev);
a3a09142 4517 adev->mp1_state = PP_MP1_STATE_NONE;
53b3f8f4 4518 atomic_set(&adev->in_gpu_reset, 0);
6049db43 4519 up_write(&adev->reset_sem);
26bc5340
AG
4520}
4521
91fb309d
HC
4522/*
4523 * to lockup a list of amdgpu devices in a hive safely, if not a hive
4524 * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
4525 *
4526 * unlock won't require roll back.
4527 */
4528static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
4529{
4530 struct amdgpu_device *tmp_adev = NULL;
4531
4532 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4533 if (!hive) {
4534 dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
4535 return -ENODEV;
4536 }
4537 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4538 if (!amdgpu_device_lock_adev(tmp_adev, hive))
4539 goto roll_back;
4540 }
4541 } else if (!amdgpu_device_lock_adev(adev, hive))
4542 return -EAGAIN;
4543
4544 return 0;
4545roll_back:
4546 if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
4547 /*
4548 * if the lockup iteration break in the middle of a hive,
4549 * it may means there may has a race issue,
4550 * or a hive device locked up independently.
4551 * we may be in trouble and may not, so will try to roll back
4552 * the lock and give out a warnning.
4553 */
4554 dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
4555 list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4556 amdgpu_device_unlock_adev(tmp_adev);
4557 }
4558 }
4559 return -EAGAIN;
4560}
4561
3f12acc8
EQ
4562static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4563{
4564 struct pci_dev *p = NULL;
4565
4566 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4567 adev->pdev->bus->number, 1);
4568 if (p) {
4569 pm_runtime_enable(&(p->dev));
4570 pm_runtime_resume(&(p->dev));
4571 }
4572}
4573
4574static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4575{
4576 enum amd_reset_method reset_method;
4577 struct pci_dev *p = NULL;
4578 u64 expires;
4579
4580 /*
4581 * For now, only BACO and mode1 reset are confirmed
4582 * to suffer the audio issue without proper suspended.
4583 */
4584 reset_method = amdgpu_asic_reset_method(adev);
4585 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4586 (reset_method != AMD_RESET_METHOD_MODE1))
4587 return -EINVAL;
4588
4589 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4590 adev->pdev->bus->number, 1);
4591 if (!p)
4592 return -ENODEV;
4593
4594 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4595 if (!expires)
4596 /*
4597 * If we cannot get the audio device autosuspend delay,
4598 * a fixed 4S interval will be used. Considering 3S is
4599 * the audio controller default autosuspend delay setting.
4600 * 4S used here is guaranteed to cover that.
4601 */
54b7feb9 4602 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4603
4604 while (!pm_runtime_status_suspended(&(p->dev))) {
4605 if (!pm_runtime_suspend(&(p->dev)))
4606 break;
4607
4608 if (expires < ktime_get_mono_fast_ns()) {
4609 dev_warn(adev->dev, "failed to suspend display audio\n");
4610 /* TODO: abort the succeeding gpu reset? */
4611 return -ETIMEDOUT;
4612 }
4613 }
4614
4615 pm_runtime_disable(&(p->dev));
4616
4617 return 0;
4618}
4619
26bc5340
AG
4620/**
4621 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4622 *
982a820b 4623 * @adev: amdgpu_device pointer
26bc5340
AG
4624 * @job: which job trigger hang
4625 *
4626 * Attempt to reset the GPU if it has hung (all asics).
4627 * Attempt to do soft-reset or full-reset and reinitialize Asic
4628 * Returns 0 for success or an error on failure.
4629 */
4630
4631int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4632 struct amdgpu_job *job)
4633{
1d721ed6 4634 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4635 bool need_full_reset = false;
4636 bool job_signaled = false;
26bc5340 4637 struct amdgpu_hive_info *hive = NULL;
26bc5340 4638 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4639 int i, r = 0;
bb5c7235 4640 bool need_emergency_restart = false;
3f12acc8 4641 bool audio_suspended = false;
26bc5340 4642
6e3cd2a9 4643 /*
bb5c7235
WS
4644 * Special case: RAS triggered and full reset isn't supported
4645 */
4646 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4647
d5ea093e
AG
4648 /*
4649 * Flush RAM to disk so that after reboot
4650 * the user can read log and see why the system rebooted.
4651 */
bb5c7235 4652 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4653 DRM_WARN("Emergency reboot.");
4654
4655 ksys_sync_helper();
4656 emergency_restart();
4657 }
4658
b823821f 4659 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4660 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4661
4662 /*
1d721ed6
AG
4663 * Here we trylock to avoid chain of resets executing from
4664 * either trigger by jobs on different adevs in XGMI hive or jobs on
4665 * different schedulers for same device while this TO handler is running.
4666 * We always reset all schedulers for device and all devices for XGMI
4667 * hive so that should take care of them too.
26bc5340 4668 */
d95e8e97 4669 hive = amdgpu_get_xgmi_hive(adev);
53b3f8f4
DL
4670 if (hive) {
4671 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4672 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4673 job ? job->base.id : -1, hive->hive_id);
d95e8e97 4674 amdgpu_put_xgmi_hive(hive);
91fb309d
HC
4675 if (job)
4676 drm_sched_increase_karma(&job->base);
53b3f8f4
DL
4677 return 0;
4678 }
4679 mutex_lock(&hive->hive_lock);
1d721ed6 4680 }
26bc5340 4681
91fb309d
HC
4682 /*
4683 * lock the device before we try to operate the linked list
4684 * if didn't get the device lock, don't touch the linked list since
4685 * others may iterating it.
4686 */
4687 r = amdgpu_device_lock_hive_adev(adev, hive);
4688 if (r) {
4689 dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
4690 job ? job->base.id : -1);
4691
4692 /* even we skipped this reset, still need to set the job to guilty */
4693 if (job)
4694 drm_sched_increase_karma(&job->base);
4695 goto skip_recovery;
4696 }
4697
9e94d22c
EQ
4698 /*
4699 * Build list of devices to reset.
4700 * In case we are in XGMI hive mode, resort the device list
4701 * to put adev in the 1st position.
4702 */
4703 INIT_LIST_HEAD(&device_list);
4704 if (adev->gmc.xgmi.num_physical_nodes > 1) {
9e94d22c
EQ
4705 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4706 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4707 device_list_handle = &hive->device_list;
4708 } else {
4709 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4710 device_list_handle = &device_list;
4711 }
4712
1d721ed6
AG
4713 /* block all schedulers and reset given job's ring */
4714 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3f12acc8
EQ
4715 /*
4716 * Try to put the audio codec into suspend state
4717 * before gpu reset started.
4718 *
4719 * Due to the power domain of the graphics device
4720 * is shared with AZ power domain. Without this,
4721 * we may change the audio hardware from behind
4722 * the audio driver's back. That will trigger
4723 * some audio codec errors.
4724 */
4725 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4726 audio_suspended = true;
4727
9e94d22c
EQ
4728 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4729
52fb44cf
EQ
4730 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4731
9e94d22c
EQ
4732 if (!amdgpu_sriov_vf(tmp_adev))
4733 amdgpu_amdkfd_pre_reset(tmp_adev);
4734
12ffa55d
AG
4735 /*
4736 * Mark these ASICs to be reseted as untracked first
4737 * And add them back after reset completed
4738 */
4739 amdgpu_unregister_gpu_instance(tmp_adev);
4740
a2f63ee8 4741 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4742
f1c1314b 4743 /* disable ras on ALL IPs */
bb5c7235 4744 if (!need_emergency_restart &&
b823821f 4745 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4746 amdgpu_ras_suspend(tmp_adev);
4747
1d721ed6
AG
4748 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4749 struct amdgpu_ring *ring = tmp_adev->rings[i];
4750
4751 if (!ring || !ring->sched.thread)
4752 continue;
4753
0b2d2c2e 4754 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4755
bb5c7235 4756 if (need_emergency_restart)
7c6e68c7 4757 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 4758 }
8f8c80f4 4759 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
4760 }
4761
bb5c7235 4762 if (need_emergency_restart)
7c6e68c7
AG
4763 goto skip_sched_resume;
4764
1d721ed6
AG
4765 /*
4766 * Must check guilty signal here since after this point all old
4767 * HW fences are force signaled.
4768 *
4769 * job->base holds a reference to parent fence
4770 */
4771 if (job && job->base.s_fence->parent &&
7dd8c205 4772 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4773 job_signaled = true;
1d721ed6
AG
4774 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4775 goto skip_hw_reset;
4776 }
4777
26bc5340
AG
4778retry: /* Rest of adevs pre asic reset from XGMI hive. */
4779 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340 4780 r = amdgpu_device_pre_asic_reset(tmp_adev,
ded08454 4781 (tmp_adev == adev) ? job : NULL,
26bc5340
AG
4782 &need_full_reset);
4783 /*TODO Should we stop ?*/
4784 if (r) {
aac89168 4785 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 4786 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
4787 tmp_adev->asic_reset_res = r;
4788 }
4789 }
4790
4791 /* Actual ASIC resets if needed.*/
4792 /* TODO Implement XGMI hive reset logic for SRIOV */
4793 if (amdgpu_sriov_vf(adev)) {
4794 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4795 if (r)
4796 adev->asic_reset_res = r;
4797 } else {
7ac71382 4798 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset, false);
26bc5340
AG
4799 if (r && r == -EAGAIN)
4800 goto retry;
4801 }
4802
1d721ed6
AG
4803skip_hw_reset:
4804
26bc5340
AG
4805 /* Post ASIC reset for all devs .*/
4806 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4807
1d721ed6
AG
4808 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4809 struct amdgpu_ring *ring = tmp_adev->rings[i];
4810
4811 if (!ring || !ring->sched.thread)
4812 continue;
4813
4814 /* No point to resubmit jobs if we didn't HW reset*/
4815 if (!tmp_adev->asic_reset_res && !job_signaled)
4816 drm_sched_resubmit_jobs(&ring->sched);
4817
4818 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4819 }
4820
4821 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4a580877 4822 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
4823 }
4824
4825 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4826
4827 if (r) {
4828 /* bad news, how to tell it to userspace ? */
12ffa55d 4829 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4830 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4831 } else {
12ffa55d 4832 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4833 }
7c6e68c7 4834 }
26bc5340 4835
7c6e68c7
AG
4836skip_sched_resume:
4837 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4838 /*unlock kfd: SRIOV would do it separately */
bb5c7235 4839 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4840 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4841 if (audio_suspended)
4842 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4843 amdgpu_device_unlock_adev(tmp_adev);
4844 }
4845
cbfd17f7 4846skip_recovery:
9e94d22c 4847 if (hive) {
53b3f8f4 4848 atomic_set(&hive->in_reset, 0);
9e94d22c 4849 mutex_unlock(&hive->hive_lock);
d95e8e97 4850 amdgpu_put_xgmi_hive(hive);
9e94d22c 4851 }
26bc5340 4852
91fb309d 4853 if (r && r != -EAGAIN)
26bc5340 4854 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4855 return r;
4856}
4857
e3ecdffa
AD
4858/**
4859 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4860 *
4861 * @adev: amdgpu_device pointer
4862 *
4863 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4864 * and lanes) of the slot the device is in. Handles APUs and
4865 * virtualized environments where PCIE config space may not be available.
4866 */
5494d864 4867static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4868{
5d9a6330 4869 struct pci_dev *pdev;
c5313457
HK
4870 enum pci_bus_speed speed_cap, platform_speed_cap;
4871 enum pcie_link_width platform_link_width;
d0dd7f0c 4872
cd474ba0
AD
4873 if (amdgpu_pcie_gen_cap)
4874 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4875
cd474ba0
AD
4876 if (amdgpu_pcie_lane_cap)
4877 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4878
cd474ba0
AD
4879 /* covers APUs as well */
4880 if (pci_is_root_bus(adev->pdev->bus)) {
4881 if (adev->pm.pcie_gen_mask == 0)
4882 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4883 if (adev->pm.pcie_mlw_mask == 0)
4884 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4885 return;
cd474ba0 4886 }
d0dd7f0c 4887
c5313457
HK
4888 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4889 return;
4890
dbaa922b
AD
4891 pcie_bandwidth_available(adev->pdev, NULL,
4892 &platform_speed_cap, &platform_link_width);
c5313457 4893
cd474ba0 4894 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4895 /* asic caps */
4896 pdev = adev->pdev;
4897 speed_cap = pcie_get_speed_cap(pdev);
4898 if (speed_cap == PCI_SPEED_UNKNOWN) {
4899 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4900 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4901 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4902 } else {
2b3a1f51
FX
4903 if (speed_cap == PCIE_SPEED_32_0GT)
4904 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4905 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4906 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4907 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
4908 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
4909 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4910 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4911 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4912 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4913 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4914 else if (speed_cap == PCIE_SPEED_8_0GT)
4915 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4916 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4917 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4918 else if (speed_cap == PCIE_SPEED_5_0GT)
4919 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4920 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4921 else
4922 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4923 }
4924 /* platform caps */
c5313457 4925 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4926 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4927 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4928 } else {
2b3a1f51
FX
4929 if (platform_speed_cap == PCIE_SPEED_32_0GT)
4930 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4931 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4932 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4933 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
4934 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
4935 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4936 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4937 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4938 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4939 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4940 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4941 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4942 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4943 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4944 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4945 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4946 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4947 else
4948 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4949
cd474ba0
AD
4950 }
4951 }
4952 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4953 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4954 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4955 } else {
c5313457 4956 switch (platform_link_width) {
5d9a6330 4957 case PCIE_LNK_X32:
cd474ba0
AD
4958 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4959 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4960 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4961 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4962 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4963 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4964 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4965 break;
5d9a6330 4966 case PCIE_LNK_X16:
cd474ba0
AD
4967 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4968 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4969 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4970 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4971 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4972 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4973 break;
5d9a6330 4974 case PCIE_LNK_X12:
cd474ba0
AD
4975 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4976 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4977 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4978 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4979 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4980 break;
5d9a6330 4981 case PCIE_LNK_X8:
cd474ba0
AD
4982 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4983 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4984 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4985 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4986 break;
5d9a6330 4987 case PCIE_LNK_X4:
cd474ba0
AD
4988 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4989 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4990 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4991 break;
5d9a6330 4992 case PCIE_LNK_X2:
cd474ba0
AD
4993 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4994 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4995 break;
5d9a6330 4996 case PCIE_LNK_X1:
cd474ba0
AD
4997 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4998 break;
4999 default:
5000 break;
5001 }
d0dd7f0c
AD
5002 }
5003 }
5004}
d38ceaf9 5005
361dbd01
AD
5006int amdgpu_device_baco_enter(struct drm_device *dev)
5007{
1348969a 5008 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5009 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5010
4a580877 5011 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5012 return -ENOTSUPP;
5013
6fb33209 5014 if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5015 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5016
9530273e 5017 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5018}
5019
5020int amdgpu_device_baco_exit(struct drm_device *dev)
5021{
1348969a 5022 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5023 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5024 int ret = 0;
361dbd01 5025
4a580877 5026 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5027 return -ENOTSUPP;
5028
9530273e
EQ
5029 ret = amdgpu_dpm_baco_exit(adev);
5030 if (ret)
5031 return ret;
7a22677b 5032
6fb33209 5033 if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5034 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5035
5036 return 0;
361dbd01 5037}
c9a6b82f 5038
acd89fca
AG
5039static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
5040{
5041 int i;
5042
5043 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5044 struct amdgpu_ring *ring = adev->rings[i];
5045
5046 if (!ring || !ring->sched.thread)
5047 continue;
5048
5049 cancel_delayed_work_sync(&ring->sched.work_tdr);
5050 }
5051}
5052
c9a6b82f
AG
5053/**
5054 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5055 * @pdev: PCI device struct
5056 * @state: PCI channel state
5057 *
5058 * Description: Called when a PCI error is detected.
5059 *
5060 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5061 */
5062pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5063{
5064 struct drm_device *dev = pci_get_drvdata(pdev);
5065 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5066 int i;
c9a6b82f
AG
5067
5068 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5069
6894305c
AG
5070 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5071 DRM_WARN("No support for XGMI hive yet...");
5072 return PCI_ERS_RESULT_DISCONNECT;
5073 }
5074
c9a6b82f
AG
5075 switch (state) {
5076 case pci_channel_io_normal:
5077 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5078 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5079 case pci_channel_io_frozen:
5080 /*
acd89fca
AG
5081 * Cancel and wait for all TDRs in progress if failing to
5082 * set adev->in_gpu_reset in amdgpu_device_lock_adev
5083 *
5084 * Locking adev->reset_sem will prevent any external access
5085 * to GPU during PCI error recovery
5086 */
5087 while (!amdgpu_device_lock_adev(adev, NULL))
5088 amdgpu_cancel_all_tdr(adev);
5089
5090 /*
5091 * Block any work scheduling as we do for regular GPU reset
5092 * for the duration of the recovery
5093 */
5094 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5095 struct amdgpu_ring *ring = adev->rings[i];
5096
5097 if (!ring || !ring->sched.thread)
5098 continue;
5099
5100 drm_sched_stop(&ring->sched, NULL);
5101 }
8f8c80f4 5102 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5103 return PCI_ERS_RESULT_NEED_RESET;
5104 case pci_channel_io_perm_failure:
5105 /* Permanent error, prepare for device removal */
5106 return PCI_ERS_RESULT_DISCONNECT;
5107 }
5108
5109 return PCI_ERS_RESULT_NEED_RESET;
5110}
5111
5112/**
5113 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5114 * @pdev: pointer to PCI device
5115 */
5116pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5117{
5118
5119 DRM_INFO("PCI error: mmio enabled callback!!\n");
5120
5121 /* TODO - dump whatever for debugging purposes */
5122
5123 /* This called only if amdgpu_pci_error_detected returns
5124 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5125 * works, no need to reset slot.
5126 */
5127
5128 return PCI_ERS_RESULT_RECOVERED;
5129}
5130
5131/**
5132 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5133 * @pdev: PCI device struct
5134 *
5135 * Description: This routine is called by the pci error recovery
5136 * code after the PCI slot has been reset, just before we
5137 * should resume normal operations.
5138 */
5139pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5140{
5141 struct drm_device *dev = pci_get_drvdata(pdev);
5142 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5143 int r, i;
7ac71382 5144 bool need_full_reset = true;
362c7b91 5145 u32 memsize;
7ac71382 5146 struct list_head device_list;
c9a6b82f
AG
5147
5148 DRM_INFO("PCI error: slot reset callback!!\n");
5149
7ac71382
AG
5150 INIT_LIST_HEAD(&device_list);
5151 list_add_tail(&adev->gmc.xgmi.head, &device_list);
5152
362c7b91
AG
5153 /* wait for asic to come out of reset */
5154 msleep(500);
5155
7ac71382 5156 /* Restore PCI confspace */
c1dd4aa6 5157 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5158
362c7b91
AG
5159 /* confirm ASIC came out of reset */
5160 for (i = 0; i < adev->usec_timeout; i++) {
5161 memsize = amdgpu_asic_get_config_memsize(adev);
5162
5163 if (memsize != 0xffffffff)
5164 break;
5165 udelay(1);
5166 }
5167 if (memsize == 0xffffffff) {
5168 r = -ETIME;
5169 goto out;
5170 }
5171
8a11d283 5172 adev->in_pci_err_recovery = true;
7ac71382 5173 r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);
bf36b52e 5174 adev->in_pci_err_recovery = false;
c9a6b82f
AG
5175 if (r)
5176 goto out;
5177
7ac71382 5178 r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
c9a6b82f
AG
5179
5180out:
c9a6b82f 5181 if (!r) {
c1dd4aa6
AG
5182 if (amdgpu_device_cache_pci_state(adev->pdev))
5183 pci_restore_state(adev->pdev);
5184
c9a6b82f
AG
5185 DRM_INFO("PCIe error recovery succeeded\n");
5186 } else {
5187 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5188 amdgpu_device_unlock_adev(adev);
5189 }
5190
5191 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5192}
5193
5194/**
5195 * amdgpu_pci_resume() - resume normal ops after PCI reset
5196 * @pdev: pointer to PCI device
5197 *
5198 * Called when the error recovery driver tells us that its
505199a3 5199 * OK to resume normal operation.
c9a6b82f
AG
5200 */
5201void amdgpu_pci_resume(struct pci_dev *pdev)
5202{
5203 struct drm_device *dev = pci_get_drvdata(pdev);
5204 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5205 int i;
c9a6b82f 5206
c9a6b82f
AG
5207
5208 DRM_INFO("PCI error: resume callback!!\n");
acd89fca
AG
5209
5210 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5211 struct amdgpu_ring *ring = adev->rings[i];
5212
5213 if (!ring || !ring->sched.thread)
5214 continue;
5215
5216
5217 drm_sched_resubmit_jobs(&ring->sched);
5218 drm_sched_start(&ring->sched, true);
5219 }
5220
5221 amdgpu_device_unlock_adev(adev);
c9a6b82f 5222}
c1dd4aa6
AG
5223
5224bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5225{
5226 struct drm_device *dev = pci_get_drvdata(pdev);
5227 struct amdgpu_device *adev = drm_to_adev(dev);
5228 int r;
5229
5230 r = pci_save_state(pdev);
5231 if (!r) {
5232 kfree(adev->pci_state);
5233
5234 adev->pci_state = pci_store_saved_state(pdev);
5235
5236 if (!adev->pci_state) {
5237 DRM_ERROR("Failed to store PCI saved state");
5238 return false;
5239 }
5240 } else {
5241 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5242 return false;
5243 }
5244
5245 return true;
5246}
5247
5248bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5249{
5250 struct drm_device *dev = pci_get_drvdata(pdev);
5251 struct amdgpu_device *adev = drm_to_adev(dev);
5252 int r;
5253
5254 if (!adev->pci_state)
5255 return false;
5256
5257 r = pci_load_saved_state(pdev, adev->pci_state);
5258
5259 if (!r) {
5260 pci_restore_state(pdev);
5261 } else {
5262 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5263 return false;
5264 }
5265
5266 return true;
5267}
5268
5269