Merge tag 'drm-misc-next-2021-04-09' of git://anongit.freedesktop.org/drm/drm-misc...
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
4e52a9f8 83MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
e2a75f88 84
2dc80b00
S
85#define AMDGPU_RESUME_MS 2000
86
050091ab 87const char *amdgpu_asic_name[] = {
da69c161
KW
88 "TAHITI",
89 "PITCAIRN",
90 "VERDE",
91 "OLAND",
92 "HAINAN",
d38ceaf9
AD
93 "BONAIRE",
94 "KAVERI",
95 "KABINI",
96 "HAWAII",
97 "MULLINS",
98 "TOPAZ",
99 "TONGA",
48299f95 100 "FIJI",
d38ceaf9 101 "CARRIZO",
139f4917 102 "STONEY",
2cc0c0b5
FC
103 "POLARIS10",
104 "POLARIS11",
c4642a47 105 "POLARIS12",
48ff108d 106 "VEGAM",
d4196f01 107 "VEGA10",
8fab806a 108 "VEGA12",
956fcddc 109 "VEGA20",
2ca8a5d2 110 "RAVEN",
d6c3b24e 111 "ARCTURUS",
1eee4228 112 "RENOIR",
d46b417a 113 "ALDEBARAN",
852a6626 114 "NAVI10",
87dbad02 115 "NAVI14",
9802f5d7 116 "NAVI12",
ccaf72d3 117 "SIENNA_CICHLID",
ddd8fbe7 118 "NAVY_FLOUNDER",
4f1e9a76 119 "VANGOGH",
a2468e04 120 "DIMGREY_CAVEFISH",
d38ceaf9
AD
121 "LAST",
122};
123
dcea6e65
KR
124/**
125 * DOC: pcie_replay_count
126 *
127 * The amdgpu driver provides a sysfs API for reporting the total number
128 * of PCIe replays (NAKs)
129 * The file pcie_replay_count is used for this and returns the total
130 * number of replays as a sum of the NAKs generated and NAKs received
131 */
132
133static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
134 struct device_attribute *attr, char *buf)
135{
136 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 137 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
138 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
139
140 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
141}
142
143static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
144 amdgpu_device_get_pcie_replay_count, NULL);
145
5494d864
AD
146static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
147
bd607166
KR
148/**
149 * DOC: product_name
150 *
151 * The amdgpu driver provides a sysfs API for reporting the product name
152 * for the device
153 * The file serial_number is used for this and returns the product name
154 * as returned from the FRU.
155 * NOTE: This is only available for certain server cards
156 */
157
158static ssize_t amdgpu_device_get_product_name(struct device *dev,
159 struct device_attribute *attr, char *buf)
160{
161 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 162 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
163
164 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
165}
166
167static DEVICE_ATTR(product_name, S_IRUGO,
168 amdgpu_device_get_product_name, NULL);
169
170/**
171 * DOC: product_number
172 *
173 * The amdgpu driver provides a sysfs API for reporting the part number
174 * for the device
175 * The file serial_number is used for this and returns the part number
176 * as returned from the FRU.
177 * NOTE: This is only available for certain server cards
178 */
179
180static ssize_t amdgpu_device_get_product_number(struct device *dev,
181 struct device_attribute *attr, char *buf)
182{
183 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 184 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
185
186 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
187}
188
189static DEVICE_ATTR(product_number, S_IRUGO,
190 amdgpu_device_get_product_number, NULL);
191
192/**
193 * DOC: serial_number
194 *
195 * The amdgpu driver provides a sysfs API for reporting the serial number
196 * for the device
197 * The file serial_number is used for this and returns the serial number
198 * as returned from the FRU.
199 * NOTE: This is only available for certain server cards
200 */
201
202static ssize_t amdgpu_device_get_serial_number(struct device *dev,
203 struct device_attribute *attr, char *buf)
204{
205 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 206 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
207
208 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
209}
210
211static DEVICE_ATTR(serial_number, S_IRUGO,
212 amdgpu_device_get_serial_number, NULL);
213
fd496ca8
AD
214/**
215 * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control
216 *
217 * @dev: drm_device pointer
218 *
219 * Returns true if the device is a dGPU with HG/PX power control,
220 * otherwise return false.
221 */
222bool amdgpu_device_supports_atpx(struct drm_device *dev)
223{
224 struct amdgpu_device *adev = drm_to_adev(dev);
225
226 if (adev->flags & AMD_IS_PX)
227 return true;
228 return false;
229}
230
e3ecdffa 231/**
0330b848 232 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
e3ecdffa
AD
233 *
234 * @dev: drm_device pointer
235 *
236 * Returns true if the device is a dGPU with HG/PX power control,
237 * otherwise return false.
238 */
31af062a 239bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 240{
1348969a 241 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 242
0330b848 243 if (adev->has_pr3)
d38ceaf9
AD
244 return true;
245 return false;
246}
247
a69cba42
AD
248/**
249 * amdgpu_device_supports_baco - Does the device support BACO
250 *
251 * @dev: drm_device pointer
252 *
253 * Returns true if the device supporte BACO,
254 * otherwise return false.
255 */
256bool amdgpu_device_supports_baco(struct drm_device *dev)
257{
1348969a 258 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
259
260 return amdgpu_asic_supports_baco(adev);
261}
262
6e3cd2a9
MCC
263/*
264 * VRAM access helper functions
265 */
266
e35e2b11 267/**
e35e2b11
TY
268 * amdgpu_device_vram_access - read/write a buffer in vram
269 *
270 * @adev: amdgpu_device pointer
271 * @pos: offset of the buffer in vram
272 * @buf: virtual address of the buffer in system memory
273 * @size: read/write size, sizeof(@buf) must > @size
274 * @write: true - write to vram, otherwise - read from vram
275 */
276void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
277 uint32_t *buf, size_t size, bool write)
278{
e35e2b11 279 unsigned long flags;
ce05ac56
CK
280 uint32_t hi = ~0;
281 uint64_t last;
282
9d11eb0d
CK
283
284#ifdef CONFIG_64BIT
285 last = min(pos + size, adev->gmc.visible_vram_size);
286 if (last > pos) {
287 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
288 size_t count = last - pos;
289
290 if (write) {
291 memcpy_toio(addr, buf, count);
292 mb();
293 amdgpu_asic_flush_hdp(adev, NULL);
294 } else {
295 amdgpu_asic_invalidate_hdp(adev, NULL);
296 mb();
297 memcpy_fromio(buf, addr, count);
298 }
299
300 if (count == size)
301 return;
302
303 pos += count;
304 buf += count / 4;
305 size -= count;
306 }
307#endif
308
ce05ac56
CK
309 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
310 for (last = pos + size; pos < last; pos += 4) {
311 uint32_t tmp = pos >> 31;
e35e2b11 312
e35e2b11 313 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
314 if (tmp != hi) {
315 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
316 hi = tmp;
317 }
e35e2b11
TY
318 if (write)
319 WREG32_NO_KIQ(mmMM_DATA, *buf++);
320 else
321 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 322 }
ce05ac56 323 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
324}
325
d38ceaf9 326/*
f7ee1874 327 * register access helper functions.
d38ceaf9 328 */
e3ecdffa 329/**
f7ee1874 330 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
331 *
332 * @adev: amdgpu_device pointer
333 * @reg: dword aligned register offset
334 * @acc_flags: access flags which require special behavior
335 *
336 * Returns the 32 bit value from the offset specified.
337 */
f7ee1874
HZ
338uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
339 uint32_t reg, uint32_t acc_flags)
d38ceaf9 340{
f4b373f4
TSD
341 uint32_t ret;
342
bf36b52e
AG
343 if (adev->in_pci_err_recovery)
344 return 0;
345
f7ee1874
HZ
346 if ((reg * 4) < adev->rmmio_size) {
347 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
348 amdgpu_sriov_runtime(adev) &&
349 down_read_trylock(&adev->reset_sem)) {
350 ret = amdgpu_kiq_rreg(adev, reg);
351 up_read(&adev->reset_sem);
352 } else {
353 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
354 }
355 } else {
356 ret = adev->pcie_rreg(adev, reg * 4);
81202807 357 }
bc992ba5 358
f7ee1874 359 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 360
f4b373f4 361 return ret;
d38ceaf9
AD
362}
363
421a2a30
ML
364/*
365 * MMIO register read with bytes helper functions
366 * @offset:bytes offset from MMIO start
367 *
368*/
369
e3ecdffa
AD
370/**
371 * amdgpu_mm_rreg8 - read a memory mapped IO register
372 *
373 * @adev: amdgpu_device pointer
374 * @offset: byte aligned register offset
375 *
376 * Returns the 8 bit value from the offset specified.
377 */
7cbbc745
AG
378uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
379{
bf36b52e
AG
380 if (adev->in_pci_err_recovery)
381 return 0;
382
421a2a30
ML
383 if (offset < adev->rmmio_size)
384 return (readb(adev->rmmio + offset));
385 BUG();
386}
387
388/*
389 * MMIO register write with bytes helper functions
390 * @offset:bytes offset from MMIO start
391 * @value: the value want to be written to the register
392 *
393*/
e3ecdffa
AD
394/**
395 * amdgpu_mm_wreg8 - read a memory mapped IO register
396 *
397 * @adev: amdgpu_device pointer
398 * @offset: byte aligned register offset
399 * @value: 8 bit value to write
400 *
401 * Writes the value specified to the offset specified.
402 */
7cbbc745
AG
403void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
404{
bf36b52e
AG
405 if (adev->in_pci_err_recovery)
406 return;
407
421a2a30
ML
408 if (offset < adev->rmmio_size)
409 writeb(value, adev->rmmio + offset);
410 else
411 BUG();
412}
413
e3ecdffa 414/**
f7ee1874 415 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
416 *
417 * @adev: amdgpu_device pointer
418 * @reg: dword aligned register offset
419 * @v: 32 bit value to write to the register
420 * @acc_flags: access flags which require special behavior
421 *
422 * Writes the value specified to the offset specified.
423 */
f7ee1874
HZ
424void amdgpu_device_wreg(struct amdgpu_device *adev,
425 uint32_t reg, uint32_t v,
426 uint32_t acc_flags)
d38ceaf9 427{
bf36b52e
AG
428 if (adev->in_pci_err_recovery)
429 return;
430
f7ee1874
HZ
431 if ((reg * 4) < adev->rmmio_size) {
432 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
433 amdgpu_sriov_runtime(adev) &&
434 down_read_trylock(&adev->reset_sem)) {
435 amdgpu_kiq_wreg(adev, reg, v);
436 up_read(&adev->reset_sem);
437 } else {
438 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
439 }
440 } else {
441 adev->pcie_wreg(adev, reg * 4, v);
81202807 442 }
bc992ba5 443
f7ee1874 444 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 445}
d38ceaf9 446
2e0cc4d4
ML
447/*
448 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
449 *
450 * this function is invoked only the debugfs register access
451 * */
f7ee1874
HZ
452void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
453 uint32_t reg, uint32_t v)
2e0cc4d4 454{
bf36b52e
AG
455 if (adev->in_pci_err_recovery)
456 return;
457
2e0cc4d4 458 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
459 adev->gfx.rlc.funcs &&
460 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4
ML
461 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
462 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
f7ee1874
HZ
463 } else {
464 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 465 }
d38ceaf9
AD
466}
467
d38ceaf9
AD
468/**
469 * amdgpu_mm_rdoorbell - read a doorbell dword
470 *
471 * @adev: amdgpu_device pointer
472 * @index: doorbell index
473 *
474 * Returns the value in the doorbell aperture at the
475 * requested doorbell index (CIK).
476 */
477u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
478{
bf36b52e
AG
479 if (adev->in_pci_err_recovery)
480 return 0;
481
d38ceaf9
AD
482 if (index < adev->doorbell.num_doorbells) {
483 return readl(adev->doorbell.ptr + index);
484 } else {
485 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
486 return 0;
487 }
488}
489
490/**
491 * amdgpu_mm_wdoorbell - write a doorbell dword
492 *
493 * @adev: amdgpu_device pointer
494 * @index: doorbell index
495 * @v: value to write
496 *
497 * Writes @v to the doorbell aperture at the
498 * requested doorbell index (CIK).
499 */
500void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
501{
bf36b52e
AG
502 if (adev->in_pci_err_recovery)
503 return;
504
d38ceaf9
AD
505 if (index < adev->doorbell.num_doorbells) {
506 writel(v, adev->doorbell.ptr + index);
507 } else {
508 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
509 }
510}
511
832be404
KW
512/**
513 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
514 *
515 * @adev: amdgpu_device pointer
516 * @index: doorbell index
517 *
518 * Returns the value in the doorbell aperture at the
519 * requested doorbell index (VEGA10+).
520 */
521u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
522{
bf36b52e
AG
523 if (adev->in_pci_err_recovery)
524 return 0;
525
832be404
KW
526 if (index < adev->doorbell.num_doorbells) {
527 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
528 } else {
529 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
530 return 0;
531 }
532}
533
534/**
535 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
536 *
537 * @adev: amdgpu_device pointer
538 * @index: doorbell index
539 * @v: value to write
540 *
541 * Writes @v to the doorbell aperture at the
542 * requested doorbell index (VEGA10+).
543 */
544void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
545{
bf36b52e
AG
546 if (adev->in_pci_err_recovery)
547 return;
548
832be404
KW
549 if (index < adev->doorbell.num_doorbells) {
550 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
551 } else {
552 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
553 }
554}
555
1bba3683
HZ
556/**
557 * amdgpu_device_indirect_rreg - read an indirect register
558 *
559 * @adev: amdgpu_device pointer
560 * @pcie_index: mmio register offset
561 * @pcie_data: mmio register offset
22f453fb 562 * @reg_addr: indirect register address to read from
1bba3683
HZ
563 *
564 * Returns the value of indirect register @reg_addr
565 */
566u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
567 u32 pcie_index, u32 pcie_data,
568 u32 reg_addr)
569{
570 unsigned long flags;
571 u32 r;
572 void __iomem *pcie_index_offset;
573 void __iomem *pcie_data_offset;
574
575 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
576 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
577 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
578
579 writel(reg_addr, pcie_index_offset);
580 readl(pcie_index_offset);
581 r = readl(pcie_data_offset);
582 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
583
584 return r;
585}
586
587/**
588 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
589 *
590 * @adev: amdgpu_device pointer
591 * @pcie_index: mmio register offset
592 * @pcie_data: mmio register offset
22f453fb 593 * @reg_addr: indirect register address to read from
1bba3683
HZ
594 *
595 * Returns the value of indirect register @reg_addr
596 */
597u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
598 u32 pcie_index, u32 pcie_data,
599 u32 reg_addr)
600{
601 unsigned long flags;
602 u64 r;
603 void __iomem *pcie_index_offset;
604 void __iomem *pcie_data_offset;
605
606 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
607 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
608 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
609
610 /* read low 32 bits */
611 writel(reg_addr, pcie_index_offset);
612 readl(pcie_index_offset);
613 r = readl(pcie_data_offset);
614 /* read high 32 bits */
615 writel(reg_addr + 4, pcie_index_offset);
616 readl(pcie_index_offset);
617 r |= ((u64)readl(pcie_data_offset) << 32);
618 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
619
620 return r;
621}
622
623/**
624 * amdgpu_device_indirect_wreg - write an indirect register address
625 *
626 * @adev: amdgpu_device pointer
627 * @pcie_index: mmio register offset
628 * @pcie_data: mmio register offset
629 * @reg_addr: indirect register offset
630 * @reg_data: indirect register data
631 *
632 */
633void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
634 u32 pcie_index, u32 pcie_data,
635 u32 reg_addr, u32 reg_data)
636{
637 unsigned long flags;
638 void __iomem *pcie_index_offset;
639 void __iomem *pcie_data_offset;
640
641 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
642 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
643 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
644
645 writel(reg_addr, pcie_index_offset);
646 readl(pcie_index_offset);
647 writel(reg_data, pcie_data_offset);
648 readl(pcie_data_offset);
649 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
650}
651
652/**
653 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
654 *
655 * @adev: amdgpu_device pointer
656 * @pcie_index: mmio register offset
657 * @pcie_data: mmio register offset
658 * @reg_addr: indirect register offset
659 * @reg_data: indirect register data
660 *
661 */
662void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
663 u32 pcie_index, u32 pcie_data,
664 u32 reg_addr, u64 reg_data)
665{
666 unsigned long flags;
667 void __iomem *pcie_index_offset;
668 void __iomem *pcie_data_offset;
669
670 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
671 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
672 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
673
674 /* write low 32 bits */
675 writel(reg_addr, pcie_index_offset);
676 readl(pcie_index_offset);
677 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
678 readl(pcie_data_offset);
679 /* write high 32 bits */
680 writel(reg_addr + 4, pcie_index_offset);
681 readl(pcie_index_offset);
682 writel((u32)(reg_data >> 32), pcie_data_offset);
683 readl(pcie_data_offset);
684 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
685}
686
d38ceaf9
AD
687/**
688 * amdgpu_invalid_rreg - dummy reg read function
689 *
982a820b 690 * @adev: amdgpu_device pointer
d38ceaf9
AD
691 * @reg: offset of register
692 *
693 * Dummy register read function. Used for register blocks
694 * that certain asics don't have (all asics).
695 * Returns the value in the register.
696 */
697static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
698{
699 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
700 BUG();
701 return 0;
702}
703
704/**
705 * amdgpu_invalid_wreg - dummy reg write function
706 *
982a820b 707 * @adev: amdgpu_device pointer
d38ceaf9
AD
708 * @reg: offset of register
709 * @v: value to write to the register
710 *
711 * Dummy register read function. Used for register blocks
712 * that certain asics don't have (all asics).
713 */
714static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
715{
716 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
717 reg, v);
718 BUG();
719}
720
4fa1c6a6
TZ
721/**
722 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
723 *
982a820b 724 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
725 * @reg: offset of register
726 *
727 * Dummy register read function. Used for register blocks
728 * that certain asics don't have (all asics).
729 * Returns the value in the register.
730 */
731static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
732{
733 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
734 BUG();
735 return 0;
736}
737
738/**
739 * amdgpu_invalid_wreg64 - dummy reg write function
740 *
982a820b 741 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
742 * @reg: offset of register
743 * @v: value to write to the register
744 *
745 * Dummy register read function. Used for register blocks
746 * that certain asics don't have (all asics).
747 */
748static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
749{
750 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
751 reg, v);
752 BUG();
753}
754
d38ceaf9
AD
755/**
756 * amdgpu_block_invalid_rreg - dummy reg read function
757 *
982a820b 758 * @adev: amdgpu_device pointer
d38ceaf9
AD
759 * @block: offset of instance
760 * @reg: offset of register
761 *
762 * Dummy register read function. Used for register blocks
763 * that certain asics don't have (all asics).
764 * Returns the value in the register.
765 */
766static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
767 uint32_t block, uint32_t reg)
768{
769 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
770 reg, block);
771 BUG();
772 return 0;
773}
774
775/**
776 * amdgpu_block_invalid_wreg - dummy reg write function
777 *
982a820b 778 * @adev: amdgpu_device pointer
d38ceaf9
AD
779 * @block: offset of instance
780 * @reg: offset of register
781 * @v: value to write to the register
782 *
783 * Dummy register read function. Used for register blocks
784 * that certain asics don't have (all asics).
785 */
786static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
787 uint32_t block,
788 uint32_t reg, uint32_t v)
789{
790 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
791 reg, block, v);
792 BUG();
793}
794
4d2997ab
AD
795/**
796 * amdgpu_device_asic_init - Wrapper for atom asic_init
797 *
982a820b 798 * @adev: amdgpu_device pointer
4d2997ab
AD
799 *
800 * Does any asic specific work and then calls atom asic init.
801 */
802static int amdgpu_device_asic_init(struct amdgpu_device *adev)
803{
804 amdgpu_asic_pre_asic_init(adev);
805
806 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
807}
808
e3ecdffa
AD
809/**
810 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
811 *
982a820b 812 * @adev: amdgpu_device pointer
e3ecdffa
AD
813 *
814 * Allocates a scratch page of VRAM for use by various things in the
815 * driver.
816 */
06ec9070 817static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 818{
a4a02777
CK
819 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
820 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
821 &adev->vram_scratch.robj,
822 &adev->vram_scratch.gpu_addr,
823 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
824}
825
e3ecdffa
AD
826/**
827 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
828 *
982a820b 829 * @adev: amdgpu_device pointer
e3ecdffa
AD
830 *
831 * Frees the VRAM scratch page.
832 */
06ec9070 833static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 834{
078af1a3 835 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
836}
837
838/**
9c3f2b54 839 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
840 *
841 * @adev: amdgpu_device pointer
842 * @registers: pointer to the register array
843 * @array_size: size of the register array
844 *
845 * Programs an array or registers with and and or masks.
846 * This is a helper for setting golden registers.
847 */
9c3f2b54
AD
848void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
849 const u32 *registers,
850 const u32 array_size)
d38ceaf9
AD
851{
852 u32 tmp, reg, and_mask, or_mask;
853 int i;
854
855 if (array_size % 3)
856 return;
857
858 for (i = 0; i < array_size; i +=3) {
859 reg = registers[i + 0];
860 and_mask = registers[i + 1];
861 or_mask = registers[i + 2];
862
863 if (and_mask == 0xffffffff) {
864 tmp = or_mask;
865 } else {
866 tmp = RREG32(reg);
867 tmp &= ~and_mask;
e0d07657
HZ
868 if (adev->family >= AMDGPU_FAMILY_AI)
869 tmp |= (or_mask & and_mask);
870 else
871 tmp |= or_mask;
d38ceaf9
AD
872 }
873 WREG32(reg, tmp);
874 }
875}
876
e3ecdffa
AD
877/**
878 * amdgpu_device_pci_config_reset - reset the GPU
879 *
880 * @adev: amdgpu_device pointer
881 *
882 * Resets the GPU using the pci config reset sequence.
883 * Only applicable to asics prior to vega10.
884 */
8111c387 885void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
886{
887 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
888}
889
af484df8
AD
890/**
891 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
892 *
893 * @adev: amdgpu_device pointer
894 *
895 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
896 */
897int amdgpu_device_pci_reset(struct amdgpu_device *adev)
898{
899 return pci_reset_function(adev->pdev);
900}
901
d38ceaf9
AD
902/*
903 * GPU doorbell aperture helpers function.
904 */
905/**
06ec9070 906 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
907 *
908 * @adev: amdgpu_device pointer
909 *
910 * Init doorbell driver information (CIK)
911 * Returns 0 on success, error on failure.
912 */
06ec9070 913static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 914{
6585661d 915
705e519e
CK
916 /* No doorbell on SI hardware generation */
917 if (adev->asic_type < CHIP_BONAIRE) {
918 adev->doorbell.base = 0;
919 adev->doorbell.size = 0;
920 adev->doorbell.num_doorbells = 0;
921 adev->doorbell.ptr = NULL;
922 return 0;
923 }
924
d6895ad3
CK
925 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
926 return -EINVAL;
927
22357775
AD
928 amdgpu_asic_init_doorbell_index(adev);
929
d38ceaf9
AD
930 /* doorbell bar mapping */
931 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
932 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
933
edf600da 934 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 935 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
936 if (adev->doorbell.num_doorbells == 0)
937 return -EINVAL;
938
ec3db8a6 939 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
940 * paging queue doorbell use the second page. The
941 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
942 * doorbells are in the first page. So with paging queue enabled,
943 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
944 */
945 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 946 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 947
8972e5d2
CK
948 adev->doorbell.ptr = ioremap(adev->doorbell.base,
949 adev->doorbell.num_doorbells *
950 sizeof(u32));
951 if (adev->doorbell.ptr == NULL)
d38ceaf9 952 return -ENOMEM;
d38ceaf9
AD
953
954 return 0;
955}
956
957/**
06ec9070 958 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
959 *
960 * @adev: amdgpu_device pointer
961 *
962 * Tear down doorbell driver information (CIK)
963 */
06ec9070 964static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
965{
966 iounmap(adev->doorbell.ptr);
967 adev->doorbell.ptr = NULL;
968}
969
22cb0164 970
d38ceaf9
AD
971
972/*
06ec9070 973 * amdgpu_device_wb_*()
455a7bc2 974 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 975 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
976 */
977
978/**
06ec9070 979 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
980 *
981 * @adev: amdgpu_device pointer
982 *
983 * Disables Writeback and frees the Writeback memory (all asics).
984 * Used at driver shutdown.
985 */
06ec9070 986static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
987{
988 if (adev->wb.wb_obj) {
a76ed485
AD
989 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
990 &adev->wb.gpu_addr,
991 (void **)&adev->wb.wb);
d38ceaf9
AD
992 adev->wb.wb_obj = NULL;
993 }
994}
995
996/**
06ec9070 997 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
998 *
999 * @adev: amdgpu_device pointer
1000 *
455a7bc2 1001 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1002 * Used at driver startup.
1003 * Returns 0 on success or an -error on failure.
1004 */
06ec9070 1005static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1006{
1007 int r;
1008
1009 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1010 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1011 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1012 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1013 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1014 (void **)&adev->wb.wb);
d38ceaf9
AD
1015 if (r) {
1016 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1017 return r;
1018 }
d38ceaf9
AD
1019
1020 adev->wb.num_wb = AMDGPU_MAX_WB;
1021 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1022
1023 /* clear wb memory */
73469585 1024 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1025 }
1026
1027 return 0;
1028}
1029
1030/**
131b4b36 1031 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1032 *
1033 * @adev: amdgpu_device pointer
1034 * @wb: wb index
1035 *
1036 * Allocate a wb slot for use by the driver (all asics).
1037 * Returns 0 on success or -EINVAL on failure.
1038 */
131b4b36 1039int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1040{
1041 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1042
97407b63 1043 if (offset < adev->wb.num_wb) {
7014285a 1044 __set_bit(offset, adev->wb.used);
63ae07ca 1045 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1046 return 0;
1047 } else {
1048 return -EINVAL;
1049 }
1050}
1051
d38ceaf9 1052/**
131b4b36 1053 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1054 *
1055 * @adev: amdgpu_device pointer
1056 * @wb: wb index
1057 *
1058 * Free a wb slot allocated for use by the driver (all asics)
1059 */
131b4b36 1060void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1061{
73469585 1062 wb >>= 3;
d38ceaf9 1063 if (wb < adev->wb.num_wb)
73469585 1064 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1065}
1066
d6895ad3
CK
1067/**
1068 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1069 *
1070 * @adev: amdgpu_device pointer
1071 *
1072 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1073 * to fail, but if any of the BARs is not accessible after the size we abort
1074 * driver loading by returning -ENODEV.
1075 */
1076int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1077{
453f617a 1078 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
31b8adab
CK
1079 struct pci_bus *root;
1080 struct resource *res;
1081 unsigned i;
d6895ad3
CK
1082 u16 cmd;
1083 int r;
1084
0c03b912 1085 /* Bypass for VF */
1086 if (amdgpu_sriov_vf(adev))
1087 return 0;
1088
b7221f2b
AD
1089 /* skip if the bios has already enabled large BAR */
1090 if (adev->gmc.real_vram_size &&
1091 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1092 return 0;
1093
31b8adab
CK
1094 /* Check if the root BUS has 64bit memory resources */
1095 root = adev->pdev->bus;
1096 while (root->parent)
1097 root = root->parent;
1098
1099 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1100 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1101 res->start > 0x100000000ull)
1102 break;
1103 }
1104
1105 /* Trying to resize is pointless without a root hub window above 4GB */
1106 if (!res)
1107 return 0;
1108
453f617a
ND
1109 /* Limit the BAR size to what is available */
1110 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1111 rbar_size);
1112
d6895ad3
CK
1113 /* Disable memory decoding while we change the BAR addresses and size */
1114 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1115 pci_write_config_word(adev->pdev, PCI_COMMAND,
1116 cmd & ~PCI_COMMAND_MEMORY);
1117
1118 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1119 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1120 if (adev->asic_type >= CHIP_BONAIRE)
1121 pci_release_resource(adev->pdev, 2);
1122
1123 pci_release_resource(adev->pdev, 0);
1124
1125 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1126 if (r == -ENOSPC)
1127 DRM_INFO("Not enough PCI address space for a large BAR.");
1128 else if (r && r != -ENOTSUPP)
1129 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1130
1131 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1132
1133 /* When the doorbell or fb BAR isn't available we have no chance of
1134 * using the device.
1135 */
06ec9070 1136 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1137 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1138 return -ENODEV;
1139
1140 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1141
1142 return 0;
1143}
a05502e5 1144
d38ceaf9
AD
1145/*
1146 * GPU helpers function.
1147 */
1148/**
39c640c0 1149 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1150 *
1151 * @adev: amdgpu_device pointer
1152 *
c836fec5
JQ
1153 * Check if the asic has been initialized (all asics) at driver startup
1154 * or post is needed if hw reset is performed.
1155 * Returns true if need or false if not.
d38ceaf9 1156 */
39c640c0 1157bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1158{
1159 uint32_t reg;
1160
bec86378
ML
1161 if (amdgpu_sriov_vf(adev))
1162 return false;
1163
1164 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1165 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1166 * some old smc fw still need driver do vPost otherwise gpu hang, while
1167 * those smc fw version above 22.15 doesn't have this flaw, so we force
1168 * vpost executed for smc version below 22.15
bec86378
ML
1169 */
1170 if (adev->asic_type == CHIP_FIJI) {
1171 int err;
1172 uint32_t fw_ver;
1173 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1174 /* force vPost if error occured */
1175 if (err)
1176 return true;
1177
1178 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1179 if (fw_ver < 0x00160e00)
1180 return true;
bec86378 1181 }
bec86378 1182 }
91fe77eb 1183
e3c1b071 1184 /* Don't post if we need to reset whole hive on init */
1185 if (adev->gmc.xgmi.pending_reset)
1186 return false;
1187
91fe77eb 1188 if (adev->has_hw_reset) {
1189 adev->has_hw_reset = false;
1190 return true;
1191 }
1192
1193 /* bios scratch used on CIK+ */
1194 if (adev->asic_type >= CHIP_BONAIRE)
1195 return amdgpu_atombios_scratch_need_asic_init(adev);
1196
1197 /* check MEM_SIZE for older asics */
1198 reg = amdgpu_asic_get_config_memsize(adev);
1199
1200 if ((reg != 0) && (reg != 0xffffffff))
1201 return false;
1202
1203 return true;
bec86378
ML
1204}
1205
d38ceaf9
AD
1206/* if we get transitioned to only one device, take VGA back */
1207/**
06ec9070 1208 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1209 *
1210 * @cookie: amdgpu_device pointer
1211 * @state: enable/disable vga decode
1212 *
1213 * Enable/disable vga decode (all asics).
1214 * Returns VGA resource flags.
1215 */
06ec9070 1216static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1217{
1218 struct amdgpu_device *adev = cookie;
1219 amdgpu_asic_set_vga_state(adev, state);
1220 if (state)
1221 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1222 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1223 else
1224 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1225}
1226
e3ecdffa
AD
1227/**
1228 * amdgpu_device_check_block_size - validate the vm block size
1229 *
1230 * @adev: amdgpu_device pointer
1231 *
1232 * Validates the vm block size specified via module parameter.
1233 * The vm block size defines number of bits in page table versus page directory,
1234 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1235 * page table and the remaining bits are in the page directory.
1236 */
06ec9070 1237static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1238{
1239 /* defines number of bits in page table versus page directory,
1240 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1241 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1242 if (amdgpu_vm_block_size == -1)
1243 return;
a1adf8be 1244
bab4fee7 1245 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1246 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1247 amdgpu_vm_block_size);
97489129 1248 amdgpu_vm_block_size = -1;
a1adf8be 1249 }
a1adf8be
CZ
1250}
1251
e3ecdffa
AD
1252/**
1253 * amdgpu_device_check_vm_size - validate the vm size
1254 *
1255 * @adev: amdgpu_device pointer
1256 *
1257 * Validates the vm size in GB specified via module parameter.
1258 * The VM size is the size of the GPU virtual memory space in GB.
1259 */
06ec9070 1260static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1261{
64dab074
AD
1262 /* no need to check the default value */
1263 if (amdgpu_vm_size == -1)
1264 return;
1265
83ca145d
ZJ
1266 if (amdgpu_vm_size < 1) {
1267 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1268 amdgpu_vm_size);
f3368128 1269 amdgpu_vm_size = -1;
83ca145d 1270 }
83ca145d
ZJ
1271}
1272
7951e376
RZ
1273static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1274{
1275 struct sysinfo si;
a9d4fe2f 1276 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1277 uint64_t total_memory;
1278 uint64_t dram_size_seven_GB = 0x1B8000000;
1279 uint64_t dram_size_three_GB = 0xB8000000;
1280
1281 if (amdgpu_smu_memory_pool_size == 0)
1282 return;
1283
1284 if (!is_os_64) {
1285 DRM_WARN("Not 64-bit OS, feature not supported\n");
1286 goto def_value;
1287 }
1288 si_meminfo(&si);
1289 total_memory = (uint64_t)si.totalram * si.mem_unit;
1290
1291 if ((amdgpu_smu_memory_pool_size == 1) ||
1292 (amdgpu_smu_memory_pool_size == 2)) {
1293 if (total_memory < dram_size_three_GB)
1294 goto def_value1;
1295 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1296 (amdgpu_smu_memory_pool_size == 8)) {
1297 if (total_memory < dram_size_seven_GB)
1298 goto def_value1;
1299 } else {
1300 DRM_WARN("Smu memory pool size not supported\n");
1301 goto def_value;
1302 }
1303 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1304
1305 return;
1306
1307def_value1:
1308 DRM_WARN("No enough system memory\n");
1309def_value:
1310 adev->pm.smu_prv_buffer_size = 0;
1311}
1312
d38ceaf9 1313/**
06ec9070 1314 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1315 *
1316 * @adev: amdgpu_device pointer
1317 *
1318 * Validates certain module parameters and updates
1319 * the associated values used by the driver (all asics).
1320 */
912dfc84 1321static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1322{
5b011235
CZ
1323 if (amdgpu_sched_jobs < 4) {
1324 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1325 amdgpu_sched_jobs);
1326 amdgpu_sched_jobs = 4;
76117507 1327 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1328 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1329 amdgpu_sched_jobs);
1330 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1331 }
d38ceaf9 1332
83e74db6 1333 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1334 /* gart size must be greater or equal to 32M */
1335 dev_warn(adev->dev, "gart size (%d) too small\n",
1336 amdgpu_gart_size);
83e74db6 1337 amdgpu_gart_size = -1;
d38ceaf9
AD
1338 }
1339
36d38372 1340 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1341 /* gtt size must be greater or equal to 32M */
36d38372
CK
1342 dev_warn(adev->dev, "gtt size (%d) too small\n",
1343 amdgpu_gtt_size);
1344 amdgpu_gtt_size = -1;
d38ceaf9
AD
1345 }
1346
d07f14be
RH
1347 /* valid range is between 4 and 9 inclusive */
1348 if (amdgpu_vm_fragment_size != -1 &&
1349 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1350 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1351 amdgpu_vm_fragment_size = -1;
1352 }
1353
5d5bd5e3
KW
1354 if (amdgpu_sched_hw_submission < 2) {
1355 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1356 amdgpu_sched_hw_submission);
1357 amdgpu_sched_hw_submission = 2;
1358 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1359 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1360 amdgpu_sched_hw_submission);
1361 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1362 }
1363
7951e376
RZ
1364 amdgpu_device_check_smu_prv_buffer_size(adev);
1365
06ec9070 1366 amdgpu_device_check_vm_size(adev);
d38ceaf9 1367
06ec9070 1368 amdgpu_device_check_block_size(adev);
6a7f76e7 1369
19aede77 1370 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1371
c6252390 1372 amdgpu_gmc_tmz_set(adev);
01a8dcec 1373
9b498efa
AD
1374 amdgpu_gmc_noretry_set(adev);
1375
e3c00faa 1376 return 0;
d38ceaf9
AD
1377}
1378
1379/**
1380 * amdgpu_switcheroo_set_state - set switcheroo state
1381 *
1382 * @pdev: pci dev pointer
1694467b 1383 * @state: vga_switcheroo state
d38ceaf9
AD
1384 *
1385 * Callback for the switcheroo driver. Suspends or resumes the
1386 * the asics before or after it is powered up using ACPI methods.
1387 */
8aba21b7
LT
1388static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1389 enum vga_switcheroo_state state)
d38ceaf9
AD
1390{
1391 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1392 int r;
d38ceaf9 1393
fd496ca8 1394 if (amdgpu_device_supports_atpx(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1395 return;
1396
1397 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1398 pr_info("switched on\n");
d38ceaf9
AD
1399 /* don't suspend or resume card normally */
1400 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1401
8f66090b
TZ
1402 pci_set_power_state(pdev, PCI_D0);
1403 amdgpu_device_load_pci_state(pdev);
1404 r = pci_enable_device(pdev);
de185019
AD
1405 if (r)
1406 DRM_WARN("pci_enable_device failed (%d)\n", r);
1407 amdgpu_device_resume(dev, true);
d38ceaf9 1408
d38ceaf9 1409 dev->switch_power_state = DRM_SWITCH_POWER_ON;
d38ceaf9 1410 } else {
dd4fa6c1 1411 pr_info("switched off\n");
d38ceaf9 1412 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1413 amdgpu_device_suspend(dev, true);
8f66090b 1414 amdgpu_device_cache_pci_state(pdev);
de185019 1415 /* Shut down the device */
8f66090b
TZ
1416 pci_disable_device(pdev);
1417 pci_set_power_state(pdev, PCI_D3cold);
d38ceaf9
AD
1418 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1419 }
1420}
1421
1422/**
1423 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1424 *
1425 * @pdev: pci dev pointer
1426 *
1427 * Callback for the switcheroo driver. Check of the switcheroo
1428 * state can be changed.
1429 * Returns true if the state can be changed, false if not.
1430 */
1431static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1432{
1433 struct drm_device *dev = pci_get_drvdata(pdev);
1434
1435 /*
1436 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1437 * locking inversion with the driver load path. And the access here is
1438 * completely racy anyway. So don't bother with locking for now.
1439 */
7e13ad89 1440 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1441}
1442
1443static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1444 .set_gpu_state = amdgpu_switcheroo_set_state,
1445 .reprobe = NULL,
1446 .can_switch = amdgpu_switcheroo_can_switch,
1447};
1448
e3ecdffa
AD
1449/**
1450 * amdgpu_device_ip_set_clockgating_state - set the CG state
1451 *
87e3f136 1452 * @dev: amdgpu_device pointer
e3ecdffa
AD
1453 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1454 * @state: clockgating state (gate or ungate)
1455 *
1456 * Sets the requested clockgating state for all instances of
1457 * the hardware IP specified.
1458 * Returns the error code from the last instance.
1459 */
43fa561f 1460int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1461 enum amd_ip_block_type block_type,
1462 enum amd_clockgating_state state)
d38ceaf9 1463{
43fa561f 1464 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1465 int i, r = 0;
1466
1467 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1468 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1469 continue;
c722865a
RZ
1470 if (adev->ip_blocks[i].version->type != block_type)
1471 continue;
1472 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1473 continue;
1474 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1475 (void *)adev, state);
1476 if (r)
1477 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1478 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1479 }
1480 return r;
1481}
1482
e3ecdffa
AD
1483/**
1484 * amdgpu_device_ip_set_powergating_state - set the PG state
1485 *
87e3f136 1486 * @dev: amdgpu_device pointer
e3ecdffa
AD
1487 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1488 * @state: powergating state (gate or ungate)
1489 *
1490 * Sets the requested powergating state for all instances of
1491 * the hardware IP specified.
1492 * Returns the error code from the last instance.
1493 */
43fa561f 1494int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1495 enum amd_ip_block_type block_type,
1496 enum amd_powergating_state state)
d38ceaf9 1497{
43fa561f 1498 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1499 int i, r = 0;
1500
1501 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1502 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1503 continue;
c722865a
RZ
1504 if (adev->ip_blocks[i].version->type != block_type)
1505 continue;
1506 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1507 continue;
1508 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1509 (void *)adev, state);
1510 if (r)
1511 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1512 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1513 }
1514 return r;
1515}
1516
e3ecdffa
AD
1517/**
1518 * amdgpu_device_ip_get_clockgating_state - get the CG state
1519 *
1520 * @adev: amdgpu_device pointer
1521 * @flags: clockgating feature flags
1522 *
1523 * Walks the list of IPs on the device and updates the clockgating
1524 * flags for each IP.
1525 * Updates @flags with the feature flags for each hardware IP where
1526 * clockgating is enabled.
1527 */
2990a1fc
AD
1528void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1529 u32 *flags)
6cb2d4e4
HR
1530{
1531 int i;
1532
1533 for (i = 0; i < adev->num_ip_blocks; i++) {
1534 if (!adev->ip_blocks[i].status.valid)
1535 continue;
1536 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1537 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1538 }
1539}
1540
e3ecdffa
AD
1541/**
1542 * amdgpu_device_ip_wait_for_idle - wait for idle
1543 *
1544 * @adev: amdgpu_device pointer
1545 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1546 *
1547 * Waits for the request hardware IP to be idle.
1548 * Returns 0 for success or a negative error code on failure.
1549 */
2990a1fc
AD
1550int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1551 enum amd_ip_block_type block_type)
5dbbb60b
AD
1552{
1553 int i, r;
1554
1555 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1556 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1557 continue;
a1255107
AD
1558 if (adev->ip_blocks[i].version->type == block_type) {
1559 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1560 if (r)
1561 return r;
1562 break;
1563 }
1564 }
1565 return 0;
1566
1567}
1568
e3ecdffa
AD
1569/**
1570 * amdgpu_device_ip_is_idle - is the hardware IP idle
1571 *
1572 * @adev: amdgpu_device pointer
1573 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1574 *
1575 * Check if the hardware IP is idle or not.
1576 * Returns true if it the IP is idle, false if not.
1577 */
2990a1fc
AD
1578bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1579 enum amd_ip_block_type block_type)
5dbbb60b
AD
1580{
1581 int i;
1582
1583 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1584 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1585 continue;
a1255107
AD
1586 if (adev->ip_blocks[i].version->type == block_type)
1587 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1588 }
1589 return true;
1590
1591}
1592
e3ecdffa
AD
1593/**
1594 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1595 *
1596 * @adev: amdgpu_device pointer
87e3f136 1597 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1598 *
1599 * Returns a pointer to the hardware IP block structure
1600 * if it exists for the asic, otherwise NULL.
1601 */
2990a1fc
AD
1602struct amdgpu_ip_block *
1603amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1604 enum amd_ip_block_type type)
d38ceaf9
AD
1605{
1606 int i;
1607
1608 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1609 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1610 return &adev->ip_blocks[i];
1611
1612 return NULL;
1613}
1614
1615/**
2990a1fc 1616 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1617 *
1618 * @adev: amdgpu_device pointer
5fc3aeeb 1619 * @type: enum amd_ip_block_type
d38ceaf9
AD
1620 * @major: major version
1621 * @minor: minor version
1622 *
1623 * return 0 if equal or greater
1624 * return 1 if smaller or the ip_block doesn't exist
1625 */
2990a1fc
AD
1626int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1627 enum amd_ip_block_type type,
1628 u32 major, u32 minor)
d38ceaf9 1629{
2990a1fc 1630 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1631
a1255107
AD
1632 if (ip_block && ((ip_block->version->major > major) ||
1633 ((ip_block->version->major == major) &&
1634 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1635 return 0;
1636
1637 return 1;
1638}
1639
a1255107 1640/**
2990a1fc 1641 * amdgpu_device_ip_block_add
a1255107
AD
1642 *
1643 * @adev: amdgpu_device pointer
1644 * @ip_block_version: pointer to the IP to add
1645 *
1646 * Adds the IP block driver information to the collection of IPs
1647 * on the asic.
1648 */
2990a1fc
AD
1649int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1650 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1651{
1652 if (!ip_block_version)
1653 return -EINVAL;
1654
e966a725 1655 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1656 ip_block_version->funcs->name);
1657
a1255107
AD
1658 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1659
1660 return 0;
1661}
1662
e3ecdffa
AD
1663/**
1664 * amdgpu_device_enable_virtual_display - enable virtual display feature
1665 *
1666 * @adev: amdgpu_device pointer
1667 *
1668 * Enabled the virtual display feature if the user has enabled it via
1669 * the module parameter virtual_display. This feature provides a virtual
1670 * display hardware on headless boards or in virtualized environments.
1671 * This function parses and validates the configuration string specified by
1672 * the user and configues the virtual display configuration (number of
1673 * virtual connectors, crtcs, etc.) specified.
1674 */
483ef985 1675static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1676{
1677 adev->enable_virtual_display = false;
1678
1679 if (amdgpu_virtual_display) {
8f66090b 1680 const char *pci_address_name = pci_name(adev->pdev);
0f66356d 1681 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1682
1683 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1684 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1685 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1686 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1687 if (!strcmp("all", pciaddname)
1688 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1689 long num_crtc;
1690 int res = -1;
1691
9accf2fd 1692 adev->enable_virtual_display = true;
0f66356d
ED
1693
1694 if (pciaddname_tmp)
1695 res = kstrtol(pciaddname_tmp, 10,
1696 &num_crtc);
1697
1698 if (!res) {
1699 if (num_crtc < 1)
1700 num_crtc = 1;
1701 if (num_crtc > 6)
1702 num_crtc = 6;
1703 adev->mode_info.num_crtc = num_crtc;
1704 } else {
1705 adev->mode_info.num_crtc = 1;
1706 }
9accf2fd
ED
1707 break;
1708 }
1709 }
1710
0f66356d
ED
1711 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1712 amdgpu_virtual_display, pci_address_name,
1713 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1714
1715 kfree(pciaddstr);
1716 }
1717}
1718
e3ecdffa
AD
1719/**
1720 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1721 *
1722 * @adev: amdgpu_device pointer
1723 *
1724 * Parses the asic configuration parameters specified in the gpu info
1725 * firmware and makes them availale to the driver for use in configuring
1726 * the asic.
1727 * Returns 0 on success, -EINVAL on failure.
1728 */
e2a75f88
AD
1729static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1730{
e2a75f88 1731 const char *chip_name;
c0a43457 1732 char fw_name[40];
e2a75f88
AD
1733 int err;
1734 const struct gpu_info_firmware_header_v1_0 *hdr;
1735
ab4fe3e1
HR
1736 adev->firmware.gpu_info_fw = NULL;
1737
72de33f8 1738 if (adev->mman.discovery_bin) {
258620d0 1739 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1740
1741 /*
1742 * FIXME: The bounding box is still needed by Navi12, so
1743 * temporarily read it from gpu_info firmware. Should be droped
1744 * when DAL no longer needs it.
1745 */
1746 if (adev->asic_type != CHIP_NAVI12)
1747 return 0;
258620d0
AD
1748 }
1749
e2a75f88 1750 switch (adev->asic_type) {
e2a75f88
AD
1751#ifdef CONFIG_DRM_AMDGPU_SI
1752 case CHIP_VERDE:
1753 case CHIP_TAHITI:
1754 case CHIP_PITCAIRN:
1755 case CHIP_OLAND:
1756 case CHIP_HAINAN:
1757#endif
1758#ifdef CONFIG_DRM_AMDGPU_CIK
1759 case CHIP_BONAIRE:
1760 case CHIP_HAWAII:
1761 case CHIP_KAVERI:
1762 case CHIP_KABINI:
1763 case CHIP_MULLINS:
1764#endif
da87c30b
AD
1765 case CHIP_TOPAZ:
1766 case CHIP_TONGA:
1767 case CHIP_FIJI:
1768 case CHIP_POLARIS10:
1769 case CHIP_POLARIS11:
1770 case CHIP_POLARIS12:
1771 case CHIP_VEGAM:
1772 case CHIP_CARRIZO:
1773 case CHIP_STONEY:
27c0bc71 1774 case CHIP_VEGA20:
44b3253a 1775 case CHIP_ALDEBARAN:
84d244a3
JC
1776 case CHIP_SIENNA_CICHLID:
1777 case CHIP_NAVY_FLOUNDER:
eac88a5f 1778 case CHIP_DIMGREY_CAVEFISH:
e2a75f88
AD
1779 default:
1780 return 0;
1781 case CHIP_VEGA10:
1782 chip_name = "vega10";
1783 break;
3f76dced
AD
1784 case CHIP_VEGA12:
1785 chip_name = "vega12";
1786 break;
2d2e5e7e 1787 case CHIP_RAVEN:
54f78a76 1788 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1789 chip_name = "raven2";
54f78a76 1790 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1791 chip_name = "picasso";
54c4d17e
FX
1792 else
1793 chip_name = "raven";
2d2e5e7e 1794 break;
65e60f6e
LM
1795 case CHIP_ARCTURUS:
1796 chip_name = "arcturus";
1797 break;
b51a26a0 1798 case CHIP_RENOIR:
2e62f0b5
PL
1799 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1800 chip_name = "renoir";
1801 else
1802 chip_name = "green_sardine";
b51a26a0 1803 break;
23c6268e
HR
1804 case CHIP_NAVI10:
1805 chip_name = "navi10";
1806 break;
ed42cfe1
XY
1807 case CHIP_NAVI14:
1808 chip_name = "navi14";
1809 break;
42b325e5
XY
1810 case CHIP_NAVI12:
1811 chip_name = "navi12";
1812 break;
4e52a9f8
HR
1813 case CHIP_VANGOGH:
1814 chip_name = "vangogh";
1815 break;
e2a75f88
AD
1816 }
1817
1818 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1819 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1820 if (err) {
1821 dev_err(adev->dev,
1822 "Failed to load gpu_info firmware \"%s\"\n",
1823 fw_name);
1824 goto out;
1825 }
ab4fe3e1 1826 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1827 if (err) {
1828 dev_err(adev->dev,
1829 "Failed to validate gpu_info firmware \"%s\"\n",
1830 fw_name);
1831 goto out;
1832 }
1833
ab4fe3e1 1834 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1835 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1836
1837 switch (hdr->version_major) {
1838 case 1:
1839 {
1840 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1841 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1842 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1843
cc375d8c
TY
1844 /*
1845 * Should be droped when DAL no longer needs it.
1846 */
1847 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1848 goto parse_soc_bounding_box;
1849
b5ab16bf
AD
1850 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1851 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1852 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1853 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1854 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1855 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1856 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1857 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1858 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1859 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1860 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1861 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1862 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1863 adev->gfx.cu_info.max_waves_per_simd =
1864 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1865 adev->gfx.cu_info.max_scratch_slots_per_cu =
1866 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1867 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1868 if (hdr->version_minor >= 1) {
35c2e910
HZ
1869 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1870 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1871 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1872 adev->gfx.config.num_sc_per_sh =
1873 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1874 adev->gfx.config.num_packer_per_sc =
1875 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1876 }
ec51d3fa
XY
1877
1878parse_soc_bounding_box:
ec51d3fa
XY
1879 /*
1880 * soc bounding box info is not integrated in disocovery table,
258620d0 1881 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1882 */
48321c3d
HW
1883 if (hdr->version_minor == 2) {
1884 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1885 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1886 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1887 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1888 }
e2a75f88
AD
1889 break;
1890 }
1891 default:
1892 dev_err(adev->dev,
1893 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1894 err = -EINVAL;
1895 goto out;
1896 }
1897out:
e2a75f88
AD
1898 return err;
1899}
1900
e3ecdffa
AD
1901/**
1902 * amdgpu_device_ip_early_init - run early init for hardware IPs
1903 *
1904 * @adev: amdgpu_device pointer
1905 *
1906 * Early initialization pass for hardware IPs. The hardware IPs that make
1907 * up each asic are discovered each IP's early_init callback is run. This
1908 * is the first stage in initializing the asic.
1909 * Returns 0 on success, negative error code on failure.
1910 */
06ec9070 1911static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1912{
aaa36a97 1913 int i, r;
d38ceaf9 1914
483ef985 1915 amdgpu_device_enable_virtual_display(adev);
a6be7570 1916
00a979f3 1917 if (amdgpu_sriov_vf(adev)) {
00a979f3 1918 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
1919 if (r)
1920 return r;
00a979f3
WS
1921 }
1922
d38ceaf9 1923 switch (adev->asic_type) {
33f34802
KW
1924#ifdef CONFIG_DRM_AMDGPU_SI
1925 case CHIP_VERDE:
1926 case CHIP_TAHITI:
1927 case CHIP_PITCAIRN:
1928 case CHIP_OLAND:
1929 case CHIP_HAINAN:
295d0daf 1930 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1931 r = si_set_ip_blocks(adev);
1932 if (r)
1933 return r;
1934 break;
1935#endif
a2e73f56
AD
1936#ifdef CONFIG_DRM_AMDGPU_CIK
1937 case CHIP_BONAIRE:
1938 case CHIP_HAWAII:
1939 case CHIP_KAVERI:
1940 case CHIP_KABINI:
1941 case CHIP_MULLINS:
e1ad2d53 1942 if (adev->flags & AMD_IS_APU)
a2e73f56 1943 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1944 else
1945 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1946
1947 r = cik_set_ip_blocks(adev);
1948 if (r)
1949 return r;
1950 break;
1951#endif
da87c30b
AD
1952 case CHIP_TOPAZ:
1953 case CHIP_TONGA:
1954 case CHIP_FIJI:
1955 case CHIP_POLARIS10:
1956 case CHIP_POLARIS11:
1957 case CHIP_POLARIS12:
1958 case CHIP_VEGAM:
1959 case CHIP_CARRIZO:
1960 case CHIP_STONEY:
1961 if (adev->flags & AMD_IS_APU)
1962 adev->family = AMDGPU_FAMILY_CZ;
1963 else
1964 adev->family = AMDGPU_FAMILY_VI;
1965
1966 r = vi_set_ip_blocks(adev);
1967 if (r)
1968 return r;
1969 break;
e48a3cd9
AD
1970 case CHIP_VEGA10:
1971 case CHIP_VEGA12:
e4bd8170 1972 case CHIP_VEGA20:
e48a3cd9 1973 case CHIP_RAVEN:
61cf44c1 1974 case CHIP_ARCTURUS:
b51a26a0 1975 case CHIP_RENOIR:
c00a18ec 1976 case CHIP_ALDEBARAN:
70534d1e 1977 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
1978 adev->family = AMDGPU_FAMILY_RV;
1979 else
1980 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1981
1982 r = soc15_set_ip_blocks(adev);
1983 if (r)
1984 return r;
1985 break;
0a5b8c7b 1986 case CHIP_NAVI10:
7ecb5cd4 1987 case CHIP_NAVI14:
4808cf9c 1988 case CHIP_NAVI12:
11e8aef5 1989 case CHIP_SIENNA_CICHLID:
41f446bf 1990 case CHIP_NAVY_FLOUNDER:
144722fa 1991 case CHIP_DIMGREY_CAVEFISH:
4e52a9f8
HR
1992 case CHIP_VANGOGH:
1993 if (adev->asic_type == CHIP_VANGOGH)
1994 adev->family = AMDGPU_FAMILY_VGH;
1995 else
1996 adev->family = AMDGPU_FAMILY_NV;
0a5b8c7b
HR
1997
1998 r = nv_set_ip_blocks(adev);
1999 if (r)
2000 return r;
2001 break;
d38ceaf9
AD
2002 default:
2003 /* FIXME: not supported yet */
2004 return -EINVAL;
2005 }
2006
1884734a 2007 amdgpu_amdkfd_device_probe(adev);
2008
3b94fb10 2009 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2010 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2011 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
4215a119
HC
2012 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2013 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
00f54b97 2014
d38ceaf9
AD
2015 for (i = 0; i < adev->num_ip_blocks; i++) {
2016 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2017 DRM_ERROR("disabled ip block: %d <%s>\n",
2018 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2019 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2020 } else {
a1255107
AD
2021 if (adev->ip_blocks[i].version->funcs->early_init) {
2022 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2023 if (r == -ENOENT) {
a1255107 2024 adev->ip_blocks[i].status.valid = false;
2c1a2784 2025 } else if (r) {
a1255107
AD
2026 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2027 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2028 return r;
2c1a2784 2029 } else {
a1255107 2030 adev->ip_blocks[i].status.valid = true;
2c1a2784 2031 }
974e6b64 2032 } else {
a1255107 2033 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2034 }
d38ceaf9 2035 }
21a249ca
AD
2036 /* get the vbios after the asic_funcs are set up */
2037 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2038 r = amdgpu_device_parse_gpu_info_fw(adev);
2039 if (r)
2040 return r;
2041
21a249ca
AD
2042 /* Read BIOS */
2043 if (!amdgpu_get_bios(adev))
2044 return -EINVAL;
2045
2046 r = amdgpu_atombios_init(adev);
2047 if (r) {
2048 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2049 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2050 return r;
2051 }
2052 }
d38ceaf9
AD
2053 }
2054
395d1fb9
NH
2055 adev->cg_flags &= amdgpu_cg_mask;
2056 adev->pg_flags &= amdgpu_pg_mask;
2057
d38ceaf9
AD
2058 return 0;
2059}
2060
0a4f2520
RZ
2061static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2062{
2063 int i, r;
2064
2065 for (i = 0; i < adev->num_ip_blocks; i++) {
2066 if (!adev->ip_blocks[i].status.sw)
2067 continue;
2068 if (adev->ip_blocks[i].status.hw)
2069 continue;
2070 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2071 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2072 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2073 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2074 if (r) {
2075 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2076 adev->ip_blocks[i].version->funcs->name, r);
2077 return r;
2078 }
2079 adev->ip_blocks[i].status.hw = true;
2080 }
2081 }
2082
2083 return 0;
2084}
2085
2086static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2087{
2088 int i, r;
2089
2090 for (i = 0; i < adev->num_ip_blocks; i++) {
2091 if (!adev->ip_blocks[i].status.sw)
2092 continue;
2093 if (adev->ip_blocks[i].status.hw)
2094 continue;
2095 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2096 if (r) {
2097 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2098 adev->ip_blocks[i].version->funcs->name, r);
2099 return r;
2100 }
2101 adev->ip_blocks[i].status.hw = true;
2102 }
2103
2104 return 0;
2105}
2106
7a3e0bb2
RZ
2107static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2108{
2109 int r = 0;
2110 int i;
80f41f84 2111 uint32_t smu_version;
7a3e0bb2
RZ
2112
2113 if (adev->asic_type >= CHIP_VEGA10) {
2114 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2115 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2116 continue;
2117
e3c1b071 2118 if (!adev->ip_blocks[i].status.sw)
2119 continue;
2120
482f0e53
ML
2121 /* no need to do the fw loading again if already done*/
2122 if (adev->ip_blocks[i].status.hw == true)
2123 break;
2124
53b3f8f4 2125 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2126 r = adev->ip_blocks[i].version->funcs->resume(adev);
2127 if (r) {
2128 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2129 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2130 return r;
2131 }
2132 } else {
2133 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2134 if (r) {
2135 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2136 adev->ip_blocks[i].version->funcs->name, r);
2137 return r;
7a3e0bb2 2138 }
7a3e0bb2 2139 }
482f0e53
ML
2140
2141 adev->ip_blocks[i].status.hw = true;
2142 break;
7a3e0bb2
RZ
2143 }
2144 }
482f0e53 2145
8973d9ec
ED
2146 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2147 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2148
80f41f84 2149 return r;
7a3e0bb2
RZ
2150}
2151
e3ecdffa
AD
2152/**
2153 * amdgpu_device_ip_init - run init for hardware IPs
2154 *
2155 * @adev: amdgpu_device pointer
2156 *
2157 * Main initialization pass for hardware IPs. The list of all the hardware
2158 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2159 * are run. sw_init initializes the software state associated with each IP
2160 * and hw_init initializes the hardware associated with each IP.
2161 * Returns 0 on success, negative error code on failure.
2162 */
06ec9070 2163static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2164{
2165 int i, r;
2166
c030f2e4 2167 r = amdgpu_ras_init(adev);
2168 if (r)
2169 return r;
2170
d38ceaf9 2171 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2172 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2173 continue;
a1255107 2174 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2175 if (r) {
a1255107
AD
2176 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2177 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2178 goto init_failed;
2c1a2784 2179 }
a1255107 2180 adev->ip_blocks[i].status.sw = true;
bfca0289 2181
d38ceaf9 2182 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 2183 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 2184 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2185 if (r) {
2186 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2187 goto init_failed;
2c1a2784 2188 }
a1255107 2189 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2190 if (r) {
2191 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2192 goto init_failed;
2c1a2784 2193 }
06ec9070 2194 r = amdgpu_device_wb_init(adev);
2c1a2784 2195 if (r) {
06ec9070 2196 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2197 goto init_failed;
2c1a2784 2198 }
a1255107 2199 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2200
2201 /* right after GMC hw init, we create CSA */
f92d5c61 2202 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2203 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2204 AMDGPU_GEM_DOMAIN_VRAM,
2205 AMDGPU_CSA_SIZE);
2493664f
ML
2206 if (r) {
2207 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2208 goto init_failed;
2493664f
ML
2209 }
2210 }
d38ceaf9
AD
2211 }
2212 }
2213
c9ffa427
YT
2214 if (amdgpu_sriov_vf(adev))
2215 amdgpu_virt_init_data_exchange(adev);
2216
533aed27
AG
2217 r = amdgpu_ib_pool_init(adev);
2218 if (r) {
2219 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2220 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2221 goto init_failed;
2222 }
2223
c8963ea4
RZ
2224 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2225 if (r)
72d3f592 2226 goto init_failed;
0a4f2520
RZ
2227
2228 r = amdgpu_device_ip_hw_init_phase1(adev);
2229 if (r)
72d3f592 2230 goto init_failed;
0a4f2520 2231
7a3e0bb2
RZ
2232 r = amdgpu_device_fw_loading(adev);
2233 if (r)
72d3f592 2234 goto init_failed;
7a3e0bb2 2235
0a4f2520
RZ
2236 r = amdgpu_device_ip_hw_init_phase2(adev);
2237 if (r)
72d3f592 2238 goto init_failed;
d38ceaf9 2239
121a2bc6
AG
2240 /*
2241 * retired pages will be loaded from eeprom and reserved here,
2242 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2243 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2244 * for I2C communication which only true at this point.
b82e65a9
GC
2245 *
2246 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2247 * failure from bad gpu situation and stop amdgpu init process
2248 * accordingly. For other failed cases, it will still release all
2249 * the resource and print error message, rather than returning one
2250 * negative value to upper level.
121a2bc6
AG
2251 *
2252 * Note: theoretically, this should be called before all vram allocations
2253 * to protect retired page from abusing
2254 */
b82e65a9
GC
2255 r = amdgpu_ras_recovery_init(adev);
2256 if (r)
2257 goto init_failed;
121a2bc6 2258
3e2e2ab5
HZ
2259 if (adev->gmc.xgmi.num_physical_nodes > 1)
2260 amdgpu_xgmi_add_device(adev);
e3c1b071 2261
2262 /* Don't init kfd if whole hive need to be reset during init */
2263 if (!adev->gmc.xgmi.pending_reset)
2264 amdgpu_amdkfd_device_init(adev);
c6332b97 2265
bd607166
KR
2266 amdgpu_fru_get_product_info(adev);
2267
72d3f592 2268init_failed:
c9ffa427 2269 if (amdgpu_sriov_vf(adev))
c6332b97 2270 amdgpu_virt_release_full_gpu(adev, true);
2271
72d3f592 2272 return r;
d38ceaf9
AD
2273}
2274
e3ecdffa
AD
2275/**
2276 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2277 *
2278 * @adev: amdgpu_device pointer
2279 *
2280 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2281 * this function before a GPU reset. If the value is retained after a
2282 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2283 */
06ec9070 2284static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2285{
2286 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2287}
2288
e3ecdffa
AD
2289/**
2290 * amdgpu_device_check_vram_lost - check if vram is valid
2291 *
2292 * @adev: amdgpu_device pointer
2293 *
2294 * Checks the reset magic value written to the gart pointer in VRAM.
2295 * The driver calls this after a GPU reset to see if the contents of
2296 * VRAM is lost or now.
2297 * returns true if vram is lost, false if not.
2298 */
06ec9070 2299static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2300{
dadce777
EQ
2301 if (memcmp(adev->gart.ptr, adev->reset_magic,
2302 AMDGPU_RESET_MAGIC_NUM))
2303 return true;
2304
53b3f8f4 2305 if (!amdgpu_in_reset(adev))
dadce777
EQ
2306 return false;
2307
2308 /*
2309 * For all ASICs with baco/mode1 reset, the VRAM is
2310 * always assumed to be lost.
2311 */
2312 switch (amdgpu_asic_reset_method(adev)) {
2313 case AMD_RESET_METHOD_BACO:
2314 case AMD_RESET_METHOD_MODE1:
2315 return true;
2316 default:
2317 return false;
2318 }
0c49e0b8
CZ
2319}
2320
e3ecdffa 2321/**
1112a46b 2322 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2323 *
2324 * @adev: amdgpu_device pointer
b8b72130 2325 * @state: clockgating state (gate or ungate)
e3ecdffa 2326 *
e3ecdffa 2327 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2328 * set_clockgating_state callbacks are run.
2329 * Late initialization pass enabling clockgating for hardware IPs.
2330 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2331 * Returns 0 on success, negative error code on failure.
2332 */
fdd34271 2333
1112a46b
RZ
2334static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2335 enum amd_clockgating_state state)
d38ceaf9 2336{
1112a46b 2337 int i, j, r;
d38ceaf9 2338
4a2ba394
SL
2339 if (amdgpu_emu_mode == 1)
2340 return 0;
2341
1112a46b
RZ
2342 for (j = 0; j < adev->num_ip_blocks; j++) {
2343 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2344 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2345 continue;
4a446d55 2346 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2347 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2348 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2349 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2350 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2351 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2352 /* enable clockgating to save power */
a1255107 2353 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2354 state);
4a446d55
AD
2355 if (r) {
2356 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2357 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2358 return r;
2359 }
b0b00ff1 2360 }
d38ceaf9 2361 }
06b18f61 2362
c9f96fd5
RZ
2363 return 0;
2364}
2365
1112a46b 2366static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2367{
1112a46b 2368 int i, j, r;
06b18f61 2369
c9f96fd5
RZ
2370 if (amdgpu_emu_mode == 1)
2371 return 0;
2372
1112a46b
RZ
2373 for (j = 0; j < adev->num_ip_blocks; j++) {
2374 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2375 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2376 continue;
2377 /* skip CG for VCE/UVD, it's handled specially */
2378 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2379 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2380 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2381 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2382 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2383 /* enable powergating to save power */
2384 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2385 state);
c9f96fd5
RZ
2386 if (r) {
2387 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2388 adev->ip_blocks[i].version->funcs->name, r);
2389 return r;
2390 }
2391 }
2392 }
2dc80b00
S
2393 return 0;
2394}
2395
beff74bc
AD
2396static int amdgpu_device_enable_mgpu_fan_boost(void)
2397{
2398 struct amdgpu_gpu_instance *gpu_ins;
2399 struct amdgpu_device *adev;
2400 int i, ret = 0;
2401
2402 mutex_lock(&mgpu_info.mutex);
2403
2404 /*
2405 * MGPU fan boost feature should be enabled
2406 * only when there are two or more dGPUs in
2407 * the system
2408 */
2409 if (mgpu_info.num_dgpu < 2)
2410 goto out;
2411
2412 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2413 gpu_ins = &(mgpu_info.gpu_ins[i]);
2414 adev = gpu_ins->adev;
2415 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2416 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2417 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2418 if (ret)
2419 break;
2420
2421 gpu_ins->mgpu_fan_enabled = 1;
2422 }
2423 }
2424
2425out:
2426 mutex_unlock(&mgpu_info.mutex);
2427
2428 return ret;
2429}
2430
e3ecdffa
AD
2431/**
2432 * amdgpu_device_ip_late_init - run late init for hardware IPs
2433 *
2434 * @adev: amdgpu_device pointer
2435 *
2436 * Late initialization pass for hardware IPs. The list of all the hardware
2437 * IPs that make up the asic is walked and the late_init callbacks are run.
2438 * late_init covers any special initialization that an IP requires
2439 * after all of the have been initialized or something that needs to happen
2440 * late in the init process.
2441 * Returns 0 on success, negative error code on failure.
2442 */
06ec9070 2443static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2444{
60599a03 2445 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2446 int i = 0, r;
2447
2448 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2449 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2450 continue;
2451 if (adev->ip_blocks[i].version->funcs->late_init) {
2452 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2453 if (r) {
2454 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2455 adev->ip_blocks[i].version->funcs->name, r);
2456 return r;
2457 }
2dc80b00 2458 }
73f847db 2459 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2460 }
2461
a891d239
DL
2462 amdgpu_ras_set_error_query_ready(adev, true);
2463
1112a46b
RZ
2464 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2465 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2466
06ec9070 2467 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2468
beff74bc
AD
2469 r = amdgpu_device_enable_mgpu_fan_boost();
2470 if (r)
2471 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2472
2d02893f 2473 /* For XGMI + passthrough configuration on arcturus, enable light SBR */
2474 if (adev->asic_type == CHIP_ARCTURUS &&
2475 amdgpu_passthrough(adev) &&
2476 adev->gmc.xgmi.num_physical_nodes > 1)
2477 smu_set_light_sbr(&adev->smu, true);
60599a03
EQ
2478
2479 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2480 mutex_lock(&mgpu_info.mutex);
2481
2482 /*
2483 * Reset device p-state to low as this was booted with high.
2484 *
2485 * This should be performed only after all devices from the same
2486 * hive get initialized.
2487 *
2488 * However, it's unknown how many device in the hive in advance.
2489 * As this is counted one by one during devices initializations.
2490 *
2491 * So, we wait for all XGMI interlinked devices initialized.
2492 * This may bring some delays as those devices may come from
2493 * different hives. But that should be OK.
2494 */
2495 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2496 for (i = 0; i < mgpu_info.num_gpu; i++) {
2497 gpu_instance = &(mgpu_info.gpu_ins[i]);
2498 if (gpu_instance->adev->flags & AMD_IS_APU)
2499 continue;
2500
d84a430d
JK
2501 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2502 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2503 if (r) {
2504 DRM_ERROR("pstate setting failed (%d).\n", r);
2505 break;
2506 }
2507 }
2508 }
2509
2510 mutex_unlock(&mgpu_info.mutex);
2511 }
2512
d38ceaf9
AD
2513 return 0;
2514}
2515
e3ecdffa
AD
2516/**
2517 * amdgpu_device_ip_fini - run fini for hardware IPs
2518 *
2519 * @adev: amdgpu_device pointer
2520 *
2521 * Main teardown pass for hardware IPs. The list of all the hardware
2522 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2523 * are run. hw_fini tears down the hardware associated with each IP
2524 * and sw_fini tears down any software state associated with each IP.
2525 * Returns 0 on success, negative error code on failure.
2526 */
06ec9070 2527static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2528{
2529 int i, r;
2530
5278a159
SY
2531 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2532 amdgpu_virt_release_ras_err_handler_data(adev);
2533
c030f2e4 2534 amdgpu_ras_pre_fini(adev);
2535
a82400b5
AG
2536 if (adev->gmc.xgmi.num_physical_nodes > 1)
2537 amdgpu_xgmi_remove_device(adev);
2538
05df1f01 2539 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2540 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2541
26eb6b51
DL
2542 amdgpu_amdkfd_device_fini(adev);
2543
3e96dbfd
AD
2544 /* need to disable SMC first */
2545 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2546 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2547 continue;
fdd34271 2548 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2549 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2550 /* XXX handle errors */
2551 if (r) {
2552 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2553 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2554 }
a1255107 2555 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2556 break;
2557 }
2558 }
2559
d38ceaf9 2560 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2561 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2562 continue;
8201a67a 2563
a1255107 2564 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2565 /* XXX handle errors */
2c1a2784 2566 if (r) {
a1255107
AD
2567 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2568 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2569 }
8201a67a 2570
a1255107 2571 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2572 }
2573
9950cda2 2574
d38ceaf9 2575 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2576 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2577 continue;
c12aba3a
ML
2578
2579 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2580 amdgpu_ucode_free_bo(adev);
1e256e27 2581 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2582 amdgpu_device_wb_fini(adev);
2583 amdgpu_device_vram_scratch_fini(adev);
533aed27 2584 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2585 }
2586
a1255107 2587 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2588 /* XXX handle errors */
2c1a2784 2589 if (r) {
a1255107
AD
2590 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2591 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2592 }
a1255107
AD
2593 adev->ip_blocks[i].status.sw = false;
2594 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2595 }
2596
a6dcfd9c 2597 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2598 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2599 continue;
a1255107
AD
2600 if (adev->ip_blocks[i].version->funcs->late_fini)
2601 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2602 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2603 }
2604
c030f2e4 2605 amdgpu_ras_fini(adev);
2606
030308fc 2607 if (amdgpu_sriov_vf(adev))
24136135
ML
2608 if (amdgpu_virt_release_full_gpu(adev, false))
2609 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2610
d38ceaf9
AD
2611 return 0;
2612}
2613
e3ecdffa 2614/**
beff74bc 2615 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2616 *
1112a46b 2617 * @work: work_struct.
e3ecdffa 2618 */
beff74bc 2619static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2620{
2621 struct amdgpu_device *adev =
beff74bc 2622 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2623 int r;
2624
2625 r = amdgpu_ib_ring_tests(adev);
2626 if (r)
2627 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2628}
2629
1e317b99
RZ
2630static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2631{
2632 struct amdgpu_device *adev =
2633 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2634
2635 mutex_lock(&adev->gfx.gfx_off_mutex);
2636 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2637 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2638 adev->gfx.gfx_off_state = true;
2639 }
2640 mutex_unlock(&adev->gfx.gfx_off_mutex);
2641}
2642
e3ecdffa 2643/**
e7854a03 2644 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2645 *
2646 * @adev: amdgpu_device pointer
2647 *
2648 * Main suspend function for hardware IPs. The list of all the hardware
2649 * IPs that make up the asic is walked, clockgating is disabled and the
2650 * suspend callbacks are run. suspend puts the hardware and software state
2651 * in each IP into a state suitable for suspend.
2652 * Returns 0 on success, negative error code on failure.
2653 */
e7854a03
AD
2654static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2655{
2656 int i, r;
2657
b00978de
PL
2658 if (adev->in_poweroff_reboot_com ||
2659 !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) {
628c36d7
PL
2660 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2661 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2662 }
05df1f01 2663
e7854a03
AD
2664 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2665 if (!adev->ip_blocks[i].status.valid)
2666 continue;
2b9f7848 2667
e7854a03 2668 /* displays are handled separately */
2b9f7848
ND
2669 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2670 continue;
2671
2672 /* XXX handle errors */
2673 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2674 /* XXX handle errors */
2675 if (r) {
2676 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2677 adev->ip_blocks[i].version->funcs->name, r);
2678 return r;
e7854a03 2679 }
2b9f7848
ND
2680
2681 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2682 }
2683
e7854a03
AD
2684 return 0;
2685}
2686
2687/**
2688 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2689 *
2690 * @adev: amdgpu_device pointer
2691 *
2692 * Main suspend function for hardware IPs. The list of all the hardware
2693 * IPs that make up the asic is walked, clockgating is disabled and the
2694 * suspend callbacks are run. suspend puts the hardware and software state
2695 * in each IP into a state suitable for suspend.
2696 * Returns 0 on success, negative error code on failure.
2697 */
2698static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2699{
2700 int i, r;
2701
2702 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2703 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2704 continue;
e7854a03
AD
2705 /* displays are handled in phase1 */
2706 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2707 continue;
bff77e86
LM
2708 /* PSP lost connection when err_event_athub occurs */
2709 if (amdgpu_ras_intr_triggered() &&
2710 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2711 adev->ip_blocks[i].status.hw = false;
2712 continue;
2713 }
e3c1b071 2714
2715 /* skip unnecessary suspend if we do not initialize them yet */
2716 if (adev->gmc.xgmi.pending_reset &&
2717 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2718 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
2719 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2720 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
2721 adev->ip_blocks[i].status.hw = false;
2722 continue;
2723 }
d38ceaf9 2724 /* XXX handle errors */
a1255107 2725 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2726 /* XXX handle errors */
2c1a2784 2727 if (r) {
a1255107
AD
2728 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2729 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2730 }
876923fb 2731 adev->ip_blocks[i].status.hw = false;
a3a09142 2732 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2733 if(!amdgpu_sriov_vf(adev)){
2734 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2735 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2736 if (r) {
2737 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2738 adev->mp1_state, r);
2739 return r;
2740 }
a3a09142
AD
2741 }
2742 }
d38ceaf9
AD
2743 }
2744
2745 return 0;
2746}
2747
e7854a03
AD
2748/**
2749 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2750 *
2751 * @adev: amdgpu_device pointer
2752 *
2753 * Main suspend function for hardware IPs. The list of all the hardware
2754 * IPs that make up the asic is walked, clockgating is disabled and the
2755 * suspend callbacks are run. suspend puts the hardware and software state
2756 * in each IP into a state suitable for suspend.
2757 * Returns 0 on success, negative error code on failure.
2758 */
2759int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2760{
2761 int r;
2762
3c73683c
JC
2763 if (amdgpu_sriov_vf(adev)) {
2764 amdgpu_virt_fini_data_exchange(adev);
e7819644 2765 amdgpu_virt_request_full_gpu(adev, false);
3c73683c 2766 }
e7819644 2767
e7854a03
AD
2768 r = amdgpu_device_ip_suspend_phase1(adev);
2769 if (r)
2770 return r;
2771 r = amdgpu_device_ip_suspend_phase2(adev);
2772
e7819644
YT
2773 if (amdgpu_sriov_vf(adev))
2774 amdgpu_virt_release_full_gpu(adev, false);
2775
e7854a03
AD
2776 return r;
2777}
2778
06ec9070 2779static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2780{
2781 int i, r;
2782
2cb681b6
ML
2783 static enum amd_ip_block_type ip_order[] = {
2784 AMD_IP_BLOCK_TYPE_GMC,
2785 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2786 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2787 AMD_IP_BLOCK_TYPE_IH,
2788 };
a90ad3c2 2789
2cb681b6
ML
2790 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2791 int j;
2792 struct amdgpu_ip_block *block;
a90ad3c2 2793
4cd2a96d
J
2794 block = &adev->ip_blocks[i];
2795 block->status.hw = false;
2cb681b6 2796
4cd2a96d 2797 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 2798
4cd2a96d 2799 if (block->version->type != ip_order[j] ||
2cb681b6
ML
2800 !block->status.valid)
2801 continue;
2802
2803 r = block->version->funcs->hw_init(adev);
0aaeefcc 2804 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2805 if (r)
2806 return r;
482f0e53 2807 block->status.hw = true;
a90ad3c2
ML
2808 }
2809 }
2810
2811 return 0;
2812}
2813
06ec9070 2814static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2815{
2816 int i, r;
2817
2cb681b6
ML
2818 static enum amd_ip_block_type ip_order[] = {
2819 AMD_IP_BLOCK_TYPE_SMC,
2820 AMD_IP_BLOCK_TYPE_DCE,
2821 AMD_IP_BLOCK_TYPE_GFX,
2822 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2823 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2824 AMD_IP_BLOCK_TYPE_VCE,
2825 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2826 };
a90ad3c2 2827
2cb681b6
ML
2828 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2829 int j;
2830 struct amdgpu_ip_block *block;
a90ad3c2 2831
2cb681b6
ML
2832 for (j = 0; j < adev->num_ip_blocks; j++) {
2833 block = &adev->ip_blocks[j];
2834
2835 if (block->version->type != ip_order[i] ||
482f0e53
ML
2836 !block->status.valid ||
2837 block->status.hw)
2cb681b6
ML
2838 continue;
2839
895bd048
JZ
2840 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2841 r = block->version->funcs->resume(adev);
2842 else
2843 r = block->version->funcs->hw_init(adev);
2844
0aaeefcc 2845 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2846 if (r)
2847 return r;
482f0e53 2848 block->status.hw = true;
a90ad3c2
ML
2849 }
2850 }
2851
2852 return 0;
2853}
2854
e3ecdffa
AD
2855/**
2856 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2857 *
2858 * @adev: amdgpu_device pointer
2859 *
2860 * First resume function for hardware IPs. The list of all the hardware
2861 * IPs that make up the asic is walked and the resume callbacks are run for
2862 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2863 * after a suspend and updates the software state as necessary. This
2864 * function is also used for restoring the GPU after a GPU reset.
2865 * Returns 0 on success, negative error code on failure.
2866 */
06ec9070 2867static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2868{
2869 int i, r;
2870
a90ad3c2 2871 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2872 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2873 continue;
a90ad3c2 2874 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2875 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2876 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2877
fcf0649f
CZ
2878 r = adev->ip_blocks[i].version->funcs->resume(adev);
2879 if (r) {
2880 DRM_ERROR("resume of IP block <%s> failed %d\n",
2881 adev->ip_blocks[i].version->funcs->name, r);
2882 return r;
2883 }
482f0e53 2884 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2885 }
2886 }
2887
2888 return 0;
2889}
2890
e3ecdffa
AD
2891/**
2892 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2893 *
2894 * @adev: amdgpu_device pointer
2895 *
2896 * First resume function for hardware IPs. The list of all the hardware
2897 * IPs that make up the asic is walked and the resume callbacks are run for
2898 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2899 * functional state after a suspend and updates the software state as
2900 * necessary. This function is also used for restoring the GPU after a GPU
2901 * reset.
2902 * Returns 0 on success, negative error code on failure.
2903 */
06ec9070 2904static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2905{
2906 int i, r;
2907
2908 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2909 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2910 continue;
fcf0649f 2911 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2912 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2913 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2914 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2915 continue;
a1255107 2916 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2917 if (r) {
a1255107
AD
2918 DRM_ERROR("resume of IP block <%s> failed %d\n",
2919 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2920 return r;
2c1a2784 2921 }
482f0e53 2922 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2923 }
2924
2925 return 0;
2926}
2927
e3ecdffa
AD
2928/**
2929 * amdgpu_device_ip_resume - run resume for hardware IPs
2930 *
2931 * @adev: amdgpu_device pointer
2932 *
2933 * Main resume function for hardware IPs. The hardware IPs
2934 * are split into two resume functions because they are
2935 * are also used in in recovering from a GPU reset and some additional
2936 * steps need to be take between them. In this case (S3/S4) they are
2937 * run sequentially.
2938 * Returns 0 on success, negative error code on failure.
2939 */
06ec9070 2940static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2941{
2942 int r;
2943
06ec9070 2944 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2945 if (r)
2946 return r;
7a3e0bb2
RZ
2947
2948 r = amdgpu_device_fw_loading(adev);
2949 if (r)
2950 return r;
2951
06ec9070 2952 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2953
2954 return r;
2955}
2956
e3ecdffa
AD
2957/**
2958 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2959 *
2960 * @adev: amdgpu_device pointer
2961 *
2962 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2963 */
4e99a44e 2964static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2965{
6867e1b5
ML
2966 if (amdgpu_sriov_vf(adev)) {
2967 if (adev->is_atom_fw) {
2968 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2969 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2970 } else {
2971 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2972 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2973 }
2974
2975 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2976 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2977 }
048765ad
AR
2978}
2979
e3ecdffa
AD
2980/**
2981 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2982 *
2983 * @asic_type: AMD asic type
2984 *
2985 * Check if there is DC (new modesetting infrastructre) support for an asic.
2986 * returns true if DC has support, false if not.
2987 */
4562236b
HW
2988bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2989{
2990 switch (asic_type) {
2991#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
2992#if defined(CONFIG_DRM_AMD_DC_SI)
2993 case CHIP_TAHITI:
2994 case CHIP_PITCAIRN:
2995 case CHIP_VERDE:
2996 case CHIP_OLAND:
2997#endif
4562236b 2998 case CHIP_BONAIRE:
0d6fbccb 2999 case CHIP_KAVERI:
367e6687
AD
3000 case CHIP_KABINI:
3001 case CHIP_MULLINS:
d9fda248
HW
3002 /*
3003 * We have systems in the wild with these ASICs that require
3004 * LVDS and VGA support which is not supported with DC.
3005 *
3006 * Fallback to the non-DC driver here by default so as not to
3007 * cause regressions.
3008 */
3009 return amdgpu_dc > 0;
3010 case CHIP_HAWAII:
4562236b
HW
3011 case CHIP_CARRIZO:
3012 case CHIP_STONEY:
4562236b 3013 case CHIP_POLARIS10:
675fd32b 3014 case CHIP_POLARIS11:
2c8ad2d5 3015 case CHIP_POLARIS12:
675fd32b 3016 case CHIP_VEGAM:
4562236b
HW
3017 case CHIP_TONGA:
3018 case CHIP_FIJI:
42f8ffa1 3019 case CHIP_VEGA10:
dca7b401 3020 case CHIP_VEGA12:
c6034aa2 3021 case CHIP_VEGA20:
b86a1aa3 3022#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 3023 case CHIP_RAVEN:
b4f199c7 3024 case CHIP_NAVI10:
8fceceb6 3025 case CHIP_NAVI14:
078655d9 3026 case CHIP_NAVI12:
e1c14c43 3027 case CHIP_RENOIR:
81d9bfb8 3028 case CHIP_SIENNA_CICHLID:
a6c5308f 3029 case CHIP_NAVY_FLOUNDER:
7cc656e2 3030 case CHIP_DIMGREY_CAVEFISH:
84b934bc 3031 case CHIP_VANGOGH:
42f8ffa1 3032#endif
fd187853 3033 return amdgpu_dc != 0;
4562236b
HW
3034#endif
3035 default:
93b09a9a 3036 if (amdgpu_dc > 0)
044a48f4 3037 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
93b09a9a 3038 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
3039 return false;
3040 }
3041}
3042
3043/**
3044 * amdgpu_device_has_dc_support - check if dc is supported
3045 *
982a820b 3046 * @adev: amdgpu_device pointer
4562236b
HW
3047 *
3048 * Returns true for supported, false for not supported
3049 */
3050bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3051{
c997e8e2 3052 if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
2555039d
XY
3053 return false;
3054
4562236b
HW
3055 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3056}
3057
d4535e2c
AG
3058
3059static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3060{
3061 struct amdgpu_device *adev =
3062 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3063 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3064
c6a6e2db
AG
3065 /* It's a bug to not have a hive within this function */
3066 if (WARN_ON(!hive))
3067 return;
3068
3069 /*
3070 * Use task barrier to synchronize all xgmi reset works across the
3071 * hive. task_barrier_enter and task_barrier_exit will block
3072 * until all the threads running the xgmi reset works reach
3073 * those points. task_barrier_full will do both blocks.
3074 */
3075 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3076
3077 task_barrier_enter(&hive->tb);
4a580877 3078 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3079
3080 if (adev->asic_reset_res)
3081 goto fail;
3082
3083 task_barrier_exit(&hive->tb);
4a580877 3084 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3085
3086 if (adev->asic_reset_res)
3087 goto fail;
43c4d576
JC
3088
3089 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
3090 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
3091 } else {
3092
3093 task_barrier_full(&hive->tb);
3094 adev->asic_reset_res = amdgpu_asic_reset(adev);
3095 }
ce316fa5 3096
c6a6e2db 3097fail:
d4535e2c 3098 if (adev->asic_reset_res)
fed184e9 3099 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3100 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3101 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3102}
3103
71f98027
AD
3104static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3105{
3106 char *input = amdgpu_lockup_timeout;
3107 char *timeout_setting = NULL;
3108 int index = 0;
3109 long timeout;
3110 int ret = 0;
3111
3112 /*
3113 * By default timeout for non compute jobs is 10000.
3114 * And there is no timeout enforced on compute jobs.
3115 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3116 * jobs are 60000 by default.
71f98027
AD
3117 */
3118 adev->gfx_timeout = msecs_to_jiffies(10000);
3119 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
9882e278
ED
3120 if (amdgpu_sriov_vf(adev))
3121 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3122 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3123 else if (amdgpu_passthrough(adev))
b7b2a316 3124 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
3125 else
3126 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
3127
f440ff44 3128 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3129 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3130 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3131 ret = kstrtol(timeout_setting, 0, &timeout);
3132 if (ret)
3133 return ret;
3134
3135 if (timeout == 0) {
3136 index++;
3137 continue;
3138 } else if (timeout < 0) {
3139 timeout = MAX_SCHEDULE_TIMEOUT;
3140 } else {
3141 timeout = msecs_to_jiffies(timeout);
3142 }
3143
3144 switch (index++) {
3145 case 0:
3146 adev->gfx_timeout = timeout;
3147 break;
3148 case 1:
3149 adev->compute_timeout = timeout;
3150 break;
3151 case 2:
3152 adev->sdma_timeout = timeout;
3153 break;
3154 case 3:
3155 adev->video_timeout = timeout;
3156 break;
3157 default:
3158 break;
3159 }
3160 }
3161 /*
3162 * There is only one value specified and
3163 * it should apply to all non-compute jobs.
3164 */
bcccee89 3165 if (index == 1) {
71f98027 3166 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3167 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3168 adev->compute_timeout = adev->gfx_timeout;
3169 }
71f98027
AD
3170 }
3171
3172 return ret;
3173}
d4535e2c 3174
77f3a5cd
ND
3175static const struct attribute *amdgpu_dev_attributes[] = {
3176 &dev_attr_product_name.attr,
3177 &dev_attr_product_number.attr,
3178 &dev_attr_serial_number.attr,
3179 &dev_attr_pcie_replay_count.attr,
3180 NULL
3181};
3182
c9a6b82f 3183
d38ceaf9
AD
3184/**
3185 * amdgpu_device_init - initialize the driver
3186 *
3187 * @adev: amdgpu_device pointer
d38ceaf9
AD
3188 * @flags: driver flags
3189 *
3190 * Initializes the driver info and hw (all asics).
3191 * Returns 0 for success or an error on failure.
3192 * Called at driver startup.
3193 */
3194int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3195 uint32_t flags)
3196{
8aba21b7
LT
3197 struct drm_device *ddev = adev_to_drm(adev);
3198 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3199 int r, i;
fd496ca8 3200 bool atpx = false;
95844d20 3201 u32 max_MBps;
d38ceaf9
AD
3202
3203 adev->shutdown = false;
d38ceaf9 3204 adev->flags = flags;
4e66d7d2
YZ
3205
3206 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3207 adev->asic_type = amdgpu_force_asic_type;
3208 else
3209 adev->asic_type = flags & AMD_ASIC_MASK;
3210
d38ceaf9 3211 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3212 if (amdgpu_emu_mode == 1)
8bdab6bb 3213 adev->usec_timeout *= 10;
770d13b1 3214 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3215 adev->accel_working = false;
3216 adev->num_rings = 0;
3217 adev->mman.buffer_funcs = NULL;
3218 adev->mman.buffer_funcs_ring = NULL;
3219 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3220 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3221 adev->gmc.gmc_funcs = NULL;
f54d1867 3222 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3223 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3224
3225 adev->smc_rreg = &amdgpu_invalid_rreg;
3226 adev->smc_wreg = &amdgpu_invalid_wreg;
3227 adev->pcie_rreg = &amdgpu_invalid_rreg;
3228 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3229 adev->pciep_rreg = &amdgpu_invalid_rreg;
3230 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3231 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3232 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3233 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3234 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3235 adev->didt_rreg = &amdgpu_invalid_rreg;
3236 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3237 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3238 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3239 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3240 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3241
3e39ab90
AD
3242 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3243 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3244 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3245
3246 /* mutex initialization are all done here so we
3247 * can recall function without having locking issues */
0e5ca0d1 3248 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3249 mutex_init(&adev->pm.mutex);
3250 mutex_init(&adev->gfx.gpu_clock_mutex);
3251 mutex_init(&adev->srbm_mutex);
b8866c26 3252 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3253 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3254 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3255 mutex_init(&adev->mn_lock);
e23b74aa 3256 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3257 hash_init(adev->mn_hash);
53b3f8f4 3258 atomic_set(&adev->in_gpu_reset, 0);
6049db43 3259 init_rwsem(&adev->reset_sem);
32eaeae0 3260 mutex_init(&adev->psp.mutex);
bd052211 3261 mutex_init(&adev->notifier_lock);
d38ceaf9 3262
912dfc84
EQ
3263 r = amdgpu_device_check_arguments(adev);
3264 if (r)
3265 return r;
d38ceaf9 3266
d38ceaf9
AD
3267 spin_lock_init(&adev->mmio_idx_lock);
3268 spin_lock_init(&adev->smc_idx_lock);
3269 spin_lock_init(&adev->pcie_idx_lock);
3270 spin_lock_init(&adev->uvd_ctx_idx_lock);
3271 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3272 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3273 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3274 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3275 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3276
0c4e7fa5
CZ
3277 INIT_LIST_HEAD(&adev->shadow_list);
3278 mutex_init(&adev->shadow_list_lock);
3279
655ce9cb 3280 INIT_LIST_HEAD(&adev->reset_list);
3281
beff74bc
AD
3282 INIT_DELAYED_WORK(&adev->delayed_init_work,
3283 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3284 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3285 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3286
d4535e2c
AG
3287 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3288
d23ee13f 3289 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3290 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3291
b265bdbd
EQ
3292 atomic_set(&adev->throttling_logging_enabled, 1);
3293 /*
3294 * If throttling continues, logging will be performed every minute
3295 * to avoid log flooding. "-1" is subtracted since the thermal
3296 * throttling interrupt comes every second. Thus, the total logging
3297 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3298 * for throttling interrupt) = 60 seconds.
3299 */
3300 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3301 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3302
0fa49558
AX
3303 /* Registers mapping */
3304 /* TODO: block userspace mapping of io register */
da69c161
KW
3305 if (adev->asic_type >= CHIP_BONAIRE) {
3306 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3307 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3308 } else {
3309 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3310 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3311 }
d38ceaf9 3312
d38ceaf9
AD
3313 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3314 if (adev->rmmio == NULL) {
3315 return -ENOMEM;
3316 }
3317 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3318 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3319
b2109d8e
JX
3320 /* enable PCIE atomic ops */
3321 r = pci_enable_atomic_ops_to_root(adev->pdev,
3322 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3323 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3324 if (r) {
3325 adev->have_atomics_support = false;
3326 DRM_INFO("PCIE atomic ops is not supported\n");
3327 } else {
3328 adev->have_atomics_support = true;
3329 }
3330
5494d864
AD
3331 amdgpu_device_get_pcie_info(adev);
3332
b239c017
JX
3333 if (amdgpu_mcbp)
3334 DRM_INFO("MCBP is enabled\n");
3335
5f84cc63
JX
3336 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3337 adev->enable_mes = true;
3338
3aa0115d
ML
3339 /* detect hw virtualization here */
3340 amdgpu_detect_virtualization(adev);
3341
dffa11b4
ML
3342 r = amdgpu_device_get_job_timeout_settings(adev);
3343 if (r) {
3344 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4192f7b5 3345 goto failed_unmap;
a190d1c7
XY
3346 }
3347
d38ceaf9 3348 /* early init functions */
06ec9070 3349 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3350 if (r)
4192f7b5 3351 goto failed_unmap;
d38ceaf9 3352
6585661d
OZ
3353 /* doorbell bar mapping and doorbell index init*/
3354 amdgpu_device_doorbell_init(adev);
3355
d38ceaf9
AD
3356 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3357 /* this will fail for cards that aren't VGA class devices, just
3358 * ignore it */
38d6be81
AD
3359 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3360 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3361
fd496ca8
AD
3362 if (amdgpu_device_supports_atpx(ddev))
3363 atpx = true;
3840c5bc
AD
3364 if (amdgpu_has_atpx() &&
3365 (amdgpu_is_atpx_hybrid() ||
3366 amdgpu_has_atpx_dgpu_power_cntl()) &&
3367 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3368 vga_switcheroo_register_client(adev->pdev,
fd496ca8
AD
3369 &amdgpu_switcheroo_ops, atpx);
3370 if (atpx)
d38ceaf9
AD
3371 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3372
9475a943
SL
3373 if (amdgpu_emu_mode == 1) {
3374 /* post the asic on emulation mode */
3375 emu_soc_asic_init(adev);
bfca0289 3376 goto fence_driver_init;
9475a943 3377 }
bfca0289 3378
4e99a44e
ML
3379 /* detect if we are with an SRIOV vbios */
3380 amdgpu_device_detect_sriov_bios(adev);
048765ad 3381
95e8e59e
AD
3382 /* check if we need to reset the asic
3383 * E.g., driver was not cleanly unloaded previously, etc.
3384 */
f14899fd 3385 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
e3c1b071 3386 if (adev->gmc.xgmi.num_physical_nodes) {
3387 dev_info(adev->dev, "Pending hive reset.\n");
3388 adev->gmc.xgmi.pending_reset = true;
3389 /* Only need to init necessary block for SMU to handle the reset */
3390 for (i = 0; i < adev->num_ip_blocks; i++) {
3391 if (!adev->ip_blocks[i].status.valid)
3392 continue;
3393 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3394 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3395 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3396 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
751f43e7 3397 DRM_DEBUG("IP %s disabled for hw_init.\n",
e3c1b071 3398 adev->ip_blocks[i].version->funcs->name);
3399 adev->ip_blocks[i].status.hw = true;
3400 }
3401 }
3402 } else {
3403 r = amdgpu_asic_reset(adev);
3404 if (r) {
3405 dev_err(adev->dev, "asic reset on init failed\n");
3406 goto failed;
3407 }
95e8e59e
AD
3408 }
3409 }
3410
8f66090b 3411 pci_enable_pcie_error_reporting(adev->pdev);
c9a6b82f 3412
d38ceaf9 3413 /* Post card if necessary */
39c640c0 3414 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3415 if (!adev->bios) {
bec86378 3416 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3417 r = -EINVAL;
3418 goto failed;
d38ceaf9 3419 }
bec86378 3420 DRM_INFO("GPU posting now...\n");
4d2997ab 3421 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3422 if (r) {
3423 dev_err(adev->dev, "gpu post error!\n");
3424 goto failed;
3425 }
d38ceaf9
AD
3426 }
3427
88b64e95
AD
3428 if (adev->is_atom_fw) {
3429 /* Initialize clocks */
3430 r = amdgpu_atomfirmware_get_clock_info(adev);
3431 if (r) {
3432 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3433 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3434 goto failed;
3435 }
3436 } else {
a5bde2f9
AD
3437 /* Initialize clocks */
3438 r = amdgpu_atombios_get_clock_info(adev);
3439 if (r) {
3440 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3441 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3442 goto failed;
a5bde2f9
AD
3443 }
3444 /* init i2c buses */
4562236b
HW
3445 if (!amdgpu_device_has_dc_support(adev))
3446 amdgpu_atombios_i2c_init(adev);
2c1a2784 3447 }
d38ceaf9 3448
bfca0289 3449fence_driver_init:
d38ceaf9
AD
3450 /* Fence driver */
3451 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3452 if (r) {
3453 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3454 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3455 goto failed;
2c1a2784 3456 }
d38ceaf9
AD
3457
3458 /* init the mode config */
4a580877 3459 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3460
06ec9070 3461 r = amdgpu_device_ip_init(adev);
d38ceaf9 3462 if (r) {
8840a387 3463 /* failed in exclusive mode due to timeout */
3464 if (amdgpu_sriov_vf(adev) &&
3465 !amdgpu_sriov_runtime(adev) &&
3466 amdgpu_virt_mmio_blocked(adev) &&
3467 !amdgpu_virt_wait_reset(adev)) {
3468 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3469 /* Don't send request since VF is inactive. */
3470 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3471 adev->virt.ops = NULL;
8840a387 3472 r = -EAGAIN;
970fd197 3473 goto release_ras_con;
8840a387 3474 }
06ec9070 3475 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3476 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
970fd197 3477 goto release_ras_con;
d38ceaf9
AD
3478 }
3479
d69b8971
YZ
3480 dev_info(adev->dev,
3481 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3482 adev->gfx.config.max_shader_engines,
3483 adev->gfx.config.max_sh_per_se,
3484 adev->gfx.config.max_cu_per_sh,
3485 adev->gfx.cu_info.number);
3486
d38ceaf9
AD
3487 adev->accel_working = true;
3488
e59c0205
AX
3489 amdgpu_vm_check_compute_bug(adev);
3490
95844d20
MO
3491 /* Initialize the buffer migration limit. */
3492 if (amdgpu_moverate >= 0)
3493 max_MBps = amdgpu_moverate;
3494 else
3495 max_MBps = 8; /* Allow 8 MB/s. */
3496 /* Get a log2 for easy divisions. */
3497 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3498
9bc92b9c
ML
3499 amdgpu_fbdev_init(adev);
3500
d2f52ac8 3501 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3502 if (r) {
3503 adev->pm_sysfs_en = false;
d2f52ac8 3504 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3505 } else
3506 adev->pm_sysfs_en = true;
d2f52ac8 3507
5bb23532 3508 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3509 if (r) {
3510 adev->ucode_sysfs_en = false;
5bb23532 3511 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3512 } else
3513 adev->ucode_sysfs_en = true;
5bb23532 3514
d38ceaf9
AD
3515 if ((amdgpu_testing & 1)) {
3516 if (adev->accel_working)
3517 amdgpu_test_moves(adev);
3518 else
3519 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3520 }
d38ceaf9
AD
3521 if (amdgpu_benchmarking) {
3522 if (adev->accel_working)
3523 amdgpu_benchmark(adev, amdgpu_benchmarking);
3524 else
3525 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3526 }
3527
b0adca4d
EQ
3528 /*
3529 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3530 * Otherwise the mgpu fan boost feature will be skipped due to the
3531 * gpu instance is counted less.
3532 */
3533 amdgpu_register_gpu_instance(adev);
3534
d38ceaf9
AD
3535 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3536 * explicit gating rather than handling it automatically.
3537 */
e3c1b071 3538 if (!adev->gmc.xgmi.pending_reset) {
3539 r = amdgpu_device_ip_late_init(adev);
3540 if (r) {
3541 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3542 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
970fd197 3543 goto release_ras_con;
e3c1b071 3544 }
3545 /* must succeed. */
3546 amdgpu_ras_resume(adev);
3547 queue_delayed_work(system_wq, &adev->delayed_init_work,
3548 msecs_to_jiffies(AMDGPU_RESUME_MS));
2c1a2784 3549 }
d38ceaf9 3550
2c738637
ML
3551 if (amdgpu_sriov_vf(adev))
3552 flush_delayed_work(&adev->delayed_init_work);
3553
77f3a5cd 3554 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3555 if (r)
77f3a5cd 3556 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3557
d155bef0
AB
3558 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3559 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3560 if (r)
3561 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3562
c1dd4aa6
AG
3563 /* Have stored pci confspace at hand for restore in sudden PCI error */
3564 if (amdgpu_device_cache_pci_state(adev->pdev))
3565 pci_restore_state(pdev);
3566
e3c1b071 3567 if (adev->gmc.xgmi.pending_reset)
3568 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3569 msecs_to_jiffies(AMDGPU_RESUME_MS));
3570
d38ceaf9 3571 return 0;
83ba126a 3572
970fd197
SY
3573release_ras_con:
3574 amdgpu_release_ras_context(adev);
3575
83ba126a 3576failed:
89041940 3577 amdgpu_vf_error_trans_all(adev);
fd496ca8 3578 if (atpx)
83ba126a 3579 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3580
4192f7b5
AD
3581failed_unmap:
3582 iounmap(adev->rmmio);
3583 adev->rmmio = NULL;
3584
83ba126a 3585 return r;
d38ceaf9
AD
3586}
3587
d38ceaf9
AD
3588/**
3589 * amdgpu_device_fini - tear down the driver
3590 *
3591 * @adev: amdgpu_device pointer
3592 *
3593 * Tear down the driver info (all asics).
3594 * Called at driver shutdown.
3595 */
3596void amdgpu_device_fini(struct amdgpu_device *adev)
3597{
aac89168 3598 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3599 flush_delayed_work(&adev->delayed_init_work);
bb0cd09b 3600 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
d0d13fe8 3601 adev->shutdown = true;
9f875167 3602
c1dd4aa6
AG
3603 kfree(adev->pci_state);
3604
752c683d
ML
3605 /* make sure IB test finished before entering exclusive mode
3606 * to avoid preemption on IB test
3607 * */
519b8b76 3608 if (amdgpu_sriov_vf(adev)) {
752c683d 3609 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3610 amdgpu_virt_fini_data_exchange(adev);
3611 }
752c683d 3612
e5b03032
ML
3613 /* disable all interrupts */
3614 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3615 if (adev->mode_info.mode_config_initialized){
3616 if (!amdgpu_device_has_dc_support(adev))
4a580877 3617 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3618 else
4a580877 3619 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3620 }
d38ceaf9 3621 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3622 if (adev->pm_sysfs_en)
3623 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3624 amdgpu_fbdev_fini(adev);
e230ac11 3625 amdgpu_device_ip_fini(adev);
75e1658e
ND
3626 release_firmware(adev->firmware.gpu_info_fw);
3627 adev->firmware.gpu_info_fw = NULL;
d38ceaf9
AD
3628 adev->accel_working = false;
3629 /* free i2c buses */
4562236b
HW
3630 if (!amdgpu_device_has_dc_support(adev))
3631 amdgpu_i2c_fini(adev);
bfca0289
SL
3632
3633 if (amdgpu_emu_mode != 1)
3634 amdgpu_atombios_fini(adev);
3635
d38ceaf9
AD
3636 kfree(adev->bios);
3637 adev->bios = NULL;
3840c5bc
AD
3638 if (amdgpu_has_atpx() &&
3639 (amdgpu_is_atpx_hybrid() ||
3640 amdgpu_has_atpx_dgpu_power_cntl()) &&
3641 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3642 vga_switcheroo_unregister_client(adev->pdev);
fd496ca8 3643 if (amdgpu_device_supports_atpx(adev_to_drm(adev)))
83ba126a 3644 vga_switcheroo_fini_domain_pm_ops(adev->dev);
38d6be81
AD
3645 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3646 vga_client_register(adev->pdev, NULL, NULL, NULL);
d38ceaf9
AD
3647 iounmap(adev->rmmio);
3648 adev->rmmio = NULL;
06ec9070 3649 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3650
7c868b59
YT
3651 if (adev->ucode_sysfs_en)
3652 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3653
3654 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3655 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3656 amdgpu_pmu_fini(adev);
72de33f8 3657 if (adev->mman.discovery_bin)
a190d1c7 3658 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3659}
3660
3661
3662/*
3663 * Suspend & resume.
3664 */
3665/**
810ddc3a 3666 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3667 *
87e3f136 3668 * @dev: drm dev pointer
87e3f136 3669 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3670 *
3671 * Puts the hw in the suspend state (all asics).
3672 * Returns 0 for success or an error on failure.
3673 * Called at driver suspend.
3674 */
de185019 3675int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3676{
3677 struct amdgpu_device *adev;
3678 struct drm_crtc *crtc;
3679 struct drm_connector *connector;
f8d2d39e 3680 struct drm_connector_list_iter iter;
5ceb54c6 3681 int r;
d38ceaf9 3682
1348969a 3683 adev = drm_to_adev(dev);
d38ceaf9
AD
3684
3685 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3686 return 0;
3687
44779b43 3688 adev->in_suspend = true;
d38ceaf9
AD
3689 drm_kms_helper_poll_disable(dev);
3690
5f818173
S
3691 if (fbcon)
3692 amdgpu_fbdev_set_suspend(adev, 1);
3693
beff74bc 3694 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3695
4562236b
HW
3696 if (!amdgpu_device_has_dc_support(adev)) {
3697 /* turn off display hw */
3698 drm_modeset_lock_all(dev);
f8d2d39e
LP
3699 drm_connector_list_iter_begin(dev, &iter);
3700 drm_for_each_connector_iter(connector, &iter)
3701 drm_helper_connector_dpms(connector,
3702 DRM_MODE_DPMS_OFF);
3703 drm_connector_list_iter_end(&iter);
4562236b 3704 drm_modeset_unlock_all(dev);
fe1053b7
AD
3705 /* unpin the front buffers and cursors */
3706 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3707 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3708 struct drm_framebuffer *fb = crtc->primary->fb;
3709 struct amdgpu_bo *robj;
3710
91334223 3711 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3712 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3713 r = amdgpu_bo_reserve(aobj, true);
3714 if (r == 0) {
3715 amdgpu_bo_unpin(aobj);
3716 amdgpu_bo_unreserve(aobj);
3717 }
756e6880 3718 }
756e6880 3719
fe1053b7
AD
3720 if (fb == NULL || fb->obj[0] == NULL) {
3721 continue;
3722 }
3723 robj = gem_to_amdgpu_bo(fb->obj[0]);
3724 /* don't unpin kernel fb objects */
3725 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3726 r = amdgpu_bo_reserve(robj, true);
3727 if (r == 0) {
3728 amdgpu_bo_unpin(robj);
3729 amdgpu_bo_unreserve(robj);
3730 }
d38ceaf9
AD
3731 }
3732 }
3733 }
fe1053b7 3734
5e6932fe 3735 amdgpu_ras_suspend(adev);
3736
fe1053b7
AD
3737 r = amdgpu_device_ip_suspend_phase1(adev);
3738
ad887af9 3739 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
94fa5660 3740
d38ceaf9
AD
3741 /* evict vram memory */
3742 amdgpu_bo_evict_vram(adev);
3743
5ceb54c6 3744 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3745
b00978de
PL
3746 if (adev->in_poweroff_reboot_com ||
3747 !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev))
628c36d7
PL
3748 r = amdgpu_device_ip_suspend_phase2(adev);
3749 else
3750 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
a0a71e49
AD
3751 /* evict remaining vram memory
3752 * This second call to evict vram is to evict the gart page table
3753 * using the CPU.
3754 */
d38ceaf9
AD
3755 amdgpu_bo_evict_vram(adev);
3756
d38ceaf9
AD
3757 return 0;
3758}
3759
3760/**
810ddc3a 3761 * amdgpu_device_resume - initiate device resume
d38ceaf9 3762 *
87e3f136 3763 * @dev: drm dev pointer
87e3f136 3764 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3765 *
3766 * Bring the hw back to operating state (all asics).
3767 * Returns 0 for success or an error on failure.
3768 * Called at driver resume.
3769 */
de185019 3770int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3771{
3772 struct drm_connector *connector;
f8d2d39e 3773 struct drm_connector_list_iter iter;
1348969a 3774 struct amdgpu_device *adev = drm_to_adev(dev);
756e6880 3775 struct drm_crtc *crtc;
03161a6e 3776 int r = 0;
d38ceaf9
AD
3777
3778 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3779 return 0;
3780
9ca5b8a1 3781 if (amdgpu_acpi_is_s0ix_supported(adev))
628c36d7
PL
3782 amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
3783
d38ceaf9 3784 /* post card */
39c640c0 3785 if (amdgpu_device_need_post(adev)) {
4d2997ab 3786 r = amdgpu_device_asic_init(adev);
74b0b157 3787 if (r)
aac89168 3788 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 3789 }
d38ceaf9 3790
06ec9070 3791 r = amdgpu_device_ip_resume(adev);
e6707218 3792 if (r) {
aac89168 3793 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3794 return r;
e6707218 3795 }
5ceb54c6
AD
3796 amdgpu_fence_driver_resume(adev);
3797
d38ceaf9 3798
06ec9070 3799 r = amdgpu_device_ip_late_init(adev);
03161a6e 3800 if (r)
4d3b9ae5 3801 return r;
d38ceaf9 3802
beff74bc
AD
3803 queue_delayed_work(system_wq, &adev->delayed_init_work,
3804 msecs_to_jiffies(AMDGPU_RESUME_MS));
3805
fe1053b7
AD
3806 if (!amdgpu_device_has_dc_support(adev)) {
3807 /* pin cursors */
3808 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3809 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3810
91334223 3811 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3812 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3813 r = amdgpu_bo_reserve(aobj, true);
3814 if (r == 0) {
3815 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3816 if (r != 0)
aac89168 3817 dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
fe1053b7
AD
3818 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3819 amdgpu_bo_unreserve(aobj);
3820 }
756e6880
AD
3821 }
3822 }
3823 }
ad887af9 3824 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
ba997709
YZ
3825 if (r)
3826 return r;
756e6880 3827
96a5d8d4 3828 /* Make sure IB tests flushed */
beff74bc 3829 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3830
d38ceaf9
AD
3831 /* blat the mode back in */
3832 if (fbcon) {
4562236b
HW
3833 if (!amdgpu_device_has_dc_support(adev)) {
3834 /* pre DCE11 */
3835 drm_helper_resume_force_mode(dev);
3836
3837 /* turn on display hw */
3838 drm_modeset_lock_all(dev);
f8d2d39e
LP
3839
3840 drm_connector_list_iter_begin(dev, &iter);
3841 drm_for_each_connector_iter(connector, &iter)
3842 drm_helper_connector_dpms(connector,
3843 DRM_MODE_DPMS_ON);
3844 drm_connector_list_iter_end(&iter);
3845
4562236b 3846 drm_modeset_unlock_all(dev);
d38ceaf9 3847 }
4d3b9ae5 3848 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3849 }
3850
3851 drm_kms_helper_poll_enable(dev);
23a1a9e5 3852
5e6932fe 3853 amdgpu_ras_resume(adev);
3854
23a1a9e5
L
3855 /*
3856 * Most of the connector probing functions try to acquire runtime pm
3857 * refs to ensure that the GPU is powered on when connector polling is
3858 * performed. Since we're calling this from a runtime PM callback,
3859 * trying to acquire rpm refs will cause us to deadlock.
3860 *
3861 * Since we're guaranteed to be holding the rpm lock, it's safe to
3862 * temporarily disable the rpm helpers so this doesn't deadlock us.
3863 */
3864#ifdef CONFIG_PM
3865 dev->dev->power.disable_depth++;
3866#endif
4562236b
HW
3867 if (!amdgpu_device_has_dc_support(adev))
3868 drm_helper_hpd_irq_event(dev);
3869 else
3870 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3871#ifdef CONFIG_PM
3872 dev->dev->power.disable_depth--;
3873#endif
44779b43
RZ
3874 adev->in_suspend = false;
3875
4d3b9ae5 3876 return 0;
d38ceaf9
AD
3877}
3878
e3ecdffa
AD
3879/**
3880 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3881 *
3882 * @adev: amdgpu_device pointer
3883 *
3884 * The list of all the hardware IPs that make up the asic is walked and
3885 * the check_soft_reset callbacks are run. check_soft_reset determines
3886 * if the asic is still hung or not.
3887 * Returns true if any of the IPs are still in a hung state, false if not.
3888 */
06ec9070 3889static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3890{
3891 int i;
3892 bool asic_hang = false;
3893
f993d628
ML
3894 if (amdgpu_sriov_vf(adev))
3895 return true;
3896
8bc04c29
AD
3897 if (amdgpu_asic_need_full_reset(adev))
3898 return true;
3899
63fbf42f 3900 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3901 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3902 continue;
a1255107
AD
3903 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3904 adev->ip_blocks[i].status.hang =
3905 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3906 if (adev->ip_blocks[i].status.hang) {
aac89168 3907 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3908 asic_hang = true;
3909 }
3910 }
3911 return asic_hang;
3912}
3913
e3ecdffa
AD
3914/**
3915 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3916 *
3917 * @adev: amdgpu_device pointer
3918 *
3919 * The list of all the hardware IPs that make up the asic is walked and the
3920 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3921 * handles any IP specific hardware or software state changes that are
3922 * necessary for a soft reset to succeed.
3923 * Returns 0 on success, negative error code on failure.
3924 */
06ec9070 3925static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3926{
3927 int i, r = 0;
3928
3929 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3930 if (!adev->ip_blocks[i].status.valid)
d31a501e 3931 continue;
a1255107
AD
3932 if (adev->ip_blocks[i].status.hang &&
3933 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3934 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3935 if (r)
3936 return r;
3937 }
3938 }
3939
3940 return 0;
3941}
3942
e3ecdffa
AD
3943/**
3944 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3945 *
3946 * @adev: amdgpu_device pointer
3947 *
3948 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3949 * reset is necessary to recover.
3950 * Returns true if a full asic reset is required, false if not.
3951 */
06ec9070 3952static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3953{
da146d3b
AD
3954 int i;
3955
8bc04c29
AD
3956 if (amdgpu_asic_need_full_reset(adev))
3957 return true;
3958
da146d3b 3959 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3960 if (!adev->ip_blocks[i].status.valid)
da146d3b 3961 continue;
a1255107
AD
3962 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3963 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3964 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3965 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3966 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3967 if (adev->ip_blocks[i].status.hang) {
aac89168 3968 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
3969 return true;
3970 }
3971 }
35d782fe
CZ
3972 }
3973 return false;
3974}
3975
e3ecdffa
AD
3976/**
3977 * amdgpu_device_ip_soft_reset - do a soft reset
3978 *
3979 * @adev: amdgpu_device pointer
3980 *
3981 * The list of all the hardware IPs that make up the asic is walked and the
3982 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3983 * IP specific hardware or software state changes that are necessary to soft
3984 * reset the IP.
3985 * Returns 0 on success, negative error code on failure.
3986 */
06ec9070 3987static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3988{
3989 int i, r = 0;
3990
3991 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3992 if (!adev->ip_blocks[i].status.valid)
35d782fe 3993 continue;
a1255107
AD
3994 if (adev->ip_blocks[i].status.hang &&
3995 adev->ip_blocks[i].version->funcs->soft_reset) {
3996 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3997 if (r)
3998 return r;
3999 }
4000 }
4001
4002 return 0;
4003}
4004
e3ecdffa
AD
4005/**
4006 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4007 *
4008 * @adev: amdgpu_device pointer
4009 *
4010 * The list of all the hardware IPs that make up the asic is walked and the
4011 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4012 * handles any IP specific hardware or software state changes that are
4013 * necessary after the IP has been soft reset.
4014 * Returns 0 on success, negative error code on failure.
4015 */
06ec9070 4016static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
4017{
4018 int i, r = 0;
4019
4020 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 4021 if (!adev->ip_blocks[i].status.valid)
35d782fe 4022 continue;
a1255107
AD
4023 if (adev->ip_blocks[i].status.hang &&
4024 adev->ip_blocks[i].version->funcs->post_soft_reset)
4025 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
4026 if (r)
4027 return r;
4028 }
4029
4030 return 0;
4031}
4032
e3ecdffa 4033/**
c33adbc7 4034 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
4035 *
4036 * @adev: amdgpu_device pointer
4037 *
4038 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4039 * restore things like GPUVM page tables after a GPU reset where
4040 * the contents of VRAM might be lost.
403009bf
CK
4041 *
4042 * Returns:
4043 * 0 on success, negative error code on failure.
e3ecdffa 4044 */
c33adbc7 4045static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4046{
c41d1cf6 4047 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
4048 struct amdgpu_bo *shadow;
4049 long r = 1, tmo;
c41d1cf6
ML
4050
4051 if (amdgpu_sriov_runtime(adev))
b045d3af 4052 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4053 else
4054 tmo = msecs_to_jiffies(100);
4055
aac89168 4056 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4057 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
4058 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
4059
4060 /* No need to recover an evicted BO */
4061 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 4062 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
4063 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
4064 continue;
4065
4066 r = amdgpu_bo_restore_shadow(shadow, &next);
4067 if (r)
4068 break;
4069
c41d1cf6 4070 if (fence) {
1712fb1a 4071 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4072 dma_fence_put(fence);
4073 fence = next;
1712fb1a 4074 if (tmo == 0) {
4075 r = -ETIMEDOUT;
c41d1cf6 4076 break;
1712fb1a 4077 } else if (tmo < 0) {
4078 r = tmo;
4079 break;
4080 }
403009bf
CK
4081 } else {
4082 fence = next;
c41d1cf6 4083 }
c41d1cf6
ML
4084 }
4085 mutex_unlock(&adev->shadow_list_lock);
4086
403009bf
CK
4087 if (fence)
4088 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4089 dma_fence_put(fence);
4090
1712fb1a 4091 if (r < 0 || tmo <= 0) {
aac89168 4092 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4093 return -EIO;
4094 }
c41d1cf6 4095
aac89168 4096 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4097 return 0;
c41d1cf6
ML
4098}
4099
a90ad3c2 4100
e3ecdffa 4101/**
06ec9070 4102 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4103 *
982a820b 4104 * @adev: amdgpu_device pointer
87e3f136 4105 * @from_hypervisor: request from hypervisor
5740682e
ML
4106 *
4107 * do VF FLR and reinitialize Asic
3f48c681 4108 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4109 */
4110static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4111 bool from_hypervisor)
5740682e
ML
4112{
4113 int r;
4114
4115 if (from_hypervisor)
4116 r = amdgpu_virt_request_full_gpu(adev, true);
4117 else
4118 r = amdgpu_virt_reset_gpu(adev);
4119 if (r)
4120 return r;
a90ad3c2 4121
b639c22c
JZ
4122 amdgpu_amdkfd_pre_reset(adev);
4123
a90ad3c2 4124 /* Resume IP prior to SMC */
06ec9070 4125 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4126 if (r)
4127 goto error;
a90ad3c2 4128
c9ffa427 4129 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4130 /* we need recover gart prior to run SMC/CP/SDMA resume */
6c28aed6 4131 amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
a90ad3c2 4132
7a3e0bb2
RZ
4133 r = amdgpu_device_fw_loading(adev);
4134 if (r)
4135 return r;
4136
a90ad3c2 4137 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4138 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4139 if (r)
4140 goto error;
a90ad3c2
ML
4141
4142 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 4143 r = amdgpu_ib_ring_tests(adev);
f81e8d53 4144 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 4145
abc34253
ED
4146error:
4147 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 4148 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4149 amdgpu_inc_vram_lost(adev);
c33adbc7 4150 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
4151 }
4152
4153 return r;
4154}
4155
9a1cddd6 4156/**
4157 * amdgpu_device_has_job_running - check if there is any job in mirror list
4158 *
982a820b 4159 * @adev: amdgpu_device pointer
9a1cddd6 4160 *
4161 * check if there is any job in mirror list
4162 */
4163bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4164{
4165 int i;
4166 struct drm_sched_job *job;
4167
4168 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4169 struct amdgpu_ring *ring = adev->rings[i];
4170
4171 if (!ring || !ring->sched.thread)
4172 continue;
4173
4174 spin_lock(&ring->sched.job_list_lock);
6efa4b46
LT
4175 job = list_first_entry_or_null(&ring->sched.pending_list,
4176 struct drm_sched_job, list);
9a1cddd6 4177 spin_unlock(&ring->sched.job_list_lock);
4178 if (job)
4179 return true;
4180 }
4181 return false;
4182}
4183
12938fad
CK
4184/**
4185 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4186 *
982a820b 4187 * @adev: amdgpu_device pointer
12938fad
CK
4188 *
4189 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4190 * a hung GPU.
4191 */
4192bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4193{
4194 if (!amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4195 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
12938fad
CK
4196 return false;
4197 }
4198
3ba7b418
AG
4199 if (amdgpu_gpu_recovery == 0)
4200 goto disabled;
4201
4202 if (amdgpu_sriov_vf(adev))
4203 return true;
4204
4205 if (amdgpu_gpu_recovery == -1) {
4206 switch (adev->asic_type) {
fc42d47c
AG
4207 case CHIP_BONAIRE:
4208 case CHIP_HAWAII:
3ba7b418
AG
4209 case CHIP_TOPAZ:
4210 case CHIP_TONGA:
4211 case CHIP_FIJI:
4212 case CHIP_POLARIS10:
4213 case CHIP_POLARIS11:
4214 case CHIP_POLARIS12:
4215 case CHIP_VEGAM:
4216 case CHIP_VEGA20:
4217 case CHIP_VEGA10:
4218 case CHIP_VEGA12:
c43b849f 4219 case CHIP_RAVEN:
e9d4cf91 4220 case CHIP_ARCTURUS:
2cb44fb0 4221 case CHIP_RENOIR:
658c6639
AD
4222 case CHIP_NAVI10:
4223 case CHIP_NAVI14:
4224 case CHIP_NAVI12:
131a3c74 4225 case CHIP_SIENNA_CICHLID:
665fe4dc 4226 case CHIP_NAVY_FLOUNDER:
27859ee3 4227 case CHIP_DIMGREY_CAVEFISH:
3ba7b418
AG
4228 break;
4229 default:
4230 goto disabled;
4231 }
12938fad
CK
4232 }
4233
4234 return true;
3ba7b418
AG
4235
4236disabled:
aac89168 4237 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4238 return false;
12938fad
CK
4239}
4240
5c03e584
FX
4241int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4242{
4243 u32 i;
4244 int ret = 0;
4245
4246 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4247
4248 dev_info(adev->dev, "GPU mode1 reset\n");
4249
4250 /* disable BM */
4251 pci_clear_master(adev->pdev);
4252
4253 amdgpu_device_cache_pci_state(adev->pdev);
4254
4255 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4256 dev_info(adev->dev, "GPU smu mode1 reset\n");
4257 ret = amdgpu_dpm_mode1_reset(adev);
4258 } else {
4259 dev_info(adev->dev, "GPU psp mode1 reset\n");
4260 ret = psp_gpu_reset(adev);
4261 }
4262
4263 if (ret)
4264 dev_err(adev->dev, "GPU mode1 reset failed\n");
4265
4266 amdgpu_device_load_pci_state(adev->pdev);
4267
4268 /* wait for asic to come out of reset */
4269 for (i = 0; i < adev->usec_timeout; i++) {
4270 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4271
4272 if (memsize != 0xffffffff)
4273 break;
4274 udelay(1);
4275 }
4276
4277 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4278 return ret;
4279}
5c6dd71e 4280
e3c1b071 4281int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4282 struct amdgpu_job *job,
4283 bool *need_full_reset_arg)
26bc5340
AG
4284{
4285 int i, r = 0;
4286 bool need_full_reset = *need_full_reset_arg;
71182665 4287
e3c1b071 4288 /* no need to dump if device is not in good state during probe period */
4289 if (!adev->gmc.xgmi.pending_reset)
4290 amdgpu_debugfs_wait_dump(adev);
728e7e0c 4291
b602ca5f
TZ
4292 if (amdgpu_sriov_vf(adev)) {
4293 /* stop the data exchange thread */
4294 amdgpu_virt_fini_data_exchange(adev);
4295 }
4296
71182665 4297 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4298 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4299 struct amdgpu_ring *ring = adev->rings[i];
4300
51687759 4301 if (!ring || !ring->sched.thread)
0875dc9e 4302 continue;
5740682e 4303
2f9d4084
ML
4304 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4305 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4306 }
d38ceaf9 4307
222b5f04
AG
4308 if(job)
4309 drm_sched_increase_karma(&job->base);
4310
1d721ed6 4311 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4312 if (!amdgpu_sriov_vf(adev)) {
4313
4314 if (!need_full_reset)
4315 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4316
4317 if (!need_full_reset) {
4318 amdgpu_device_ip_pre_soft_reset(adev);
4319 r = amdgpu_device_ip_soft_reset(adev);
4320 amdgpu_device_ip_post_soft_reset(adev);
4321 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4322 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4323 need_full_reset = true;
4324 }
4325 }
4326
4327 if (need_full_reset)
4328 r = amdgpu_device_ip_suspend(adev);
4329
4330 *need_full_reset_arg = need_full_reset;
4331 }
4332
4333 return r;
4334}
4335
e3c1b071 4336int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
4337 struct list_head *device_list_handle,
4338 bool *need_full_reset_arg,
4339 bool skip_hw_reset)
26bc5340
AG
4340{
4341 struct amdgpu_device *tmp_adev = NULL;
4342 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4343 int r = 0;
4344
4345 /*
655ce9cb 4346 * ASIC reset has to be done on all XGMI hive nodes ASAP
26bc5340
AG
4347 * to allow proper links negotiation in FW (within 1 sec)
4348 */
7ac71382 4349 if (!skip_hw_reset && need_full_reset) {
655ce9cb 4350 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
041a62bc 4351 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4352 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
e3c1b071 4353 tmp_adev->gmc.xgmi.pending_reset = false;
c96cf282 4354 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4355 r = -EALREADY;
4356 } else
4357 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4358
041a62bc 4359 if (r) {
aac89168 4360 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4361 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4362 break;
ce316fa5
LM
4363 }
4364 }
4365
041a62bc
AG
4366 /* For XGMI wait for all resets to complete before proceed */
4367 if (!r) {
655ce9cb 4368 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
ce316fa5
LM
4369 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4370 flush_work(&tmp_adev->xgmi_reset_work);
4371 r = tmp_adev->asic_reset_res;
4372 if (r)
4373 break;
ce316fa5
LM
4374 }
4375 }
4376 }
ce316fa5 4377 }
26bc5340 4378
43c4d576 4379 if (!r && amdgpu_ras_intr_triggered()) {
655ce9cb 4380 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
43c4d576
JC
4381 if (tmp_adev->mmhub.funcs &&
4382 tmp_adev->mmhub.funcs->reset_ras_error_count)
4383 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4384 }
4385
00eaa571 4386 amdgpu_ras_intr_cleared();
43c4d576 4387 }
00eaa571 4388
655ce9cb 4389 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340
AG
4390 if (need_full_reset) {
4391 /* post card */
e3c1b071 4392 r = amdgpu_device_asic_init(tmp_adev);
4393 if (r) {
aac89168 4394 dev_warn(tmp_adev->dev, "asic atom init failed!");
e3c1b071 4395 } else {
26bc5340
AG
4396 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4397 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4398 if (r)
4399 goto out;
4400
4401 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4402 if (vram_lost) {
77e7f829 4403 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4404 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4405 }
4406
6c28aed6 4407 r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
26bc5340
AG
4408 if (r)
4409 goto out;
4410
4411 r = amdgpu_device_fw_loading(tmp_adev);
4412 if (r)
4413 return r;
4414
4415 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4416 if (r)
4417 goto out;
4418
4419 if (vram_lost)
4420 amdgpu_device_fill_reset_magic(tmp_adev);
4421
fdafb359
EQ
4422 /*
4423 * Add this ASIC as tracked as reset was already
4424 * complete successfully.
4425 */
4426 amdgpu_register_gpu_instance(tmp_adev);
4427
e3c1b071 4428 if (!hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4429 amdgpu_xgmi_add_device(tmp_adev);
4430
7c04ca50 4431 r = amdgpu_device_ip_late_init(tmp_adev);
4432 if (r)
4433 goto out;
4434
565d1941
EQ
4435 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4436
e8fbaf03
GC
4437 /*
4438 * The GPU enters bad state once faulty pages
4439 * by ECC has reached the threshold, and ras
4440 * recovery is scheduled next. So add one check
4441 * here to break recovery if it indeed exceeds
4442 * bad page threshold, and remind user to
4443 * retire this GPU or setting one bigger
4444 * bad_page_threshold value to fix this once
4445 * probing driver again.
4446 */
11003c68 4447 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
e8fbaf03
GC
4448 /* must succeed. */
4449 amdgpu_ras_resume(tmp_adev);
4450 } else {
4451 r = -EINVAL;
4452 goto out;
4453 }
e79a04d5 4454
26bc5340
AG
4455 /* Update PSP FW topology after reset */
4456 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4457 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4458 }
4459 }
4460
26bc5340
AG
4461out:
4462 if (!r) {
4463 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4464 r = amdgpu_ib_ring_tests(tmp_adev);
4465 if (r) {
4466 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4467 r = amdgpu_device_ip_suspend(tmp_adev);
4468 need_full_reset = true;
4469 r = -EAGAIN;
4470 goto end;
4471 }
4472 }
4473
4474 if (!r)
4475 r = amdgpu_device_recover_vram(tmp_adev);
4476 else
4477 tmp_adev->asic_reset_res = r;
4478 }
4479
4480end:
4481 *need_full_reset_arg = need_full_reset;
4482 return r;
4483}
4484
08ebb485
DL
4485static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
4486 struct amdgpu_hive_info *hive)
26bc5340 4487{
53b3f8f4
DL
4488 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4489 return false;
4490
08ebb485
DL
4491 if (hive) {
4492 down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
4493 } else {
4494 down_write(&adev->reset_sem);
4495 }
5740682e 4496
a3a09142
AD
4497 switch (amdgpu_asic_reset_method(adev)) {
4498 case AMD_RESET_METHOD_MODE1:
4499 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4500 break;
4501 case AMD_RESET_METHOD_MODE2:
4502 adev->mp1_state = PP_MP1_STATE_RESET;
4503 break;
4504 default:
4505 adev->mp1_state = PP_MP1_STATE_NONE;
4506 break;
4507 }
1d721ed6
AG
4508
4509 return true;
26bc5340 4510}
d38ceaf9 4511
26bc5340
AG
4512static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4513{
89041940 4514 amdgpu_vf_error_trans_all(adev);
a3a09142 4515 adev->mp1_state = PP_MP1_STATE_NONE;
53b3f8f4 4516 atomic_set(&adev->in_gpu_reset, 0);
6049db43 4517 up_write(&adev->reset_sem);
26bc5340
AG
4518}
4519
91fb309d
HC
4520/*
4521 * to lockup a list of amdgpu devices in a hive safely, if not a hive
4522 * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
4523 *
4524 * unlock won't require roll back.
4525 */
4526static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
4527{
4528 struct amdgpu_device *tmp_adev = NULL;
4529
4530 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4531 if (!hive) {
4532 dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
4533 return -ENODEV;
4534 }
4535 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4536 if (!amdgpu_device_lock_adev(tmp_adev, hive))
4537 goto roll_back;
4538 }
4539 } else if (!amdgpu_device_lock_adev(adev, hive))
4540 return -EAGAIN;
4541
4542 return 0;
4543roll_back:
4544 if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
4545 /*
4546 * if the lockup iteration break in the middle of a hive,
4547 * it may means there may has a race issue,
4548 * or a hive device locked up independently.
4549 * we may be in trouble and may not, so will try to roll back
4550 * the lock and give out a warnning.
4551 */
4552 dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
4553 list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
4554 amdgpu_device_unlock_adev(tmp_adev);
4555 }
4556 }
4557 return -EAGAIN;
4558}
4559
3f12acc8
EQ
4560static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4561{
4562 struct pci_dev *p = NULL;
4563
4564 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4565 adev->pdev->bus->number, 1);
4566 if (p) {
4567 pm_runtime_enable(&(p->dev));
4568 pm_runtime_resume(&(p->dev));
4569 }
4570}
4571
4572static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4573{
4574 enum amd_reset_method reset_method;
4575 struct pci_dev *p = NULL;
4576 u64 expires;
4577
4578 /*
4579 * For now, only BACO and mode1 reset are confirmed
4580 * to suffer the audio issue without proper suspended.
4581 */
4582 reset_method = amdgpu_asic_reset_method(adev);
4583 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4584 (reset_method != AMD_RESET_METHOD_MODE1))
4585 return -EINVAL;
4586
4587 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4588 adev->pdev->bus->number, 1);
4589 if (!p)
4590 return -ENODEV;
4591
4592 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4593 if (!expires)
4594 /*
4595 * If we cannot get the audio device autosuspend delay,
4596 * a fixed 4S interval will be used. Considering 3S is
4597 * the audio controller default autosuspend delay setting.
4598 * 4S used here is guaranteed to cover that.
4599 */
54b7feb9 4600 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4601
4602 while (!pm_runtime_status_suspended(&(p->dev))) {
4603 if (!pm_runtime_suspend(&(p->dev)))
4604 break;
4605
4606 if (expires < ktime_get_mono_fast_ns()) {
4607 dev_warn(adev->dev, "failed to suspend display audio\n");
4608 /* TODO: abort the succeeding gpu reset? */
4609 return -ETIMEDOUT;
4610 }
4611 }
4612
4613 pm_runtime_disable(&(p->dev));
4614
4615 return 0;
4616}
4617
26bc5340
AG
4618/**
4619 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4620 *
982a820b 4621 * @adev: amdgpu_device pointer
26bc5340
AG
4622 * @job: which job trigger hang
4623 *
4624 * Attempt to reset the GPU if it has hung (all asics).
4625 * Attempt to do soft-reset or full-reset and reinitialize Asic
4626 * Returns 0 for success or an error on failure.
4627 */
4628
4629int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4630 struct amdgpu_job *job)
4631{
1d721ed6 4632 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4633 bool need_full_reset = false;
4634 bool job_signaled = false;
26bc5340 4635 struct amdgpu_hive_info *hive = NULL;
26bc5340 4636 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4637 int i, r = 0;
bb5c7235 4638 bool need_emergency_restart = false;
3f12acc8 4639 bool audio_suspended = false;
26bc5340 4640
6e3cd2a9 4641 /*
bb5c7235
WS
4642 * Special case: RAS triggered and full reset isn't supported
4643 */
4644 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4645
d5ea093e
AG
4646 /*
4647 * Flush RAM to disk so that after reboot
4648 * the user can read log and see why the system rebooted.
4649 */
bb5c7235 4650 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4651 DRM_WARN("Emergency reboot.");
4652
4653 ksys_sync_helper();
4654 emergency_restart();
4655 }
4656
b823821f 4657 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4658 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4659
4660 /*
1d721ed6
AG
4661 * Here we trylock to avoid chain of resets executing from
4662 * either trigger by jobs on different adevs in XGMI hive or jobs on
4663 * different schedulers for same device while this TO handler is running.
4664 * We always reset all schedulers for device and all devices for XGMI
4665 * hive so that should take care of them too.
26bc5340 4666 */
d95e8e97 4667 hive = amdgpu_get_xgmi_hive(adev);
53b3f8f4
DL
4668 if (hive) {
4669 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4670 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4671 job ? job->base.id : -1, hive->hive_id);
d95e8e97 4672 amdgpu_put_xgmi_hive(hive);
91fb309d
HC
4673 if (job)
4674 drm_sched_increase_karma(&job->base);
53b3f8f4
DL
4675 return 0;
4676 }
4677 mutex_lock(&hive->hive_lock);
1d721ed6 4678 }
26bc5340 4679
91fb309d
HC
4680 /*
4681 * lock the device before we try to operate the linked list
4682 * if didn't get the device lock, don't touch the linked list since
4683 * others may iterating it.
4684 */
4685 r = amdgpu_device_lock_hive_adev(adev, hive);
4686 if (r) {
4687 dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
4688 job ? job->base.id : -1);
4689
4690 /* even we skipped this reset, still need to set the job to guilty */
4691 if (job)
4692 drm_sched_increase_karma(&job->base);
4693 goto skip_recovery;
4694 }
4695
9e94d22c
EQ
4696 /*
4697 * Build list of devices to reset.
4698 * In case we are in XGMI hive mode, resort the device list
4699 * to put adev in the 1st position.
4700 */
4701 INIT_LIST_HEAD(&device_list);
4702 if (adev->gmc.xgmi.num_physical_nodes > 1) {
655ce9cb 4703 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
4704 list_add_tail(&tmp_adev->reset_list, &device_list);
4705 if (!list_is_first(&adev->reset_list, &device_list))
4706 list_rotate_to_front(&adev->reset_list, &device_list);
4707 device_list_handle = &device_list;
26bc5340 4708 } else {
655ce9cb 4709 list_add_tail(&adev->reset_list, &device_list);
26bc5340
AG
4710 device_list_handle = &device_list;
4711 }
4712
1d721ed6 4713 /* block all schedulers and reset given job's ring */
655ce9cb 4714 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
3f12acc8
EQ
4715 /*
4716 * Try to put the audio codec into suspend state
4717 * before gpu reset started.
4718 *
4719 * Due to the power domain of the graphics device
4720 * is shared with AZ power domain. Without this,
4721 * we may change the audio hardware from behind
4722 * the audio driver's back. That will trigger
4723 * some audio codec errors.
4724 */
4725 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4726 audio_suspended = true;
4727
9e94d22c
EQ
4728 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4729
52fb44cf
EQ
4730 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4731
9e94d22c
EQ
4732 if (!amdgpu_sriov_vf(tmp_adev))
4733 amdgpu_amdkfd_pre_reset(tmp_adev);
4734
12ffa55d
AG
4735 /*
4736 * Mark these ASICs to be reseted as untracked first
4737 * And add them back after reset completed
4738 */
4739 amdgpu_unregister_gpu_instance(tmp_adev);
4740
a2f63ee8 4741 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4742
f1c1314b 4743 /* disable ras on ALL IPs */
bb5c7235 4744 if (!need_emergency_restart &&
b823821f 4745 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4746 amdgpu_ras_suspend(tmp_adev);
4747
1d721ed6
AG
4748 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4749 struct amdgpu_ring *ring = tmp_adev->rings[i];
4750
4751 if (!ring || !ring->sched.thread)
4752 continue;
4753
0b2d2c2e 4754 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4755
bb5c7235 4756 if (need_emergency_restart)
7c6e68c7 4757 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6 4758 }
8f8c80f4 4759 atomic_inc(&tmp_adev->gpu_reset_counter);
1d721ed6
AG
4760 }
4761
bb5c7235 4762 if (need_emergency_restart)
7c6e68c7
AG
4763 goto skip_sched_resume;
4764
1d721ed6
AG
4765 /*
4766 * Must check guilty signal here since after this point all old
4767 * HW fences are force signaled.
4768 *
4769 * job->base holds a reference to parent fence
4770 */
4771 if (job && job->base.s_fence->parent &&
7dd8c205 4772 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4773 job_signaled = true;
1d721ed6
AG
4774 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4775 goto skip_hw_reset;
4776 }
4777
26bc5340 4778retry: /* Rest of adevs pre asic reset from XGMI hive. */
655ce9cb 4779 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
26bc5340 4780 r = amdgpu_device_pre_asic_reset(tmp_adev,
ded08454 4781 (tmp_adev == adev) ? job : NULL,
26bc5340
AG
4782 &need_full_reset);
4783 /*TODO Should we stop ?*/
4784 if (r) {
aac89168 4785 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 4786 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
4787 tmp_adev->asic_reset_res = r;
4788 }
4789 }
4790
4791 /* Actual ASIC resets if needed.*/
4792 /* TODO Implement XGMI hive reset logic for SRIOV */
4793 if (amdgpu_sriov_vf(adev)) {
4794 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4795 if (r)
4796 adev->asic_reset_res = r;
4797 } else {
7ac71382 4798 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset, false);
26bc5340
AG
4799 if (r && r == -EAGAIN)
4800 goto retry;
4801 }
4802
1d721ed6
AG
4803skip_hw_reset:
4804
26bc5340 4805 /* Post ASIC reset for all devs .*/
655ce9cb 4806 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
7c6e68c7 4807
1d721ed6
AG
4808 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4809 struct amdgpu_ring *ring = tmp_adev->rings[i];
4810
4811 if (!ring || !ring->sched.thread)
4812 continue;
4813
4814 /* No point to resubmit jobs if we didn't HW reset*/
4815 if (!tmp_adev->asic_reset_res && !job_signaled)
4816 drm_sched_resubmit_jobs(&ring->sched);
4817
4818 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4819 }
4820
4821 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4a580877 4822 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
4823 }
4824
4825 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4826
4827 if (r) {
4828 /* bad news, how to tell it to userspace ? */
12ffa55d 4829 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4830 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4831 } else {
12ffa55d 4832 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4833 }
7c6e68c7 4834 }
26bc5340 4835
7c6e68c7 4836skip_sched_resume:
655ce9cb 4837 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
8e2712e7 4838 /* unlock kfd: SRIOV would do it separately */
bb5c7235 4839 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4840 amdgpu_amdkfd_post_reset(tmp_adev);
8e2712e7 4841
4842 /* kfd_post_reset will do nothing if kfd device is not initialized,
4843 * need to bring up kfd here if it's not be initialized before
4844 */
4845 if (!adev->kfd.init_complete)
4846 amdgpu_amdkfd_device_init(adev);
4847
3f12acc8
EQ
4848 if (audio_suspended)
4849 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4850 amdgpu_device_unlock_adev(tmp_adev);
4851 }
4852
cbfd17f7 4853skip_recovery:
9e94d22c 4854 if (hive) {
53b3f8f4 4855 atomic_set(&hive->in_reset, 0);
9e94d22c 4856 mutex_unlock(&hive->hive_lock);
d95e8e97 4857 amdgpu_put_xgmi_hive(hive);
9e94d22c 4858 }
26bc5340 4859
91fb309d 4860 if (r && r != -EAGAIN)
26bc5340 4861 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4862 return r;
4863}
4864
e3ecdffa
AD
4865/**
4866 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4867 *
4868 * @adev: amdgpu_device pointer
4869 *
4870 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4871 * and lanes) of the slot the device is in. Handles APUs and
4872 * virtualized environments where PCIE config space may not be available.
4873 */
5494d864 4874static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4875{
5d9a6330 4876 struct pci_dev *pdev;
c5313457
HK
4877 enum pci_bus_speed speed_cap, platform_speed_cap;
4878 enum pcie_link_width platform_link_width;
d0dd7f0c 4879
cd474ba0
AD
4880 if (amdgpu_pcie_gen_cap)
4881 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4882
cd474ba0
AD
4883 if (amdgpu_pcie_lane_cap)
4884 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4885
cd474ba0
AD
4886 /* covers APUs as well */
4887 if (pci_is_root_bus(adev->pdev->bus)) {
4888 if (adev->pm.pcie_gen_mask == 0)
4889 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4890 if (adev->pm.pcie_mlw_mask == 0)
4891 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4892 return;
cd474ba0 4893 }
d0dd7f0c 4894
c5313457
HK
4895 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4896 return;
4897
dbaa922b
AD
4898 pcie_bandwidth_available(adev->pdev, NULL,
4899 &platform_speed_cap, &platform_link_width);
c5313457 4900
cd474ba0 4901 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4902 /* asic caps */
4903 pdev = adev->pdev;
4904 speed_cap = pcie_get_speed_cap(pdev);
4905 if (speed_cap == PCI_SPEED_UNKNOWN) {
4906 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4907 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4908 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4909 } else {
2b3a1f51
FX
4910 if (speed_cap == PCIE_SPEED_32_0GT)
4911 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4912 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4913 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4914 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
4915 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
4916 else if (speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4917 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4918 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4919 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4920 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4921 else if (speed_cap == PCIE_SPEED_8_0GT)
4922 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4923 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4924 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4925 else if (speed_cap == PCIE_SPEED_5_0GT)
4926 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4927 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4928 else
4929 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4930 }
4931 /* platform caps */
c5313457 4932 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4933 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4934 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4935 } else {
2b3a1f51
FX
4936 if (platform_speed_cap == PCIE_SPEED_32_0GT)
4937 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4938 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4939 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4940 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
4941 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
4942 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4943 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4944 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4945 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4946 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4947 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4948 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4949 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4950 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4951 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4952 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4953 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4954 else
4955 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4956
cd474ba0
AD
4957 }
4958 }
4959 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4960 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4961 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4962 } else {
c5313457 4963 switch (platform_link_width) {
5d9a6330 4964 case PCIE_LNK_X32:
cd474ba0
AD
4965 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4966 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4967 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4968 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4969 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4970 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4971 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4972 break;
5d9a6330 4973 case PCIE_LNK_X16:
cd474ba0
AD
4974 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4975 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4976 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4977 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4978 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4979 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4980 break;
5d9a6330 4981 case PCIE_LNK_X12:
cd474ba0
AD
4982 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4983 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4984 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4985 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4986 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4987 break;
5d9a6330 4988 case PCIE_LNK_X8:
cd474ba0
AD
4989 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4990 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4991 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4992 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4993 break;
5d9a6330 4994 case PCIE_LNK_X4:
cd474ba0
AD
4995 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4996 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4997 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4998 break;
5d9a6330 4999 case PCIE_LNK_X2:
cd474ba0
AD
5000 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5001 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5002 break;
5d9a6330 5003 case PCIE_LNK_X1:
cd474ba0
AD
5004 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5005 break;
5006 default:
5007 break;
5008 }
d0dd7f0c
AD
5009 }
5010 }
5011}
d38ceaf9 5012
361dbd01
AD
5013int amdgpu_device_baco_enter(struct drm_device *dev)
5014{
1348969a 5015 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5016 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 5017
4a580877 5018 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5019 return -ENOTSUPP;
5020
6fb33209 5021 if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5022 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5023
9530273e 5024 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
5025}
5026
5027int amdgpu_device_baco_exit(struct drm_device *dev)
5028{
1348969a 5029 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 5030 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 5031 int ret = 0;
361dbd01 5032
4a580877 5033 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
5034 return -ENOTSUPP;
5035
9530273e
EQ
5036 ret = amdgpu_dpm_baco_exit(adev);
5037 if (ret)
5038 return ret;
7a22677b 5039
6fb33209 5040 if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
7a22677b
LM
5041 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5042
5043 return 0;
361dbd01 5044}
c9a6b82f 5045
acd89fca
AG
5046static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
5047{
5048 int i;
5049
5050 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5051 struct amdgpu_ring *ring = adev->rings[i];
5052
5053 if (!ring || !ring->sched.thread)
5054 continue;
5055
5056 cancel_delayed_work_sync(&ring->sched.work_tdr);
5057 }
5058}
5059
c9a6b82f
AG
5060/**
5061 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5062 * @pdev: PCI device struct
5063 * @state: PCI channel state
5064 *
5065 * Description: Called when a PCI error is detected.
5066 *
5067 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5068 */
5069pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5070{
5071 struct drm_device *dev = pci_get_drvdata(pdev);
5072 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5073 int i;
c9a6b82f
AG
5074
5075 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5076
6894305c
AG
5077 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5078 DRM_WARN("No support for XGMI hive yet...");
5079 return PCI_ERS_RESULT_DISCONNECT;
5080 }
5081
c9a6b82f
AG
5082 switch (state) {
5083 case pci_channel_io_normal:
5084 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca 5085 /* Fatal error, prepare for slot reset */
8a11d283
TZ
5086 case pci_channel_io_frozen:
5087 /*
acd89fca
AG
5088 * Cancel and wait for all TDRs in progress if failing to
5089 * set adev->in_gpu_reset in amdgpu_device_lock_adev
5090 *
5091 * Locking adev->reset_sem will prevent any external access
5092 * to GPU during PCI error recovery
5093 */
5094 while (!amdgpu_device_lock_adev(adev, NULL))
5095 amdgpu_cancel_all_tdr(adev);
5096
5097 /*
5098 * Block any work scheduling as we do for regular GPU reset
5099 * for the duration of the recovery
5100 */
5101 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5102 struct amdgpu_ring *ring = adev->rings[i];
5103
5104 if (!ring || !ring->sched.thread)
5105 continue;
5106
5107 drm_sched_stop(&ring->sched, NULL);
5108 }
8f8c80f4 5109 atomic_inc(&adev->gpu_reset_counter);
c9a6b82f
AG
5110 return PCI_ERS_RESULT_NEED_RESET;
5111 case pci_channel_io_perm_failure:
5112 /* Permanent error, prepare for device removal */
5113 return PCI_ERS_RESULT_DISCONNECT;
5114 }
5115
5116 return PCI_ERS_RESULT_NEED_RESET;
5117}
5118
5119/**
5120 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5121 * @pdev: pointer to PCI device
5122 */
5123pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5124{
5125
5126 DRM_INFO("PCI error: mmio enabled callback!!\n");
5127
5128 /* TODO - dump whatever for debugging purposes */
5129
5130 /* This called only if amdgpu_pci_error_detected returns
5131 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5132 * works, no need to reset slot.
5133 */
5134
5135 return PCI_ERS_RESULT_RECOVERED;
5136}
5137
5138/**
5139 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5140 * @pdev: PCI device struct
5141 *
5142 * Description: This routine is called by the pci error recovery
5143 * code after the PCI slot has been reset, just before we
5144 * should resume normal operations.
5145 */
5146pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5147{
5148 struct drm_device *dev = pci_get_drvdata(pdev);
5149 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 5150 int r, i;
7ac71382 5151 bool need_full_reset = true;
362c7b91 5152 u32 memsize;
7ac71382 5153 struct list_head device_list;
c9a6b82f
AG
5154
5155 DRM_INFO("PCI error: slot reset callback!!\n");
5156
7ac71382 5157 INIT_LIST_HEAD(&device_list);
655ce9cb 5158 list_add_tail(&adev->reset_list, &device_list);
7ac71382 5159
362c7b91
AG
5160 /* wait for asic to come out of reset */
5161 msleep(500);
5162
7ac71382 5163 /* Restore PCI confspace */
c1dd4aa6 5164 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5165
362c7b91
AG
5166 /* confirm ASIC came out of reset */
5167 for (i = 0; i < adev->usec_timeout; i++) {
5168 memsize = amdgpu_asic_get_config_memsize(adev);
5169
5170 if (memsize != 0xffffffff)
5171 break;
5172 udelay(1);
5173 }
5174 if (memsize == 0xffffffff) {
5175 r = -ETIME;
5176 goto out;
5177 }
5178
8a11d283 5179 adev->in_pci_err_recovery = true;
7ac71382 5180 r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);
bf36b52e 5181 adev->in_pci_err_recovery = false;
c9a6b82f
AG
5182 if (r)
5183 goto out;
5184
7ac71382 5185 r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
c9a6b82f
AG
5186
5187out:
c9a6b82f 5188 if (!r) {
c1dd4aa6
AG
5189 if (amdgpu_device_cache_pci_state(adev->pdev))
5190 pci_restore_state(adev->pdev);
5191
c9a6b82f
AG
5192 DRM_INFO("PCIe error recovery succeeded\n");
5193 } else {
5194 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5195 amdgpu_device_unlock_adev(adev);
5196 }
5197
5198 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5199}
5200
5201/**
5202 * amdgpu_pci_resume() - resume normal ops after PCI reset
5203 * @pdev: pointer to PCI device
5204 *
5205 * Called when the error recovery driver tells us that its
505199a3 5206 * OK to resume normal operation.
c9a6b82f
AG
5207 */
5208void amdgpu_pci_resume(struct pci_dev *pdev)
5209{
5210 struct drm_device *dev = pci_get_drvdata(pdev);
5211 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5212 int i;
c9a6b82f 5213
c9a6b82f
AG
5214
5215 DRM_INFO("PCI error: resume callback!!\n");
acd89fca
AG
5216
5217 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5218 struct amdgpu_ring *ring = adev->rings[i];
5219
5220 if (!ring || !ring->sched.thread)
5221 continue;
5222
5223
5224 drm_sched_resubmit_jobs(&ring->sched);
5225 drm_sched_start(&ring->sched, true);
5226 }
5227
5228 amdgpu_device_unlock_adev(adev);
c9a6b82f 5229}
c1dd4aa6
AG
5230
5231bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5232{
5233 struct drm_device *dev = pci_get_drvdata(pdev);
5234 struct amdgpu_device *adev = drm_to_adev(dev);
5235 int r;
5236
5237 r = pci_save_state(pdev);
5238 if (!r) {
5239 kfree(adev->pci_state);
5240
5241 adev->pci_state = pci_store_saved_state(pdev);
5242
5243 if (!adev->pci_state) {
5244 DRM_ERROR("Failed to store PCI saved state");
5245 return false;
5246 }
5247 } else {
5248 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5249 return false;
5250 }
5251
5252 return true;
5253}
5254
5255bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5256{
5257 struct drm_device *dev = pci_get_drvdata(pdev);
5258 struct amdgpu_device *adev = drm_to_adev(dev);
5259 int r;
5260
5261 if (!adev->pci_state)
5262 return false;
5263
5264 r = pci_load_saved_state(pdev, adev->pci_state);
5265
5266 if (!r) {
5267 pci_restore_state(pdev);
5268 } else {
5269 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5270 return false;
5271 }
5272
5273 return true;
5274}
5275
5276