drm/amdgpu: add amdgpu_gfx_state_change_set() set gfx power change entry (v2)
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
4e52a9f8 83MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin");
2e62f0b5 84MODULE_FIRMWARE("amdgpu/green_sardine_gpu_info.bin");
e2a75f88 85
2dc80b00
S
86#define AMDGPU_RESUME_MS 2000
87
050091ab 88const char *amdgpu_asic_name[] = {
da69c161
KW
89 "TAHITI",
90 "PITCAIRN",
91 "VERDE",
92 "OLAND",
93 "HAINAN",
d38ceaf9
AD
94 "BONAIRE",
95 "KAVERI",
96 "KABINI",
97 "HAWAII",
98 "MULLINS",
99 "TOPAZ",
100 "TONGA",
48299f95 101 "FIJI",
d38ceaf9 102 "CARRIZO",
139f4917 103 "STONEY",
2cc0c0b5
FC
104 "POLARIS10",
105 "POLARIS11",
c4642a47 106 "POLARIS12",
48ff108d 107 "VEGAM",
d4196f01 108 "VEGA10",
8fab806a 109 "VEGA12",
956fcddc 110 "VEGA20",
2ca8a5d2 111 "RAVEN",
d6c3b24e 112 "ARCTURUS",
1eee4228 113 "RENOIR",
852a6626 114 "NAVI10",
87dbad02 115 "NAVI14",
9802f5d7 116 "NAVI12",
ccaf72d3 117 "SIENNA_CICHLID",
ddd8fbe7 118 "NAVY_FLOUNDER",
4f1e9a76 119 "VANGOGH",
a2468e04 120 "DIMGREY_CAVEFISH",
d38ceaf9
AD
121 "LAST",
122};
123
dcea6e65
KR
124/**
125 * DOC: pcie_replay_count
126 *
127 * The amdgpu driver provides a sysfs API for reporting the total number
128 * of PCIe replays (NAKs)
129 * The file pcie_replay_count is used for this and returns the total
130 * number of replays as a sum of the NAKs generated and NAKs received
131 */
132
133static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
134 struct device_attribute *attr, char *buf)
135{
136 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 137 struct amdgpu_device *adev = drm_to_adev(ddev);
dcea6e65
KR
138 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
139
140 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
141}
142
143static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
144 amdgpu_device_get_pcie_replay_count, NULL);
145
5494d864
AD
146static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
147
bd607166
KR
148/**
149 * DOC: product_name
150 *
151 * The amdgpu driver provides a sysfs API for reporting the product name
152 * for the device
153 * The file serial_number is used for this and returns the product name
154 * as returned from the FRU.
155 * NOTE: This is only available for certain server cards
156 */
157
158static ssize_t amdgpu_device_get_product_name(struct device *dev,
159 struct device_attribute *attr, char *buf)
160{
161 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 162 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
163
164 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
165}
166
167static DEVICE_ATTR(product_name, S_IRUGO,
168 amdgpu_device_get_product_name, NULL);
169
170/**
171 * DOC: product_number
172 *
173 * The amdgpu driver provides a sysfs API for reporting the part number
174 * for the device
175 * The file serial_number is used for this and returns the part number
176 * as returned from the FRU.
177 * NOTE: This is only available for certain server cards
178 */
179
180static ssize_t amdgpu_device_get_product_number(struct device *dev,
181 struct device_attribute *attr, char *buf)
182{
183 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 184 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
185
186 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
187}
188
189static DEVICE_ATTR(product_number, S_IRUGO,
190 amdgpu_device_get_product_number, NULL);
191
192/**
193 * DOC: serial_number
194 *
195 * The amdgpu driver provides a sysfs API for reporting the serial number
196 * for the device
197 * The file serial_number is used for this and returns the serial number
198 * as returned from the FRU.
199 * NOTE: This is only available for certain server cards
200 */
201
202static ssize_t amdgpu_device_get_serial_number(struct device *dev,
203 struct device_attribute *attr, char *buf)
204{
205 struct drm_device *ddev = dev_get_drvdata(dev);
1348969a 206 struct amdgpu_device *adev = drm_to_adev(ddev);
bd607166
KR
207
208 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
209}
210
211static DEVICE_ATTR(serial_number, S_IRUGO,
212 amdgpu_device_get_serial_number, NULL);
213
e3ecdffa 214/**
31af062a 215 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
216 *
217 * @dev: drm_device pointer
218 *
219 * Returns true if the device is a dGPU with HG/PX power control,
220 * otherwise return false.
221 */
31af062a 222bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9 223{
1348969a 224 struct amdgpu_device *adev = drm_to_adev(dev);
d38ceaf9 225
2f7d10b3 226 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
227 return true;
228 return false;
229}
230
a69cba42
AD
231/**
232 * amdgpu_device_supports_baco - Does the device support BACO
233 *
234 * @dev: drm_device pointer
235 *
236 * Returns true if the device supporte BACO,
237 * otherwise return false.
238 */
239bool amdgpu_device_supports_baco(struct drm_device *dev)
240{
1348969a 241 struct amdgpu_device *adev = drm_to_adev(dev);
a69cba42
AD
242
243 return amdgpu_asic_supports_baco(adev);
244}
245
6e3cd2a9
MCC
246/*
247 * VRAM access helper functions
248 */
249
e35e2b11 250/**
e35e2b11
TY
251 * amdgpu_device_vram_access - read/write a buffer in vram
252 *
253 * @adev: amdgpu_device pointer
254 * @pos: offset of the buffer in vram
255 * @buf: virtual address of the buffer in system memory
256 * @size: read/write size, sizeof(@buf) must > @size
257 * @write: true - write to vram, otherwise - read from vram
258 */
259void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
260 uint32_t *buf, size_t size, bool write)
261{
e35e2b11 262 unsigned long flags;
ce05ac56
CK
263 uint32_t hi = ~0;
264 uint64_t last;
265
9d11eb0d
CK
266
267#ifdef CONFIG_64BIT
268 last = min(pos + size, adev->gmc.visible_vram_size);
269 if (last > pos) {
270 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
271 size_t count = last - pos;
272
273 if (write) {
274 memcpy_toio(addr, buf, count);
275 mb();
276 amdgpu_asic_flush_hdp(adev, NULL);
277 } else {
278 amdgpu_asic_invalidate_hdp(adev, NULL);
279 mb();
280 memcpy_fromio(buf, addr, count);
281 }
282
283 if (count == size)
284 return;
285
286 pos += count;
287 buf += count / 4;
288 size -= count;
289 }
290#endif
291
ce05ac56
CK
292 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
293 for (last = pos + size; pos < last; pos += 4) {
294 uint32_t tmp = pos >> 31;
e35e2b11 295
e35e2b11 296 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
297 if (tmp != hi) {
298 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
299 hi = tmp;
300 }
e35e2b11
TY
301 if (write)
302 WREG32_NO_KIQ(mmMM_DATA, *buf++);
303 else
304 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 305 }
ce05ac56 306 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
307}
308
d38ceaf9 309/*
f7ee1874 310 * register access helper functions.
d38ceaf9 311 */
e3ecdffa 312/**
f7ee1874 313 * amdgpu_device_rreg - read a memory mapped IO or indirect register
e3ecdffa
AD
314 *
315 * @adev: amdgpu_device pointer
316 * @reg: dword aligned register offset
317 * @acc_flags: access flags which require special behavior
318 *
319 * Returns the 32 bit value from the offset specified.
320 */
f7ee1874
HZ
321uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
322 uint32_t reg, uint32_t acc_flags)
d38ceaf9 323{
f4b373f4
TSD
324 uint32_t ret;
325
bf36b52e
AG
326 if (adev->in_pci_err_recovery)
327 return 0;
328
f7ee1874
HZ
329 if ((reg * 4) < adev->rmmio_size) {
330 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
331 amdgpu_sriov_runtime(adev) &&
332 down_read_trylock(&adev->reset_sem)) {
333 ret = amdgpu_kiq_rreg(adev, reg);
334 up_read(&adev->reset_sem);
335 } else {
336 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
337 }
338 } else {
339 ret = adev->pcie_rreg(adev, reg * 4);
81202807 340 }
bc992ba5 341
f7ee1874 342 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
e78b579d 343
f4b373f4 344 return ret;
d38ceaf9
AD
345}
346
421a2a30
ML
347/*
348 * MMIO register read with bytes helper functions
349 * @offset:bytes offset from MMIO start
350 *
351*/
352
e3ecdffa
AD
353/**
354 * amdgpu_mm_rreg8 - read a memory mapped IO register
355 *
356 * @adev: amdgpu_device pointer
357 * @offset: byte aligned register offset
358 *
359 * Returns the 8 bit value from the offset specified.
360 */
7cbbc745
AG
361uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
362{
bf36b52e
AG
363 if (adev->in_pci_err_recovery)
364 return 0;
365
421a2a30
ML
366 if (offset < adev->rmmio_size)
367 return (readb(adev->rmmio + offset));
368 BUG();
369}
370
371/*
372 * MMIO register write with bytes helper functions
373 * @offset:bytes offset from MMIO start
374 * @value: the value want to be written to the register
375 *
376*/
e3ecdffa
AD
377/**
378 * amdgpu_mm_wreg8 - read a memory mapped IO register
379 *
380 * @adev: amdgpu_device pointer
381 * @offset: byte aligned register offset
382 * @value: 8 bit value to write
383 *
384 * Writes the value specified to the offset specified.
385 */
7cbbc745
AG
386void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
387{
bf36b52e
AG
388 if (adev->in_pci_err_recovery)
389 return;
390
421a2a30
ML
391 if (offset < adev->rmmio_size)
392 writeb(value, adev->rmmio + offset);
393 else
394 BUG();
395}
396
e3ecdffa 397/**
f7ee1874 398 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
e3ecdffa
AD
399 *
400 * @adev: amdgpu_device pointer
401 * @reg: dword aligned register offset
402 * @v: 32 bit value to write to the register
403 * @acc_flags: access flags which require special behavior
404 *
405 * Writes the value specified to the offset specified.
406 */
f7ee1874
HZ
407void amdgpu_device_wreg(struct amdgpu_device *adev,
408 uint32_t reg, uint32_t v,
409 uint32_t acc_flags)
d38ceaf9 410{
bf36b52e
AG
411 if (adev->in_pci_err_recovery)
412 return;
413
f7ee1874
HZ
414 if ((reg * 4) < adev->rmmio_size) {
415 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
416 amdgpu_sriov_runtime(adev) &&
417 down_read_trylock(&adev->reset_sem)) {
418 amdgpu_kiq_wreg(adev, reg, v);
419 up_read(&adev->reset_sem);
420 } else {
421 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
422 }
423 } else {
424 adev->pcie_wreg(adev, reg * 4, v);
81202807 425 }
bc992ba5 426
f7ee1874 427 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 428}
d38ceaf9 429
2e0cc4d4
ML
430/*
431 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
432 *
433 * this function is invoked only the debugfs register access
434 * */
f7ee1874
HZ
435void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
436 uint32_t reg, uint32_t v)
2e0cc4d4 437{
bf36b52e
AG
438 if (adev->in_pci_err_recovery)
439 return;
440
2e0cc4d4 441 if (amdgpu_sriov_fullaccess(adev) &&
f7ee1874
HZ
442 adev->gfx.rlc.funcs &&
443 adev->gfx.rlc.funcs->is_rlcg_access_range) {
2e0cc4d4
ML
444 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
445 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
f7ee1874
HZ
446 } else {
447 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
47ed4e1c 448 }
d38ceaf9
AD
449}
450
e3ecdffa
AD
451/**
452 * amdgpu_io_rreg - read an IO register
453 *
454 * @adev: amdgpu_device pointer
455 * @reg: dword aligned register offset
456 *
457 * Returns the 32 bit value from the offset specified.
458 */
d38ceaf9
AD
459u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
460{
bf36b52e
AG
461 if (adev->in_pci_err_recovery)
462 return 0;
463
d38ceaf9
AD
464 if ((reg * 4) < adev->rio_mem_size)
465 return ioread32(adev->rio_mem + (reg * 4));
466 else {
467 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
468 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
469 }
470}
471
e3ecdffa
AD
472/**
473 * amdgpu_io_wreg - write to an IO register
474 *
475 * @adev: amdgpu_device pointer
476 * @reg: dword aligned register offset
477 * @v: 32 bit value to write to the register
478 *
479 * Writes the value specified to the offset specified.
480 */
d38ceaf9
AD
481void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
482{
bf36b52e
AG
483 if (adev->in_pci_err_recovery)
484 return;
485
d38ceaf9
AD
486 if ((reg * 4) < adev->rio_mem_size)
487 iowrite32(v, adev->rio_mem + (reg * 4));
488 else {
489 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
490 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
491 }
492}
493
494/**
495 * amdgpu_mm_rdoorbell - read a doorbell dword
496 *
497 * @adev: amdgpu_device pointer
498 * @index: doorbell index
499 *
500 * Returns the value in the doorbell aperture at the
501 * requested doorbell index (CIK).
502 */
503u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
504{
bf36b52e
AG
505 if (adev->in_pci_err_recovery)
506 return 0;
507
d38ceaf9
AD
508 if (index < adev->doorbell.num_doorbells) {
509 return readl(adev->doorbell.ptr + index);
510 } else {
511 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
512 return 0;
513 }
514}
515
516/**
517 * amdgpu_mm_wdoorbell - write a doorbell dword
518 *
519 * @adev: amdgpu_device pointer
520 * @index: doorbell index
521 * @v: value to write
522 *
523 * Writes @v to the doorbell aperture at the
524 * requested doorbell index (CIK).
525 */
526void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
527{
bf36b52e
AG
528 if (adev->in_pci_err_recovery)
529 return;
530
d38ceaf9
AD
531 if (index < adev->doorbell.num_doorbells) {
532 writel(v, adev->doorbell.ptr + index);
533 } else {
534 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
535 }
536}
537
832be404
KW
538/**
539 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
540 *
541 * @adev: amdgpu_device pointer
542 * @index: doorbell index
543 *
544 * Returns the value in the doorbell aperture at the
545 * requested doorbell index (VEGA10+).
546 */
547u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
548{
bf36b52e
AG
549 if (adev->in_pci_err_recovery)
550 return 0;
551
832be404
KW
552 if (index < adev->doorbell.num_doorbells) {
553 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
554 } else {
555 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
556 return 0;
557 }
558}
559
560/**
561 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
562 *
563 * @adev: amdgpu_device pointer
564 * @index: doorbell index
565 * @v: value to write
566 *
567 * Writes @v to the doorbell aperture at the
568 * requested doorbell index (VEGA10+).
569 */
570void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
571{
bf36b52e
AG
572 if (adev->in_pci_err_recovery)
573 return;
574
832be404
KW
575 if (index < adev->doorbell.num_doorbells) {
576 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
577 } else {
578 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
579 }
580}
581
1bba3683
HZ
582/**
583 * amdgpu_device_indirect_rreg - read an indirect register
584 *
585 * @adev: amdgpu_device pointer
586 * @pcie_index: mmio register offset
587 * @pcie_data: mmio register offset
22f453fb 588 * @reg_addr: indirect register address to read from
1bba3683
HZ
589 *
590 * Returns the value of indirect register @reg_addr
591 */
592u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
593 u32 pcie_index, u32 pcie_data,
594 u32 reg_addr)
595{
596 unsigned long flags;
597 u32 r;
598 void __iomem *pcie_index_offset;
599 void __iomem *pcie_data_offset;
600
601 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
602 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
603 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
604
605 writel(reg_addr, pcie_index_offset);
606 readl(pcie_index_offset);
607 r = readl(pcie_data_offset);
608 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
609
610 return r;
611}
612
613/**
614 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
615 *
616 * @adev: amdgpu_device pointer
617 * @pcie_index: mmio register offset
618 * @pcie_data: mmio register offset
22f453fb 619 * @reg_addr: indirect register address to read from
1bba3683
HZ
620 *
621 * Returns the value of indirect register @reg_addr
622 */
623u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
624 u32 pcie_index, u32 pcie_data,
625 u32 reg_addr)
626{
627 unsigned long flags;
628 u64 r;
629 void __iomem *pcie_index_offset;
630 void __iomem *pcie_data_offset;
631
632 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
633 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
634 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
635
636 /* read low 32 bits */
637 writel(reg_addr, pcie_index_offset);
638 readl(pcie_index_offset);
639 r = readl(pcie_data_offset);
640 /* read high 32 bits */
641 writel(reg_addr + 4, pcie_index_offset);
642 readl(pcie_index_offset);
643 r |= ((u64)readl(pcie_data_offset) << 32);
644 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
645
646 return r;
647}
648
649/**
650 * amdgpu_device_indirect_wreg - write an indirect register address
651 *
652 * @adev: amdgpu_device pointer
653 * @pcie_index: mmio register offset
654 * @pcie_data: mmio register offset
655 * @reg_addr: indirect register offset
656 * @reg_data: indirect register data
657 *
658 */
659void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
660 u32 pcie_index, u32 pcie_data,
661 u32 reg_addr, u32 reg_data)
662{
663 unsigned long flags;
664 void __iomem *pcie_index_offset;
665 void __iomem *pcie_data_offset;
666
667 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
668 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
669 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
670
671 writel(reg_addr, pcie_index_offset);
672 readl(pcie_index_offset);
673 writel(reg_data, pcie_data_offset);
674 readl(pcie_data_offset);
675 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
676}
677
678/**
679 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
680 *
681 * @adev: amdgpu_device pointer
682 * @pcie_index: mmio register offset
683 * @pcie_data: mmio register offset
684 * @reg_addr: indirect register offset
685 * @reg_data: indirect register data
686 *
687 */
688void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
689 u32 pcie_index, u32 pcie_data,
690 u32 reg_addr, u64 reg_data)
691{
692 unsigned long flags;
693 void __iomem *pcie_index_offset;
694 void __iomem *pcie_data_offset;
695
696 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
697 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
698 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
699
700 /* write low 32 bits */
701 writel(reg_addr, pcie_index_offset);
702 readl(pcie_index_offset);
703 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
704 readl(pcie_data_offset);
705 /* write high 32 bits */
706 writel(reg_addr + 4, pcie_index_offset);
707 readl(pcie_index_offset);
708 writel((u32)(reg_data >> 32), pcie_data_offset);
709 readl(pcie_data_offset);
710 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
711}
712
d38ceaf9
AD
713/**
714 * amdgpu_invalid_rreg - dummy reg read function
715 *
982a820b 716 * @adev: amdgpu_device pointer
d38ceaf9
AD
717 * @reg: offset of register
718 *
719 * Dummy register read function. Used for register blocks
720 * that certain asics don't have (all asics).
721 * Returns the value in the register.
722 */
723static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
724{
725 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
726 BUG();
727 return 0;
728}
729
730/**
731 * amdgpu_invalid_wreg - dummy reg write function
732 *
982a820b 733 * @adev: amdgpu_device pointer
d38ceaf9
AD
734 * @reg: offset of register
735 * @v: value to write to the register
736 *
737 * Dummy register read function. Used for register blocks
738 * that certain asics don't have (all asics).
739 */
740static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
741{
742 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
743 reg, v);
744 BUG();
745}
746
4fa1c6a6
TZ
747/**
748 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
749 *
982a820b 750 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
751 * @reg: offset of register
752 *
753 * Dummy register read function. Used for register blocks
754 * that certain asics don't have (all asics).
755 * Returns the value in the register.
756 */
757static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
758{
759 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
760 BUG();
761 return 0;
762}
763
764/**
765 * amdgpu_invalid_wreg64 - dummy reg write function
766 *
982a820b 767 * @adev: amdgpu_device pointer
4fa1c6a6
TZ
768 * @reg: offset of register
769 * @v: value to write to the register
770 *
771 * Dummy register read function. Used for register blocks
772 * that certain asics don't have (all asics).
773 */
774static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
775{
776 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
777 reg, v);
778 BUG();
779}
780
d38ceaf9
AD
781/**
782 * amdgpu_block_invalid_rreg - dummy reg read function
783 *
982a820b 784 * @adev: amdgpu_device pointer
d38ceaf9
AD
785 * @block: offset of instance
786 * @reg: offset of register
787 *
788 * Dummy register read function. Used for register blocks
789 * that certain asics don't have (all asics).
790 * Returns the value in the register.
791 */
792static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
793 uint32_t block, uint32_t reg)
794{
795 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
796 reg, block);
797 BUG();
798 return 0;
799}
800
801/**
802 * amdgpu_block_invalid_wreg - dummy reg write function
803 *
982a820b 804 * @adev: amdgpu_device pointer
d38ceaf9
AD
805 * @block: offset of instance
806 * @reg: offset of register
807 * @v: value to write to the register
808 *
809 * Dummy register read function. Used for register blocks
810 * that certain asics don't have (all asics).
811 */
812static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
813 uint32_t block,
814 uint32_t reg, uint32_t v)
815{
816 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
817 reg, block, v);
818 BUG();
819}
820
4d2997ab
AD
821/**
822 * amdgpu_device_asic_init - Wrapper for atom asic_init
823 *
982a820b 824 * @adev: amdgpu_device pointer
4d2997ab
AD
825 *
826 * Does any asic specific work and then calls atom asic init.
827 */
828static int amdgpu_device_asic_init(struct amdgpu_device *adev)
829{
830 amdgpu_asic_pre_asic_init(adev);
831
832 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
833}
834
e3ecdffa
AD
835/**
836 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
837 *
982a820b 838 * @adev: amdgpu_device pointer
e3ecdffa
AD
839 *
840 * Allocates a scratch page of VRAM for use by various things in the
841 * driver.
842 */
06ec9070 843static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 844{
a4a02777
CK
845 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
846 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
847 &adev->vram_scratch.robj,
848 &adev->vram_scratch.gpu_addr,
849 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
850}
851
e3ecdffa
AD
852/**
853 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
854 *
982a820b 855 * @adev: amdgpu_device pointer
e3ecdffa
AD
856 *
857 * Frees the VRAM scratch page.
858 */
06ec9070 859static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 860{
078af1a3 861 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
862}
863
864/**
9c3f2b54 865 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
866 *
867 * @adev: amdgpu_device pointer
868 * @registers: pointer to the register array
869 * @array_size: size of the register array
870 *
871 * Programs an array or registers with and and or masks.
872 * This is a helper for setting golden registers.
873 */
9c3f2b54
AD
874void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
875 const u32 *registers,
876 const u32 array_size)
d38ceaf9
AD
877{
878 u32 tmp, reg, and_mask, or_mask;
879 int i;
880
881 if (array_size % 3)
882 return;
883
884 for (i = 0; i < array_size; i +=3) {
885 reg = registers[i + 0];
886 and_mask = registers[i + 1];
887 or_mask = registers[i + 2];
888
889 if (and_mask == 0xffffffff) {
890 tmp = or_mask;
891 } else {
892 tmp = RREG32(reg);
893 tmp &= ~and_mask;
e0d07657
HZ
894 if (adev->family >= AMDGPU_FAMILY_AI)
895 tmp |= (or_mask & and_mask);
896 else
897 tmp |= or_mask;
d38ceaf9
AD
898 }
899 WREG32(reg, tmp);
900 }
901}
902
e3ecdffa
AD
903/**
904 * amdgpu_device_pci_config_reset - reset the GPU
905 *
906 * @adev: amdgpu_device pointer
907 *
908 * Resets the GPU using the pci config reset sequence.
909 * Only applicable to asics prior to vega10.
910 */
8111c387 911void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
912{
913 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
914}
915
916/*
917 * GPU doorbell aperture helpers function.
918 */
919/**
06ec9070 920 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
921 *
922 * @adev: amdgpu_device pointer
923 *
924 * Init doorbell driver information (CIK)
925 * Returns 0 on success, error on failure.
926 */
06ec9070 927static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 928{
6585661d 929
705e519e
CK
930 /* No doorbell on SI hardware generation */
931 if (adev->asic_type < CHIP_BONAIRE) {
932 adev->doorbell.base = 0;
933 adev->doorbell.size = 0;
934 adev->doorbell.num_doorbells = 0;
935 adev->doorbell.ptr = NULL;
936 return 0;
937 }
938
d6895ad3
CK
939 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
940 return -EINVAL;
941
22357775
AD
942 amdgpu_asic_init_doorbell_index(adev);
943
d38ceaf9
AD
944 /* doorbell bar mapping */
945 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
946 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
947
edf600da 948 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 949 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
950 if (adev->doorbell.num_doorbells == 0)
951 return -EINVAL;
952
ec3db8a6 953 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
954 * paging queue doorbell use the second page. The
955 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
956 * doorbells are in the first page. So with paging queue enabled,
957 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
958 */
959 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 960 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 961
8972e5d2
CK
962 adev->doorbell.ptr = ioremap(adev->doorbell.base,
963 adev->doorbell.num_doorbells *
964 sizeof(u32));
965 if (adev->doorbell.ptr == NULL)
d38ceaf9 966 return -ENOMEM;
d38ceaf9
AD
967
968 return 0;
969}
970
971/**
06ec9070 972 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
973 *
974 * @adev: amdgpu_device pointer
975 *
976 * Tear down doorbell driver information (CIK)
977 */
06ec9070 978static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
979{
980 iounmap(adev->doorbell.ptr);
981 adev->doorbell.ptr = NULL;
982}
983
22cb0164 984
d38ceaf9
AD
985
986/*
06ec9070 987 * amdgpu_device_wb_*()
455a7bc2 988 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 989 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
990 */
991
992/**
06ec9070 993 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
994 *
995 * @adev: amdgpu_device pointer
996 *
997 * Disables Writeback and frees the Writeback memory (all asics).
998 * Used at driver shutdown.
999 */
06ec9070 1000static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1001{
1002 if (adev->wb.wb_obj) {
a76ed485
AD
1003 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1004 &adev->wb.gpu_addr,
1005 (void **)&adev->wb.wb);
d38ceaf9
AD
1006 adev->wb.wb_obj = NULL;
1007 }
1008}
1009
1010/**
06ec9070 1011 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
1012 *
1013 * @adev: amdgpu_device pointer
1014 *
455a7bc2 1015 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
1016 * Used at driver startup.
1017 * Returns 0 on success or an -error on failure.
1018 */
06ec9070 1019static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
1020{
1021 int r;
1022
1023 if (adev->wb.wb_obj == NULL) {
97407b63
AD
1024 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1025 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
1026 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1027 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1028 (void **)&adev->wb.wb);
d38ceaf9
AD
1029 if (r) {
1030 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1031 return r;
1032 }
d38ceaf9
AD
1033
1034 adev->wb.num_wb = AMDGPU_MAX_WB;
1035 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1036
1037 /* clear wb memory */
73469585 1038 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
1039 }
1040
1041 return 0;
1042}
1043
1044/**
131b4b36 1045 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
1046 *
1047 * @adev: amdgpu_device pointer
1048 * @wb: wb index
1049 *
1050 * Allocate a wb slot for use by the driver (all asics).
1051 * Returns 0 on success or -EINVAL on failure.
1052 */
131b4b36 1053int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
1054{
1055 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 1056
97407b63 1057 if (offset < adev->wb.num_wb) {
7014285a 1058 __set_bit(offset, adev->wb.used);
63ae07ca 1059 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
1060 return 0;
1061 } else {
1062 return -EINVAL;
1063 }
1064}
1065
d38ceaf9 1066/**
131b4b36 1067 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
1068 *
1069 * @adev: amdgpu_device pointer
1070 * @wb: wb index
1071 *
1072 * Free a wb slot allocated for use by the driver (all asics)
1073 */
131b4b36 1074void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 1075{
73469585 1076 wb >>= 3;
d38ceaf9 1077 if (wb < adev->wb.num_wb)
73469585 1078 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
1079}
1080
d6895ad3
CK
1081/**
1082 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1083 *
1084 * @adev: amdgpu_device pointer
1085 *
1086 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1087 * to fail, but if any of the BARs is not accessible after the size we abort
1088 * driver loading by returning -ENODEV.
1089 */
1090int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1091{
770d13b1 1092 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 1093 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
1094 struct pci_bus *root;
1095 struct resource *res;
1096 unsigned i;
d6895ad3
CK
1097 u16 cmd;
1098 int r;
1099
0c03b912 1100 /* Bypass for VF */
1101 if (amdgpu_sriov_vf(adev))
1102 return 0;
1103
b7221f2b
AD
1104 /* skip if the bios has already enabled large BAR */
1105 if (adev->gmc.real_vram_size &&
1106 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1107 return 0;
1108
31b8adab
CK
1109 /* Check if the root BUS has 64bit memory resources */
1110 root = adev->pdev->bus;
1111 while (root->parent)
1112 root = root->parent;
1113
1114 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 1115 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
1116 res->start > 0x100000000ull)
1117 break;
1118 }
1119
1120 /* Trying to resize is pointless without a root hub window above 4GB */
1121 if (!res)
1122 return 0;
1123
d6895ad3
CK
1124 /* Disable memory decoding while we change the BAR addresses and size */
1125 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1126 pci_write_config_word(adev->pdev, PCI_COMMAND,
1127 cmd & ~PCI_COMMAND_MEMORY);
1128
1129 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 1130 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
1131 if (adev->asic_type >= CHIP_BONAIRE)
1132 pci_release_resource(adev->pdev, 2);
1133
1134 pci_release_resource(adev->pdev, 0);
1135
1136 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1137 if (r == -ENOSPC)
1138 DRM_INFO("Not enough PCI address space for a large BAR.");
1139 else if (r && r != -ENOTSUPP)
1140 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1141
1142 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1143
1144 /* When the doorbell or fb BAR isn't available we have no chance of
1145 * using the device.
1146 */
06ec9070 1147 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
1148 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1149 return -ENODEV;
1150
1151 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1152
1153 return 0;
1154}
a05502e5 1155
d38ceaf9
AD
1156/*
1157 * GPU helpers function.
1158 */
1159/**
39c640c0 1160 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
1161 *
1162 * @adev: amdgpu_device pointer
1163 *
c836fec5
JQ
1164 * Check if the asic has been initialized (all asics) at driver startup
1165 * or post is needed if hw reset is performed.
1166 * Returns true if need or false if not.
d38ceaf9 1167 */
39c640c0 1168bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
1169{
1170 uint32_t reg;
1171
bec86378
ML
1172 if (amdgpu_sriov_vf(adev))
1173 return false;
1174
1175 if (amdgpu_passthrough(adev)) {
1da2c326
ML
1176 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1177 * some old smc fw still need driver do vPost otherwise gpu hang, while
1178 * those smc fw version above 22.15 doesn't have this flaw, so we force
1179 * vpost executed for smc version below 22.15
bec86378
ML
1180 */
1181 if (adev->asic_type == CHIP_FIJI) {
1182 int err;
1183 uint32_t fw_ver;
1184 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1185 /* force vPost if error occured */
1186 if (err)
1187 return true;
1188
1189 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1190 if (fw_ver < 0x00160e00)
1191 return true;
bec86378 1192 }
bec86378 1193 }
91fe77eb 1194
1195 if (adev->has_hw_reset) {
1196 adev->has_hw_reset = false;
1197 return true;
1198 }
1199
1200 /* bios scratch used on CIK+ */
1201 if (adev->asic_type >= CHIP_BONAIRE)
1202 return amdgpu_atombios_scratch_need_asic_init(adev);
1203
1204 /* check MEM_SIZE for older asics */
1205 reg = amdgpu_asic_get_config_memsize(adev);
1206
1207 if ((reg != 0) && (reg != 0xffffffff))
1208 return false;
1209
1210 return true;
bec86378
ML
1211}
1212
d38ceaf9
AD
1213/* if we get transitioned to only one device, take VGA back */
1214/**
06ec9070 1215 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1216 *
1217 * @cookie: amdgpu_device pointer
1218 * @state: enable/disable vga decode
1219 *
1220 * Enable/disable vga decode (all asics).
1221 * Returns VGA resource flags.
1222 */
06ec9070 1223static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1224{
1225 struct amdgpu_device *adev = cookie;
1226 amdgpu_asic_set_vga_state(adev, state);
1227 if (state)
1228 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1229 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1230 else
1231 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1232}
1233
e3ecdffa
AD
1234/**
1235 * amdgpu_device_check_block_size - validate the vm block size
1236 *
1237 * @adev: amdgpu_device pointer
1238 *
1239 * Validates the vm block size specified via module parameter.
1240 * The vm block size defines number of bits in page table versus page directory,
1241 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1242 * page table and the remaining bits are in the page directory.
1243 */
06ec9070 1244static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1245{
1246 /* defines number of bits in page table versus page directory,
1247 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1248 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1249 if (amdgpu_vm_block_size == -1)
1250 return;
a1adf8be 1251
bab4fee7 1252 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1253 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1254 amdgpu_vm_block_size);
97489129 1255 amdgpu_vm_block_size = -1;
a1adf8be 1256 }
a1adf8be
CZ
1257}
1258
e3ecdffa
AD
1259/**
1260 * amdgpu_device_check_vm_size - validate the vm size
1261 *
1262 * @adev: amdgpu_device pointer
1263 *
1264 * Validates the vm size in GB specified via module parameter.
1265 * The VM size is the size of the GPU virtual memory space in GB.
1266 */
06ec9070 1267static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1268{
64dab074
AD
1269 /* no need to check the default value */
1270 if (amdgpu_vm_size == -1)
1271 return;
1272
83ca145d
ZJ
1273 if (amdgpu_vm_size < 1) {
1274 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1275 amdgpu_vm_size);
f3368128 1276 amdgpu_vm_size = -1;
83ca145d 1277 }
83ca145d
ZJ
1278}
1279
7951e376
RZ
1280static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1281{
1282 struct sysinfo si;
a9d4fe2f 1283 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1284 uint64_t total_memory;
1285 uint64_t dram_size_seven_GB = 0x1B8000000;
1286 uint64_t dram_size_three_GB = 0xB8000000;
1287
1288 if (amdgpu_smu_memory_pool_size == 0)
1289 return;
1290
1291 if (!is_os_64) {
1292 DRM_WARN("Not 64-bit OS, feature not supported\n");
1293 goto def_value;
1294 }
1295 si_meminfo(&si);
1296 total_memory = (uint64_t)si.totalram * si.mem_unit;
1297
1298 if ((amdgpu_smu_memory_pool_size == 1) ||
1299 (amdgpu_smu_memory_pool_size == 2)) {
1300 if (total_memory < dram_size_three_GB)
1301 goto def_value1;
1302 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1303 (amdgpu_smu_memory_pool_size == 8)) {
1304 if (total_memory < dram_size_seven_GB)
1305 goto def_value1;
1306 } else {
1307 DRM_WARN("Smu memory pool size not supported\n");
1308 goto def_value;
1309 }
1310 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1311
1312 return;
1313
1314def_value1:
1315 DRM_WARN("No enough system memory\n");
1316def_value:
1317 adev->pm.smu_prv_buffer_size = 0;
1318}
1319
d38ceaf9 1320/**
06ec9070 1321 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1322 *
1323 * @adev: amdgpu_device pointer
1324 *
1325 * Validates certain module parameters and updates
1326 * the associated values used by the driver (all asics).
1327 */
912dfc84 1328static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1329{
5b011235
CZ
1330 if (amdgpu_sched_jobs < 4) {
1331 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1332 amdgpu_sched_jobs);
1333 amdgpu_sched_jobs = 4;
76117507 1334 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1335 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1336 amdgpu_sched_jobs);
1337 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1338 }
d38ceaf9 1339
83e74db6 1340 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1341 /* gart size must be greater or equal to 32M */
1342 dev_warn(adev->dev, "gart size (%d) too small\n",
1343 amdgpu_gart_size);
83e74db6 1344 amdgpu_gart_size = -1;
d38ceaf9
AD
1345 }
1346
36d38372 1347 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1348 /* gtt size must be greater or equal to 32M */
36d38372
CK
1349 dev_warn(adev->dev, "gtt size (%d) too small\n",
1350 amdgpu_gtt_size);
1351 amdgpu_gtt_size = -1;
d38ceaf9
AD
1352 }
1353
d07f14be
RH
1354 /* valid range is between 4 and 9 inclusive */
1355 if (amdgpu_vm_fragment_size != -1 &&
1356 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1357 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1358 amdgpu_vm_fragment_size = -1;
1359 }
1360
5d5bd5e3
KW
1361 if (amdgpu_sched_hw_submission < 2) {
1362 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1363 amdgpu_sched_hw_submission);
1364 amdgpu_sched_hw_submission = 2;
1365 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1366 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1367 amdgpu_sched_hw_submission);
1368 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1369 }
1370
7951e376
RZ
1371 amdgpu_device_check_smu_prv_buffer_size(adev);
1372
06ec9070 1373 amdgpu_device_check_vm_size(adev);
d38ceaf9 1374
06ec9070 1375 amdgpu_device_check_block_size(adev);
6a7f76e7 1376
19aede77 1377 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1378
c6252390 1379 amdgpu_gmc_tmz_set(adev);
01a8dcec 1380
9b498efa
AD
1381 amdgpu_gmc_noretry_set(adev);
1382
e3c00faa 1383 return 0;
d38ceaf9
AD
1384}
1385
1386/**
1387 * amdgpu_switcheroo_set_state - set switcheroo state
1388 *
1389 * @pdev: pci dev pointer
1694467b 1390 * @state: vga_switcheroo state
d38ceaf9
AD
1391 *
1392 * Callback for the switcheroo driver. Suspends or resumes the
1393 * the asics before or after it is powered up using ACPI methods.
1394 */
8aba21b7
LT
1395static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1396 enum vga_switcheroo_state state)
d38ceaf9
AD
1397{
1398 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1399 int r;
d38ceaf9 1400
31af062a 1401 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1402 return;
1403
1404 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1405 pr_info("switched on\n");
d38ceaf9
AD
1406 /* don't suspend or resume card normally */
1407 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1408
de185019 1409 pci_set_power_state(dev->pdev, PCI_D0);
c1dd4aa6 1410 amdgpu_device_load_pci_state(dev->pdev);
de185019
AD
1411 r = pci_enable_device(dev->pdev);
1412 if (r)
1413 DRM_WARN("pci_enable_device failed (%d)\n", r);
1414 amdgpu_device_resume(dev, true);
d38ceaf9 1415
d38ceaf9
AD
1416 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1417 drm_kms_helper_poll_enable(dev);
1418 } else {
dd4fa6c1 1419 pr_info("switched off\n");
d38ceaf9
AD
1420 drm_kms_helper_poll_disable(dev);
1421 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019 1422 amdgpu_device_suspend(dev, true);
c1dd4aa6 1423 amdgpu_device_cache_pci_state(dev->pdev);
de185019
AD
1424 /* Shut down the device */
1425 pci_disable_device(dev->pdev);
1426 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1427 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1428 }
1429}
1430
1431/**
1432 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1433 *
1434 * @pdev: pci dev pointer
1435 *
1436 * Callback for the switcheroo driver. Check of the switcheroo
1437 * state can be changed.
1438 * Returns true if the state can be changed, false if not.
1439 */
1440static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1441{
1442 struct drm_device *dev = pci_get_drvdata(pdev);
1443
1444 /*
1445 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1446 * locking inversion with the driver load path. And the access here is
1447 * completely racy anyway. So don't bother with locking for now.
1448 */
7e13ad89 1449 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1450}
1451
1452static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1453 .set_gpu_state = amdgpu_switcheroo_set_state,
1454 .reprobe = NULL,
1455 .can_switch = amdgpu_switcheroo_can_switch,
1456};
1457
e3ecdffa
AD
1458/**
1459 * amdgpu_device_ip_set_clockgating_state - set the CG state
1460 *
87e3f136 1461 * @dev: amdgpu_device pointer
e3ecdffa
AD
1462 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1463 * @state: clockgating state (gate or ungate)
1464 *
1465 * Sets the requested clockgating state for all instances of
1466 * the hardware IP specified.
1467 * Returns the error code from the last instance.
1468 */
43fa561f 1469int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1470 enum amd_ip_block_type block_type,
1471 enum amd_clockgating_state state)
d38ceaf9 1472{
43fa561f 1473 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1474 int i, r = 0;
1475
1476 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1477 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1478 continue;
c722865a
RZ
1479 if (adev->ip_blocks[i].version->type != block_type)
1480 continue;
1481 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1482 continue;
1483 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1484 (void *)adev, state);
1485 if (r)
1486 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1487 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1488 }
1489 return r;
1490}
1491
e3ecdffa
AD
1492/**
1493 * amdgpu_device_ip_set_powergating_state - set the PG state
1494 *
87e3f136 1495 * @dev: amdgpu_device pointer
e3ecdffa
AD
1496 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1497 * @state: powergating state (gate or ungate)
1498 *
1499 * Sets the requested powergating state for all instances of
1500 * the hardware IP specified.
1501 * Returns the error code from the last instance.
1502 */
43fa561f 1503int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1504 enum amd_ip_block_type block_type,
1505 enum amd_powergating_state state)
d38ceaf9 1506{
43fa561f 1507 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1508 int i, r = 0;
1509
1510 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1511 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1512 continue;
c722865a
RZ
1513 if (adev->ip_blocks[i].version->type != block_type)
1514 continue;
1515 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1516 continue;
1517 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1518 (void *)adev, state);
1519 if (r)
1520 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1521 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1522 }
1523 return r;
1524}
1525
e3ecdffa
AD
1526/**
1527 * amdgpu_device_ip_get_clockgating_state - get the CG state
1528 *
1529 * @adev: amdgpu_device pointer
1530 * @flags: clockgating feature flags
1531 *
1532 * Walks the list of IPs on the device and updates the clockgating
1533 * flags for each IP.
1534 * Updates @flags with the feature flags for each hardware IP where
1535 * clockgating is enabled.
1536 */
2990a1fc
AD
1537void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1538 u32 *flags)
6cb2d4e4
HR
1539{
1540 int i;
1541
1542 for (i = 0; i < adev->num_ip_blocks; i++) {
1543 if (!adev->ip_blocks[i].status.valid)
1544 continue;
1545 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1546 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1547 }
1548}
1549
e3ecdffa
AD
1550/**
1551 * amdgpu_device_ip_wait_for_idle - wait for idle
1552 *
1553 * @adev: amdgpu_device pointer
1554 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1555 *
1556 * Waits for the request hardware IP to be idle.
1557 * Returns 0 for success or a negative error code on failure.
1558 */
2990a1fc
AD
1559int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1560 enum amd_ip_block_type block_type)
5dbbb60b
AD
1561{
1562 int i, r;
1563
1564 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1565 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1566 continue;
a1255107
AD
1567 if (adev->ip_blocks[i].version->type == block_type) {
1568 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1569 if (r)
1570 return r;
1571 break;
1572 }
1573 }
1574 return 0;
1575
1576}
1577
e3ecdffa
AD
1578/**
1579 * amdgpu_device_ip_is_idle - is the hardware IP idle
1580 *
1581 * @adev: amdgpu_device pointer
1582 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1583 *
1584 * Check if the hardware IP is idle or not.
1585 * Returns true if it the IP is idle, false if not.
1586 */
2990a1fc
AD
1587bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1588 enum amd_ip_block_type block_type)
5dbbb60b
AD
1589{
1590 int i;
1591
1592 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1593 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1594 continue;
a1255107
AD
1595 if (adev->ip_blocks[i].version->type == block_type)
1596 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1597 }
1598 return true;
1599
1600}
1601
e3ecdffa
AD
1602/**
1603 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1604 *
1605 * @adev: amdgpu_device pointer
87e3f136 1606 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1607 *
1608 * Returns a pointer to the hardware IP block structure
1609 * if it exists for the asic, otherwise NULL.
1610 */
2990a1fc
AD
1611struct amdgpu_ip_block *
1612amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1613 enum amd_ip_block_type type)
d38ceaf9
AD
1614{
1615 int i;
1616
1617 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1618 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1619 return &adev->ip_blocks[i];
1620
1621 return NULL;
1622}
1623
1624/**
2990a1fc 1625 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1626 *
1627 * @adev: amdgpu_device pointer
5fc3aeeb 1628 * @type: enum amd_ip_block_type
d38ceaf9
AD
1629 * @major: major version
1630 * @minor: minor version
1631 *
1632 * return 0 if equal or greater
1633 * return 1 if smaller or the ip_block doesn't exist
1634 */
2990a1fc
AD
1635int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1636 enum amd_ip_block_type type,
1637 u32 major, u32 minor)
d38ceaf9 1638{
2990a1fc 1639 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1640
a1255107
AD
1641 if (ip_block && ((ip_block->version->major > major) ||
1642 ((ip_block->version->major == major) &&
1643 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1644 return 0;
1645
1646 return 1;
1647}
1648
a1255107 1649/**
2990a1fc 1650 * amdgpu_device_ip_block_add
a1255107
AD
1651 *
1652 * @adev: amdgpu_device pointer
1653 * @ip_block_version: pointer to the IP to add
1654 *
1655 * Adds the IP block driver information to the collection of IPs
1656 * on the asic.
1657 */
2990a1fc
AD
1658int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1659 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1660{
1661 if (!ip_block_version)
1662 return -EINVAL;
1663
e966a725 1664 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1665 ip_block_version->funcs->name);
1666
a1255107
AD
1667 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1668
1669 return 0;
1670}
1671
e3ecdffa
AD
1672/**
1673 * amdgpu_device_enable_virtual_display - enable virtual display feature
1674 *
1675 * @adev: amdgpu_device pointer
1676 *
1677 * Enabled the virtual display feature if the user has enabled it via
1678 * the module parameter virtual_display. This feature provides a virtual
1679 * display hardware on headless boards or in virtualized environments.
1680 * This function parses and validates the configuration string specified by
1681 * the user and configues the virtual display configuration (number of
1682 * virtual connectors, crtcs, etc.) specified.
1683 */
483ef985 1684static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1685{
1686 adev->enable_virtual_display = false;
1687
1688 if (amdgpu_virtual_display) {
4a580877 1689 struct drm_device *ddev = adev_to_drm(adev);
9accf2fd 1690 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1691 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1692
1693 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1694 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1695 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1696 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1697 if (!strcmp("all", pciaddname)
1698 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1699 long num_crtc;
1700 int res = -1;
1701
9accf2fd 1702 adev->enable_virtual_display = true;
0f66356d
ED
1703
1704 if (pciaddname_tmp)
1705 res = kstrtol(pciaddname_tmp, 10,
1706 &num_crtc);
1707
1708 if (!res) {
1709 if (num_crtc < 1)
1710 num_crtc = 1;
1711 if (num_crtc > 6)
1712 num_crtc = 6;
1713 adev->mode_info.num_crtc = num_crtc;
1714 } else {
1715 adev->mode_info.num_crtc = 1;
1716 }
9accf2fd
ED
1717 break;
1718 }
1719 }
1720
0f66356d
ED
1721 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1722 amdgpu_virtual_display, pci_address_name,
1723 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1724
1725 kfree(pciaddstr);
1726 }
1727}
1728
e3ecdffa
AD
1729/**
1730 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1731 *
1732 * @adev: amdgpu_device pointer
1733 *
1734 * Parses the asic configuration parameters specified in the gpu info
1735 * firmware and makes them availale to the driver for use in configuring
1736 * the asic.
1737 * Returns 0 on success, -EINVAL on failure.
1738 */
e2a75f88
AD
1739static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1740{
e2a75f88 1741 const char *chip_name;
c0a43457 1742 char fw_name[40];
e2a75f88
AD
1743 int err;
1744 const struct gpu_info_firmware_header_v1_0 *hdr;
1745
ab4fe3e1
HR
1746 adev->firmware.gpu_info_fw = NULL;
1747
72de33f8 1748 if (adev->mman.discovery_bin) {
258620d0 1749 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1750
1751 /*
1752 * FIXME: The bounding box is still needed by Navi12, so
1753 * temporarily read it from gpu_info firmware. Should be droped
1754 * when DAL no longer needs it.
1755 */
1756 if (adev->asic_type != CHIP_NAVI12)
1757 return 0;
258620d0
AD
1758 }
1759
e2a75f88 1760 switch (adev->asic_type) {
e2a75f88
AD
1761#ifdef CONFIG_DRM_AMDGPU_SI
1762 case CHIP_VERDE:
1763 case CHIP_TAHITI:
1764 case CHIP_PITCAIRN:
1765 case CHIP_OLAND:
1766 case CHIP_HAINAN:
1767#endif
1768#ifdef CONFIG_DRM_AMDGPU_CIK
1769 case CHIP_BONAIRE:
1770 case CHIP_HAWAII:
1771 case CHIP_KAVERI:
1772 case CHIP_KABINI:
1773 case CHIP_MULLINS:
1774#endif
da87c30b
AD
1775 case CHIP_TOPAZ:
1776 case CHIP_TONGA:
1777 case CHIP_FIJI:
1778 case CHIP_POLARIS10:
1779 case CHIP_POLARIS11:
1780 case CHIP_POLARIS12:
1781 case CHIP_VEGAM:
1782 case CHIP_CARRIZO:
1783 case CHIP_STONEY:
27c0bc71 1784 case CHIP_VEGA20:
84d244a3
JC
1785 case CHIP_SIENNA_CICHLID:
1786 case CHIP_NAVY_FLOUNDER:
eac88a5f 1787 case CHIP_DIMGREY_CAVEFISH:
e2a75f88
AD
1788 default:
1789 return 0;
1790 case CHIP_VEGA10:
1791 chip_name = "vega10";
1792 break;
3f76dced
AD
1793 case CHIP_VEGA12:
1794 chip_name = "vega12";
1795 break;
2d2e5e7e 1796 case CHIP_RAVEN:
54f78a76 1797 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1798 chip_name = "raven2";
54f78a76 1799 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1800 chip_name = "picasso";
54c4d17e
FX
1801 else
1802 chip_name = "raven";
2d2e5e7e 1803 break;
65e60f6e
LM
1804 case CHIP_ARCTURUS:
1805 chip_name = "arcturus";
1806 break;
b51a26a0 1807 case CHIP_RENOIR:
2e62f0b5
PL
1808 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1809 chip_name = "renoir";
1810 else
1811 chip_name = "green_sardine";
b51a26a0 1812 break;
23c6268e
HR
1813 case CHIP_NAVI10:
1814 chip_name = "navi10";
1815 break;
ed42cfe1
XY
1816 case CHIP_NAVI14:
1817 chip_name = "navi14";
1818 break;
42b325e5
XY
1819 case CHIP_NAVI12:
1820 chip_name = "navi12";
1821 break;
4e52a9f8
HR
1822 case CHIP_VANGOGH:
1823 chip_name = "vangogh";
1824 break;
e2a75f88
AD
1825 }
1826
1827 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1828 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1829 if (err) {
1830 dev_err(adev->dev,
1831 "Failed to load gpu_info firmware \"%s\"\n",
1832 fw_name);
1833 goto out;
1834 }
ab4fe3e1 1835 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1836 if (err) {
1837 dev_err(adev->dev,
1838 "Failed to validate gpu_info firmware \"%s\"\n",
1839 fw_name);
1840 goto out;
1841 }
1842
ab4fe3e1 1843 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1844 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1845
1846 switch (hdr->version_major) {
1847 case 1:
1848 {
1849 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1850 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1851 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1852
cc375d8c
TY
1853 /*
1854 * Should be droped when DAL no longer needs it.
1855 */
1856 if (adev->asic_type == CHIP_NAVI12)
ec51d3fa
XY
1857 goto parse_soc_bounding_box;
1858
b5ab16bf
AD
1859 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1860 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1861 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1862 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1863 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1864 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1865 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1866 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1867 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1868 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1869 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1870 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1871 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1872 adev->gfx.cu_info.max_waves_per_simd =
1873 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1874 adev->gfx.cu_info.max_scratch_slots_per_cu =
1875 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1876 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1877 if (hdr->version_minor >= 1) {
35c2e910
HZ
1878 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1879 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1880 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1881 adev->gfx.config.num_sc_per_sh =
1882 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1883 adev->gfx.config.num_packer_per_sc =
1884 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1885 }
ec51d3fa
XY
1886
1887parse_soc_bounding_box:
ec51d3fa
XY
1888 /*
1889 * soc bounding box info is not integrated in disocovery table,
258620d0 1890 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1891 */
48321c3d
HW
1892 if (hdr->version_minor == 2) {
1893 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1894 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1895 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1896 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1897 }
e2a75f88
AD
1898 break;
1899 }
1900 default:
1901 dev_err(adev->dev,
1902 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1903 err = -EINVAL;
1904 goto out;
1905 }
1906out:
e2a75f88
AD
1907 return err;
1908}
1909
e3ecdffa
AD
1910/**
1911 * amdgpu_device_ip_early_init - run early init for hardware IPs
1912 *
1913 * @adev: amdgpu_device pointer
1914 *
1915 * Early initialization pass for hardware IPs. The hardware IPs that make
1916 * up each asic are discovered each IP's early_init callback is run. This
1917 * is the first stage in initializing the asic.
1918 * Returns 0 on success, negative error code on failure.
1919 */
06ec9070 1920static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1921{
aaa36a97 1922 int i, r;
d38ceaf9 1923
483ef985 1924 amdgpu_device_enable_virtual_display(adev);
a6be7570 1925
00a979f3 1926 if (amdgpu_sriov_vf(adev)) {
00a979f3 1927 r = amdgpu_virt_request_full_gpu(adev, true);
aaa36a97
AD
1928 if (r)
1929 return r;
00a979f3
WS
1930 }
1931
d38ceaf9 1932 switch (adev->asic_type) {
33f34802
KW
1933#ifdef CONFIG_DRM_AMDGPU_SI
1934 case CHIP_VERDE:
1935 case CHIP_TAHITI:
1936 case CHIP_PITCAIRN:
1937 case CHIP_OLAND:
1938 case CHIP_HAINAN:
295d0daf 1939 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1940 r = si_set_ip_blocks(adev);
1941 if (r)
1942 return r;
1943 break;
1944#endif
a2e73f56
AD
1945#ifdef CONFIG_DRM_AMDGPU_CIK
1946 case CHIP_BONAIRE:
1947 case CHIP_HAWAII:
1948 case CHIP_KAVERI:
1949 case CHIP_KABINI:
1950 case CHIP_MULLINS:
e1ad2d53 1951 if (adev->flags & AMD_IS_APU)
a2e73f56 1952 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1953 else
1954 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1955
1956 r = cik_set_ip_blocks(adev);
1957 if (r)
1958 return r;
1959 break;
1960#endif
da87c30b
AD
1961 case CHIP_TOPAZ:
1962 case CHIP_TONGA:
1963 case CHIP_FIJI:
1964 case CHIP_POLARIS10:
1965 case CHIP_POLARIS11:
1966 case CHIP_POLARIS12:
1967 case CHIP_VEGAM:
1968 case CHIP_CARRIZO:
1969 case CHIP_STONEY:
1970 if (adev->flags & AMD_IS_APU)
1971 adev->family = AMDGPU_FAMILY_CZ;
1972 else
1973 adev->family = AMDGPU_FAMILY_VI;
1974
1975 r = vi_set_ip_blocks(adev);
1976 if (r)
1977 return r;
1978 break;
e48a3cd9
AD
1979 case CHIP_VEGA10:
1980 case CHIP_VEGA12:
e4bd8170 1981 case CHIP_VEGA20:
e48a3cd9 1982 case CHIP_RAVEN:
61cf44c1 1983 case CHIP_ARCTURUS:
b51a26a0 1984 case CHIP_RENOIR:
70534d1e 1985 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
1986 adev->family = AMDGPU_FAMILY_RV;
1987 else
1988 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1989
1990 r = soc15_set_ip_blocks(adev);
1991 if (r)
1992 return r;
1993 break;
0a5b8c7b 1994 case CHIP_NAVI10:
7ecb5cd4 1995 case CHIP_NAVI14:
4808cf9c 1996 case CHIP_NAVI12:
11e8aef5 1997 case CHIP_SIENNA_CICHLID:
41f446bf 1998 case CHIP_NAVY_FLOUNDER:
144722fa 1999 case CHIP_DIMGREY_CAVEFISH:
4e52a9f8
HR
2000 case CHIP_VANGOGH:
2001 if (adev->asic_type == CHIP_VANGOGH)
2002 adev->family = AMDGPU_FAMILY_VGH;
2003 else
2004 adev->family = AMDGPU_FAMILY_NV;
0a5b8c7b
HR
2005
2006 r = nv_set_ip_blocks(adev);
2007 if (r)
2008 return r;
2009 break;
d38ceaf9
AD
2010 default:
2011 /* FIXME: not supported yet */
2012 return -EINVAL;
2013 }
2014
1884734a 2015 amdgpu_amdkfd_device_probe(adev);
2016
3b94fb10 2017 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 2018 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 2019 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 2020
d38ceaf9
AD
2021 for (i = 0; i < adev->num_ip_blocks; i++) {
2022 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
2023 DRM_ERROR("disabled ip block: %d <%s>\n",
2024 i, adev->ip_blocks[i].version->funcs->name);
a1255107 2025 adev->ip_blocks[i].status.valid = false;
d38ceaf9 2026 } else {
a1255107
AD
2027 if (adev->ip_blocks[i].version->funcs->early_init) {
2028 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 2029 if (r == -ENOENT) {
a1255107 2030 adev->ip_blocks[i].status.valid = false;
2c1a2784 2031 } else if (r) {
a1255107
AD
2032 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2033 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2034 return r;
2c1a2784 2035 } else {
a1255107 2036 adev->ip_blocks[i].status.valid = true;
2c1a2784 2037 }
974e6b64 2038 } else {
a1255107 2039 adev->ip_blocks[i].status.valid = true;
d38ceaf9 2040 }
d38ceaf9 2041 }
21a249ca
AD
2042 /* get the vbios after the asic_funcs are set up */
2043 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
2044 r = amdgpu_device_parse_gpu_info_fw(adev);
2045 if (r)
2046 return r;
2047
21a249ca
AD
2048 /* Read BIOS */
2049 if (!amdgpu_get_bios(adev))
2050 return -EINVAL;
2051
2052 r = amdgpu_atombios_init(adev);
2053 if (r) {
2054 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2055 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2056 return r;
2057 }
2058 }
d38ceaf9
AD
2059 }
2060
395d1fb9
NH
2061 adev->cg_flags &= amdgpu_cg_mask;
2062 adev->pg_flags &= amdgpu_pg_mask;
2063
d38ceaf9
AD
2064 return 0;
2065}
2066
0a4f2520
RZ
2067static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2068{
2069 int i, r;
2070
2071 for (i = 0; i < adev->num_ip_blocks; i++) {
2072 if (!adev->ip_blocks[i].status.sw)
2073 continue;
2074 if (adev->ip_blocks[i].status.hw)
2075 continue;
2076 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 2077 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
2078 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2079 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2080 if (r) {
2081 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2082 adev->ip_blocks[i].version->funcs->name, r);
2083 return r;
2084 }
2085 adev->ip_blocks[i].status.hw = true;
2086 }
2087 }
2088
2089 return 0;
2090}
2091
2092static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2093{
2094 int i, r;
2095
2096 for (i = 0; i < adev->num_ip_blocks; i++) {
2097 if (!adev->ip_blocks[i].status.sw)
2098 continue;
2099 if (adev->ip_blocks[i].status.hw)
2100 continue;
2101 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2102 if (r) {
2103 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2104 adev->ip_blocks[i].version->funcs->name, r);
2105 return r;
2106 }
2107 adev->ip_blocks[i].status.hw = true;
2108 }
2109
2110 return 0;
2111}
2112
7a3e0bb2
RZ
2113static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2114{
2115 int r = 0;
2116 int i;
80f41f84 2117 uint32_t smu_version;
7a3e0bb2
RZ
2118
2119 if (adev->asic_type >= CHIP_VEGA10) {
2120 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
2121 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2122 continue;
2123
2124 /* no need to do the fw loading again if already done*/
2125 if (adev->ip_blocks[i].status.hw == true)
2126 break;
2127
53b3f8f4 2128 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
2129 r = adev->ip_blocks[i].version->funcs->resume(adev);
2130 if (r) {
2131 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 2132 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
2133 return r;
2134 }
2135 } else {
2136 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2137 if (r) {
2138 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2139 adev->ip_blocks[i].version->funcs->name, r);
2140 return r;
7a3e0bb2 2141 }
7a3e0bb2 2142 }
482f0e53
ML
2143
2144 adev->ip_blocks[i].status.hw = true;
2145 break;
7a3e0bb2
RZ
2146 }
2147 }
482f0e53 2148
8973d9ec
ED
2149 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2150 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 2151
80f41f84 2152 return r;
7a3e0bb2
RZ
2153}
2154
e3ecdffa
AD
2155/**
2156 * amdgpu_device_ip_init - run init for hardware IPs
2157 *
2158 * @adev: amdgpu_device pointer
2159 *
2160 * Main initialization pass for hardware IPs. The list of all the hardware
2161 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2162 * are run. sw_init initializes the software state associated with each IP
2163 * and hw_init initializes the hardware associated with each IP.
2164 * Returns 0 on success, negative error code on failure.
2165 */
06ec9070 2166static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
2167{
2168 int i, r;
2169
c030f2e4 2170 r = amdgpu_ras_init(adev);
2171 if (r)
2172 return r;
2173
d38ceaf9 2174 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2175 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2176 continue;
a1255107 2177 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 2178 if (r) {
a1255107
AD
2179 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2180 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 2181 goto init_failed;
2c1a2784 2182 }
a1255107 2183 adev->ip_blocks[i].status.sw = true;
bfca0289 2184
d38ceaf9 2185 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 2186 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 2187 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
2188 if (r) {
2189 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2190 goto init_failed;
2c1a2784 2191 }
a1255107 2192 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2193 if (r) {
2194 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2195 goto init_failed;
2c1a2784 2196 }
06ec9070 2197 r = amdgpu_device_wb_init(adev);
2c1a2784 2198 if (r) {
06ec9070 2199 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2200 goto init_failed;
2c1a2784 2201 }
a1255107 2202 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2203
2204 /* right after GMC hw init, we create CSA */
f92d5c61 2205 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2206 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2207 AMDGPU_GEM_DOMAIN_VRAM,
2208 AMDGPU_CSA_SIZE);
2493664f
ML
2209 if (r) {
2210 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2211 goto init_failed;
2493664f
ML
2212 }
2213 }
d38ceaf9
AD
2214 }
2215 }
2216
c9ffa427
YT
2217 if (amdgpu_sriov_vf(adev))
2218 amdgpu_virt_init_data_exchange(adev);
2219
533aed27
AG
2220 r = amdgpu_ib_pool_init(adev);
2221 if (r) {
2222 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2223 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2224 goto init_failed;
2225 }
2226
c8963ea4
RZ
2227 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2228 if (r)
72d3f592 2229 goto init_failed;
0a4f2520
RZ
2230
2231 r = amdgpu_device_ip_hw_init_phase1(adev);
2232 if (r)
72d3f592 2233 goto init_failed;
0a4f2520 2234
7a3e0bb2
RZ
2235 r = amdgpu_device_fw_loading(adev);
2236 if (r)
72d3f592 2237 goto init_failed;
7a3e0bb2 2238
0a4f2520
RZ
2239 r = amdgpu_device_ip_hw_init_phase2(adev);
2240 if (r)
72d3f592 2241 goto init_failed;
d38ceaf9 2242
121a2bc6
AG
2243 /*
2244 * retired pages will be loaded from eeprom and reserved here,
2245 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2246 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2247 * for I2C communication which only true at this point.
b82e65a9
GC
2248 *
2249 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2250 * failure from bad gpu situation and stop amdgpu init process
2251 * accordingly. For other failed cases, it will still release all
2252 * the resource and print error message, rather than returning one
2253 * negative value to upper level.
121a2bc6
AG
2254 *
2255 * Note: theoretically, this should be called before all vram allocations
2256 * to protect retired page from abusing
2257 */
b82e65a9
GC
2258 r = amdgpu_ras_recovery_init(adev);
2259 if (r)
2260 goto init_failed;
121a2bc6 2261
3e2e2ab5
HZ
2262 if (adev->gmc.xgmi.num_physical_nodes > 1)
2263 amdgpu_xgmi_add_device(adev);
1884734a 2264 amdgpu_amdkfd_device_init(adev);
c6332b97 2265
bd607166
KR
2266 amdgpu_fru_get_product_info(adev);
2267
72d3f592 2268init_failed:
c9ffa427 2269 if (amdgpu_sriov_vf(adev))
c6332b97 2270 amdgpu_virt_release_full_gpu(adev, true);
2271
72d3f592 2272 return r;
d38ceaf9
AD
2273}
2274
e3ecdffa
AD
2275/**
2276 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2277 *
2278 * @adev: amdgpu_device pointer
2279 *
2280 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2281 * this function before a GPU reset. If the value is retained after a
2282 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2283 */
06ec9070 2284static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2285{
2286 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2287}
2288
e3ecdffa
AD
2289/**
2290 * amdgpu_device_check_vram_lost - check if vram is valid
2291 *
2292 * @adev: amdgpu_device pointer
2293 *
2294 * Checks the reset magic value written to the gart pointer in VRAM.
2295 * The driver calls this after a GPU reset to see if the contents of
2296 * VRAM is lost or now.
2297 * returns true if vram is lost, false if not.
2298 */
06ec9070 2299static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2300{
dadce777
EQ
2301 if (memcmp(adev->gart.ptr, adev->reset_magic,
2302 AMDGPU_RESET_MAGIC_NUM))
2303 return true;
2304
53b3f8f4 2305 if (!amdgpu_in_reset(adev))
dadce777
EQ
2306 return false;
2307
2308 /*
2309 * For all ASICs with baco/mode1 reset, the VRAM is
2310 * always assumed to be lost.
2311 */
2312 switch (amdgpu_asic_reset_method(adev)) {
2313 case AMD_RESET_METHOD_BACO:
2314 case AMD_RESET_METHOD_MODE1:
2315 return true;
2316 default:
2317 return false;
2318 }
0c49e0b8
CZ
2319}
2320
e3ecdffa 2321/**
1112a46b 2322 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2323 *
2324 * @adev: amdgpu_device pointer
b8b72130 2325 * @state: clockgating state (gate or ungate)
e3ecdffa 2326 *
e3ecdffa 2327 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2328 * set_clockgating_state callbacks are run.
2329 * Late initialization pass enabling clockgating for hardware IPs.
2330 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2331 * Returns 0 on success, negative error code on failure.
2332 */
fdd34271 2333
1112a46b
RZ
2334static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2335 enum amd_clockgating_state state)
d38ceaf9 2336{
1112a46b 2337 int i, j, r;
d38ceaf9 2338
4a2ba394
SL
2339 if (amdgpu_emu_mode == 1)
2340 return 0;
2341
1112a46b
RZ
2342 for (j = 0; j < adev->num_ip_blocks; j++) {
2343 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2344 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2345 continue;
4a446d55 2346 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2347 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2348 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2349 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2350 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2351 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2352 /* enable clockgating to save power */
a1255107 2353 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2354 state);
4a446d55
AD
2355 if (r) {
2356 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2357 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2358 return r;
2359 }
b0b00ff1 2360 }
d38ceaf9 2361 }
06b18f61 2362
c9f96fd5
RZ
2363 return 0;
2364}
2365
1112a46b 2366static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2367{
1112a46b 2368 int i, j, r;
06b18f61 2369
c9f96fd5
RZ
2370 if (amdgpu_emu_mode == 1)
2371 return 0;
2372
1112a46b
RZ
2373 for (j = 0; j < adev->num_ip_blocks; j++) {
2374 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2375 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2376 continue;
2377 /* skip CG for VCE/UVD, it's handled specially */
2378 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2379 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2380 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2381 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2382 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2383 /* enable powergating to save power */
2384 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2385 state);
c9f96fd5
RZ
2386 if (r) {
2387 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2388 adev->ip_blocks[i].version->funcs->name, r);
2389 return r;
2390 }
2391 }
2392 }
2dc80b00
S
2393 return 0;
2394}
2395
beff74bc
AD
2396static int amdgpu_device_enable_mgpu_fan_boost(void)
2397{
2398 struct amdgpu_gpu_instance *gpu_ins;
2399 struct amdgpu_device *adev;
2400 int i, ret = 0;
2401
2402 mutex_lock(&mgpu_info.mutex);
2403
2404 /*
2405 * MGPU fan boost feature should be enabled
2406 * only when there are two or more dGPUs in
2407 * the system
2408 */
2409 if (mgpu_info.num_dgpu < 2)
2410 goto out;
2411
2412 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2413 gpu_ins = &(mgpu_info.gpu_ins[i]);
2414 adev = gpu_ins->adev;
2415 if (!(adev->flags & AMD_IS_APU) &&
f10bb940 2416 !gpu_ins->mgpu_fan_enabled) {
beff74bc
AD
2417 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2418 if (ret)
2419 break;
2420
2421 gpu_ins->mgpu_fan_enabled = 1;
2422 }
2423 }
2424
2425out:
2426 mutex_unlock(&mgpu_info.mutex);
2427
2428 return ret;
2429}
2430
e3ecdffa
AD
2431/**
2432 * amdgpu_device_ip_late_init - run late init for hardware IPs
2433 *
2434 * @adev: amdgpu_device pointer
2435 *
2436 * Late initialization pass for hardware IPs. The list of all the hardware
2437 * IPs that make up the asic is walked and the late_init callbacks are run.
2438 * late_init covers any special initialization that an IP requires
2439 * after all of the have been initialized or something that needs to happen
2440 * late in the init process.
2441 * Returns 0 on success, negative error code on failure.
2442 */
06ec9070 2443static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2444{
60599a03 2445 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2446 int i = 0, r;
2447
2448 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2449 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2450 continue;
2451 if (adev->ip_blocks[i].version->funcs->late_init) {
2452 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2453 if (r) {
2454 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2455 adev->ip_blocks[i].version->funcs->name, r);
2456 return r;
2457 }
2dc80b00 2458 }
73f847db 2459 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2460 }
2461
a891d239
DL
2462 amdgpu_ras_set_error_query_ready(adev, true);
2463
1112a46b
RZ
2464 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2465 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2466
06ec9070 2467 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2468
beff74bc
AD
2469 r = amdgpu_device_enable_mgpu_fan_boost();
2470 if (r)
2471 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2472
60599a03
EQ
2473
2474 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2475 mutex_lock(&mgpu_info.mutex);
2476
2477 /*
2478 * Reset device p-state to low as this was booted with high.
2479 *
2480 * This should be performed only after all devices from the same
2481 * hive get initialized.
2482 *
2483 * However, it's unknown how many device in the hive in advance.
2484 * As this is counted one by one during devices initializations.
2485 *
2486 * So, we wait for all XGMI interlinked devices initialized.
2487 * This may bring some delays as those devices may come from
2488 * different hives. But that should be OK.
2489 */
2490 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2491 for (i = 0; i < mgpu_info.num_gpu; i++) {
2492 gpu_instance = &(mgpu_info.gpu_ins[i]);
2493 if (gpu_instance->adev->flags & AMD_IS_APU)
2494 continue;
2495
d84a430d
JK
2496 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2497 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2498 if (r) {
2499 DRM_ERROR("pstate setting failed (%d).\n", r);
2500 break;
2501 }
2502 }
2503 }
2504
2505 mutex_unlock(&mgpu_info.mutex);
2506 }
2507
d38ceaf9
AD
2508 return 0;
2509}
2510
e3ecdffa
AD
2511/**
2512 * amdgpu_device_ip_fini - run fini for hardware IPs
2513 *
2514 * @adev: amdgpu_device pointer
2515 *
2516 * Main teardown pass for hardware IPs. The list of all the hardware
2517 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2518 * are run. hw_fini tears down the hardware associated with each IP
2519 * and sw_fini tears down any software state associated with each IP.
2520 * Returns 0 on success, negative error code on failure.
2521 */
06ec9070 2522static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2523{
2524 int i, r;
2525
5278a159
SY
2526 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2527 amdgpu_virt_release_ras_err_handler_data(adev);
2528
c030f2e4 2529 amdgpu_ras_pre_fini(adev);
2530
a82400b5
AG
2531 if (adev->gmc.xgmi.num_physical_nodes > 1)
2532 amdgpu_xgmi_remove_device(adev);
2533
1884734a 2534 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2535
2536 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2537 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2538
3e96dbfd
AD
2539 /* need to disable SMC first */
2540 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2541 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2542 continue;
fdd34271 2543 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2544 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2545 /* XXX handle errors */
2546 if (r) {
2547 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2548 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2549 }
a1255107 2550 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2551 break;
2552 }
2553 }
2554
d38ceaf9 2555 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2556 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2557 continue;
8201a67a 2558
a1255107 2559 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2560 /* XXX handle errors */
2c1a2784 2561 if (r) {
a1255107
AD
2562 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2563 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2564 }
8201a67a 2565
a1255107 2566 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2567 }
2568
9950cda2 2569
d38ceaf9 2570 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2571 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2572 continue;
c12aba3a
ML
2573
2574 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2575 amdgpu_ucode_free_bo(adev);
1e256e27 2576 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2577 amdgpu_device_wb_fini(adev);
2578 amdgpu_device_vram_scratch_fini(adev);
533aed27 2579 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2580 }
2581
a1255107 2582 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2583 /* XXX handle errors */
2c1a2784 2584 if (r) {
a1255107
AD
2585 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2586 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2587 }
a1255107
AD
2588 adev->ip_blocks[i].status.sw = false;
2589 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2590 }
2591
a6dcfd9c 2592 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2593 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2594 continue;
a1255107
AD
2595 if (adev->ip_blocks[i].version->funcs->late_fini)
2596 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2597 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2598 }
2599
c030f2e4 2600 amdgpu_ras_fini(adev);
2601
030308fc 2602 if (amdgpu_sriov_vf(adev))
24136135
ML
2603 if (amdgpu_virt_release_full_gpu(adev, false))
2604 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2605
d38ceaf9
AD
2606 return 0;
2607}
2608
e3ecdffa 2609/**
beff74bc 2610 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2611 *
1112a46b 2612 * @work: work_struct.
e3ecdffa 2613 */
beff74bc 2614static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2615{
2616 struct amdgpu_device *adev =
beff74bc 2617 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2618 int r;
2619
2620 r = amdgpu_ib_ring_tests(adev);
2621 if (r)
2622 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2623}
2624
1e317b99
RZ
2625static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2626{
2627 struct amdgpu_device *adev =
2628 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2629
2630 mutex_lock(&adev->gfx.gfx_off_mutex);
2631 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2632 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2633 adev->gfx.gfx_off_state = true;
2634 }
2635 mutex_unlock(&adev->gfx.gfx_off_mutex);
2636}
2637
e3ecdffa 2638/**
e7854a03 2639 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2640 *
2641 * @adev: amdgpu_device pointer
2642 *
2643 * Main suspend function for hardware IPs. The list of all the hardware
2644 * IPs that make up the asic is walked, clockgating is disabled and the
2645 * suspend callbacks are run. suspend puts the hardware and software state
2646 * in each IP into a state suitable for suspend.
2647 * Returns 0 on success, negative error code on failure.
2648 */
e7854a03
AD
2649static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2650{
2651 int i, r;
2652
ced1ba97
PL
2653 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2654 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2655
e7854a03
AD
2656 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2657 if (!adev->ip_blocks[i].status.valid)
2658 continue;
2b9f7848 2659
e7854a03 2660 /* displays are handled separately */
2b9f7848
ND
2661 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2662 continue;
2663
2664 /* XXX handle errors */
2665 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2666 /* XXX handle errors */
2667 if (r) {
2668 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2669 adev->ip_blocks[i].version->funcs->name, r);
2670 return r;
e7854a03 2671 }
2b9f7848
ND
2672
2673 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2674 }
2675
e7854a03
AD
2676 return 0;
2677}
2678
2679/**
2680 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2681 *
2682 * @adev: amdgpu_device pointer
2683 *
2684 * Main suspend function for hardware IPs. The list of all the hardware
2685 * IPs that make up the asic is walked, clockgating is disabled and the
2686 * suspend callbacks are run. suspend puts the hardware and software state
2687 * in each IP into a state suitable for suspend.
2688 * Returns 0 on success, negative error code on failure.
2689 */
2690static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2691{
2692 int i, r;
2693
2694 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2695 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2696 continue;
e7854a03
AD
2697 /* displays are handled in phase1 */
2698 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2699 continue;
bff77e86
LM
2700 /* PSP lost connection when err_event_athub occurs */
2701 if (amdgpu_ras_intr_triggered() &&
2702 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2703 adev->ip_blocks[i].status.hw = false;
2704 continue;
2705 }
d38ceaf9 2706 /* XXX handle errors */
a1255107 2707 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2708 /* XXX handle errors */
2c1a2784 2709 if (r) {
a1255107
AD
2710 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2711 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2712 }
876923fb 2713 adev->ip_blocks[i].status.hw = false;
a3a09142 2714 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2715 if(!amdgpu_sriov_vf(adev)){
2716 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2717 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2718 if (r) {
2719 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2720 adev->mp1_state, r);
2721 return r;
2722 }
a3a09142
AD
2723 }
2724 }
b5507c7e 2725 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2726 }
2727
2728 return 0;
2729}
2730
e7854a03
AD
2731/**
2732 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2733 *
2734 * @adev: amdgpu_device pointer
2735 *
2736 * Main suspend function for hardware IPs. The list of all the hardware
2737 * IPs that make up the asic is walked, clockgating is disabled and the
2738 * suspend callbacks are run. suspend puts the hardware and software state
2739 * in each IP into a state suitable for suspend.
2740 * Returns 0 on success, negative error code on failure.
2741 */
2742int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2743{
2744 int r;
2745
e7819644
YT
2746 if (amdgpu_sriov_vf(adev))
2747 amdgpu_virt_request_full_gpu(adev, false);
2748
e7854a03
AD
2749 r = amdgpu_device_ip_suspend_phase1(adev);
2750 if (r)
2751 return r;
2752 r = amdgpu_device_ip_suspend_phase2(adev);
2753
e7819644
YT
2754 if (amdgpu_sriov_vf(adev))
2755 amdgpu_virt_release_full_gpu(adev, false);
2756
e7854a03
AD
2757 return r;
2758}
2759
06ec9070 2760static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2761{
2762 int i, r;
2763
2cb681b6
ML
2764 static enum amd_ip_block_type ip_order[] = {
2765 AMD_IP_BLOCK_TYPE_GMC,
2766 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2767 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2768 AMD_IP_BLOCK_TYPE_IH,
2769 };
a90ad3c2 2770
2cb681b6
ML
2771 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2772 int j;
2773 struct amdgpu_ip_block *block;
a90ad3c2 2774
4cd2a96d
J
2775 block = &adev->ip_blocks[i];
2776 block->status.hw = false;
2cb681b6 2777
4cd2a96d 2778 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
2cb681b6 2779
4cd2a96d 2780 if (block->version->type != ip_order[j] ||
2cb681b6
ML
2781 !block->status.valid)
2782 continue;
2783
2784 r = block->version->funcs->hw_init(adev);
0aaeefcc 2785 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2786 if (r)
2787 return r;
482f0e53 2788 block->status.hw = true;
a90ad3c2
ML
2789 }
2790 }
2791
2792 return 0;
2793}
2794
06ec9070 2795static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2796{
2797 int i, r;
2798
2cb681b6
ML
2799 static enum amd_ip_block_type ip_order[] = {
2800 AMD_IP_BLOCK_TYPE_SMC,
2801 AMD_IP_BLOCK_TYPE_DCE,
2802 AMD_IP_BLOCK_TYPE_GFX,
2803 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2804 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2805 AMD_IP_BLOCK_TYPE_VCE,
2806 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2807 };
a90ad3c2 2808
2cb681b6
ML
2809 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2810 int j;
2811 struct amdgpu_ip_block *block;
a90ad3c2 2812
2cb681b6
ML
2813 for (j = 0; j < adev->num_ip_blocks; j++) {
2814 block = &adev->ip_blocks[j];
2815
2816 if (block->version->type != ip_order[i] ||
482f0e53
ML
2817 !block->status.valid ||
2818 block->status.hw)
2cb681b6
ML
2819 continue;
2820
895bd048
JZ
2821 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2822 r = block->version->funcs->resume(adev);
2823 else
2824 r = block->version->funcs->hw_init(adev);
2825
0aaeefcc 2826 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2827 if (r)
2828 return r;
482f0e53 2829 block->status.hw = true;
a90ad3c2
ML
2830 }
2831 }
2832
2833 return 0;
2834}
2835
e3ecdffa
AD
2836/**
2837 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2838 *
2839 * @adev: amdgpu_device pointer
2840 *
2841 * First resume function for hardware IPs. The list of all the hardware
2842 * IPs that make up the asic is walked and the resume callbacks are run for
2843 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2844 * after a suspend and updates the software state as necessary. This
2845 * function is also used for restoring the GPU after a GPU reset.
2846 * Returns 0 on success, negative error code on failure.
2847 */
06ec9070 2848static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2849{
2850 int i, r;
2851
a90ad3c2 2852 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2853 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2854 continue;
a90ad3c2 2855 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2856 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2857 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2858
fcf0649f
CZ
2859 r = adev->ip_blocks[i].version->funcs->resume(adev);
2860 if (r) {
2861 DRM_ERROR("resume of IP block <%s> failed %d\n",
2862 adev->ip_blocks[i].version->funcs->name, r);
2863 return r;
2864 }
482f0e53 2865 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2866 }
2867 }
2868
2869 return 0;
2870}
2871
e3ecdffa
AD
2872/**
2873 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2874 *
2875 * @adev: amdgpu_device pointer
2876 *
2877 * First resume function for hardware IPs. The list of all the hardware
2878 * IPs that make up the asic is walked and the resume callbacks are run for
2879 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2880 * functional state after a suspend and updates the software state as
2881 * necessary. This function is also used for restoring the GPU after a GPU
2882 * reset.
2883 * Returns 0 on success, negative error code on failure.
2884 */
06ec9070 2885static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2886{
2887 int i, r;
2888
2889 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2890 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2891 continue;
fcf0649f 2892 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2893 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2894 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2895 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2896 continue;
a1255107 2897 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2898 if (r) {
a1255107
AD
2899 DRM_ERROR("resume of IP block <%s> failed %d\n",
2900 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2901 return r;
2c1a2784 2902 }
482f0e53 2903 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2904 }
2905
2906 return 0;
2907}
2908
e3ecdffa
AD
2909/**
2910 * amdgpu_device_ip_resume - run resume for hardware IPs
2911 *
2912 * @adev: amdgpu_device pointer
2913 *
2914 * Main resume function for hardware IPs. The hardware IPs
2915 * are split into two resume functions because they are
2916 * are also used in in recovering from a GPU reset and some additional
2917 * steps need to be take between them. In this case (S3/S4) they are
2918 * run sequentially.
2919 * Returns 0 on success, negative error code on failure.
2920 */
06ec9070 2921static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2922{
2923 int r;
2924
06ec9070 2925 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2926 if (r)
2927 return r;
7a3e0bb2
RZ
2928
2929 r = amdgpu_device_fw_loading(adev);
2930 if (r)
2931 return r;
2932
06ec9070 2933 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2934
2935 return r;
2936}
2937
e3ecdffa
AD
2938/**
2939 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2940 *
2941 * @adev: amdgpu_device pointer
2942 *
2943 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2944 */
4e99a44e 2945static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2946{
6867e1b5
ML
2947 if (amdgpu_sriov_vf(adev)) {
2948 if (adev->is_atom_fw) {
2949 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2950 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2951 } else {
2952 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2953 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2954 }
2955
2956 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2957 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2958 }
048765ad
AR
2959}
2960
e3ecdffa
AD
2961/**
2962 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2963 *
2964 * @asic_type: AMD asic type
2965 *
2966 * Check if there is DC (new modesetting infrastructre) support for an asic.
2967 * returns true if DC has support, false if not.
2968 */
4562236b
HW
2969bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2970{
2971 switch (asic_type) {
2972#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
2973#if defined(CONFIG_DRM_AMD_DC_SI)
2974 case CHIP_TAHITI:
2975 case CHIP_PITCAIRN:
2976 case CHIP_VERDE:
2977 case CHIP_OLAND:
2978#endif
4562236b 2979 case CHIP_BONAIRE:
0d6fbccb 2980 case CHIP_KAVERI:
367e6687
AD
2981 case CHIP_KABINI:
2982 case CHIP_MULLINS:
d9fda248
HW
2983 /*
2984 * We have systems in the wild with these ASICs that require
2985 * LVDS and VGA support which is not supported with DC.
2986 *
2987 * Fallback to the non-DC driver here by default so as not to
2988 * cause regressions.
2989 */
2990 return amdgpu_dc > 0;
2991 case CHIP_HAWAII:
4562236b
HW
2992 case CHIP_CARRIZO:
2993 case CHIP_STONEY:
4562236b 2994 case CHIP_POLARIS10:
675fd32b 2995 case CHIP_POLARIS11:
2c8ad2d5 2996 case CHIP_POLARIS12:
675fd32b 2997 case CHIP_VEGAM:
4562236b
HW
2998 case CHIP_TONGA:
2999 case CHIP_FIJI:
42f8ffa1 3000 case CHIP_VEGA10:
dca7b401 3001 case CHIP_VEGA12:
c6034aa2 3002 case CHIP_VEGA20:
b86a1aa3 3003#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 3004 case CHIP_RAVEN:
b4f199c7 3005 case CHIP_NAVI10:
8fceceb6 3006 case CHIP_NAVI14:
078655d9 3007 case CHIP_NAVI12:
e1c14c43 3008 case CHIP_RENOIR:
81d9bfb8 3009 case CHIP_SIENNA_CICHLID:
a6c5308f 3010 case CHIP_NAVY_FLOUNDER:
7cc656e2 3011 case CHIP_DIMGREY_CAVEFISH:
84b934bc 3012 case CHIP_VANGOGH:
42f8ffa1 3013#endif
fd187853 3014 return amdgpu_dc != 0;
4562236b
HW
3015#endif
3016 default:
93b09a9a
SS
3017 if (amdgpu_dc > 0)
3018 DRM_INFO("Display Core has been requested via kernel parameter "
3019 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
3020 return false;
3021 }
3022}
3023
3024/**
3025 * amdgpu_device_has_dc_support - check if dc is supported
3026 *
982a820b 3027 * @adev: amdgpu_device pointer
4562236b
HW
3028 *
3029 * Returns true for supported, false for not supported
3030 */
3031bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3032{
c997e8e2 3033 if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
2555039d
XY
3034 return false;
3035
4562236b
HW
3036 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3037}
3038
d4535e2c
AG
3039
3040static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3041{
3042 struct amdgpu_device *adev =
3043 container_of(__work, struct amdgpu_device, xgmi_reset_work);
d95e8e97 3044 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
d4535e2c 3045
c6a6e2db
AG
3046 /* It's a bug to not have a hive within this function */
3047 if (WARN_ON(!hive))
3048 return;
3049
3050 /*
3051 * Use task barrier to synchronize all xgmi reset works across the
3052 * hive. task_barrier_enter and task_barrier_exit will block
3053 * until all the threads running the xgmi reset works reach
3054 * those points. task_barrier_full will do both blocks.
3055 */
3056 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3057
3058 task_barrier_enter(&hive->tb);
4a580877 3059 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
c6a6e2db
AG
3060
3061 if (adev->asic_reset_res)
3062 goto fail;
3063
3064 task_barrier_exit(&hive->tb);
4a580877 3065 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
c6a6e2db
AG
3066
3067 if (adev->asic_reset_res)
3068 goto fail;
43c4d576
JC
3069
3070 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
3071 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
3072 } else {
3073
3074 task_barrier_full(&hive->tb);
3075 adev->asic_reset_res = amdgpu_asic_reset(adev);
3076 }
ce316fa5 3077
c6a6e2db 3078fail:
d4535e2c 3079 if (adev->asic_reset_res)
fed184e9 3080 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4a580877 3081 adev->asic_reset_res, adev_to_drm(adev)->unique);
d95e8e97 3082 amdgpu_put_xgmi_hive(hive);
d4535e2c
AG
3083}
3084
71f98027
AD
3085static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3086{
3087 char *input = amdgpu_lockup_timeout;
3088 char *timeout_setting = NULL;
3089 int index = 0;
3090 long timeout;
3091 int ret = 0;
3092
3093 /*
3094 * By default timeout for non compute jobs is 10000.
3095 * And there is no timeout enforced on compute jobs.
3096 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 3097 * jobs are 60000 by default.
71f98027
AD
3098 */
3099 adev->gfx_timeout = msecs_to_jiffies(10000);
3100 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3101 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 3102 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
3103 else
3104 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
3105
f440ff44 3106 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 3107 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 3108 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
3109 ret = kstrtol(timeout_setting, 0, &timeout);
3110 if (ret)
3111 return ret;
3112
3113 if (timeout == 0) {
3114 index++;
3115 continue;
3116 } else if (timeout < 0) {
3117 timeout = MAX_SCHEDULE_TIMEOUT;
3118 } else {
3119 timeout = msecs_to_jiffies(timeout);
3120 }
3121
3122 switch (index++) {
3123 case 0:
3124 adev->gfx_timeout = timeout;
3125 break;
3126 case 1:
3127 adev->compute_timeout = timeout;
3128 break;
3129 case 2:
3130 adev->sdma_timeout = timeout;
3131 break;
3132 case 3:
3133 adev->video_timeout = timeout;
3134 break;
3135 default:
3136 break;
3137 }
3138 }
3139 /*
3140 * There is only one value specified and
3141 * it should apply to all non-compute jobs.
3142 */
bcccee89 3143 if (index == 1) {
71f98027 3144 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
3145 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3146 adev->compute_timeout = adev->gfx_timeout;
3147 }
71f98027
AD
3148 }
3149
3150 return ret;
3151}
d4535e2c 3152
77f3a5cd
ND
3153static const struct attribute *amdgpu_dev_attributes[] = {
3154 &dev_attr_product_name.attr,
3155 &dev_attr_product_number.attr,
3156 &dev_attr_serial_number.attr,
3157 &dev_attr_pcie_replay_count.attr,
3158 NULL
3159};
3160
c9a6b82f 3161
d38ceaf9
AD
3162/**
3163 * amdgpu_device_init - initialize the driver
3164 *
3165 * @adev: amdgpu_device pointer
d38ceaf9
AD
3166 * @flags: driver flags
3167 *
3168 * Initializes the driver info and hw (all asics).
3169 * Returns 0 for success or an error on failure.
3170 * Called at driver startup.
3171 */
3172int amdgpu_device_init(struct amdgpu_device *adev,
d38ceaf9
AD
3173 uint32_t flags)
3174{
8aba21b7
LT
3175 struct drm_device *ddev = adev_to_drm(adev);
3176 struct pci_dev *pdev = adev->pdev;
d38ceaf9 3177 int r, i;
3840c5bc 3178 bool boco = false;
95844d20 3179 u32 max_MBps;
d38ceaf9
AD
3180
3181 adev->shutdown = false;
d38ceaf9 3182 adev->flags = flags;
4e66d7d2
YZ
3183
3184 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3185 adev->asic_type = amdgpu_force_asic_type;
3186 else
3187 adev->asic_type = flags & AMD_ASIC_MASK;
3188
d38ceaf9 3189 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3190 if (amdgpu_emu_mode == 1)
8bdab6bb 3191 adev->usec_timeout *= 10;
770d13b1 3192 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3193 adev->accel_working = false;
3194 adev->num_rings = 0;
3195 adev->mman.buffer_funcs = NULL;
3196 adev->mman.buffer_funcs_ring = NULL;
3197 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3198 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3199 adev->gmc.gmc_funcs = NULL;
f54d1867 3200 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3201 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3202
3203 adev->smc_rreg = &amdgpu_invalid_rreg;
3204 adev->smc_wreg = &amdgpu_invalid_wreg;
3205 adev->pcie_rreg = &amdgpu_invalid_rreg;
3206 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3207 adev->pciep_rreg = &amdgpu_invalid_rreg;
3208 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3209 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3210 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3211 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3212 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3213 adev->didt_rreg = &amdgpu_invalid_rreg;
3214 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3215 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3216 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3217 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3218 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3219
3e39ab90
AD
3220 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3221 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3222 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3223
3224 /* mutex initialization are all done here so we
3225 * can recall function without having locking issues */
d38ceaf9 3226 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 3227 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3228 mutex_init(&adev->pm.mutex);
3229 mutex_init(&adev->gfx.gpu_clock_mutex);
3230 mutex_init(&adev->srbm_mutex);
b8866c26 3231 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3232 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3233 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3234 mutex_init(&adev->mn_lock);
e23b74aa 3235 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3236 hash_init(adev->mn_hash);
53b3f8f4 3237 atomic_set(&adev->in_gpu_reset, 0);
6049db43 3238 init_rwsem(&adev->reset_sem);
32eaeae0 3239 mutex_init(&adev->psp.mutex);
bd052211 3240 mutex_init(&adev->notifier_lock);
d38ceaf9 3241
912dfc84
EQ
3242 r = amdgpu_device_check_arguments(adev);
3243 if (r)
3244 return r;
d38ceaf9 3245
d38ceaf9
AD
3246 spin_lock_init(&adev->mmio_idx_lock);
3247 spin_lock_init(&adev->smc_idx_lock);
3248 spin_lock_init(&adev->pcie_idx_lock);
3249 spin_lock_init(&adev->uvd_ctx_idx_lock);
3250 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3251 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3252 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3253 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3254 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3255
0c4e7fa5
CZ
3256 INIT_LIST_HEAD(&adev->shadow_list);
3257 mutex_init(&adev->shadow_list_lock);
3258
beff74bc
AD
3259 INIT_DELAYED_WORK(&adev->delayed_init_work,
3260 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3261 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3262 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3263
d4535e2c
AG
3264 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3265
d23ee13f 3266 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3267 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3268
b265bdbd
EQ
3269 atomic_set(&adev->throttling_logging_enabled, 1);
3270 /*
3271 * If throttling continues, logging will be performed every minute
3272 * to avoid log flooding. "-1" is subtracted since the thermal
3273 * throttling interrupt comes every second. Thus, the total logging
3274 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3275 * for throttling interrupt) = 60 seconds.
3276 */
3277 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3278 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3279
0fa49558
AX
3280 /* Registers mapping */
3281 /* TODO: block userspace mapping of io register */
da69c161
KW
3282 if (adev->asic_type >= CHIP_BONAIRE) {
3283 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3284 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3285 } else {
3286 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3287 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3288 }
d38ceaf9 3289
d38ceaf9
AD
3290 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3291 if (adev->rmmio == NULL) {
3292 return -ENOMEM;
3293 }
3294 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3295 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3296
d38ceaf9
AD
3297 /* io port mapping */
3298 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3299 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3300 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3301 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3302 break;
3303 }
3304 }
3305 if (adev->rio_mem == NULL)
b64a18c5 3306 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3307
b2109d8e
JX
3308 /* enable PCIE atomic ops */
3309 r = pci_enable_atomic_ops_to_root(adev->pdev,
3310 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3311 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3312 if (r) {
3313 adev->have_atomics_support = false;
3314 DRM_INFO("PCIE atomic ops is not supported\n");
3315 } else {
3316 adev->have_atomics_support = true;
3317 }
3318
5494d864
AD
3319 amdgpu_device_get_pcie_info(adev);
3320
b239c017
JX
3321 if (amdgpu_mcbp)
3322 DRM_INFO("MCBP is enabled\n");
3323
5f84cc63
JX
3324 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3325 adev->enable_mes = true;
3326
3aa0115d
ML
3327 /* detect hw virtualization here */
3328 amdgpu_detect_virtualization(adev);
3329
dffa11b4
ML
3330 r = amdgpu_device_get_job_timeout_settings(adev);
3331 if (r) {
3332 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4192f7b5 3333 goto failed_unmap;
a190d1c7
XY
3334 }
3335
d38ceaf9 3336 /* early init functions */
06ec9070 3337 r = amdgpu_device_ip_early_init(adev);
d38ceaf9 3338 if (r)
4192f7b5 3339 goto failed_unmap;
d38ceaf9 3340
6585661d
OZ
3341 /* doorbell bar mapping and doorbell index init*/
3342 amdgpu_device_doorbell_init(adev);
3343
d38ceaf9
AD
3344 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3345 /* this will fail for cards that aren't VGA class devices, just
3346 * ignore it */
06ec9070 3347 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3348
31af062a 3349 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3350 boco = true;
3351 if (amdgpu_has_atpx() &&
3352 (amdgpu_is_atpx_hybrid() ||
3353 amdgpu_has_atpx_dgpu_power_cntl()) &&
3354 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3355 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3356 &amdgpu_switcheroo_ops, boco);
3357 if (boco)
d38ceaf9
AD
3358 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3359
9475a943
SL
3360 if (amdgpu_emu_mode == 1) {
3361 /* post the asic on emulation mode */
3362 emu_soc_asic_init(adev);
bfca0289 3363 goto fence_driver_init;
9475a943 3364 }
bfca0289 3365
4e99a44e
ML
3366 /* detect if we are with an SRIOV vbios */
3367 amdgpu_device_detect_sriov_bios(adev);
048765ad 3368
95e8e59e
AD
3369 /* check if we need to reset the asic
3370 * E.g., driver was not cleanly unloaded previously, etc.
3371 */
f14899fd 3372 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3373 r = amdgpu_asic_reset(adev);
3374 if (r) {
3375 dev_err(adev->dev, "asic reset on init failed\n");
3376 goto failed;
3377 }
3378 }
3379
c9a6b82f
AG
3380 pci_enable_pcie_error_reporting(adev->ddev.pdev);
3381
d38ceaf9 3382 /* Post card if necessary */
39c640c0 3383 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3384 if (!adev->bios) {
bec86378 3385 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3386 r = -EINVAL;
3387 goto failed;
d38ceaf9 3388 }
bec86378 3389 DRM_INFO("GPU posting now...\n");
4d2997ab 3390 r = amdgpu_device_asic_init(adev);
4e99a44e
ML
3391 if (r) {
3392 dev_err(adev->dev, "gpu post error!\n");
3393 goto failed;
3394 }
d38ceaf9
AD
3395 }
3396
88b64e95
AD
3397 if (adev->is_atom_fw) {
3398 /* Initialize clocks */
3399 r = amdgpu_atomfirmware_get_clock_info(adev);
3400 if (r) {
3401 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3402 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3403 goto failed;
3404 }
3405 } else {
a5bde2f9
AD
3406 /* Initialize clocks */
3407 r = amdgpu_atombios_get_clock_info(adev);
3408 if (r) {
3409 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3410 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3411 goto failed;
a5bde2f9
AD
3412 }
3413 /* init i2c buses */
4562236b
HW
3414 if (!amdgpu_device_has_dc_support(adev))
3415 amdgpu_atombios_i2c_init(adev);
2c1a2784 3416 }
d38ceaf9 3417
bfca0289 3418fence_driver_init:
d38ceaf9
AD
3419 /* Fence driver */
3420 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3421 if (r) {
3422 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3423 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3424 goto failed;
2c1a2784 3425 }
d38ceaf9
AD
3426
3427 /* init the mode config */
4a580877 3428 drm_mode_config_init(adev_to_drm(adev));
d38ceaf9 3429
06ec9070 3430 r = amdgpu_device_ip_init(adev);
d38ceaf9 3431 if (r) {
8840a387 3432 /* failed in exclusive mode due to timeout */
3433 if (amdgpu_sriov_vf(adev) &&
3434 !amdgpu_sriov_runtime(adev) &&
3435 amdgpu_virt_mmio_blocked(adev) &&
3436 !amdgpu_virt_wait_reset(adev)) {
3437 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3438 /* Don't send request since VF is inactive. */
3439 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3440 adev->virt.ops = NULL;
8840a387 3441 r = -EAGAIN;
3442 goto failed;
3443 }
06ec9070 3444 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3445 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3446 goto failed;
d38ceaf9
AD
3447 }
3448
d69b8971
YZ
3449 dev_info(adev->dev,
3450 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3451 adev->gfx.config.max_shader_engines,
3452 adev->gfx.config.max_sh_per_se,
3453 adev->gfx.config.max_cu_per_sh,
3454 adev->gfx.cu_info.number);
3455
d38ceaf9
AD
3456 adev->accel_working = true;
3457
e59c0205
AX
3458 amdgpu_vm_check_compute_bug(adev);
3459
95844d20
MO
3460 /* Initialize the buffer migration limit. */
3461 if (amdgpu_moverate >= 0)
3462 max_MBps = amdgpu_moverate;
3463 else
3464 max_MBps = 8; /* Allow 8 MB/s. */
3465 /* Get a log2 for easy divisions. */
3466 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3467
9bc92b9c
ML
3468 amdgpu_fbdev_init(adev);
3469
d2f52ac8 3470 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3471 if (r) {
3472 adev->pm_sysfs_en = false;
d2f52ac8 3473 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3474 } else
3475 adev->pm_sysfs_en = true;
d2f52ac8 3476
5bb23532 3477 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3478 if (r) {
3479 adev->ucode_sysfs_en = false;
5bb23532 3480 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3481 } else
3482 adev->ucode_sysfs_en = true;
5bb23532 3483
d38ceaf9
AD
3484 if ((amdgpu_testing & 1)) {
3485 if (adev->accel_working)
3486 amdgpu_test_moves(adev);
3487 else
3488 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3489 }
d38ceaf9
AD
3490 if (amdgpu_benchmarking) {
3491 if (adev->accel_working)
3492 amdgpu_benchmark(adev, amdgpu_benchmarking);
3493 else
3494 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3495 }
3496
b0adca4d
EQ
3497 /*
3498 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3499 * Otherwise the mgpu fan boost feature will be skipped due to the
3500 * gpu instance is counted less.
3501 */
3502 amdgpu_register_gpu_instance(adev);
3503
d38ceaf9
AD
3504 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3505 * explicit gating rather than handling it automatically.
3506 */
06ec9070 3507 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3508 if (r) {
06ec9070 3509 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3510 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3511 goto failed;
2c1a2784 3512 }
d38ceaf9 3513
108c6a63 3514 /* must succeed. */
511fdbc3 3515 amdgpu_ras_resume(adev);
108c6a63 3516
beff74bc
AD
3517 queue_delayed_work(system_wq, &adev->delayed_init_work,
3518 msecs_to_jiffies(AMDGPU_RESUME_MS));
3519
2c738637
ML
3520 if (amdgpu_sriov_vf(adev))
3521 flush_delayed_work(&adev->delayed_init_work);
3522
77f3a5cd 3523 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
5aea5327 3524 if (r)
77f3a5cd 3525 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166 3526
d155bef0
AB
3527 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3528 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3529 if (r)
3530 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3531
c1dd4aa6
AG
3532 /* Have stored pci confspace at hand for restore in sudden PCI error */
3533 if (amdgpu_device_cache_pci_state(adev->pdev))
3534 pci_restore_state(pdev);
3535
d38ceaf9 3536 return 0;
83ba126a
AD
3537
3538failed:
89041940 3539 amdgpu_vf_error_trans_all(adev);
3840c5bc 3540 if (boco)
83ba126a 3541 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3542
4192f7b5
AD
3543failed_unmap:
3544 iounmap(adev->rmmio);
3545 adev->rmmio = NULL;
3546
83ba126a 3547 return r;
d38ceaf9
AD
3548}
3549
d38ceaf9
AD
3550/**
3551 * amdgpu_device_fini - tear down the driver
3552 *
3553 * @adev: amdgpu_device pointer
3554 *
3555 * Tear down the driver info (all asics).
3556 * Called at driver shutdown.
3557 */
3558void amdgpu_device_fini(struct amdgpu_device *adev)
3559{
aac89168 3560 dev_info(adev->dev, "amdgpu: finishing device.\n");
9f875167 3561 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3562 adev->shutdown = true;
9f875167 3563
c1dd4aa6
AG
3564 kfree(adev->pci_state);
3565
752c683d
ML
3566 /* make sure IB test finished before entering exclusive mode
3567 * to avoid preemption on IB test
3568 * */
519b8b76 3569 if (amdgpu_sriov_vf(adev)) {
752c683d 3570 amdgpu_virt_request_full_gpu(adev, false);
519b8b76
BZ
3571 amdgpu_virt_fini_data_exchange(adev);
3572 }
752c683d 3573
e5b03032
ML
3574 /* disable all interrupts */
3575 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3576 if (adev->mode_info.mode_config_initialized){
3577 if (!amdgpu_device_has_dc_support(adev))
4a580877 3578 drm_helper_force_disable_all(adev_to_drm(adev));
ff97cba8 3579 else
4a580877 3580 drm_atomic_helper_shutdown(adev_to_drm(adev));
ff97cba8 3581 }
d38ceaf9 3582 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3583 if (adev->pm_sysfs_en)
3584 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3585 amdgpu_fbdev_fini(adev);
e230ac11 3586 amdgpu_device_ip_fini(adev);
75e1658e
ND
3587 release_firmware(adev->firmware.gpu_info_fw);
3588 adev->firmware.gpu_info_fw = NULL;
d38ceaf9
AD
3589 adev->accel_working = false;
3590 /* free i2c buses */
4562236b
HW
3591 if (!amdgpu_device_has_dc_support(adev))
3592 amdgpu_i2c_fini(adev);
bfca0289
SL
3593
3594 if (amdgpu_emu_mode != 1)
3595 amdgpu_atombios_fini(adev);
3596
d38ceaf9
AD
3597 kfree(adev->bios);
3598 adev->bios = NULL;
3840c5bc
AD
3599 if (amdgpu_has_atpx() &&
3600 (amdgpu_is_atpx_hybrid() ||
3601 amdgpu_has_atpx_dgpu_power_cntl()) &&
3602 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3603 vga_switcheroo_unregister_client(adev->pdev);
4a580877 3604 if (amdgpu_device_supports_boco(adev_to_drm(adev)))
83ba126a 3605 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3606 vga_client_register(adev->pdev, NULL, NULL, NULL);
3607 if (adev->rio_mem)
3608 pci_iounmap(adev->pdev, adev->rio_mem);
3609 adev->rio_mem = NULL;
3610 iounmap(adev->rmmio);
3611 adev->rmmio = NULL;
06ec9070 3612 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3613
7c868b59
YT
3614 if (adev->ucode_sysfs_en)
3615 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3616
3617 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3618 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3619 amdgpu_pmu_fini(adev);
72de33f8 3620 if (adev->mman.discovery_bin)
a190d1c7 3621 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3622}
3623
3624
3625/*
3626 * Suspend & resume.
3627 */
3628/**
810ddc3a 3629 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3630 *
87e3f136 3631 * @dev: drm dev pointer
87e3f136 3632 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3633 *
3634 * Puts the hw in the suspend state (all asics).
3635 * Returns 0 for success or an error on failure.
3636 * Called at driver suspend.
3637 */
de185019 3638int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3639{
3640 struct amdgpu_device *adev;
3641 struct drm_crtc *crtc;
3642 struct drm_connector *connector;
f8d2d39e 3643 struct drm_connector_list_iter iter;
5ceb54c6 3644 int r;
d38ceaf9 3645
1348969a 3646 adev = drm_to_adev(dev);
d38ceaf9
AD
3647
3648 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3649 return 0;
3650
44779b43 3651 adev->in_suspend = true;
d38ceaf9
AD
3652 drm_kms_helper_poll_disable(dev);
3653
5f818173
S
3654 if (fbcon)
3655 amdgpu_fbdev_set_suspend(adev, 1);
3656
beff74bc 3657 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3658
4562236b
HW
3659 if (!amdgpu_device_has_dc_support(adev)) {
3660 /* turn off display hw */
3661 drm_modeset_lock_all(dev);
f8d2d39e
LP
3662 drm_connector_list_iter_begin(dev, &iter);
3663 drm_for_each_connector_iter(connector, &iter)
3664 drm_helper_connector_dpms(connector,
3665 DRM_MODE_DPMS_OFF);
3666 drm_connector_list_iter_end(&iter);
4562236b 3667 drm_modeset_unlock_all(dev);
fe1053b7
AD
3668 /* unpin the front buffers and cursors */
3669 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3670 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3671 struct drm_framebuffer *fb = crtc->primary->fb;
3672 struct amdgpu_bo *robj;
3673
91334223 3674 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3675 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3676 r = amdgpu_bo_reserve(aobj, true);
3677 if (r == 0) {
3678 amdgpu_bo_unpin(aobj);
3679 amdgpu_bo_unreserve(aobj);
3680 }
756e6880 3681 }
756e6880 3682
fe1053b7
AD
3683 if (fb == NULL || fb->obj[0] == NULL) {
3684 continue;
3685 }
3686 robj = gem_to_amdgpu_bo(fb->obj[0]);
3687 /* don't unpin kernel fb objects */
3688 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3689 r = amdgpu_bo_reserve(robj, true);
3690 if (r == 0) {
3691 amdgpu_bo_unpin(robj);
3692 amdgpu_bo_unreserve(robj);
3693 }
d38ceaf9
AD
3694 }
3695 }
3696 }
fe1053b7 3697
5e6932fe 3698 amdgpu_ras_suspend(adev);
3699
fe1053b7
AD
3700 r = amdgpu_device_ip_suspend_phase1(adev);
3701
94fa5660
EQ
3702 amdgpu_amdkfd_suspend(adev, !fbcon);
3703
d38ceaf9
AD
3704 /* evict vram memory */
3705 amdgpu_bo_evict_vram(adev);
3706
5ceb54c6 3707 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3708
fe1053b7 3709 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3710
a0a71e49
AD
3711 /* evict remaining vram memory
3712 * This second call to evict vram is to evict the gart page table
3713 * using the CPU.
3714 */
d38ceaf9
AD
3715 amdgpu_bo_evict_vram(adev);
3716
d38ceaf9
AD
3717 return 0;
3718}
3719
3720/**
810ddc3a 3721 * amdgpu_device_resume - initiate device resume
d38ceaf9 3722 *
87e3f136 3723 * @dev: drm dev pointer
87e3f136 3724 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3725 *
3726 * Bring the hw back to operating state (all asics).
3727 * Returns 0 for success or an error on failure.
3728 * Called at driver resume.
3729 */
de185019 3730int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3731{
3732 struct drm_connector *connector;
f8d2d39e 3733 struct drm_connector_list_iter iter;
1348969a 3734 struct amdgpu_device *adev = drm_to_adev(dev);
756e6880 3735 struct drm_crtc *crtc;
03161a6e 3736 int r = 0;
d38ceaf9
AD
3737
3738 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3739 return 0;
3740
d38ceaf9 3741 /* post card */
39c640c0 3742 if (amdgpu_device_need_post(adev)) {
4d2997ab 3743 r = amdgpu_device_asic_init(adev);
74b0b157 3744 if (r)
aac89168 3745 dev_err(adev->dev, "amdgpu asic init failed\n");
74b0b157 3746 }
d38ceaf9 3747
06ec9070 3748 r = amdgpu_device_ip_resume(adev);
e6707218 3749 if (r) {
aac89168 3750 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3751 return r;
e6707218 3752 }
5ceb54c6
AD
3753 amdgpu_fence_driver_resume(adev);
3754
d38ceaf9 3755
06ec9070 3756 r = amdgpu_device_ip_late_init(adev);
03161a6e 3757 if (r)
4d3b9ae5 3758 return r;
d38ceaf9 3759
beff74bc
AD
3760 queue_delayed_work(system_wq, &adev->delayed_init_work,
3761 msecs_to_jiffies(AMDGPU_RESUME_MS));
3762
fe1053b7
AD
3763 if (!amdgpu_device_has_dc_support(adev)) {
3764 /* pin cursors */
3765 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3766 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3767
91334223 3768 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3769 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3770 r = amdgpu_bo_reserve(aobj, true);
3771 if (r == 0) {
3772 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3773 if (r != 0)
aac89168 3774 dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
fe1053b7
AD
3775 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3776 amdgpu_bo_unreserve(aobj);
3777 }
756e6880
AD
3778 }
3779 }
3780 }
9593f4d6 3781 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3782 if (r)
3783 return r;
756e6880 3784
96a5d8d4 3785 /* Make sure IB tests flushed */
beff74bc 3786 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3787
d38ceaf9
AD
3788 /* blat the mode back in */
3789 if (fbcon) {
4562236b
HW
3790 if (!amdgpu_device_has_dc_support(adev)) {
3791 /* pre DCE11 */
3792 drm_helper_resume_force_mode(dev);
3793
3794 /* turn on display hw */
3795 drm_modeset_lock_all(dev);
f8d2d39e
LP
3796
3797 drm_connector_list_iter_begin(dev, &iter);
3798 drm_for_each_connector_iter(connector, &iter)
3799 drm_helper_connector_dpms(connector,
3800 DRM_MODE_DPMS_ON);
3801 drm_connector_list_iter_end(&iter);
3802
4562236b 3803 drm_modeset_unlock_all(dev);
d38ceaf9 3804 }
4d3b9ae5 3805 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3806 }
3807
3808 drm_kms_helper_poll_enable(dev);
23a1a9e5 3809
5e6932fe 3810 amdgpu_ras_resume(adev);
3811
23a1a9e5
L
3812 /*
3813 * Most of the connector probing functions try to acquire runtime pm
3814 * refs to ensure that the GPU is powered on when connector polling is
3815 * performed. Since we're calling this from a runtime PM callback,
3816 * trying to acquire rpm refs will cause us to deadlock.
3817 *
3818 * Since we're guaranteed to be holding the rpm lock, it's safe to
3819 * temporarily disable the rpm helpers so this doesn't deadlock us.
3820 */
3821#ifdef CONFIG_PM
3822 dev->dev->power.disable_depth++;
3823#endif
4562236b
HW
3824 if (!amdgpu_device_has_dc_support(adev))
3825 drm_helper_hpd_irq_event(dev);
3826 else
3827 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3828#ifdef CONFIG_PM
3829 dev->dev->power.disable_depth--;
3830#endif
44779b43
RZ
3831 adev->in_suspend = false;
3832
4d3b9ae5 3833 return 0;
d38ceaf9
AD
3834}
3835
e3ecdffa
AD
3836/**
3837 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3838 *
3839 * @adev: amdgpu_device pointer
3840 *
3841 * The list of all the hardware IPs that make up the asic is walked and
3842 * the check_soft_reset callbacks are run. check_soft_reset determines
3843 * if the asic is still hung or not.
3844 * Returns true if any of the IPs are still in a hung state, false if not.
3845 */
06ec9070 3846static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3847{
3848 int i;
3849 bool asic_hang = false;
3850
f993d628
ML
3851 if (amdgpu_sriov_vf(adev))
3852 return true;
3853
8bc04c29
AD
3854 if (amdgpu_asic_need_full_reset(adev))
3855 return true;
3856
63fbf42f 3857 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3858 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3859 continue;
a1255107
AD
3860 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3861 adev->ip_blocks[i].status.hang =
3862 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3863 if (adev->ip_blocks[i].status.hang) {
aac89168 3864 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3865 asic_hang = true;
3866 }
3867 }
3868 return asic_hang;
3869}
3870
e3ecdffa
AD
3871/**
3872 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3873 *
3874 * @adev: amdgpu_device pointer
3875 *
3876 * The list of all the hardware IPs that make up the asic is walked and the
3877 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3878 * handles any IP specific hardware or software state changes that are
3879 * necessary for a soft reset to succeed.
3880 * Returns 0 on success, negative error code on failure.
3881 */
06ec9070 3882static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3883{
3884 int i, r = 0;
3885
3886 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3887 if (!adev->ip_blocks[i].status.valid)
d31a501e 3888 continue;
a1255107
AD
3889 if (adev->ip_blocks[i].status.hang &&
3890 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3891 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3892 if (r)
3893 return r;
3894 }
3895 }
3896
3897 return 0;
3898}
3899
e3ecdffa
AD
3900/**
3901 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3902 *
3903 * @adev: amdgpu_device pointer
3904 *
3905 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3906 * reset is necessary to recover.
3907 * Returns true if a full asic reset is required, false if not.
3908 */
06ec9070 3909static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3910{
da146d3b
AD
3911 int i;
3912
8bc04c29
AD
3913 if (amdgpu_asic_need_full_reset(adev))
3914 return true;
3915
da146d3b 3916 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3917 if (!adev->ip_blocks[i].status.valid)
da146d3b 3918 continue;
a1255107
AD
3919 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3920 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3921 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3922 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3923 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3924 if (adev->ip_blocks[i].status.hang) {
aac89168 3925 dev_info(adev->dev, "Some block need full reset!\n");
da146d3b
AD
3926 return true;
3927 }
3928 }
35d782fe
CZ
3929 }
3930 return false;
3931}
3932
e3ecdffa
AD
3933/**
3934 * amdgpu_device_ip_soft_reset - do a soft reset
3935 *
3936 * @adev: amdgpu_device pointer
3937 *
3938 * The list of all the hardware IPs that make up the asic is walked and the
3939 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3940 * IP specific hardware or software state changes that are necessary to soft
3941 * reset the IP.
3942 * Returns 0 on success, negative error code on failure.
3943 */
06ec9070 3944static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3945{
3946 int i, r = 0;
3947
3948 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3949 if (!adev->ip_blocks[i].status.valid)
35d782fe 3950 continue;
a1255107
AD
3951 if (adev->ip_blocks[i].status.hang &&
3952 adev->ip_blocks[i].version->funcs->soft_reset) {
3953 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3954 if (r)
3955 return r;
3956 }
3957 }
3958
3959 return 0;
3960}
3961
e3ecdffa
AD
3962/**
3963 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3964 *
3965 * @adev: amdgpu_device pointer
3966 *
3967 * The list of all the hardware IPs that make up the asic is walked and the
3968 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3969 * handles any IP specific hardware or software state changes that are
3970 * necessary after the IP has been soft reset.
3971 * Returns 0 on success, negative error code on failure.
3972 */
06ec9070 3973static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3974{
3975 int i, r = 0;
3976
3977 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3978 if (!adev->ip_blocks[i].status.valid)
35d782fe 3979 continue;
a1255107
AD
3980 if (adev->ip_blocks[i].status.hang &&
3981 adev->ip_blocks[i].version->funcs->post_soft_reset)
3982 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3983 if (r)
3984 return r;
3985 }
3986
3987 return 0;
3988}
3989
e3ecdffa 3990/**
c33adbc7 3991 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3992 *
3993 * @adev: amdgpu_device pointer
3994 *
3995 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3996 * restore things like GPUVM page tables after a GPU reset where
3997 * the contents of VRAM might be lost.
403009bf
CK
3998 *
3999 * Returns:
4000 * 0 on success, negative error code on failure.
e3ecdffa 4001 */
c33adbc7 4002static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 4003{
c41d1cf6 4004 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
4005 struct amdgpu_bo *shadow;
4006 long r = 1, tmo;
c41d1cf6
ML
4007
4008 if (amdgpu_sriov_runtime(adev))
b045d3af 4009 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
4010 else
4011 tmo = msecs_to_jiffies(100);
4012
aac89168 4013 dev_info(adev->dev, "recover vram bo from shadow start\n");
c41d1cf6 4014 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
4015 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
4016
4017 /* No need to recover an evicted BO */
4018 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 4019 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
4020 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
4021 continue;
4022
4023 r = amdgpu_bo_restore_shadow(shadow, &next);
4024 if (r)
4025 break;
4026
c41d1cf6 4027 if (fence) {
1712fb1a 4028 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
4029 dma_fence_put(fence);
4030 fence = next;
1712fb1a 4031 if (tmo == 0) {
4032 r = -ETIMEDOUT;
c41d1cf6 4033 break;
1712fb1a 4034 } else if (tmo < 0) {
4035 r = tmo;
4036 break;
4037 }
403009bf
CK
4038 } else {
4039 fence = next;
c41d1cf6 4040 }
c41d1cf6
ML
4041 }
4042 mutex_unlock(&adev->shadow_list_lock);
4043
403009bf
CK
4044 if (fence)
4045 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
4046 dma_fence_put(fence);
4047
1712fb1a 4048 if (r < 0 || tmo <= 0) {
aac89168 4049 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
4050 return -EIO;
4051 }
c41d1cf6 4052
aac89168 4053 dev_info(adev->dev, "recover vram bo from shadow done\n");
403009bf 4054 return 0;
c41d1cf6
ML
4055}
4056
a90ad3c2 4057
e3ecdffa 4058/**
06ec9070 4059 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e 4060 *
982a820b 4061 * @adev: amdgpu_device pointer
87e3f136 4062 * @from_hypervisor: request from hypervisor
5740682e
ML
4063 *
4064 * do VF FLR and reinitialize Asic
3f48c681 4065 * return 0 means succeeded otherwise failed
e3ecdffa
AD
4066 */
4067static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4068 bool from_hypervisor)
5740682e
ML
4069{
4070 int r;
4071
4072 if (from_hypervisor)
4073 r = amdgpu_virt_request_full_gpu(adev, true);
4074 else
4075 r = amdgpu_virt_reset_gpu(adev);
4076 if (r)
4077 return r;
a90ad3c2 4078
b639c22c
JZ
4079 amdgpu_amdkfd_pre_reset(adev);
4080
a90ad3c2 4081 /* Resume IP prior to SMC */
06ec9070 4082 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
4083 if (r)
4084 goto error;
a90ad3c2 4085
c9ffa427 4086 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 4087 /* we need recover gart prior to run SMC/CP/SDMA resume */
6c28aed6 4088 amdgpu_gtt_mgr_recover(ttm_manager_type(&adev->mman.bdev, TTM_PL_TT));
a90ad3c2 4089
7a3e0bb2
RZ
4090 r = amdgpu_device_fw_loading(adev);
4091 if (r)
4092 return r;
4093
a90ad3c2 4094 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 4095 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
4096 if (r)
4097 goto error;
a90ad3c2
ML
4098
4099 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 4100 r = amdgpu_ib_ring_tests(adev);
f81e8d53 4101 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 4102
abc34253
ED
4103error:
4104 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 4105 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 4106 amdgpu_inc_vram_lost(adev);
c33adbc7 4107 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
4108 }
4109
4110 return r;
4111}
4112
9a1cddd6 4113/**
4114 * amdgpu_device_has_job_running - check if there is any job in mirror list
4115 *
982a820b 4116 * @adev: amdgpu_device pointer
9a1cddd6 4117 *
4118 * check if there is any job in mirror list
4119 */
4120bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4121{
4122 int i;
4123 struct drm_sched_job *job;
4124
4125 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4126 struct amdgpu_ring *ring = adev->rings[i];
4127
4128 if (!ring || !ring->sched.thread)
4129 continue;
4130
4131 spin_lock(&ring->sched.job_list_lock);
4132 job = list_first_entry_or_null(&ring->sched.ring_mirror_list,
4133 struct drm_sched_job, node);
4134 spin_unlock(&ring->sched.job_list_lock);
4135 if (job)
4136 return true;
4137 }
4138 return false;
4139}
4140
12938fad
CK
4141/**
4142 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4143 *
982a820b 4144 * @adev: amdgpu_device pointer
12938fad
CK
4145 *
4146 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4147 * a hung GPU.
4148 */
4149bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4150{
4151 if (!amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4152 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
12938fad
CK
4153 return false;
4154 }
4155
3ba7b418
AG
4156 if (amdgpu_gpu_recovery == 0)
4157 goto disabled;
4158
4159 if (amdgpu_sriov_vf(adev))
4160 return true;
4161
4162 if (amdgpu_gpu_recovery == -1) {
4163 switch (adev->asic_type) {
fc42d47c
AG
4164 case CHIP_BONAIRE:
4165 case CHIP_HAWAII:
3ba7b418
AG
4166 case CHIP_TOPAZ:
4167 case CHIP_TONGA:
4168 case CHIP_FIJI:
4169 case CHIP_POLARIS10:
4170 case CHIP_POLARIS11:
4171 case CHIP_POLARIS12:
4172 case CHIP_VEGAM:
4173 case CHIP_VEGA20:
4174 case CHIP_VEGA10:
4175 case CHIP_VEGA12:
c43b849f 4176 case CHIP_RAVEN:
e9d4cf91 4177 case CHIP_ARCTURUS:
2cb44fb0 4178 case CHIP_RENOIR:
658c6639
AD
4179 case CHIP_NAVI10:
4180 case CHIP_NAVI14:
4181 case CHIP_NAVI12:
131a3c74 4182 case CHIP_SIENNA_CICHLID:
3ba7b418
AG
4183 break;
4184 default:
4185 goto disabled;
4186 }
12938fad
CK
4187 }
4188
4189 return true;
3ba7b418
AG
4190
4191disabled:
aac89168 4192 dev_info(adev->dev, "GPU recovery disabled.\n");
3ba7b418 4193 return false;
12938fad
CK
4194}
4195
5c6dd71e 4196
26bc5340
AG
4197static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4198 struct amdgpu_job *job,
4199 bool *need_full_reset_arg)
4200{
4201 int i, r = 0;
4202 bool need_full_reset = *need_full_reset_arg;
71182665 4203
728e7e0c
JZ
4204 amdgpu_debugfs_wait_dump(adev);
4205
b602ca5f
TZ
4206 if (amdgpu_sriov_vf(adev)) {
4207 /* stop the data exchange thread */
4208 amdgpu_virt_fini_data_exchange(adev);
4209 }
4210
71182665 4211 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
4212 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4213 struct amdgpu_ring *ring = adev->rings[i];
4214
51687759 4215 if (!ring || !ring->sched.thread)
0875dc9e 4216 continue;
5740682e 4217
2f9d4084
ML
4218 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4219 amdgpu_fence_driver_force_completion(ring);
0875dc9e 4220 }
d38ceaf9 4221
222b5f04
AG
4222 if(job)
4223 drm_sched_increase_karma(&job->base);
4224
1d721ed6 4225 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
4226 if (!amdgpu_sriov_vf(adev)) {
4227
4228 if (!need_full_reset)
4229 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4230
4231 if (!need_full_reset) {
4232 amdgpu_device_ip_pre_soft_reset(adev);
4233 r = amdgpu_device_ip_soft_reset(adev);
4234 amdgpu_device_ip_post_soft_reset(adev);
4235 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
aac89168 4236 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
26bc5340
AG
4237 need_full_reset = true;
4238 }
4239 }
4240
4241 if (need_full_reset)
4242 r = amdgpu_device_ip_suspend(adev);
4243
4244 *need_full_reset_arg = need_full_reset;
4245 }
4246
4247 return r;
4248}
4249
041a62bc 4250static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340 4251 struct list_head *device_list_handle,
7ac71382
AG
4252 bool *need_full_reset_arg,
4253 bool skip_hw_reset)
26bc5340
AG
4254{
4255 struct amdgpu_device *tmp_adev = NULL;
4256 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4257 int r = 0;
4258
4259 /*
4260 * ASIC reset has to be done on all HGMI hive nodes ASAP
4261 * to allow proper links negotiation in FW (within 1 sec)
4262 */
7ac71382 4263 if (!skip_hw_reset && need_full_reset) {
26bc5340 4264 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 4265 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4266 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 4267 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4268 r = -EALREADY;
4269 } else
4270 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4271
041a62bc 4272 if (r) {
aac89168 4273 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4a580877 4274 r, adev_to_drm(tmp_adev)->unique);
041a62bc 4275 break;
ce316fa5
LM
4276 }
4277 }
4278
041a62bc
AG
4279 /* For XGMI wait for all resets to complete before proceed */
4280 if (!r) {
ce316fa5
LM
4281 list_for_each_entry(tmp_adev, device_list_handle,
4282 gmc.xgmi.head) {
4283 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4284 flush_work(&tmp_adev->xgmi_reset_work);
4285 r = tmp_adev->asic_reset_res;
4286 if (r)
4287 break;
ce316fa5
LM
4288 }
4289 }
4290 }
ce316fa5 4291 }
26bc5340 4292
43c4d576
JC
4293 if (!r && amdgpu_ras_intr_triggered()) {
4294 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4295 if (tmp_adev->mmhub.funcs &&
4296 tmp_adev->mmhub.funcs->reset_ras_error_count)
4297 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4298 }
4299
00eaa571 4300 amdgpu_ras_intr_cleared();
43c4d576 4301 }
00eaa571 4302
26bc5340
AG
4303 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4304 if (need_full_reset) {
4305 /* post card */
4d2997ab 4306 if (amdgpu_device_asic_init(tmp_adev))
aac89168 4307 dev_warn(tmp_adev->dev, "asic atom init failed!");
26bc5340
AG
4308
4309 if (!r) {
4310 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4311 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4312 if (r)
4313 goto out;
4314
4315 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4316 if (vram_lost) {
77e7f829 4317 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4318 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4319 }
4320
6c28aed6 4321 r = amdgpu_gtt_mgr_recover(ttm_manager_type(&tmp_adev->mman.bdev, TTM_PL_TT));
26bc5340
AG
4322 if (r)
4323 goto out;
4324
4325 r = amdgpu_device_fw_loading(tmp_adev);
4326 if (r)
4327 return r;
4328
4329 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4330 if (r)
4331 goto out;
4332
4333 if (vram_lost)
4334 amdgpu_device_fill_reset_magic(tmp_adev);
4335
fdafb359
EQ
4336 /*
4337 * Add this ASIC as tracked as reset was already
4338 * complete successfully.
4339 */
4340 amdgpu_register_gpu_instance(tmp_adev);
4341
7c04ca50 4342 r = amdgpu_device_ip_late_init(tmp_adev);
4343 if (r)
4344 goto out;
4345
565d1941
EQ
4346 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4347
e8fbaf03
GC
4348 /*
4349 * The GPU enters bad state once faulty pages
4350 * by ECC has reached the threshold, and ras
4351 * recovery is scheduled next. So add one check
4352 * here to break recovery if it indeed exceeds
4353 * bad page threshold, and remind user to
4354 * retire this GPU or setting one bigger
4355 * bad_page_threshold value to fix this once
4356 * probing driver again.
4357 */
4358 if (!amdgpu_ras_check_err_threshold(tmp_adev)) {
4359 /* must succeed. */
4360 amdgpu_ras_resume(tmp_adev);
4361 } else {
4362 r = -EINVAL;
4363 goto out;
4364 }
e79a04d5 4365
26bc5340
AG
4366 /* Update PSP FW topology after reset */
4367 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4368 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4369 }
4370 }
4371
26bc5340
AG
4372out:
4373 if (!r) {
4374 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4375 r = amdgpu_ib_ring_tests(tmp_adev);
4376 if (r) {
4377 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4378 r = amdgpu_device_ip_suspend(tmp_adev);
4379 need_full_reset = true;
4380 r = -EAGAIN;
4381 goto end;
4382 }
4383 }
4384
4385 if (!r)
4386 r = amdgpu_device_recover_vram(tmp_adev);
4387 else
4388 tmp_adev->asic_reset_res = r;
4389 }
4390
4391end:
4392 *need_full_reset_arg = need_full_reset;
4393 return r;
4394}
4395
08ebb485
DL
4396static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
4397 struct amdgpu_hive_info *hive)
26bc5340 4398{
53b3f8f4
DL
4399 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4400 return false;
4401
08ebb485
DL
4402 if (hive) {
4403 down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
4404 } else {
4405 down_write(&adev->reset_sem);
4406 }
5740682e 4407
26bc5340 4408 atomic_inc(&adev->gpu_reset_counter);
a3a09142
AD
4409 switch (amdgpu_asic_reset_method(adev)) {
4410 case AMD_RESET_METHOD_MODE1:
4411 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4412 break;
4413 case AMD_RESET_METHOD_MODE2:
4414 adev->mp1_state = PP_MP1_STATE_RESET;
4415 break;
4416 default:
4417 adev->mp1_state = PP_MP1_STATE_NONE;
4418 break;
4419 }
1d721ed6
AG
4420
4421 return true;
26bc5340 4422}
d38ceaf9 4423
26bc5340
AG
4424static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4425{
89041940 4426 amdgpu_vf_error_trans_all(adev);
a3a09142 4427 adev->mp1_state = PP_MP1_STATE_NONE;
53b3f8f4 4428 atomic_set(&adev->in_gpu_reset, 0);
6049db43 4429 up_write(&adev->reset_sem);
26bc5340
AG
4430}
4431
3f12acc8
EQ
4432static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4433{
4434 struct pci_dev *p = NULL;
4435
4436 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4437 adev->pdev->bus->number, 1);
4438 if (p) {
4439 pm_runtime_enable(&(p->dev));
4440 pm_runtime_resume(&(p->dev));
4441 }
4442}
4443
4444static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4445{
4446 enum amd_reset_method reset_method;
4447 struct pci_dev *p = NULL;
4448 u64 expires;
4449
4450 /*
4451 * For now, only BACO and mode1 reset are confirmed
4452 * to suffer the audio issue without proper suspended.
4453 */
4454 reset_method = amdgpu_asic_reset_method(adev);
4455 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4456 (reset_method != AMD_RESET_METHOD_MODE1))
4457 return -EINVAL;
4458
4459 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4460 adev->pdev->bus->number, 1);
4461 if (!p)
4462 return -ENODEV;
4463
4464 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4465 if (!expires)
4466 /*
4467 * If we cannot get the audio device autosuspend delay,
4468 * a fixed 4S interval will be used. Considering 3S is
4469 * the audio controller default autosuspend delay setting.
4470 * 4S used here is guaranteed to cover that.
4471 */
54b7feb9 4472 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4473
4474 while (!pm_runtime_status_suspended(&(p->dev))) {
4475 if (!pm_runtime_suspend(&(p->dev)))
4476 break;
4477
4478 if (expires < ktime_get_mono_fast_ns()) {
4479 dev_warn(adev->dev, "failed to suspend display audio\n");
4480 /* TODO: abort the succeeding gpu reset? */
4481 return -ETIMEDOUT;
4482 }
4483 }
4484
4485 pm_runtime_disable(&(p->dev));
4486
4487 return 0;
4488}
4489
26bc5340
AG
4490/**
4491 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4492 *
982a820b 4493 * @adev: amdgpu_device pointer
26bc5340
AG
4494 * @job: which job trigger hang
4495 *
4496 * Attempt to reset the GPU if it has hung (all asics).
4497 * Attempt to do soft-reset or full-reset and reinitialize Asic
4498 * Returns 0 for success or an error on failure.
4499 */
4500
4501int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4502 struct amdgpu_job *job)
4503{
1d721ed6 4504 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4505 bool need_full_reset = false;
4506 bool job_signaled = false;
26bc5340 4507 struct amdgpu_hive_info *hive = NULL;
26bc5340 4508 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4509 int i, r = 0;
bb5c7235 4510 bool need_emergency_restart = false;
3f12acc8 4511 bool audio_suspended = false;
26bc5340 4512
6e3cd2a9 4513 /*
bb5c7235
WS
4514 * Special case: RAS triggered and full reset isn't supported
4515 */
4516 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4517
d5ea093e
AG
4518 /*
4519 * Flush RAM to disk so that after reboot
4520 * the user can read log and see why the system rebooted.
4521 */
bb5c7235 4522 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4523 DRM_WARN("Emergency reboot.");
4524
4525 ksys_sync_helper();
4526 emergency_restart();
4527 }
4528
b823821f 4529 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4530 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4531
4532 /*
1d721ed6
AG
4533 * Here we trylock to avoid chain of resets executing from
4534 * either trigger by jobs on different adevs in XGMI hive or jobs on
4535 * different schedulers for same device while this TO handler is running.
4536 * We always reset all schedulers for device and all devices for XGMI
4537 * hive so that should take care of them too.
26bc5340 4538 */
d95e8e97 4539 hive = amdgpu_get_xgmi_hive(adev);
53b3f8f4
DL
4540 if (hive) {
4541 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4542 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4543 job ? job->base.id : -1, hive->hive_id);
d95e8e97 4544 amdgpu_put_xgmi_hive(hive);
53b3f8f4
DL
4545 return 0;
4546 }
4547 mutex_lock(&hive->hive_lock);
1d721ed6 4548 }
26bc5340 4549
9e94d22c
EQ
4550 /*
4551 * Build list of devices to reset.
4552 * In case we are in XGMI hive mode, resort the device list
4553 * to put adev in the 1st position.
4554 */
4555 INIT_LIST_HEAD(&device_list);
4556 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4557 if (!hive)
26bc5340 4558 return -ENODEV;
9e94d22c
EQ
4559 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4560 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4561 device_list_handle = &hive->device_list;
4562 } else {
4563 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4564 device_list_handle = &device_list;
4565 }
4566
1d721ed6
AG
4567 /* block all schedulers and reset given job's ring */
4568 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
08ebb485 4569 if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
aac89168 4570 dev_info(tmp_adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
9e94d22c 4571 job ? job->base.id : -1);
cbfd17f7
DL
4572 r = 0;
4573 goto skip_recovery;
7c6e68c7
AG
4574 }
4575
3f12acc8
EQ
4576 /*
4577 * Try to put the audio codec into suspend state
4578 * before gpu reset started.
4579 *
4580 * Due to the power domain of the graphics device
4581 * is shared with AZ power domain. Without this,
4582 * we may change the audio hardware from behind
4583 * the audio driver's back. That will trigger
4584 * some audio codec errors.
4585 */
4586 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4587 audio_suspended = true;
4588
9e94d22c
EQ
4589 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4590
52fb44cf
EQ
4591 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4592
9e94d22c
EQ
4593 if (!amdgpu_sriov_vf(tmp_adev))
4594 amdgpu_amdkfd_pre_reset(tmp_adev);
4595
12ffa55d
AG
4596 /*
4597 * Mark these ASICs to be reseted as untracked first
4598 * And add them back after reset completed
4599 */
4600 amdgpu_unregister_gpu_instance(tmp_adev);
4601
a2f63ee8 4602 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4603
f1c1314b 4604 /* disable ras on ALL IPs */
bb5c7235 4605 if (!need_emergency_restart &&
b823821f 4606 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4607 amdgpu_ras_suspend(tmp_adev);
4608
1d721ed6
AG
4609 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4610 struct amdgpu_ring *ring = tmp_adev->rings[i];
4611
4612 if (!ring || !ring->sched.thread)
4613 continue;
4614
0b2d2c2e 4615 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4616
bb5c7235 4617 if (need_emergency_restart)
7c6e68c7 4618 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4619 }
4620 }
4621
bb5c7235 4622 if (need_emergency_restart)
7c6e68c7
AG
4623 goto skip_sched_resume;
4624
1d721ed6
AG
4625 /*
4626 * Must check guilty signal here since after this point all old
4627 * HW fences are force signaled.
4628 *
4629 * job->base holds a reference to parent fence
4630 */
4631 if (job && job->base.s_fence->parent &&
7dd8c205 4632 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4633 job_signaled = true;
1d721ed6
AG
4634 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4635 goto skip_hw_reset;
4636 }
4637
26bc5340
AG
4638retry: /* Rest of adevs pre asic reset from XGMI hive. */
4639 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340 4640 r = amdgpu_device_pre_asic_reset(tmp_adev,
ded08454 4641 (tmp_adev == adev) ? job : NULL,
26bc5340
AG
4642 &need_full_reset);
4643 /*TODO Should we stop ?*/
4644 if (r) {
aac89168 4645 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
4a580877 4646 r, adev_to_drm(tmp_adev)->unique);
26bc5340
AG
4647 tmp_adev->asic_reset_res = r;
4648 }
4649 }
4650
4651 /* Actual ASIC resets if needed.*/
4652 /* TODO Implement XGMI hive reset logic for SRIOV */
4653 if (amdgpu_sriov_vf(adev)) {
4654 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4655 if (r)
4656 adev->asic_reset_res = r;
4657 } else {
7ac71382 4658 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset, false);
26bc5340
AG
4659 if (r && r == -EAGAIN)
4660 goto retry;
4661 }
4662
1d721ed6
AG
4663skip_hw_reset:
4664
26bc5340
AG
4665 /* Post ASIC reset for all devs .*/
4666 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4667
1d721ed6
AG
4668 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4669 struct amdgpu_ring *ring = tmp_adev->rings[i];
4670
4671 if (!ring || !ring->sched.thread)
4672 continue;
4673
4674 /* No point to resubmit jobs if we didn't HW reset*/
4675 if (!tmp_adev->asic_reset_res && !job_signaled)
4676 drm_sched_resubmit_jobs(&ring->sched);
4677
4678 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4679 }
4680
4681 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4a580877 4682 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
1d721ed6
AG
4683 }
4684
4685 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4686
4687 if (r) {
4688 /* bad news, how to tell it to userspace ? */
12ffa55d 4689 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4690 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4691 } else {
12ffa55d 4692 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4693 }
7c6e68c7 4694 }
26bc5340 4695
7c6e68c7
AG
4696skip_sched_resume:
4697 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4698 /*unlock kfd: SRIOV would do it separately */
bb5c7235 4699 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4700 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4701 if (audio_suspended)
4702 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4703 amdgpu_device_unlock_adev(tmp_adev);
4704 }
4705
cbfd17f7 4706skip_recovery:
9e94d22c 4707 if (hive) {
53b3f8f4 4708 atomic_set(&hive->in_reset, 0);
9e94d22c 4709 mutex_unlock(&hive->hive_lock);
d95e8e97 4710 amdgpu_put_xgmi_hive(hive);
9e94d22c 4711 }
26bc5340
AG
4712
4713 if (r)
4714 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4715 return r;
4716}
4717
e3ecdffa
AD
4718/**
4719 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4720 *
4721 * @adev: amdgpu_device pointer
4722 *
4723 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4724 * and lanes) of the slot the device is in. Handles APUs and
4725 * virtualized environments where PCIE config space may not be available.
4726 */
5494d864 4727static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4728{
5d9a6330 4729 struct pci_dev *pdev;
c5313457
HK
4730 enum pci_bus_speed speed_cap, platform_speed_cap;
4731 enum pcie_link_width platform_link_width;
d0dd7f0c 4732
cd474ba0
AD
4733 if (amdgpu_pcie_gen_cap)
4734 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4735
cd474ba0
AD
4736 if (amdgpu_pcie_lane_cap)
4737 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4738
cd474ba0
AD
4739 /* covers APUs as well */
4740 if (pci_is_root_bus(adev->pdev->bus)) {
4741 if (adev->pm.pcie_gen_mask == 0)
4742 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4743 if (adev->pm.pcie_mlw_mask == 0)
4744 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4745 return;
cd474ba0 4746 }
d0dd7f0c 4747
c5313457
HK
4748 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4749 return;
4750
dbaa922b
AD
4751 pcie_bandwidth_available(adev->pdev, NULL,
4752 &platform_speed_cap, &platform_link_width);
c5313457 4753
cd474ba0 4754 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4755 /* asic caps */
4756 pdev = adev->pdev;
4757 speed_cap = pcie_get_speed_cap(pdev);
4758 if (speed_cap == PCI_SPEED_UNKNOWN) {
4759 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4760 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4761 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4762 } else {
5d9a6330
AD
4763 if (speed_cap == PCIE_SPEED_16_0GT)
4764 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4765 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4766 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4767 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4768 else if (speed_cap == PCIE_SPEED_8_0GT)
4769 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4770 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4771 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4772 else if (speed_cap == PCIE_SPEED_5_0GT)
4773 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4774 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4775 else
4776 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4777 }
4778 /* platform caps */
c5313457 4779 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4780 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4781 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4782 } else {
c5313457 4783 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4784 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4785 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4786 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4787 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4788 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4789 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4790 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4791 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4792 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4793 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4794 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4795 else
4796 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4797
cd474ba0
AD
4798 }
4799 }
4800 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4801 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4802 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4803 } else {
c5313457 4804 switch (platform_link_width) {
5d9a6330 4805 case PCIE_LNK_X32:
cd474ba0
AD
4806 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4807 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4808 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4809 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4810 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4811 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4812 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4813 break;
5d9a6330 4814 case PCIE_LNK_X16:
cd474ba0
AD
4815 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4816 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4817 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4818 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4819 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4820 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4821 break;
5d9a6330 4822 case PCIE_LNK_X12:
cd474ba0
AD
4823 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4824 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4825 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4826 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4827 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4828 break;
5d9a6330 4829 case PCIE_LNK_X8:
cd474ba0
AD
4830 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4831 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4832 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4833 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4834 break;
5d9a6330 4835 case PCIE_LNK_X4:
cd474ba0
AD
4836 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4837 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4838 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4839 break;
5d9a6330 4840 case PCIE_LNK_X2:
cd474ba0
AD
4841 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4842 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4843 break;
5d9a6330 4844 case PCIE_LNK_X1:
cd474ba0
AD
4845 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4846 break;
4847 default:
4848 break;
4849 }
d0dd7f0c
AD
4850 }
4851 }
4852}
d38ceaf9 4853
361dbd01
AD
4854int amdgpu_device_baco_enter(struct drm_device *dev)
4855{
1348969a 4856 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 4857 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01 4858
4a580877 4859 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
4860 return -ENOTSUPP;
4861
7a22677b
LM
4862 if (ras && ras->supported)
4863 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4864
9530273e 4865 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4866}
4867
4868int amdgpu_device_baco_exit(struct drm_device *dev)
4869{
1348969a 4870 struct amdgpu_device *adev = drm_to_adev(dev);
7a22677b 4871 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4872 int ret = 0;
361dbd01 4873
4a580877 4874 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
361dbd01
AD
4875 return -ENOTSUPP;
4876
9530273e
EQ
4877 ret = amdgpu_dpm_baco_exit(adev);
4878 if (ret)
4879 return ret;
7a22677b
LM
4880
4881 if (ras && ras->supported)
4882 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4883
4884 return 0;
361dbd01 4885}
c9a6b82f 4886
acd89fca
AG
4887static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
4888{
4889 int i;
4890
4891 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4892 struct amdgpu_ring *ring = adev->rings[i];
4893
4894 if (!ring || !ring->sched.thread)
4895 continue;
4896
4897 cancel_delayed_work_sync(&ring->sched.work_tdr);
4898 }
4899}
4900
c9a6b82f
AG
4901/**
4902 * amdgpu_pci_error_detected - Called when a PCI error is detected.
4903 * @pdev: PCI device struct
4904 * @state: PCI channel state
4905 *
4906 * Description: Called when a PCI error is detected.
4907 *
4908 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
4909 */
4910pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
4911{
4912 struct drm_device *dev = pci_get_drvdata(pdev);
4913 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 4914 int i;
c9a6b82f
AG
4915
4916 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
4917
6894305c
AG
4918 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4919 DRM_WARN("No support for XGMI hive yet...");
4920 return PCI_ERS_RESULT_DISCONNECT;
4921 }
4922
c9a6b82f
AG
4923 switch (state) {
4924 case pci_channel_io_normal:
4925 return PCI_ERS_RESULT_CAN_RECOVER;
acd89fca
AG
4926 /* Fatal error, prepare for slot reset */
4927 case pci_channel_io_frozen:
4928 /*
4929 * Cancel and wait for all TDRs in progress if failing to
4930 * set adev->in_gpu_reset in amdgpu_device_lock_adev
4931 *
4932 * Locking adev->reset_sem will prevent any external access
4933 * to GPU during PCI error recovery
4934 */
4935 while (!amdgpu_device_lock_adev(adev, NULL))
4936 amdgpu_cancel_all_tdr(adev);
4937
4938 /*
4939 * Block any work scheduling as we do for regular GPU reset
4940 * for the duration of the recovery
4941 */
4942 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4943 struct amdgpu_ring *ring = adev->rings[i];
4944
4945 if (!ring || !ring->sched.thread)
4946 continue;
4947
4948 drm_sched_stop(&ring->sched, NULL);
4949 }
c9a6b82f
AG
4950 return PCI_ERS_RESULT_NEED_RESET;
4951 case pci_channel_io_perm_failure:
4952 /* Permanent error, prepare for device removal */
4953 return PCI_ERS_RESULT_DISCONNECT;
4954 }
4955
4956 return PCI_ERS_RESULT_NEED_RESET;
4957}
4958
4959/**
4960 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
4961 * @pdev: pointer to PCI device
4962 */
4963pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
4964{
4965
4966 DRM_INFO("PCI error: mmio enabled callback!!\n");
4967
4968 /* TODO - dump whatever for debugging purposes */
4969
4970 /* This called only if amdgpu_pci_error_detected returns
4971 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
4972 * works, no need to reset slot.
4973 */
4974
4975 return PCI_ERS_RESULT_RECOVERED;
4976}
4977
4978/**
4979 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
4980 * @pdev: PCI device struct
4981 *
4982 * Description: This routine is called by the pci error recovery
4983 * code after the PCI slot has been reset, just before we
4984 * should resume normal operations.
4985 */
4986pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
4987{
4988 struct drm_device *dev = pci_get_drvdata(pdev);
4989 struct amdgpu_device *adev = drm_to_adev(dev);
362c7b91 4990 int r, i;
7ac71382 4991 bool need_full_reset = true;
362c7b91 4992 u32 memsize;
7ac71382 4993 struct list_head device_list;
c9a6b82f
AG
4994
4995 DRM_INFO("PCI error: slot reset callback!!\n");
4996
7ac71382
AG
4997 INIT_LIST_HEAD(&device_list);
4998 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4999
362c7b91
AG
5000 /* wait for asic to come out of reset */
5001 msleep(500);
5002
7ac71382 5003 /* Restore PCI confspace */
c1dd4aa6 5004 amdgpu_device_load_pci_state(pdev);
c9a6b82f 5005
362c7b91
AG
5006 /* confirm ASIC came out of reset */
5007 for (i = 0; i < adev->usec_timeout; i++) {
5008 memsize = amdgpu_asic_get_config_memsize(adev);
5009
5010 if (memsize != 0xffffffff)
5011 break;
5012 udelay(1);
5013 }
5014 if (memsize == 0xffffffff) {
5015 r = -ETIME;
5016 goto out;
5017 }
5018
362c7b91 5019 adev->in_pci_err_recovery = true;
7ac71382 5020 r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);
bf36b52e 5021 adev->in_pci_err_recovery = false;
c9a6b82f
AG
5022 if (r)
5023 goto out;
5024
7ac71382 5025 r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
c9a6b82f
AG
5026
5027out:
c9a6b82f 5028 if (!r) {
c1dd4aa6
AG
5029 if (amdgpu_device_cache_pci_state(adev->pdev))
5030 pci_restore_state(adev->pdev);
5031
c9a6b82f
AG
5032 DRM_INFO("PCIe error recovery succeeded\n");
5033 } else {
5034 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5035 amdgpu_device_unlock_adev(adev);
5036 }
5037
5038 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5039}
5040
5041/**
5042 * amdgpu_pci_resume() - resume normal ops after PCI reset
5043 * @pdev: pointer to PCI device
5044 *
5045 * Called when the error recovery driver tells us that its
5046 * OK to resume normal operation. Use completion to allow
5047 * halted scsi ops to resume.
5048 */
5049void amdgpu_pci_resume(struct pci_dev *pdev)
5050{
5051 struct drm_device *dev = pci_get_drvdata(pdev);
5052 struct amdgpu_device *adev = drm_to_adev(dev);
acd89fca 5053 int i;
c9a6b82f 5054
c9a6b82f
AG
5055
5056 DRM_INFO("PCI error: resume callback!!\n");
acd89fca
AG
5057
5058 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5059 struct amdgpu_ring *ring = adev->rings[i];
5060
5061 if (!ring || !ring->sched.thread)
5062 continue;
5063
5064
5065 drm_sched_resubmit_jobs(&ring->sched);
5066 drm_sched_start(&ring->sched, true);
5067 }
5068
5069 amdgpu_device_unlock_adev(adev);
c9a6b82f 5070}
c1dd4aa6
AG
5071
5072bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5073{
5074 struct drm_device *dev = pci_get_drvdata(pdev);
5075 struct amdgpu_device *adev = drm_to_adev(dev);
5076 int r;
5077
5078 r = pci_save_state(pdev);
5079 if (!r) {
5080 kfree(adev->pci_state);
5081
5082 adev->pci_state = pci_store_saved_state(pdev);
5083
5084 if (!adev->pci_state) {
5085 DRM_ERROR("Failed to store PCI saved state");
5086 return false;
5087 }
5088 } else {
5089 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5090 return false;
5091 }
5092
5093 return true;
5094}
5095
5096bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5097{
5098 struct drm_device *dev = pci_get_drvdata(pdev);
5099 struct amdgpu_device *adev = drm_to_adev(dev);
5100 int r;
5101
5102 if (!adev->pci_state)
5103 return false;
5104
5105 r = pci_load_saved_state(pdev, adev->pci_state);
5106
5107 if (!r) {
5108 pci_restore_state(pdev);
5109 } else {
5110 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5111 return false;
5112 }
5113
5114 return true;
5115}
5116
5117