drm/amdgpu: expand to add multiple trap event irq id
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
c0a43457 83MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin");
e2a75f88 84
2dc80b00
S
85#define AMDGPU_RESUME_MS 2000
86
050091ab 87const char *amdgpu_asic_name[] = {
da69c161
KW
88 "TAHITI",
89 "PITCAIRN",
90 "VERDE",
91 "OLAND",
92 "HAINAN",
d38ceaf9
AD
93 "BONAIRE",
94 "KAVERI",
95 "KABINI",
96 "HAWAII",
97 "MULLINS",
98 "TOPAZ",
99 "TONGA",
48299f95 100 "FIJI",
d38ceaf9 101 "CARRIZO",
139f4917 102 "STONEY",
2cc0c0b5
FC
103 "POLARIS10",
104 "POLARIS11",
c4642a47 105 "POLARIS12",
48ff108d 106 "VEGAM",
d4196f01 107 "VEGA10",
8fab806a 108 "VEGA12",
956fcddc 109 "VEGA20",
2ca8a5d2 110 "RAVEN",
d6c3b24e 111 "ARCTURUS",
1eee4228 112 "RENOIR",
852a6626 113 "NAVI10",
87dbad02 114 "NAVI14",
9802f5d7 115 "NAVI12",
ccaf72d3 116 "SIENNA_CICHLID",
d38ceaf9
AD
117 "LAST",
118};
119
dcea6e65
KR
120/**
121 * DOC: pcie_replay_count
122 *
123 * The amdgpu driver provides a sysfs API for reporting the total number
124 * of PCIe replays (NAKs)
125 * The file pcie_replay_count is used for this and returns the total
126 * number of replays as a sum of the NAKs generated and NAKs received
127 */
128
129static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
130 struct device_attribute *attr, char *buf)
131{
132 struct drm_device *ddev = dev_get_drvdata(dev);
133 struct amdgpu_device *adev = ddev->dev_private;
134 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
135
136 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
137}
138
139static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
140 amdgpu_device_get_pcie_replay_count, NULL);
141
5494d864
AD
142static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
143
bd607166
KR
144/**
145 * DOC: product_name
146 *
147 * The amdgpu driver provides a sysfs API for reporting the product name
148 * for the device
149 * The file serial_number is used for this and returns the product name
150 * as returned from the FRU.
151 * NOTE: This is only available for certain server cards
152 */
153
154static ssize_t amdgpu_device_get_product_name(struct device *dev,
155 struct device_attribute *attr, char *buf)
156{
157 struct drm_device *ddev = dev_get_drvdata(dev);
158 struct amdgpu_device *adev = ddev->dev_private;
159
160 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
161}
162
163static DEVICE_ATTR(product_name, S_IRUGO,
164 amdgpu_device_get_product_name, NULL);
165
166/**
167 * DOC: product_number
168 *
169 * The amdgpu driver provides a sysfs API for reporting the part number
170 * for the device
171 * The file serial_number is used for this and returns the part number
172 * as returned from the FRU.
173 * NOTE: This is only available for certain server cards
174 */
175
176static ssize_t amdgpu_device_get_product_number(struct device *dev,
177 struct device_attribute *attr, char *buf)
178{
179 struct drm_device *ddev = dev_get_drvdata(dev);
180 struct amdgpu_device *adev = ddev->dev_private;
181
182 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
183}
184
185static DEVICE_ATTR(product_number, S_IRUGO,
186 amdgpu_device_get_product_number, NULL);
187
188/**
189 * DOC: serial_number
190 *
191 * The amdgpu driver provides a sysfs API for reporting the serial number
192 * for the device
193 * The file serial_number is used for this and returns the serial number
194 * as returned from the FRU.
195 * NOTE: This is only available for certain server cards
196 */
197
198static ssize_t amdgpu_device_get_serial_number(struct device *dev,
199 struct device_attribute *attr, char *buf)
200{
201 struct drm_device *ddev = dev_get_drvdata(dev);
202 struct amdgpu_device *adev = ddev->dev_private;
203
204 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
205}
206
207static DEVICE_ATTR(serial_number, S_IRUGO,
208 amdgpu_device_get_serial_number, NULL);
209
e3ecdffa 210/**
31af062a 211 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
212 *
213 * @dev: drm_device pointer
214 *
215 * Returns true if the device is a dGPU with HG/PX power control,
216 * otherwise return false.
217 */
31af062a 218bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
219{
220 struct amdgpu_device *adev = dev->dev_private;
221
2f7d10b3 222 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
223 return true;
224 return false;
225}
226
a69cba42
AD
227/**
228 * amdgpu_device_supports_baco - Does the device support BACO
229 *
230 * @dev: drm_device pointer
231 *
232 * Returns true if the device supporte BACO,
233 * otherwise return false.
234 */
235bool amdgpu_device_supports_baco(struct drm_device *dev)
236{
237 struct amdgpu_device *adev = dev->dev_private;
238
239 return amdgpu_asic_supports_baco(adev);
240}
241
e35e2b11
TY
242/**
243 * VRAM access helper functions.
244 *
245 * amdgpu_device_vram_access - read/write a buffer in vram
246 *
247 * @adev: amdgpu_device pointer
248 * @pos: offset of the buffer in vram
249 * @buf: virtual address of the buffer in system memory
250 * @size: read/write size, sizeof(@buf) must > @size
251 * @write: true - write to vram, otherwise - read from vram
252 */
253void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
254 uint32_t *buf, size_t size, bool write)
255{
e35e2b11 256 unsigned long flags;
ce05ac56
CK
257 uint32_t hi = ~0;
258 uint64_t last;
259
9d11eb0d
CK
260
261#ifdef CONFIG_64BIT
262 last = min(pos + size, adev->gmc.visible_vram_size);
263 if (last > pos) {
264 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
265 size_t count = last - pos;
266
267 if (write) {
268 memcpy_toio(addr, buf, count);
269 mb();
270 amdgpu_asic_flush_hdp(adev, NULL);
271 } else {
272 amdgpu_asic_invalidate_hdp(adev, NULL);
273 mb();
274 memcpy_fromio(buf, addr, count);
275 }
276
277 if (count == size)
278 return;
279
280 pos += count;
281 buf += count / 4;
282 size -= count;
283 }
284#endif
285
ce05ac56
CK
286 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
287 for (last = pos + size; pos < last; pos += 4) {
288 uint32_t tmp = pos >> 31;
e35e2b11 289
e35e2b11 290 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
291 if (tmp != hi) {
292 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
293 hi = tmp;
294 }
e35e2b11
TY
295 if (write)
296 WREG32_NO_KIQ(mmMM_DATA, *buf++);
297 else
298 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 299 }
ce05ac56 300 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
301}
302
d38ceaf9 303/*
e78b579d 304 * MMIO register access helper functions.
d38ceaf9 305 */
e3ecdffa 306/**
e78b579d 307 * amdgpu_mm_rreg - read a memory mapped IO register
e3ecdffa
AD
308 *
309 * @adev: amdgpu_device pointer
310 * @reg: dword aligned register offset
311 * @acc_flags: access flags which require special behavior
312 *
313 * Returns the 32 bit value from the offset specified.
314 */
e78b579d
HZ
315uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
316 uint32_t acc_flags)
d38ceaf9 317{
f4b373f4
TSD
318 uint32_t ret;
319
f384ff95 320 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 321 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 322
ec59847e 323 if ((reg * 4) < adev->rmmio_size)
f4b373f4 324 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
e78b579d
HZ
325 else {
326 unsigned long flags;
327
328 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
329 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
330 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
331 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
332 }
333 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
f4b373f4 334 return ret;
d38ceaf9
AD
335}
336
421a2a30
ML
337/*
338 * MMIO register read with bytes helper functions
339 * @offset:bytes offset from MMIO start
340 *
341*/
342
e3ecdffa
AD
343/**
344 * amdgpu_mm_rreg8 - read a memory mapped IO register
345 *
346 * @adev: amdgpu_device pointer
347 * @offset: byte aligned register offset
348 *
349 * Returns the 8 bit value from the offset specified.
350 */
421a2a30
ML
351uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
352 if (offset < adev->rmmio_size)
353 return (readb(adev->rmmio + offset));
354 BUG();
355}
356
357/*
358 * MMIO register write with bytes helper functions
359 * @offset:bytes offset from MMIO start
360 * @value: the value want to be written to the register
361 *
362*/
e3ecdffa
AD
363/**
364 * amdgpu_mm_wreg8 - read a memory mapped IO register
365 *
366 * @adev: amdgpu_device pointer
367 * @offset: byte aligned register offset
368 * @value: 8 bit value to write
369 *
370 * Writes the value specified to the offset specified.
371 */
421a2a30
ML
372void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
373 if (offset < adev->rmmio_size)
374 writeb(value, adev->rmmio + offset);
375 else
376 BUG();
377}
378
e78b579d 379void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
2e0cc4d4 380{
e78b579d 381 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
2e0cc4d4 382
ec59847e 383 if ((reg * 4) < adev->rmmio_size)
2e0cc4d4 384 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
e78b579d
HZ
385 else {
386 unsigned long flags;
387
388 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
389 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
390 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
391 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
392 }
2e0cc4d4
ML
393}
394
e3ecdffa 395/**
e78b579d 396 * amdgpu_mm_wreg - write to a memory mapped IO register
e3ecdffa
AD
397 *
398 * @adev: amdgpu_device pointer
399 * @reg: dword aligned register offset
400 * @v: 32 bit value to write to the register
401 * @acc_flags: access flags which require special behavior
402 *
403 * Writes the value specified to the offset specified.
404 */
e78b579d
HZ
405void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
406 uint32_t acc_flags)
d38ceaf9 407{
f384ff95 408 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 409 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 410
e78b579d 411 amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
2e0cc4d4 412}
d38ceaf9 413
2e0cc4d4
ML
414/*
415 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
416 *
417 * this function is invoked only the debugfs register access
418 * */
419void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
420 uint32_t acc_flags)
421{
422 if (amdgpu_sriov_fullaccess(adev) &&
423 adev->gfx.rlc.funcs &&
424 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 425
2e0cc4d4
ML
426 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
427 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 428 }
2e0cc4d4 429
e78b579d 430 amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
d38ceaf9
AD
431}
432
e3ecdffa
AD
433/**
434 * amdgpu_io_rreg - read an IO register
435 *
436 * @adev: amdgpu_device pointer
437 * @reg: dword aligned register offset
438 *
439 * Returns the 32 bit value from the offset specified.
440 */
d38ceaf9
AD
441u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
442{
443 if ((reg * 4) < adev->rio_mem_size)
444 return ioread32(adev->rio_mem + (reg * 4));
445 else {
446 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
447 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
448 }
449}
450
e3ecdffa
AD
451/**
452 * amdgpu_io_wreg - write to an IO register
453 *
454 * @adev: amdgpu_device pointer
455 * @reg: dword aligned register offset
456 * @v: 32 bit value to write to the register
457 *
458 * Writes the value specified to the offset specified.
459 */
d38ceaf9
AD
460void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
461{
d38ceaf9
AD
462 if ((reg * 4) < adev->rio_mem_size)
463 iowrite32(v, adev->rio_mem + (reg * 4));
464 else {
465 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
466 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
467 }
468}
469
470/**
471 * amdgpu_mm_rdoorbell - read a doorbell dword
472 *
473 * @adev: amdgpu_device pointer
474 * @index: doorbell index
475 *
476 * Returns the value in the doorbell aperture at the
477 * requested doorbell index (CIK).
478 */
479u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
480{
481 if (index < adev->doorbell.num_doorbells) {
482 return readl(adev->doorbell.ptr + index);
483 } else {
484 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
485 return 0;
486 }
487}
488
489/**
490 * amdgpu_mm_wdoorbell - write a doorbell dword
491 *
492 * @adev: amdgpu_device pointer
493 * @index: doorbell index
494 * @v: value to write
495 *
496 * Writes @v to the doorbell aperture at the
497 * requested doorbell index (CIK).
498 */
499void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
500{
501 if (index < adev->doorbell.num_doorbells) {
502 writel(v, adev->doorbell.ptr + index);
503 } else {
504 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
505 }
506}
507
832be404
KW
508/**
509 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
510 *
511 * @adev: amdgpu_device pointer
512 * @index: doorbell index
513 *
514 * Returns the value in the doorbell aperture at the
515 * requested doorbell index (VEGA10+).
516 */
517u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
518{
519 if (index < adev->doorbell.num_doorbells) {
520 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
521 } else {
522 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
523 return 0;
524 }
525}
526
527/**
528 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
529 *
530 * @adev: amdgpu_device pointer
531 * @index: doorbell index
532 * @v: value to write
533 *
534 * Writes @v to the doorbell aperture at the
535 * requested doorbell index (VEGA10+).
536 */
537void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
538{
539 if (index < adev->doorbell.num_doorbells) {
540 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
541 } else {
542 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
543 }
544}
545
d38ceaf9
AD
546/**
547 * amdgpu_invalid_rreg - dummy reg read function
548 *
549 * @adev: amdgpu device pointer
550 * @reg: offset of register
551 *
552 * Dummy register read function. Used for register blocks
553 * that certain asics don't have (all asics).
554 * Returns the value in the register.
555 */
556static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
557{
558 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
559 BUG();
560 return 0;
561}
562
563/**
564 * amdgpu_invalid_wreg - dummy reg write function
565 *
566 * @adev: amdgpu device pointer
567 * @reg: offset of register
568 * @v: value to write to the register
569 *
570 * Dummy register read function. Used for register blocks
571 * that certain asics don't have (all asics).
572 */
573static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
574{
575 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
576 reg, v);
577 BUG();
578}
579
4fa1c6a6
TZ
580/**
581 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
582 *
583 * @adev: amdgpu device pointer
584 * @reg: offset of register
585 *
586 * Dummy register read function. Used for register blocks
587 * that certain asics don't have (all asics).
588 * Returns the value in the register.
589 */
590static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
591{
592 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
593 BUG();
594 return 0;
595}
596
597/**
598 * amdgpu_invalid_wreg64 - dummy reg write function
599 *
600 * @adev: amdgpu device pointer
601 * @reg: offset of register
602 * @v: value to write to the register
603 *
604 * Dummy register read function. Used for register blocks
605 * that certain asics don't have (all asics).
606 */
607static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
608{
609 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
610 reg, v);
611 BUG();
612}
613
d38ceaf9
AD
614/**
615 * amdgpu_block_invalid_rreg - dummy reg read function
616 *
617 * @adev: amdgpu device pointer
618 * @block: offset of instance
619 * @reg: offset of register
620 *
621 * Dummy register read function. Used for register blocks
622 * that certain asics don't have (all asics).
623 * Returns the value in the register.
624 */
625static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
626 uint32_t block, uint32_t reg)
627{
628 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
629 reg, block);
630 BUG();
631 return 0;
632}
633
634/**
635 * amdgpu_block_invalid_wreg - dummy reg write function
636 *
637 * @adev: amdgpu device pointer
638 * @block: offset of instance
639 * @reg: offset of register
640 * @v: value to write to the register
641 *
642 * Dummy register read function. Used for register blocks
643 * that certain asics don't have (all asics).
644 */
645static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
646 uint32_t block,
647 uint32_t reg, uint32_t v)
648{
649 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
650 reg, block, v);
651 BUG();
652}
653
e3ecdffa
AD
654/**
655 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
656 *
657 * @adev: amdgpu device pointer
658 *
659 * Allocates a scratch page of VRAM for use by various things in the
660 * driver.
661 */
06ec9070 662static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 663{
a4a02777
CK
664 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
665 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
666 &adev->vram_scratch.robj,
667 &adev->vram_scratch.gpu_addr,
668 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
669}
670
e3ecdffa
AD
671/**
672 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
673 *
674 * @adev: amdgpu device pointer
675 *
676 * Frees the VRAM scratch page.
677 */
06ec9070 678static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 679{
078af1a3 680 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
681}
682
683/**
9c3f2b54 684 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
685 *
686 * @adev: amdgpu_device pointer
687 * @registers: pointer to the register array
688 * @array_size: size of the register array
689 *
690 * Programs an array or registers with and and or masks.
691 * This is a helper for setting golden registers.
692 */
9c3f2b54
AD
693void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
694 const u32 *registers,
695 const u32 array_size)
d38ceaf9
AD
696{
697 u32 tmp, reg, and_mask, or_mask;
698 int i;
699
700 if (array_size % 3)
701 return;
702
703 for (i = 0; i < array_size; i +=3) {
704 reg = registers[i + 0];
705 and_mask = registers[i + 1];
706 or_mask = registers[i + 2];
707
708 if (and_mask == 0xffffffff) {
709 tmp = or_mask;
710 } else {
711 tmp = RREG32(reg);
712 tmp &= ~and_mask;
e0d07657
HZ
713 if (adev->family >= AMDGPU_FAMILY_AI)
714 tmp |= (or_mask & and_mask);
715 else
716 tmp |= or_mask;
d38ceaf9
AD
717 }
718 WREG32(reg, tmp);
719 }
720}
721
e3ecdffa
AD
722/**
723 * amdgpu_device_pci_config_reset - reset the GPU
724 *
725 * @adev: amdgpu_device pointer
726 *
727 * Resets the GPU using the pci config reset sequence.
728 * Only applicable to asics prior to vega10.
729 */
8111c387 730void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
731{
732 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
733}
734
735/*
736 * GPU doorbell aperture helpers function.
737 */
738/**
06ec9070 739 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
740 *
741 * @adev: amdgpu_device pointer
742 *
743 * Init doorbell driver information (CIK)
744 * Returns 0 on success, error on failure.
745 */
06ec9070 746static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 747{
6585661d 748
705e519e
CK
749 /* No doorbell on SI hardware generation */
750 if (adev->asic_type < CHIP_BONAIRE) {
751 adev->doorbell.base = 0;
752 adev->doorbell.size = 0;
753 adev->doorbell.num_doorbells = 0;
754 adev->doorbell.ptr = NULL;
755 return 0;
756 }
757
d6895ad3
CK
758 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
759 return -EINVAL;
760
22357775
AD
761 amdgpu_asic_init_doorbell_index(adev);
762
d38ceaf9
AD
763 /* doorbell bar mapping */
764 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
765 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
766
edf600da 767 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 768 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
769 if (adev->doorbell.num_doorbells == 0)
770 return -EINVAL;
771
ec3db8a6 772 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
773 * paging queue doorbell use the second page. The
774 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
775 * doorbells are in the first page. So with paging queue enabled,
776 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
777 */
778 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 779 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 780
8972e5d2
CK
781 adev->doorbell.ptr = ioremap(adev->doorbell.base,
782 adev->doorbell.num_doorbells *
783 sizeof(u32));
784 if (adev->doorbell.ptr == NULL)
d38ceaf9 785 return -ENOMEM;
d38ceaf9
AD
786
787 return 0;
788}
789
790/**
06ec9070 791 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
792 *
793 * @adev: amdgpu_device pointer
794 *
795 * Tear down doorbell driver information (CIK)
796 */
06ec9070 797static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
798{
799 iounmap(adev->doorbell.ptr);
800 adev->doorbell.ptr = NULL;
801}
802
22cb0164 803
d38ceaf9
AD
804
805/*
06ec9070 806 * amdgpu_device_wb_*()
455a7bc2 807 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 808 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
809 */
810
811/**
06ec9070 812 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
813 *
814 * @adev: amdgpu_device pointer
815 *
816 * Disables Writeback and frees the Writeback memory (all asics).
817 * Used at driver shutdown.
818 */
06ec9070 819static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
820{
821 if (adev->wb.wb_obj) {
a76ed485
AD
822 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
823 &adev->wb.gpu_addr,
824 (void **)&adev->wb.wb);
d38ceaf9
AD
825 adev->wb.wb_obj = NULL;
826 }
827}
828
829/**
06ec9070 830 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
831 *
832 * @adev: amdgpu_device pointer
833 *
455a7bc2 834 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
835 * Used at driver startup.
836 * Returns 0 on success or an -error on failure.
837 */
06ec9070 838static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
839{
840 int r;
841
842 if (adev->wb.wb_obj == NULL) {
97407b63
AD
843 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
844 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
845 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
846 &adev->wb.wb_obj, &adev->wb.gpu_addr,
847 (void **)&adev->wb.wb);
d38ceaf9
AD
848 if (r) {
849 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
850 return r;
851 }
d38ceaf9
AD
852
853 adev->wb.num_wb = AMDGPU_MAX_WB;
854 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
855
856 /* clear wb memory */
73469585 857 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
858 }
859
860 return 0;
861}
862
863/**
131b4b36 864 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
865 *
866 * @adev: amdgpu_device pointer
867 * @wb: wb index
868 *
869 * Allocate a wb slot for use by the driver (all asics).
870 * Returns 0 on success or -EINVAL on failure.
871 */
131b4b36 872int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
873{
874 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 875
97407b63 876 if (offset < adev->wb.num_wb) {
7014285a 877 __set_bit(offset, adev->wb.used);
63ae07ca 878 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
879 return 0;
880 } else {
881 return -EINVAL;
882 }
883}
884
d38ceaf9 885/**
131b4b36 886 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
887 *
888 * @adev: amdgpu_device pointer
889 * @wb: wb index
890 *
891 * Free a wb slot allocated for use by the driver (all asics)
892 */
131b4b36 893void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 894{
73469585 895 wb >>= 3;
d38ceaf9 896 if (wb < adev->wb.num_wb)
73469585 897 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
898}
899
d6895ad3
CK
900/**
901 * amdgpu_device_resize_fb_bar - try to resize FB BAR
902 *
903 * @adev: amdgpu_device pointer
904 *
905 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
906 * to fail, but if any of the BARs is not accessible after the size we abort
907 * driver loading by returning -ENODEV.
908 */
909int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
910{
770d13b1 911 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 912 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
913 struct pci_bus *root;
914 struct resource *res;
915 unsigned i;
d6895ad3
CK
916 u16 cmd;
917 int r;
918
0c03b912 919 /* Bypass for VF */
920 if (amdgpu_sriov_vf(adev))
921 return 0;
922
b7221f2b
AD
923 /* skip if the bios has already enabled large BAR */
924 if (adev->gmc.real_vram_size &&
925 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
926 return 0;
927
31b8adab
CK
928 /* Check if the root BUS has 64bit memory resources */
929 root = adev->pdev->bus;
930 while (root->parent)
931 root = root->parent;
932
933 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 934 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
935 res->start > 0x100000000ull)
936 break;
937 }
938
939 /* Trying to resize is pointless without a root hub window above 4GB */
940 if (!res)
941 return 0;
942
d6895ad3
CK
943 /* Disable memory decoding while we change the BAR addresses and size */
944 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
945 pci_write_config_word(adev->pdev, PCI_COMMAND,
946 cmd & ~PCI_COMMAND_MEMORY);
947
948 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 949 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
950 if (adev->asic_type >= CHIP_BONAIRE)
951 pci_release_resource(adev->pdev, 2);
952
953 pci_release_resource(adev->pdev, 0);
954
955 r = pci_resize_resource(adev->pdev, 0, rbar_size);
956 if (r == -ENOSPC)
957 DRM_INFO("Not enough PCI address space for a large BAR.");
958 else if (r && r != -ENOTSUPP)
959 DRM_ERROR("Problem resizing BAR0 (%d).", r);
960
961 pci_assign_unassigned_bus_resources(adev->pdev->bus);
962
963 /* When the doorbell or fb BAR isn't available we have no chance of
964 * using the device.
965 */
06ec9070 966 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
967 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
968 return -ENODEV;
969
970 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
971
972 return 0;
973}
a05502e5 974
d38ceaf9
AD
975/*
976 * GPU helpers function.
977 */
978/**
39c640c0 979 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
980 *
981 * @adev: amdgpu_device pointer
982 *
c836fec5
JQ
983 * Check if the asic has been initialized (all asics) at driver startup
984 * or post is needed if hw reset is performed.
985 * Returns true if need or false if not.
d38ceaf9 986 */
39c640c0 987bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
988{
989 uint32_t reg;
990
bec86378
ML
991 if (amdgpu_sriov_vf(adev))
992 return false;
993
994 if (amdgpu_passthrough(adev)) {
1da2c326
ML
995 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
996 * some old smc fw still need driver do vPost otherwise gpu hang, while
997 * those smc fw version above 22.15 doesn't have this flaw, so we force
998 * vpost executed for smc version below 22.15
bec86378
ML
999 */
1000 if (adev->asic_type == CHIP_FIJI) {
1001 int err;
1002 uint32_t fw_ver;
1003 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1004 /* force vPost if error occured */
1005 if (err)
1006 return true;
1007
1008 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1009 if (fw_ver < 0x00160e00)
1010 return true;
bec86378 1011 }
bec86378 1012 }
91fe77eb 1013
1014 if (adev->has_hw_reset) {
1015 adev->has_hw_reset = false;
1016 return true;
1017 }
1018
1019 /* bios scratch used on CIK+ */
1020 if (adev->asic_type >= CHIP_BONAIRE)
1021 return amdgpu_atombios_scratch_need_asic_init(adev);
1022
1023 /* check MEM_SIZE for older asics */
1024 reg = amdgpu_asic_get_config_memsize(adev);
1025
1026 if ((reg != 0) && (reg != 0xffffffff))
1027 return false;
1028
1029 return true;
bec86378
ML
1030}
1031
d38ceaf9
AD
1032/* if we get transitioned to only one device, take VGA back */
1033/**
06ec9070 1034 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1035 *
1036 * @cookie: amdgpu_device pointer
1037 * @state: enable/disable vga decode
1038 *
1039 * Enable/disable vga decode (all asics).
1040 * Returns VGA resource flags.
1041 */
06ec9070 1042static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1043{
1044 struct amdgpu_device *adev = cookie;
1045 amdgpu_asic_set_vga_state(adev, state);
1046 if (state)
1047 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1048 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1049 else
1050 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1051}
1052
e3ecdffa
AD
1053/**
1054 * amdgpu_device_check_block_size - validate the vm block size
1055 *
1056 * @adev: amdgpu_device pointer
1057 *
1058 * Validates the vm block size specified via module parameter.
1059 * The vm block size defines number of bits in page table versus page directory,
1060 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1061 * page table and the remaining bits are in the page directory.
1062 */
06ec9070 1063static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1064{
1065 /* defines number of bits in page table versus page directory,
1066 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1067 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1068 if (amdgpu_vm_block_size == -1)
1069 return;
a1adf8be 1070
bab4fee7 1071 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1072 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1073 amdgpu_vm_block_size);
97489129 1074 amdgpu_vm_block_size = -1;
a1adf8be 1075 }
a1adf8be
CZ
1076}
1077
e3ecdffa
AD
1078/**
1079 * amdgpu_device_check_vm_size - validate the vm size
1080 *
1081 * @adev: amdgpu_device pointer
1082 *
1083 * Validates the vm size in GB specified via module parameter.
1084 * The VM size is the size of the GPU virtual memory space in GB.
1085 */
06ec9070 1086static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1087{
64dab074
AD
1088 /* no need to check the default value */
1089 if (amdgpu_vm_size == -1)
1090 return;
1091
83ca145d
ZJ
1092 if (amdgpu_vm_size < 1) {
1093 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1094 amdgpu_vm_size);
f3368128 1095 amdgpu_vm_size = -1;
83ca145d 1096 }
83ca145d
ZJ
1097}
1098
7951e376
RZ
1099static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1100{
1101 struct sysinfo si;
a9d4fe2f 1102 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1103 uint64_t total_memory;
1104 uint64_t dram_size_seven_GB = 0x1B8000000;
1105 uint64_t dram_size_three_GB = 0xB8000000;
1106
1107 if (amdgpu_smu_memory_pool_size == 0)
1108 return;
1109
1110 if (!is_os_64) {
1111 DRM_WARN("Not 64-bit OS, feature not supported\n");
1112 goto def_value;
1113 }
1114 si_meminfo(&si);
1115 total_memory = (uint64_t)si.totalram * si.mem_unit;
1116
1117 if ((amdgpu_smu_memory_pool_size == 1) ||
1118 (amdgpu_smu_memory_pool_size == 2)) {
1119 if (total_memory < dram_size_three_GB)
1120 goto def_value1;
1121 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1122 (amdgpu_smu_memory_pool_size == 8)) {
1123 if (total_memory < dram_size_seven_GB)
1124 goto def_value1;
1125 } else {
1126 DRM_WARN("Smu memory pool size not supported\n");
1127 goto def_value;
1128 }
1129 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1130
1131 return;
1132
1133def_value1:
1134 DRM_WARN("No enough system memory\n");
1135def_value:
1136 adev->pm.smu_prv_buffer_size = 0;
1137}
1138
d38ceaf9 1139/**
06ec9070 1140 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1141 *
1142 * @adev: amdgpu_device pointer
1143 *
1144 * Validates certain module parameters and updates
1145 * the associated values used by the driver (all asics).
1146 */
912dfc84 1147static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1148{
5b011235
CZ
1149 if (amdgpu_sched_jobs < 4) {
1150 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1151 amdgpu_sched_jobs);
1152 amdgpu_sched_jobs = 4;
76117507 1153 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1154 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1155 amdgpu_sched_jobs);
1156 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1157 }
d38ceaf9 1158
83e74db6 1159 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1160 /* gart size must be greater or equal to 32M */
1161 dev_warn(adev->dev, "gart size (%d) too small\n",
1162 amdgpu_gart_size);
83e74db6 1163 amdgpu_gart_size = -1;
d38ceaf9
AD
1164 }
1165
36d38372 1166 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1167 /* gtt size must be greater or equal to 32M */
36d38372
CK
1168 dev_warn(adev->dev, "gtt size (%d) too small\n",
1169 amdgpu_gtt_size);
1170 amdgpu_gtt_size = -1;
d38ceaf9
AD
1171 }
1172
d07f14be
RH
1173 /* valid range is between 4 and 9 inclusive */
1174 if (amdgpu_vm_fragment_size != -1 &&
1175 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1176 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1177 amdgpu_vm_fragment_size = -1;
1178 }
1179
5d5bd5e3
KW
1180 if (amdgpu_sched_hw_submission < 2) {
1181 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1182 amdgpu_sched_hw_submission);
1183 amdgpu_sched_hw_submission = 2;
1184 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1185 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1186 amdgpu_sched_hw_submission);
1187 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1188 }
1189
7951e376
RZ
1190 amdgpu_device_check_smu_prv_buffer_size(adev);
1191
06ec9070 1192 amdgpu_device_check_vm_size(adev);
d38ceaf9 1193
06ec9070 1194 amdgpu_device_check_block_size(adev);
6a7f76e7 1195
19aede77 1196 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1197
c6252390 1198 amdgpu_gmc_tmz_set(adev);
01a8dcec 1199
e3c00faa 1200 return 0;
d38ceaf9
AD
1201}
1202
1203/**
1204 * amdgpu_switcheroo_set_state - set switcheroo state
1205 *
1206 * @pdev: pci dev pointer
1694467b 1207 * @state: vga_switcheroo state
d38ceaf9
AD
1208 *
1209 * Callback for the switcheroo driver. Suspends or resumes the
1210 * the asics before or after it is powered up using ACPI methods.
1211 */
1212static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1213{
1214 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1215 int r;
d38ceaf9 1216
31af062a 1217 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1218 return;
1219
1220 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1221 pr_info("switched on\n");
d38ceaf9
AD
1222 /* don't suspend or resume card normally */
1223 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1224
de185019
AD
1225 pci_set_power_state(dev->pdev, PCI_D0);
1226 pci_restore_state(dev->pdev);
1227 r = pci_enable_device(dev->pdev);
1228 if (r)
1229 DRM_WARN("pci_enable_device failed (%d)\n", r);
1230 amdgpu_device_resume(dev, true);
d38ceaf9 1231
d38ceaf9
AD
1232 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1233 drm_kms_helper_poll_enable(dev);
1234 } else {
dd4fa6c1 1235 pr_info("switched off\n");
d38ceaf9
AD
1236 drm_kms_helper_poll_disable(dev);
1237 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1238 amdgpu_device_suspend(dev, true);
1239 pci_save_state(dev->pdev);
1240 /* Shut down the device */
1241 pci_disable_device(dev->pdev);
1242 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1243 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1244 }
1245}
1246
1247/**
1248 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1249 *
1250 * @pdev: pci dev pointer
1251 *
1252 * Callback for the switcheroo driver. Check of the switcheroo
1253 * state can be changed.
1254 * Returns true if the state can be changed, false if not.
1255 */
1256static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1257{
1258 struct drm_device *dev = pci_get_drvdata(pdev);
1259
1260 /*
1261 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1262 * locking inversion with the driver load path. And the access here is
1263 * completely racy anyway. So don't bother with locking for now.
1264 */
7e13ad89 1265 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1266}
1267
1268static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1269 .set_gpu_state = amdgpu_switcheroo_set_state,
1270 .reprobe = NULL,
1271 .can_switch = amdgpu_switcheroo_can_switch,
1272};
1273
e3ecdffa
AD
1274/**
1275 * amdgpu_device_ip_set_clockgating_state - set the CG state
1276 *
87e3f136 1277 * @dev: amdgpu_device pointer
e3ecdffa
AD
1278 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1279 * @state: clockgating state (gate or ungate)
1280 *
1281 * Sets the requested clockgating state for all instances of
1282 * the hardware IP specified.
1283 * Returns the error code from the last instance.
1284 */
43fa561f 1285int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1286 enum amd_ip_block_type block_type,
1287 enum amd_clockgating_state state)
d38ceaf9 1288{
43fa561f 1289 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1290 int i, r = 0;
1291
1292 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1293 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1294 continue;
c722865a
RZ
1295 if (adev->ip_blocks[i].version->type != block_type)
1296 continue;
1297 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1298 continue;
1299 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1300 (void *)adev, state);
1301 if (r)
1302 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1303 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1304 }
1305 return r;
1306}
1307
e3ecdffa
AD
1308/**
1309 * amdgpu_device_ip_set_powergating_state - set the PG state
1310 *
87e3f136 1311 * @dev: amdgpu_device pointer
e3ecdffa
AD
1312 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1313 * @state: powergating state (gate or ungate)
1314 *
1315 * Sets the requested powergating state for all instances of
1316 * the hardware IP specified.
1317 * Returns the error code from the last instance.
1318 */
43fa561f 1319int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1320 enum amd_ip_block_type block_type,
1321 enum amd_powergating_state state)
d38ceaf9 1322{
43fa561f 1323 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1324 int i, r = 0;
1325
1326 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1327 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1328 continue;
c722865a
RZ
1329 if (adev->ip_blocks[i].version->type != block_type)
1330 continue;
1331 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1332 continue;
1333 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1334 (void *)adev, state);
1335 if (r)
1336 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1337 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1338 }
1339 return r;
1340}
1341
e3ecdffa
AD
1342/**
1343 * amdgpu_device_ip_get_clockgating_state - get the CG state
1344 *
1345 * @adev: amdgpu_device pointer
1346 * @flags: clockgating feature flags
1347 *
1348 * Walks the list of IPs on the device and updates the clockgating
1349 * flags for each IP.
1350 * Updates @flags with the feature flags for each hardware IP where
1351 * clockgating is enabled.
1352 */
2990a1fc
AD
1353void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1354 u32 *flags)
6cb2d4e4
HR
1355{
1356 int i;
1357
1358 for (i = 0; i < adev->num_ip_blocks; i++) {
1359 if (!adev->ip_blocks[i].status.valid)
1360 continue;
1361 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1362 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1363 }
1364}
1365
e3ecdffa
AD
1366/**
1367 * amdgpu_device_ip_wait_for_idle - wait for idle
1368 *
1369 * @adev: amdgpu_device pointer
1370 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1371 *
1372 * Waits for the request hardware IP to be idle.
1373 * Returns 0 for success or a negative error code on failure.
1374 */
2990a1fc
AD
1375int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1376 enum amd_ip_block_type block_type)
5dbbb60b
AD
1377{
1378 int i, r;
1379
1380 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1381 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1382 continue;
a1255107
AD
1383 if (adev->ip_blocks[i].version->type == block_type) {
1384 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1385 if (r)
1386 return r;
1387 break;
1388 }
1389 }
1390 return 0;
1391
1392}
1393
e3ecdffa
AD
1394/**
1395 * amdgpu_device_ip_is_idle - is the hardware IP idle
1396 *
1397 * @adev: amdgpu_device pointer
1398 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1399 *
1400 * Check if the hardware IP is idle or not.
1401 * Returns true if it the IP is idle, false if not.
1402 */
2990a1fc
AD
1403bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1404 enum amd_ip_block_type block_type)
5dbbb60b
AD
1405{
1406 int i;
1407
1408 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1409 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1410 continue;
a1255107
AD
1411 if (adev->ip_blocks[i].version->type == block_type)
1412 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1413 }
1414 return true;
1415
1416}
1417
e3ecdffa
AD
1418/**
1419 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1420 *
1421 * @adev: amdgpu_device pointer
87e3f136 1422 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1423 *
1424 * Returns a pointer to the hardware IP block structure
1425 * if it exists for the asic, otherwise NULL.
1426 */
2990a1fc
AD
1427struct amdgpu_ip_block *
1428amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1429 enum amd_ip_block_type type)
d38ceaf9
AD
1430{
1431 int i;
1432
1433 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1434 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1435 return &adev->ip_blocks[i];
1436
1437 return NULL;
1438}
1439
1440/**
2990a1fc 1441 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1442 *
1443 * @adev: amdgpu_device pointer
5fc3aeeb 1444 * @type: enum amd_ip_block_type
d38ceaf9
AD
1445 * @major: major version
1446 * @minor: minor version
1447 *
1448 * return 0 if equal or greater
1449 * return 1 if smaller or the ip_block doesn't exist
1450 */
2990a1fc
AD
1451int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1452 enum amd_ip_block_type type,
1453 u32 major, u32 minor)
d38ceaf9 1454{
2990a1fc 1455 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1456
a1255107
AD
1457 if (ip_block && ((ip_block->version->major > major) ||
1458 ((ip_block->version->major == major) &&
1459 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1460 return 0;
1461
1462 return 1;
1463}
1464
a1255107 1465/**
2990a1fc 1466 * amdgpu_device_ip_block_add
a1255107
AD
1467 *
1468 * @adev: amdgpu_device pointer
1469 * @ip_block_version: pointer to the IP to add
1470 *
1471 * Adds the IP block driver information to the collection of IPs
1472 * on the asic.
1473 */
2990a1fc
AD
1474int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1475 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1476{
1477 if (!ip_block_version)
1478 return -EINVAL;
1479
e966a725 1480 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1481 ip_block_version->funcs->name);
1482
a1255107
AD
1483 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1484
1485 return 0;
1486}
1487
e3ecdffa
AD
1488/**
1489 * amdgpu_device_enable_virtual_display - enable virtual display feature
1490 *
1491 * @adev: amdgpu_device pointer
1492 *
1493 * Enabled the virtual display feature if the user has enabled it via
1494 * the module parameter virtual_display. This feature provides a virtual
1495 * display hardware on headless boards or in virtualized environments.
1496 * This function parses and validates the configuration string specified by
1497 * the user and configues the virtual display configuration (number of
1498 * virtual connectors, crtcs, etc.) specified.
1499 */
483ef985 1500static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1501{
1502 adev->enable_virtual_display = false;
1503
1504 if (amdgpu_virtual_display) {
1505 struct drm_device *ddev = adev->ddev;
1506 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1507 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1508
1509 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1510 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1511 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1512 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1513 if (!strcmp("all", pciaddname)
1514 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1515 long num_crtc;
1516 int res = -1;
1517
9accf2fd 1518 adev->enable_virtual_display = true;
0f66356d
ED
1519
1520 if (pciaddname_tmp)
1521 res = kstrtol(pciaddname_tmp, 10,
1522 &num_crtc);
1523
1524 if (!res) {
1525 if (num_crtc < 1)
1526 num_crtc = 1;
1527 if (num_crtc > 6)
1528 num_crtc = 6;
1529 adev->mode_info.num_crtc = num_crtc;
1530 } else {
1531 adev->mode_info.num_crtc = 1;
1532 }
9accf2fd
ED
1533 break;
1534 }
1535 }
1536
0f66356d
ED
1537 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1538 amdgpu_virtual_display, pci_address_name,
1539 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1540
1541 kfree(pciaddstr);
1542 }
1543}
1544
e3ecdffa
AD
1545/**
1546 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1547 *
1548 * @adev: amdgpu_device pointer
1549 *
1550 * Parses the asic configuration parameters specified in the gpu info
1551 * firmware and makes them availale to the driver for use in configuring
1552 * the asic.
1553 * Returns 0 on success, -EINVAL on failure.
1554 */
e2a75f88
AD
1555static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1556{
e2a75f88 1557 const char *chip_name;
c0a43457 1558 char fw_name[40];
e2a75f88
AD
1559 int err;
1560 const struct gpu_info_firmware_header_v1_0 *hdr;
1561
ab4fe3e1
HR
1562 adev->firmware.gpu_info_fw = NULL;
1563
4292b0b2 1564 if (adev->discovery_bin) {
258620d0 1565 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1566
1567 /*
1568 * FIXME: The bounding box is still needed by Navi12, so
1569 * temporarily read it from gpu_info firmware. Should be droped
1570 * when DAL no longer needs it.
1571 */
1572 if (adev->asic_type != CHIP_NAVI12)
1573 return 0;
258620d0
AD
1574 }
1575
e2a75f88 1576 switch (adev->asic_type) {
e2a75f88
AD
1577#ifdef CONFIG_DRM_AMDGPU_SI
1578 case CHIP_VERDE:
1579 case CHIP_TAHITI:
1580 case CHIP_PITCAIRN:
1581 case CHIP_OLAND:
1582 case CHIP_HAINAN:
1583#endif
1584#ifdef CONFIG_DRM_AMDGPU_CIK
1585 case CHIP_BONAIRE:
1586 case CHIP_HAWAII:
1587 case CHIP_KAVERI:
1588 case CHIP_KABINI:
1589 case CHIP_MULLINS:
1590#endif
da87c30b
AD
1591 case CHIP_TOPAZ:
1592 case CHIP_TONGA:
1593 case CHIP_FIJI:
1594 case CHIP_POLARIS10:
1595 case CHIP_POLARIS11:
1596 case CHIP_POLARIS12:
1597 case CHIP_VEGAM:
1598 case CHIP_CARRIZO:
1599 case CHIP_STONEY:
27c0bc71 1600 case CHIP_VEGA20:
e2a75f88
AD
1601 default:
1602 return 0;
1603 case CHIP_VEGA10:
1604 chip_name = "vega10";
1605 break;
3f76dced
AD
1606 case CHIP_VEGA12:
1607 chip_name = "vega12";
1608 break;
2d2e5e7e 1609 case CHIP_RAVEN:
54f78a76 1610 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1611 chip_name = "raven2";
54f78a76 1612 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1613 chip_name = "picasso";
54c4d17e
FX
1614 else
1615 chip_name = "raven";
2d2e5e7e 1616 break;
65e60f6e
LM
1617 case CHIP_ARCTURUS:
1618 chip_name = "arcturus";
1619 break;
b51a26a0
HR
1620 case CHIP_RENOIR:
1621 chip_name = "renoir";
1622 break;
23c6268e
HR
1623 case CHIP_NAVI10:
1624 chip_name = "navi10";
1625 break;
ed42cfe1
XY
1626 case CHIP_NAVI14:
1627 chip_name = "navi14";
1628 break;
42b325e5
XY
1629 case CHIP_NAVI12:
1630 chip_name = "navi12";
1631 break;
c0a43457
LG
1632 case CHIP_SIENNA_CICHLID:
1633 chip_name = "sienna_cichlid";
1634 break;
e2a75f88
AD
1635 }
1636
1637 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1638 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1639 if (err) {
1640 dev_err(adev->dev,
1641 "Failed to load gpu_info firmware \"%s\"\n",
1642 fw_name);
1643 goto out;
1644 }
ab4fe3e1 1645 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1646 if (err) {
1647 dev_err(adev->dev,
1648 "Failed to validate gpu_info firmware \"%s\"\n",
1649 fw_name);
1650 goto out;
1651 }
1652
ab4fe3e1 1653 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1654 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1655
1656 switch (hdr->version_major) {
1657 case 1:
1658 {
1659 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1660 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1661 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1662
cc375d8c
TY
1663 /*
1664 * Should be droped when DAL no longer needs it.
1665 */
1666 if (adev->asic_type == CHIP_NAVI12)
1667 goto parse_soc_bounding_box;
1668
b5ab16bf
AD
1669 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1670 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1671 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1672 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1673 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1674 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1675 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1676 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1677 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1678 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1679 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1680 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1681 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1682 adev->gfx.cu_info.max_waves_per_simd =
1683 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1684 adev->gfx.cu_info.max_scratch_slots_per_cu =
1685 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1686 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1687 if (hdr->version_minor >= 1) {
35c2e910
HZ
1688 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1689 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1690 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1691 adev->gfx.config.num_sc_per_sh =
1692 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1693 adev->gfx.config.num_packer_per_sc =
1694 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1695 }
ec51d3fa 1696
cc375d8c 1697parse_soc_bounding_box:
ec51d3fa
XY
1698 /*
1699 * soc bounding box info is not integrated in disocovery table,
258620d0 1700 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1701 */
48321c3d
HW
1702 if (hdr->version_minor == 2) {
1703 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1704 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1705 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1706 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1707 }
e2a75f88
AD
1708 break;
1709 }
1710 default:
1711 dev_err(adev->dev,
1712 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1713 err = -EINVAL;
1714 goto out;
1715 }
1716out:
e2a75f88
AD
1717 return err;
1718}
1719
e3ecdffa
AD
1720/**
1721 * amdgpu_device_ip_early_init - run early init for hardware IPs
1722 *
1723 * @adev: amdgpu_device pointer
1724 *
1725 * Early initialization pass for hardware IPs. The hardware IPs that make
1726 * up each asic are discovered each IP's early_init callback is run. This
1727 * is the first stage in initializing the asic.
1728 * Returns 0 on success, negative error code on failure.
1729 */
06ec9070 1730static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1731{
aaa36a97 1732 int i, r;
d38ceaf9 1733
483ef985 1734 amdgpu_device_enable_virtual_display(adev);
a6be7570 1735
00a979f3 1736 if (amdgpu_sriov_vf(adev)) {
00a979f3 1737 r = amdgpu_virt_request_full_gpu(adev, true);
e3a4d51c 1738 if (r)
00a979f3 1739 return r;
00a979f3
WS
1740 }
1741
d38ceaf9 1742 switch (adev->asic_type) {
33f34802
KW
1743#ifdef CONFIG_DRM_AMDGPU_SI
1744 case CHIP_VERDE:
1745 case CHIP_TAHITI:
1746 case CHIP_PITCAIRN:
1747 case CHIP_OLAND:
1748 case CHIP_HAINAN:
295d0daf 1749 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1750 r = si_set_ip_blocks(adev);
1751 if (r)
1752 return r;
1753 break;
1754#endif
a2e73f56
AD
1755#ifdef CONFIG_DRM_AMDGPU_CIK
1756 case CHIP_BONAIRE:
1757 case CHIP_HAWAII:
1758 case CHIP_KAVERI:
1759 case CHIP_KABINI:
1760 case CHIP_MULLINS:
e1ad2d53 1761 if (adev->flags & AMD_IS_APU)
a2e73f56 1762 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1763 else
1764 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1765
1766 r = cik_set_ip_blocks(adev);
1767 if (r)
1768 return r;
1769 break;
1770#endif
da87c30b
AD
1771 case CHIP_TOPAZ:
1772 case CHIP_TONGA:
1773 case CHIP_FIJI:
1774 case CHIP_POLARIS10:
1775 case CHIP_POLARIS11:
1776 case CHIP_POLARIS12:
1777 case CHIP_VEGAM:
1778 case CHIP_CARRIZO:
1779 case CHIP_STONEY:
1780 if (adev->flags & AMD_IS_APU)
1781 adev->family = AMDGPU_FAMILY_CZ;
1782 else
1783 adev->family = AMDGPU_FAMILY_VI;
1784
1785 r = vi_set_ip_blocks(adev);
1786 if (r)
1787 return r;
1788 break;
e48a3cd9
AD
1789 case CHIP_VEGA10:
1790 case CHIP_VEGA12:
e4bd8170 1791 case CHIP_VEGA20:
e48a3cd9 1792 case CHIP_RAVEN:
61cf44c1 1793 case CHIP_ARCTURUS:
b51a26a0 1794 case CHIP_RENOIR:
70534d1e 1795 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
1796 adev->family = AMDGPU_FAMILY_RV;
1797 else
1798 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1799
1800 r = soc15_set_ip_blocks(adev);
1801 if (r)
1802 return r;
1803 break;
0a5b8c7b 1804 case CHIP_NAVI10:
7ecb5cd4 1805 case CHIP_NAVI14:
4808cf9c 1806 case CHIP_NAVI12:
11e8aef5 1807 case CHIP_SIENNA_CICHLID:
0a5b8c7b
HR
1808 adev->family = AMDGPU_FAMILY_NV;
1809
1810 r = nv_set_ip_blocks(adev);
1811 if (r)
1812 return r;
1813 break;
d38ceaf9
AD
1814 default:
1815 /* FIXME: not supported yet */
1816 return -EINVAL;
1817 }
1818
1884734a 1819 amdgpu_amdkfd_device_probe(adev);
1820
3b94fb10 1821 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1822 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1823 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1824
d38ceaf9
AD
1825 for (i = 0; i < adev->num_ip_blocks; i++) {
1826 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1827 DRM_ERROR("disabled ip block: %d <%s>\n",
1828 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1829 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1830 } else {
a1255107
AD
1831 if (adev->ip_blocks[i].version->funcs->early_init) {
1832 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1833 if (r == -ENOENT) {
a1255107 1834 adev->ip_blocks[i].status.valid = false;
2c1a2784 1835 } else if (r) {
a1255107
AD
1836 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1837 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1838 return r;
2c1a2784 1839 } else {
a1255107 1840 adev->ip_blocks[i].status.valid = true;
2c1a2784 1841 }
974e6b64 1842 } else {
a1255107 1843 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1844 }
d38ceaf9 1845 }
21a249ca
AD
1846 /* get the vbios after the asic_funcs are set up */
1847 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
1848 r = amdgpu_device_parse_gpu_info_fw(adev);
1849 if (r)
1850 return r;
1851
21a249ca
AD
1852 /* Read BIOS */
1853 if (!amdgpu_get_bios(adev))
1854 return -EINVAL;
1855
1856 r = amdgpu_atombios_init(adev);
1857 if (r) {
1858 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1859 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1860 return r;
1861 }
1862 }
d38ceaf9
AD
1863 }
1864
395d1fb9
NH
1865 adev->cg_flags &= amdgpu_cg_mask;
1866 adev->pg_flags &= amdgpu_pg_mask;
1867
d38ceaf9
AD
1868 return 0;
1869}
1870
0a4f2520
RZ
1871static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1872{
1873 int i, r;
1874
1875 for (i = 0; i < adev->num_ip_blocks; i++) {
1876 if (!adev->ip_blocks[i].status.sw)
1877 continue;
1878 if (adev->ip_blocks[i].status.hw)
1879 continue;
1880 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1881 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1882 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1883 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1884 if (r) {
1885 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1886 adev->ip_blocks[i].version->funcs->name, r);
1887 return r;
1888 }
1889 adev->ip_blocks[i].status.hw = true;
1890 }
1891 }
1892
1893 return 0;
1894}
1895
1896static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1897{
1898 int i, r;
1899
1900 for (i = 0; i < adev->num_ip_blocks; i++) {
1901 if (!adev->ip_blocks[i].status.sw)
1902 continue;
1903 if (adev->ip_blocks[i].status.hw)
1904 continue;
1905 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1906 if (r) {
1907 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1908 adev->ip_blocks[i].version->funcs->name, r);
1909 return r;
1910 }
1911 adev->ip_blocks[i].status.hw = true;
1912 }
1913
1914 return 0;
1915}
1916
7a3e0bb2
RZ
1917static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1918{
1919 int r = 0;
1920 int i;
80f41f84 1921 uint32_t smu_version;
7a3e0bb2
RZ
1922
1923 if (adev->asic_type >= CHIP_VEGA10) {
1924 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1925 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1926 continue;
1927
1928 /* no need to do the fw loading again if already done*/
1929 if (adev->ip_blocks[i].status.hw == true)
1930 break;
1931
1932 if (adev->in_gpu_reset || adev->in_suspend) {
1933 r = adev->ip_blocks[i].version->funcs->resume(adev);
1934 if (r) {
1935 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1936 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1937 return r;
1938 }
1939 } else {
1940 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1941 if (r) {
1942 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1943 adev->ip_blocks[i].version->funcs->name, r);
1944 return r;
7a3e0bb2 1945 }
7a3e0bb2 1946 }
482f0e53
ML
1947
1948 adev->ip_blocks[i].status.hw = true;
1949 break;
7a3e0bb2
RZ
1950 }
1951 }
482f0e53 1952
8973d9ec
ED
1953 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1954 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1955
80f41f84 1956 return r;
7a3e0bb2
RZ
1957}
1958
e3ecdffa
AD
1959/**
1960 * amdgpu_device_ip_init - run init for hardware IPs
1961 *
1962 * @adev: amdgpu_device pointer
1963 *
1964 * Main initialization pass for hardware IPs. The list of all the hardware
1965 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1966 * are run. sw_init initializes the software state associated with each IP
1967 * and hw_init initializes the hardware associated with each IP.
1968 * Returns 0 on success, negative error code on failure.
1969 */
06ec9070 1970static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1971{
1972 int i, r;
1973
c030f2e4 1974 r = amdgpu_ras_init(adev);
1975 if (r)
1976 return r;
1977
d38ceaf9 1978 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1979 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1980 continue;
a1255107 1981 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1982 if (r) {
a1255107
AD
1983 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1984 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1985 goto init_failed;
2c1a2784 1986 }
a1255107 1987 adev->ip_blocks[i].status.sw = true;
bfca0289 1988
d38ceaf9 1989 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1990 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1991 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1992 if (r) {
1993 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1994 goto init_failed;
2c1a2784 1995 }
a1255107 1996 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1997 if (r) {
1998 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1999 goto init_failed;
2c1a2784 2000 }
06ec9070 2001 r = amdgpu_device_wb_init(adev);
2c1a2784 2002 if (r) {
06ec9070 2003 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2004 goto init_failed;
2c1a2784 2005 }
a1255107 2006 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2007
2008 /* right after GMC hw init, we create CSA */
f92d5c61 2009 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2010 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2011 AMDGPU_GEM_DOMAIN_VRAM,
2012 AMDGPU_CSA_SIZE);
2493664f
ML
2013 if (r) {
2014 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2015 goto init_failed;
2493664f
ML
2016 }
2017 }
d38ceaf9
AD
2018 }
2019 }
2020
c9ffa427
YT
2021 if (amdgpu_sriov_vf(adev))
2022 amdgpu_virt_init_data_exchange(adev);
2023
533aed27
AG
2024 r = amdgpu_ib_pool_init(adev);
2025 if (r) {
2026 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2027 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2028 goto init_failed;
2029 }
2030
c8963ea4
RZ
2031 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2032 if (r)
72d3f592 2033 goto init_failed;
0a4f2520
RZ
2034
2035 r = amdgpu_device_ip_hw_init_phase1(adev);
2036 if (r)
72d3f592 2037 goto init_failed;
0a4f2520 2038
7a3e0bb2
RZ
2039 r = amdgpu_device_fw_loading(adev);
2040 if (r)
72d3f592 2041 goto init_failed;
7a3e0bb2 2042
0a4f2520
RZ
2043 r = amdgpu_device_ip_hw_init_phase2(adev);
2044 if (r)
72d3f592 2045 goto init_failed;
d38ceaf9 2046
121a2bc6
AG
2047 /*
2048 * retired pages will be loaded from eeprom and reserved here,
2049 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2050 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2051 * for I2C communication which only true at this point.
2052 * recovery_init may fail, but it can free all resources allocated by
2053 * itself and its failure should not stop amdgpu init process.
2054 *
2055 * Note: theoretically, this should be called before all vram allocations
2056 * to protect retired page from abusing
2057 */
2058 amdgpu_ras_recovery_init(adev);
2059
3e2e2ab5
HZ
2060 if (adev->gmc.xgmi.num_physical_nodes > 1)
2061 amdgpu_xgmi_add_device(adev);
1884734a 2062 amdgpu_amdkfd_device_init(adev);
c6332b97 2063
bd607166
KR
2064 amdgpu_fru_get_product_info(adev);
2065
72d3f592 2066init_failed:
c9ffa427 2067 if (amdgpu_sriov_vf(adev))
c6332b97 2068 amdgpu_virt_release_full_gpu(adev, true);
2069
72d3f592 2070 return r;
d38ceaf9
AD
2071}
2072
e3ecdffa
AD
2073/**
2074 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2075 *
2076 * @adev: amdgpu_device pointer
2077 *
2078 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2079 * this function before a GPU reset. If the value is retained after a
2080 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2081 */
06ec9070 2082static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2083{
2084 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2085}
2086
e3ecdffa
AD
2087/**
2088 * amdgpu_device_check_vram_lost - check if vram is valid
2089 *
2090 * @adev: amdgpu_device pointer
2091 *
2092 * Checks the reset magic value written to the gart pointer in VRAM.
2093 * The driver calls this after a GPU reset to see if the contents of
2094 * VRAM is lost or now.
2095 * returns true if vram is lost, false if not.
2096 */
06ec9070 2097static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2098{
dadce777
EQ
2099 if (memcmp(adev->gart.ptr, adev->reset_magic,
2100 AMDGPU_RESET_MAGIC_NUM))
2101 return true;
2102
2103 if (!adev->in_gpu_reset)
2104 return false;
2105
2106 /*
2107 * For all ASICs with baco/mode1 reset, the VRAM is
2108 * always assumed to be lost.
2109 */
2110 switch (amdgpu_asic_reset_method(adev)) {
2111 case AMD_RESET_METHOD_BACO:
2112 case AMD_RESET_METHOD_MODE1:
2113 return true;
2114 default:
2115 return false;
2116 }
0c49e0b8
CZ
2117}
2118
e3ecdffa 2119/**
1112a46b 2120 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2121 *
2122 * @adev: amdgpu_device pointer
b8b72130 2123 * @state: clockgating state (gate or ungate)
e3ecdffa 2124 *
e3ecdffa 2125 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2126 * set_clockgating_state callbacks are run.
2127 * Late initialization pass enabling clockgating for hardware IPs.
2128 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2129 * Returns 0 on success, negative error code on failure.
2130 */
fdd34271 2131
1112a46b
RZ
2132static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2133 enum amd_clockgating_state state)
d38ceaf9 2134{
1112a46b 2135 int i, j, r;
d38ceaf9 2136
4a2ba394
SL
2137 if (amdgpu_emu_mode == 1)
2138 return 0;
2139
1112a46b
RZ
2140 for (j = 0; j < adev->num_ip_blocks; j++) {
2141 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2142 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2143 continue;
4a446d55 2144 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2145 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2146 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2147 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2148 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2149 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2150 /* enable clockgating to save power */
a1255107 2151 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2152 state);
4a446d55
AD
2153 if (r) {
2154 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2155 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2156 return r;
2157 }
b0b00ff1 2158 }
d38ceaf9 2159 }
06b18f61 2160
c9f96fd5
RZ
2161 return 0;
2162}
2163
1112a46b 2164static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2165{
1112a46b 2166 int i, j, r;
06b18f61 2167
c9f96fd5
RZ
2168 if (amdgpu_emu_mode == 1)
2169 return 0;
2170
1112a46b
RZ
2171 for (j = 0; j < adev->num_ip_blocks; j++) {
2172 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2173 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2174 continue;
2175 /* skip CG for VCE/UVD, it's handled specially */
2176 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2177 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2178 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2179 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2180 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2181 /* enable powergating to save power */
2182 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2183 state);
c9f96fd5
RZ
2184 if (r) {
2185 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2186 adev->ip_blocks[i].version->funcs->name, r);
2187 return r;
2188 }
2189 }
2190 }
2dc80b00
S
2191 return 0;
2192}
2193
beff74bc
AD
2194static int amdgpu_device_enable_mgpu_fan_boost(void)
2195{
2196 struct amdgpu_gpu_instance *gpu_ins;
2197 struct amdgpu_device *adev;
2198 int i, ret = 0;
2199
2200 mutex_lock(&mgpu_info.mutex);
2201
2202 /*
2203 * MGPU fan boost feature should be enabled
2204 * only when there are two or more dGPUs in
2205 * the system
2206 */
2207 if (mgpu_info.num_dgpu < 2)
2208 goto out;
2209
2210 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2211 gpu_ins = &(mgpu_info.gpu_ins[i]);
2212 adev = gpu_ins->adev;
2213 if (!(adev->flags & AMD_IS_APU) &&
2214 !gpu_ins->mgpu_fan_enabled &&
2215 adev->powerplay.pp_funcs &&
2216 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2217 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2218 if (ret)
2219 break;
2220
2221 gpu_ins->mgpu_fan_enabled = 1;
2222 }
2223 }
2224
2225out:
2226 mutex_unlock(&mgpu_info.mutex);
2227
2228 return ret;
2229}
2230
e3ecdffa
AD
2231/**
2232 * amdgpu_device_ip_late_init - run late init for hardware IPs
2233 *
2234 * @adev: amdgpu_device pointer
2235 *
2236 * Late initialization pass for hardware IPs. The list of all the hardware
2237 * IPs that make up the asic is walked and the late_init callbacks are run.
2238 * late_init covers any special initialization that an IP requires
2239 * after all of the have been initialized or something that needs to happen
2240 * late in the init process.
2241 * Returns 0 on success, negative error code on failure.
2242 */
06ec9070 2243static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2244{
60599a03 2245 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2246 int i = 0, r;
2247
2248 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2249 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2250 continue;
2251 if (adev->ip_blocks[i].version->funcs->late_init) {
2252 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2253 if (r) {
2254 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2255 adev->ip_blocks[i].version->funcs->name, r);
2256 return r;
2257 }
2dc80b00 2258 }
73f847db 2259 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2260 }
2261
a891d239
DL
2262 amdgpu_ras_set_error_query_ready(adev, true);
2263
1112a46b
RZ
2264 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2265 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2266
06ec9070 2267 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2268
beff74bc
AD
2269 r = amdgpu_device_enable_mgpu_fan_boost();
2270 if (r)
2271 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2272
60599a03
EQ
2273
2274 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2275 mutex_lock(&mgpu_info.mutex);
2276
2277 /*
2278 * Reset device p-state to low as this was booted with high.
2279 *
2280 * This should be performed only after all devices from the same
2281 * hive get initialized.
2282 *
2283 * However, it's unknown how many device in the hive in advance.
2284 * As this is counted one by one during devices initializations.
2285 *
2286 * So, we wait for all XGMI interlinked devices initialized.
2287 * This may bring some delays as those devices may come from
2288 * different hives. But that should be OK.
2289 */
2290 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2291 for (i = 0; i < mgpu_info.num_gpu; i++) {
2292 gpu_instance = &(mgpu_info.gpu_ins[i]);
2293 if (gpu_instance->adev->flags & AMD_IS_APU)
2294 continue;
2295
d84a430d
JK
2296 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2297 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2298 if (r) {
2299 DRM_ERROR("pstate setting failed (%d).\n", r);
2300 break;
2301 }
2302 }
2303 }
2304
2305 mutex_unlock(&mgpu_info.mutex);
2306 }
2307
d38ceaf9
AD
2308 return 0;
2309}
2310
e3ecdffa
AD
2311/**
2312 * amdgpu_device_ip_fini - run fini for hardware IPs
2313 *
2314 * @adev: amdgpu_device pointer
2315 *
2316 * Main teardown pass for hardware IPs. The list of all the hardware
2317 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2318 * are run. hw_fini tears down the hardware associated with each IP
2319 * and sw_fini tears down any software state associated with each IP.
2320 * Returns 0 on success, negative error code on failure.
2321 */
06ec9070 2322static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2323{
2324 int i, r;
2325
5278a159
SY
2326 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2327 amdgpu_virt_release_ras_err_handler_data(adev);
2328
c030f2e4 2329 amdgpu_ras_pre_fini(adev);
2330
a82400b5
AG
2331 if (adev->gmc.xgmi.num_physical_nodes > 1)
2332 amdgpu_xgmi_remove_device(adev);
2333
1884734a 2334 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2335
2336 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2337 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2338
3e96dbfd
AD
2339 /* need to disable SMC first */
2340 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2341 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2342 continue;
fdd34271 2343 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2344 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2345 /* XXX handle errors */
2346 if (r) {
2347 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2348 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2349 }
a1255107 2350 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2351 break;
2352 }
2353 }
2354
d38ceaf9 2355 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2356 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2357 continue;
8201a67a 2358
a1255107 2359 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2360 /* XXX handle errors */
2c1a2784 2361 if (r) {
a1255107
AD
2362 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2363 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2364 }
8201a67a 2365
a1255107 2366 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2367 }
2368
9950cda2 2369
d38ceaf9 2370 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2371 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2372 continue;
c12aba3a
ML
2373
2374 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2375 amdgpu_ucode_free_bo(adev);
1e256e27 2376 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2377 amdgpu_device_wb_fini(adev);
2378 amdgpu_device_vram_scratch_fini(adev);
533aed27 2379 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2380 }
2381
a1255107 2382 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2383 /* XXX handle errors */
2c1a2784 2384 if (r) {
a1255107
AD
2385 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2386 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2387 }
a1255107
AD
2388 adev->ip_blocks[i].status.sw = false;
2389 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2390 }
2391
a6dcfd9c 2392 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2393 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2394 continue;
a1255107
AD
2395 if (adev->ip_blocks[i].version->funcs->late_fini)
2396 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2397 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2398 }
2399
c030f2e4 2400 amdgpu_ras_fini(adev);
2401
030308fc 2402 if (amdgpu_sriov_vf(adev))
24136135
ML
2403 if (amdgpu_virt_release_full_gpu(adev, false))
2404 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2405
d38ceaf9
AD
2406 return 0;
2407}
2408
e3ecdffa 2409/**
beff74bc 2410 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2411 *
1112a46b 2412 * @work: work_struct.
e3ecdffa 2413 */
beff74bc 2414static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2415{
2416 struct amdgpu_device *adev =
beff74bc 2417 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2418 int r;
2419
2420 r = amdgpu_ib_ring_tests(adev);
2421 if (r)
2422 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2423}
2424
1e317b99
RZ
2425static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2426{
2427 struct amdgpu_device *adev =
2428 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2429
2430 mutex_lock(&adev->gfx.gfx_off_mutex);
2431 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2432 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2433 adev->gfx.gfx_off_state = true;
2434 }
2435 mutex_unlock(&adev->gfx.gfx_off_mutex);
2436}
2437
e3ecdffa 2438/**
e7854a03 2439 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2440 *
2441 * @adev: amdgpu_device pointer
2442 *
2443 * Main suspend function for hardware IPs. The list of all the hardware
2444 * IPs that make up the asic is walked, clockgating is disabled and the
2445 * suspend callbacks are run. suspend puts the hardware and software state
2446 * in each IP into a state suitable for suspend.
2447 * Returns 0 on success, negative error code on failure.
2448 */
e7854a03
AD
2449static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2450{
2451 int i, r;
2452
ced1ba97
PL
2453 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2454 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2455
e7854a03
AD
2456 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2457 if (!adev->ip_blocks[i].status.valid)
2458 continue;
2b9f7848 2459
e7854a03 2460 /* displays are handled separately */
2b9f7848
ND
2461 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2462 continue;
2463
2464 /* XXX handle errors */
2465 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2466 /* XXX handle errors */
2467 if (r) {
2468 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2469 adev->ip_blocks[i].version->funcs->name, r);
2470 return r;
e7854a03 2471 }
2b9f7848
ND
2472
2473 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2474 }
2475
e7854a03
AD
2476 return 0;
2477}
2478
2479/**
2480 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2481 *
2482 * @adev: amdgpu_device pointer
2483 *
2484 * Main suspend function for hardware IPs. The list of all the hardware
2485 * IPs that make up the asic is walked, clockgating is disabled and the
2486 * suspend callbacks are run. suspend puts the hardware and software state
2487 * in each IP into a state suitable for suspend.
2488 * Returns 0 on success, negative error code on failure.
2489 */
2490static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2491{
2492 int i, r;
2493
2494 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2495 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2496 continue;
e7854a03
AD
2497 /* displays are handled in phase1 */
2498 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2499 continue;
bff77e86
LM
2500 /* PSP lost connection when err_event_athub occurs */
2501 if (amdgpu_ras_intr_triggered() &&
2502 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2503 adev->ip_blocks[i].status.hw = false;
2504 continue;
2505 }
d38ceaf9 2506 /* XXX handle errors */
a1255107 2507 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2508 /* XXX handle errors */
2c1a2784 2509 if (r) {
a1255107
AD
2510 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2511 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2512 }
876923fb 2513 adev->ip_blocks[i].status.hw = false;
a3a09142 2514 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2515 if(!amdgpu_sriov_vf(adev)){
2516 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2517 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2518 if (r) {
2519 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2520 adev->mp1_state, r);
2521 return r;
2522 }
a3a09142
AD
2523 }
2524 }
b5507c7e 2525 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2526 }
2527
2528 return 0;
2529}
2530
e7854a03
AD
2531/**
2532 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2533 *
2534 * @adev: amdgpu_device pointer
2535 *
2536 * Main suspend function for hardware IPs. The list of all the hardware
2537 * IPs that make up the asic is walked, clockgating is disabled and the
2538 * suspend callbacks are run. suspend puts the hardware and software state
2539 * in each IP into a state suitable for suspend.
2540 * Returns 0 on success, negative error code on failure.
2541 */
2542int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2543{
2544 int r;
2545
e7819644
YT
2546 if (amdgpu_sriov_vf(adev))
2547 amdgpu_virt_request_full_gpu(adev, false);
2548
e7854a03
AD
2549 r = amdgpu_device_ip_suspend_phase1(adev);
2550 if (r)
2551 return r;
2552 r = amdgpu_device_ip_suspend_phase2(adev);
2553
e7819644
YT
2554 if (amdgpu_sriov_vf(adev))
2555 amdgpu_virt_release_full_gpu(adev, false);
2556
e7854a03
AD
2557 return r;
2558}
2559
06ec9070 2560static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2561{
2562 int i, r;
2563
2cb681b6
ML
2564 static enum amd_ip_block_type ip_order[] = {
2565 AMD_IP_BLOCK_TYPE_GMC,
2566 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2567 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2568 AMD_IP_BLOCK_TYPE_IH,
2569 };
a90ad3c2 2570
2cb681b6
ML
2571 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2572 int j;
2573 struct amdgpu_ip_block *block;
a90ad3c2 2574
2cb681b6
ML
2575 for (j = 0; j < adev->num_ip_blocks; j++) {
2576 block = &adev->ip_blocks[j];
2577
482f0e53 2578 block->status.hw = false;
2cb681b6
ML
2579 if (block->version->type != ip_order[i] ||
2580 !block->status.valid)
2581 continue;
2582
2583 r = block->version->funcs->hw_init(adev);
0aaeefcc 2584 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2585 if (r)
2586 return r;
482f0e53 2587 block->status.hw = true;
a90ad3c2
ML
2588 }
2589 }
2590
2591 return 0;
2592}
2593
06ec9070 2594static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2595{
2596 int i, r;
2597
2cb681b6
ML
2598 static enum amd_ip_block_type ip_order[] = {
2599 AMD_IP_BLOCK_TYPE_SMC,
2600 AMD_IP_BLOCK_TYPE_DCE,
2601 AMD_IP_BLOCK_TYPE_GFX,
2602 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2603 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2604 AMD_IP_BLOCK_TYPE_VCE,
2605 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2606 };
a90ad3c2 2607
2cb681b6
ML
2608 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2609 int j;
2610 struct amdgpu_ip_block *block;
a90ad3c2 2611
2cb681b6
ML
2612 for (j = 0; j < adev->num_ip_blocks; j++) {
2613 block = &adev->ip_blocks[j];
2614
2615 if (block->version->type != ip_order[i] ||
482f0e53
ML
2616 !block->status.valid ||
2617 block->status.hw)
2cb681b6
ML
2618 continue;
2619
895bd048
JZ
2620 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2621 r = block->version->funcs->resume(adev);
2622 else
2623 r = block->version->funcs->hw_init(adev);
2624
0aaeefcc 2625 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2626 if (r)
2627 return r;
482f0e53 2628 block->status.hw = true;
a90ad3c2
ML
2629 }
2630 }
2631
2632 return 0;
2633}
2634
e3ecdffa
AD
2635/**
2636 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2637 *
2638 * @adev: amdgpu_device pointer
2639 *
2640 * First resume function for hardware IPs. The list of all the hardware
2641 * IPs that make up the asic is walked and the resume callbacks are run for
2642 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2643 * after a suspend and updates the software state as necessary. This
2644 * function is also used for restoring the GPU after a GPU reset.
2645 * Returns 0 on success, negative error code on failure.
2646 */
06ec9070 2647static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2648{
2649 int i, r;
2650
a90ad3c2 2651 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2652 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2653 continue;
a90ad3c2 2654 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2655 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2656 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2657
fcf0649f
CZ
2658 r = adev->ip_blocks[i].version->funcs->resume(adev);
2659 if (r) {
2660 DRM_ERROR("resume of IP block <%s> failed %d\n",
2661 adev->ip_blocks[i].version->funcs->name, r);
2662 return r;
2663 }
482f0e53 2664 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2665 }
2666 }
2667
2668 return 0;
2669}
2670
e3ecdffa
AD
2671/**
2672 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2673 *
2674 * @adev: amdgpu_device pointer
2675 *
2676 * First resume function for hardware IPs. The list of all the hardware
2677 * IPs that make up the asic is walked and the resume callbacks are run for
2678 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2679 * functional state after a suspend and updates the software state as
2680 * necessary. This function is also used for restoring the GPU after a GPU
2681 * reset.
2682 * Returns 0 on success, negative error code on failure.
2683 */
06ec9070 2684static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2685{
2686 int i, r;
2687
2688 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2689 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2690 continue;
fcf0649f 2691 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2692 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2693 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2694 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2695 continue;
a1255107 2696 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2697 if (r) {
a1255107
AD
2698 DRM_ERROR("resume of IP block <%s> failed %d\n",
2699 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2700 return r;
2c1a2784 2701 }
482f0e53 2702 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2703 }
2704
2705 return 0;
2706}
2707
e3ecdffa
AD
2708/**
2709 * amdgpu_device_ip_resume - run resume for hardware IPs
2710 *
2711 * @adev: amdgpu_device pointer
2712 *
2713 * Main resume function for hardware IPs. The hardware IPs
2714 * are split into two resume functions because they are
2715 * are also used in in recovering from a GPU reset and some additional
2716 * steps need to be take between them. In this case (S3/S4) they are
2717 * run sequentially.
2718 * Returns 0 on success, negative error code on failure.
2719 */
06ec9070 2720static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2721{
2722 int r;
2723
06ec9070 2724 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2725 if (r)
2726 return r;
7a3e0bb2
RZ
2727
2728 r = amdgpu_device_fw_loading(adev);
2729 if (r)
2730 return r;
2731
06ec9070 2732 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2733
2734 return r;
2735}
2736
e3ecdffa
AD
2737/**
2738 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2739 *
2740 * @adev: amdgpu_device pointer
2741 *
2742 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2743 */
4e99a44e 2744static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2745{
6867e1b5
ML
2746 if (amdgpu_sriov_vf(adev)) {
2747 if (adev->is_atom_fw) {
2748 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2749 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2750 } else {
2751 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2752 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2753 }
2754
2755 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2756 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2757 }
048765ad
AR
2758}
2759
e3ecdffa
AD
2760/**
2761 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2762 *
2763 * @asic_type: AMD asic type
2764 *
2765 * Check if there is DC (new modesetting infrastructre) support for an asic.
2766 * returns true if DC has support, false if not.
2767 */
4562236b
HW
2768bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2769{
2770 switch (asic_type) {
2771#if defined(CONFIG_DRM_AMD_DC)
2772 case CHIP_BONAIRE:
0d6fbccb 2773 case CHIP_KAVERI:
367e6687
AD
2774 case CHIP_KABINI:
2775 case CHIP_MULLINS:
d9fda248
HW
2776 /*
2777 * We have systems in the wild with these ASICs that require
2778 * LVDS and VGA support which is not supported with DC.
2779 *
2780 * Fallback to the non-DC driver here by default so as not to
2781 * cause regressions.
2782 */
2783 return amdgpu_dc > 0;
2784 case CHIP_HAWAII:
4562236b
HW
2785 case CHIP_CARRIZO:
2786 case CHIP_STONEY:
4562236b 2787 case CHIP_POLARIS10:
675fd32b 2788 case CHIP_POLARIS11:
2c8ad2d5 2789 case CHIP_POLARIS12:
675fd32b 2790 case CHIP_VEGAM:
4562236b
HW
2791 case CHIP_TONGA:
2792 case CHIP_FIJI:
42f8ffa1 2793 case CHIP_VEGA10:
dca7b401 2794 case CHIP_VEGA12:
c6034aa2 2795 case CHIP_VEGA20:
b86a1aa3 2796#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2797 case CHIP_RAVEN:
b4f199c7 2798 case CHIP_NAVI10:
8fceceb6 2799 case CHIP_NAVI14:
078655d9 2800 case CHIP_NAVI12:
e1c14c43 2801 case CHIP_RENOIR:
81d9bfb8
JFZ
2802#endif
2803#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
2804 case CHIP_SIENNA_CICHLID:
42f8ffa1 2805#endif
fd187853 2806 return amdgpu_dc != 0;
4562236b
HW
2807#endif
2808 default:
93b09a9a
SS
2809 if (amdgpu_dc > 0)
2810 DRM_INFO("Display Core has been requested via kernel parameter "
2811 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2812 return false;
2813 }
2814}
2815
2816/**
2817 * amdgpu_device_has_dc_support - check if dc is supported
2818 *
2819 * @adev: amdgpu_device_pointer
2820 *
2821 * Returns true for supported, false for not supported
2822 */
2823bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2824{
2555039d
XY
2825 if (amdgpu_sriov_vf(adev))
2826 return false;
2827
4562236b
HW
2828 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2829}
2830
d4535e2c
AG
2831
2832static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2833{
2834 struct amdgpu_device *adev =
2835 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2836 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2837
c6a6e2db
AG
2838 /* It's a bug to not have a hive within this function */
2839 if (WARN_ON(!hive))
2840 return;
2841
2842 /*
2843 * Use task barrier to synchronize all xgmi reset works across the
2844 * hive. task_barrier_enter and task_barrier_exit will block
2845 * until all the threads running the xgmi reset works reach
2846 * those points. task_barrier_full will do both blocks.
2847 */
2848 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2849
2850 task_barrier_enter(&hive->tb);
2851 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2852
2853 if (adev->asic_reset_res)
2854 goto fail;
2855
2856 task_barrier_exit(&hive->tb);
2857 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2858
2859 if (adev->asic_reset_res)
2860 goto fail;
43c4d576
JC
2861
2862 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2863 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2864 } else {
2865
2866 task_barrier_full(&hive->tb);
2867 adev->asic_reset_res = amdgpu_asic_reset(adev);
2868 }
ce316fa5 2869
c6a6e2db 2870fail:
d4535e2c 2871 if (adev->asic_reset_res)
fed184e9 2872 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2873 adev->asic_reset_res, adev->ddev->unique);
2874}
2875
71f98027
AD
2876static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2877{
2878 char *input = amdgpu_lockup_timeout;
2879 char *timeout_setting = NULL;
2880 int index = 0;
2881 long timeout;
2882 int ret = 0;
2883
2884 /*
2885 * By default timeout for non compute jobs is 10000.
2886 * And there is no timeout enforced on compute jobs.
2887 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 2888 * jobs are 60000 by default.
71f98027
AD
2889 */
2890 adev->gfx_timeout = msecs_to_jiffies(10000);
2891 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2892 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 2893 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
2894 else
2895 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2896
f440ff44 2897 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2898 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2899 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2900 ret = kstrtol(timeout_setting, 0, &timeout);
2901 if (ret)
2902 return ret;
2903
2904 if (timeout == 0) {
2905 index++;
2906 continue;
2907 } else if (timeout < 0) {
2908 timeout = MAX_SCHEDULE_TIMEOUT;
2909 } else {
2910 timeout = msecs_to_jiffies(timeout);
2911 }
2912
2913 switch (index++) {
2914 case 0:
2915 adev->gfx_timeout = timeout;
2916 break;
2917 case 1:
2918 adev->compute_timeout = timeout;
2919 break;
2920 case 2:
2921 adev->sdma_timeout = timeout;
2922 break;
2923 case 3:
2924 adev->video_timeout = timeout;
2925 break;
2926 default:
2927 break;
2928 }
2929 }
2930 /*
2931 * There is only one value specified and
2932 * it should apply to all non-compute jobs.
2933 */
bcccee89 2934 if (index == 1) {
71f98027 2935 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2936 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2937 adev->compute_timeout = adev->gfx_timeout;
2938 }
71f98027
AD
2939 }
2940
2941 return ret;
2942}
d4535e2c 2943
77f3a5cd
ND
2944static const struct attribute *amdgpu_dev_attributes[] = {
2945 &dev_attr_product_name.attr,
2946 &dev_attr_product_number.attr,
2947 &dev_attr_serial_number.attr,
2948 &dev_attr_pcie_replay_count.attr,
2949 NULL
2950};
2951
d38ceaf9
AD
2952/**
2953 * amdgpu_device_init - initialize the driver
2954 *
2955 * @adev: amdgpu_device pointer
87e3f136 2956 * @ddev: drm dev pointer
d38ceaf9
AD
2957 * @pdev: pci dev pointer
2958 * @flags: driver flags
2959 *
2960 * Initializes the driver info and hw (all asics).
2961 * Returns 0 for success or an error on failure.
2962 * Called at driver startup.
2963 */
2964int amdgpu_device_init(struct amdgpu_device *adev,
2965 struct drm_device *ddev,
2966 struct pci_dev *pdev,
2967 uint32_t flags)
2968{
2969 int r, i;
3840c5bc 2970 bool boco = false;
95844d20 2971 u32 max_MBps;
d38ceaf9
AD
2972
2973 adev->shutdown = false;
2974 adev->dev = &pdev->dev;
2975 adev->ddev = ddev;
2976 adev->pdev = pdev;
2977 adev->flags = flags;
4e66d7d2
YZ
2978
2979 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2980 adev->asic_type = amdgpu_force_asic_type;
2981 else
2982 adev->asic_type = flags & AMD_ASIC_MASK;
2983
d38ceaf9 2984 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2985 if (amdgpu_emu_mode == 1)
8bdab6bb 2986 adev->usec_timeout *= 10;
770d13b1 2987 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2988 adev->accel_working = false;
2989 adev->num_rings = 0;
2990 adev->mman.buffer_funcs = NULL;
2991 adev->mman.buffer_funcs_ring = NULL;
2992 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2993 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2994 adev->gmc.gmc_funcs = NULL;
f54d1867 2995 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2996 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2997
2998 adev->smc_rreg = &amdgpu_invalid_rreg;
2999 adev->smc_wreg = &amdgpu_invalid_wreg;
3000 adev->pcie_rreg = &amdgpu_invalid_rreg;
3001 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3002 adev->pciep_rreg = &amdgpu_invalid_rreg;
3003 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3004 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3005 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3006 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3007 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3008 adev->didt_rreg = &amdgpu_invalid_rreg;
3009 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3010 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3011 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3012 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3013 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3014
3e39ab90
AD
3015 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3016 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3017 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3018
3019 /* mutex initialization are all done here so we
3020 * can recall function without having locking issues */
d38ceaf9 3021 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 3022 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3023 mutex_init(&adev->pm.mutex);
3024 mutex_init(&adev->gfx.gpu_clock_mutex);
3025 mutex_init(&adev->srbm_mutex);
b8866c26 3026 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3027 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3028 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3029 mutex_init(&adev->mn_lock);
e23b74aa 3030 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3031 hash_init(adev->mn_hash);
13a752e3 3032 mutex_init(&adev->lock_reset);
32eaeae0 3033 mutex_init(&adev->psp.mutex);
bd052211 3034 mutex_init(&adev->notifier_lock);
d38ceaf9 3035
912dfc84
EQ
3036 r = amdgpu_device_check_arguments(adev);
3037 if (r)
3038 return r;
d38ceaf9 3039
d38ceaf9
AD
3040 spin_lock_init(&adev->mmio_idx_lock);
3041 spin_lock_init(&adev->smc_idx_lock);
3042 spin_lock_init(&adev->pcie_idx_lock);
3043 spin_lock_init(&adev->uvd_ctx_idx_lock);
3044 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3045 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3046 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3047 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3048 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3049
0c4e7fa5
CZ
3050 INIT_LIST_HEAD(&adev->shadow_list);
3051 mutex_init(&adev->shadow_list_lock);
3052
beff74bc
AD
3053 INIT_DELAYED_WORK(&adev->delayed_init_work,
3054 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3055 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3056 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3057
d4535e2c
AG
3058 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3059
d23ee13f 3060 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3061 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3062
b265bdbd
EQ
3063 atomic_set(&adev->throttling_logging_enabled, 1);
3064 /*
3065 * If throttling continues, logging will be performed every minute
3066 * to avoid log flooding. "-1" is subtracted since the thermal
3067 * throttling interrupt comes every second. Thus, the total logging
3068 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3069 * for throttling interrupt) = 60 seconds.
3070 */
3071 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3072 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3073
0fa49558
AX
3074 /* Registers mapping */
3075 /* TODO: block userspace mapping of io register */
da69c161
KW
3076 if (adev->asic_type >= CHIP_BONAIRE) {
3077 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3078 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3079 } else {
3080 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3081 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3082 }
d38ceaf9 3083
d38ceaf9
AD
3084 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3085 if (adev->rmmio == NULL) {
3086 return -ENOMEM;
3087 }
3088 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3089 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3090
d38ceaf9
AD
3091 /* io port mapping */
3092 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3093 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3094 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3095 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3096 break;
3097 }
3098 }
3099 if (adev->rio_mem == NULL)
b64a18c5 3100 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3101
b2109d8e
JX
3102 /* enable PCIE atomic ops */
3103 r = pci_enable_atomic_ops_to_root(adev->pdev,
3104 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3105 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3106 if (r) {
3107 adev->have_atomics_support = false;
3108 DRM_INFO("PCIE atomic ops is not supported\n");
3109 } else {
3110 adev->have_atomics_support = true;
3111 }
3112
5494d864
AD
3113 amdgpu_device_get_pcie_info(adev);
3114
b239c017
JX
3115 if (amdgpu_mcbp)
3116 DRM_INFO("MCBP is enabled\n");
3117
5f84cc63
JX
3118 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3119 adev->enable_mes = true;
3120
3aa0115d
ML
3121 /* detect hw virtualization here */
3122 amdgpu_detect_virtualization(adev);
3123
dffa11b4
ML
3124 r = amdgpu_device_get_job_timeout_settings(adev);
3125 if (r) {
3126 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3127 return r;
a190d1c7
XY
3128 }
3129
d38ceaf9 3130 /* early init functions */
06ec9070 3131 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3132 if (r)
3133 return r;
3134
6585661d
OZ
3135 /* doorbell bar mapping and doorbell index init*/
3136 amdgpu_device_doorbell_init(adev);
3137
d38ceaf9
AD
3138 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3139 /* this will fail for cards that aren't VGA class devices, just
3140 * ignore it */
06ec9070 3141 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3142
31af062a 3143 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3144 boco = true;
3145 if (amdgpu_has_atpx() &&
3146 (amdgpu_is_atpx_hybrid() ||
3147 amdgpu_has_atpx_dgpu_power_cntl()) &&
3148 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3149 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3150 &amdgpu_switcheroo_ops, boco);
3151 if (boco)
d38ceaf9
AD
3152 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3153
9475a943
SL
3154 if (amdgpu_emu_mode == 1) {
3155 /* post the asic on emulation mode */
3156 emu_soc_asic_init(adev);
bfca0289 3157 goto fence_driver_init;
9475a943 3158 }
bfca0289 3159
4e99a44e
ML
3160 /* detect if we are with an SRIOV vbios */
3161 amdgpu_device_detect_sriov_bios(adev);
048765ad 3162
95e8e59e
AD
3163 /* check if we need to reset the asic
3164 * E.g., driver was not cleanly unloaded previously, etc.
3165 */
f14899fd 3166 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3167 r = amdgpu_asic_reset(adev);
3168 if (r) {
3169 dev_err(adev->dev, "asic reset on init failed\n");
3170 goto failed;
3171 }
3172 }
3173
d38ceaf9 3174 /* Post card if necessary */
39c640c0 3175 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3176 if (!adev->bios) {
bec86378 3177 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3178 r = -EINVAL;
3179 goto failed;
d38ceaf9 3180 }
bec86378 3181 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3182 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3183 if (r) {
3184 dev_err(adev->dev, "gpu post error!\n");
3185 goto failed;
3186 }
d38ceaf9
AD
3187 }
3188
88b64e95
AD
3189 if (adev->is_atom_fw) {
3190 /* Initialize clocks */
3191 r = amdgpu_atomfirmware_get_clock_info(adev);
3192 if (r) {
3193 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3194 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3195 goto failed;
3196 }
3197 } else {
a5bde2f9
AD
3198 /* Initialize clocks */
3199 r = amdgpu_atombios_get_clock_info(adev);
3200 if (r) {
3201 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3202 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3203 goto failed;
a5bde2f9
AD
3204 }
3205 /* init i2c buses */
4562236b
HW
3206 if (!amdgpu_device_has_dc_support(adev))
3207 amdgpu_atombios_i2c_init(adev);
2c1a2784 3208 }
d38ceaf9 3209
bfca0289 3210fence_driver_init:
d38ceaf9
AD
3211 /* Fence driver */
3212 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3213 if (r) {
3214 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3215 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3216 goto failed;
2c1a2784 3217 }
d38ceaf9
AD
3218
3219 /* init the mode config */
3220 drm_mode_config_init(adev->ddev);
3221
06ec9070 3222 r = amdgpu_device_ip_init(adev);
d38ceaf9 3223 if (r) {
8840a387 3224 /* failed in exclusive mode due to timeout */
3225 if (amdgpu_sriov_vf(adev) &&
3226 !amdgpu_sriov_runtime(adev) &&
3227 amdgpu_virt_mmio_blocked(adev) &&
3228 !amdgpu_virt_wait_reset(adev)) {
3229 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3230 /* Don't send request since VF is inactive. */
3231 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3232 adev->virt.ops = NULL;
8840a387 3233 r = -EAGAIN;
3234 goto failed;
3235 }
06ec9070 3236 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3237 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3238 goto failed;
d38ceaf9
AD
3239 }
3240
d69b8971
YZ
3241 dev_info(adev->dev,
3242 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3243 adev->gfx.config.max_shader_engines,
3244 adev->gfx.config.max_sh_per_se,
3245 adev->gfx.config.max_cu_per_sh,
3246 adev->gfx.cu_info.number);
3247
d38ceaf9
AD
3248 adev->accel_working = true;
3249
e59c0205
AX
3250 amdgpu_vm_check_compute_bug(adev);
3251
95844d20
MO
3252 /* Initialize the buffer migration limit. */
3253 if (amdgpu_moverate >= 0)
3254 max_MBps = amdgpu_moverate;
3255 else
3256 max_MBps = 8; /* Allow 8 MB/s. */
3257 /* Get a log2 for easy divisions. */
3258 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3259
9bc92b9c
ML
3260 amdgpu_fbdev_init(adev);
3261
d2f52ac8 3262 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3263 if (r) {
3264 adev->pm_sysfs_en = false;
d2f52ac8 3265 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3266 } else
3267 adev->pm_sysfs_en = true;
d2f52ac8 3268
5bb23532 3269 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3270 if (r) {
3271 adev->ucode_sysfs_en = false;
5bb23532 3272 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3273 } else
3274 adev->ucode_sysfs_en = true;
5bb23532 3275
d38ceaf9
AD
3276 if ((amdgpu_testing & 1)) {
3277 if (adev->accel_working)
3278 amdgpu_test_moves(adev);
3279 else
3280 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3281 }
d38ceaf9
AD
3282 if (amdgpu_benchmarking) {
3283 if (adev->accel_working)
3284 amdgpu_benchmark(adev, amdgpu_benchmarking);
3285 else
3286 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3287 }
3288
b0adca4d
EQ
3289 /*
3290 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3291 * Otherwise the mgpu fan boost feature will be skipped due to the
3292 * gpu instance is counted less.
3293 */
3294 amdgpu_register_gpu_instance(adev);
3295
d38ceaf9
AD
3296 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3297 * explicit gating rather than handling it automatically.
3298 */
06ec9070 3299 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3300 if (r) {
06ec9070 3301 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3302 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3303 goto failed;
2c1a2784 3304 }
d38ceaf9 3305
108c6a63 3306 /* must succeed. */
511fdbc3 3307 amdgpu_ras_resume(adev);
108c6a63 3308
beff74bc
AD
3309 queue_delayed_work(system_wq, &adev->delayed_init_work,
3310 msecs_to_jiffies(AMDGPU_RESUME_MS));
3311
2c738637
ML
3312 if (amdgpu_sriov_vf(adev))
3313 flush_delayed_work(&adev->delayed_init_work);
3314
77f3a5cd 3315 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
bd607166 3316 if (r) {
77f3a5cd 3317 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166
KR
3318 return r;
3319 }
3320
d155bef0
AB
3321 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3322 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3323 if (r)
3324 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3325
d38ceaf9 3326 return 0;
83ba126a
AD
3327
3328failed:
89041940 3329 amdgpu_vf_error_trans_all(adev);
3840c5bc 3330 if (boco)
83ba126a 3331 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3332
83ba126a 3333 return r;
d38ceaf9
AD
3334}
3335
d38ceaf9
AD
3336/**
3337 * amdgpu_device_fini - tear down the driver
3338 *
3339 * @adev: amdgpu_device pointer
3340 *
3341 * Tear down the driver info (all asics).
3342 * Called at driver shutdown.
3343 */
3344void amdgpu_device_fini(struct amdgpu_device *adev)
3345{
3346 int r;
3347
3348 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3349 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3350 adev->shutdown = true;
9f875167 3351
752c683d
ML
3352 /* make sure IB test finished before entering exclusive mode
3353 * to avoid preemption on IB test
3354 * */
3355 if (amdgpu_sriov_vf(adev))
3356 amdgpu_virt_request_full_gpu(adev, false);
3357
e5b03032
ML
3358 /* disable all interrupts */
3359 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3360 if (adev->mode_info.mode_config_initialized){
3361 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3362 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3363 else
3364 drm_atomic_helper_shutdown(adev->ddev);
3365 }
d38ceaf9 3366 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3367 if (adev->pm_sysfs_en)
3368 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3369 amdgpu_fbdev_fini(adev);
06ec9070 3370 r = amdgpu_device_ip_fini(adev);
75e1658e
ND
3371 release_firmware(adev->firmware.gpu_info_fw);
3372 adev->firmware.gpu_info_fw = NULL;
d38ceaf9
AD
3373 adev->accel_working = false;
3374 /* free i2c buses */
4562236b
HW
3375 if (!amdgpu_device_has_dc_support(adev))
3376 amdgpu_i2c_fini(adev);
bfca0289
SL
3377
3378 if (amdgpu_emu_mode != 1)
3379 amdgpu_atombios_fini(adev);
3380
d38ceaf9
AD
3381 kfree(adev->bios);
3382 adev->bios = NULL;
3840c5bc
AD
3383 if (amdgpu_has_atpx() &&
3384 (amdgpu_is_atpx_hybrid() ||
3385 amdgpu_has_atpx_dgpu_power_cntl()) &&
3386 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3387 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3388 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3389 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3390 vga_client_register(adev->pdev, NULL, NULL, NULL);
3391 if (adev->rio_mem)
3392 pci_iounmap(adev->pdev, adev->rio_mem);
3393 adev->rio_mem = NULL;
3394 iounmap(adev->rmmio);
3395 adev->rmmio = NULL;
06ec9070 3396 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3397
7c868b59
YT
3398 if (adev->ucode_sysfs_en)
3399 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3400
3401 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3402 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3403 amdgpu_pmu_fini(adev);
4292b0b2 3404 if (adev->discovery_bin)
a190d1c7 3405 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3406}
3407
3408
3409/*
3410 * Suspend & resume.
3411 */
3412/**
810ddc3a 3413 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3414 *
87e3f136 3415 * @dev: drm dev pointer
87e3f136 3416 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3417 *
3418 * Puts the hw in the suspend state (all asics).
3419 * Returns 0 for success or an error on failure.
3420 * Called at driver suspend.
3421 */
de185019 3422int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3423{
3424 struct amdgpu_device *adev;
3425 struct drm_crtc *crtc;
3426 struct drm_connector *connector;
f8d2d39e 3427 struct drm_connector_list_iter iter;
5ceb54c6 3428 int r;
d38ceaf9
AD
3429
3430 if (dev == NULL || dev->dev_private == NULL) {
3431 return -ENODEV;
3432 }
3433
3434 adev = dev->dev_private;
3435
3436 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3437 return 0;
3438
44779b43 3439 adev->in_suspend = true;
d38ceaf9
AD
3440 drm_kms_helper_poll_disable(dev);
3441
5f818173
S
3442 if (fbcon)
3443 amdgpu_fbdev_set_suspend(adev, 1);
3444
beff74bc 3445 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3446
4562236b
HW
3447 if (!amdgpu_device_has_dc_support(adev)) {
3448 /* turn off display hw */
3449 drm_modeset_lock_all(dev);
f8d2d39e
LP
3450 drm_connector_list_iter_begin(dev, &iter);
3451 drm_for_each_connector_iter(connector, &iter)
3452 drm_helper_connector_dpms(connector,
3453 DRM_MODE_DPMS_OFF);
3454 drm_connector_list_iter_end(&iter);
4562236b 3455 drm_modeset_unlock_all(dev);
fe1053b7
AD
3456 /* unpin the front buffers and cursors */
3457 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3458 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3459 struct drm_framebuffer *fb = crtc->primary->fb;
3460 struct amdgpu_bo *robj;
3461
91334223 3462 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3463 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3464 r = amdgpu_bo_reserve(aobj, true);
3465 if (r == 0) {
3466 amdgpu_bo_unpin(aobj);
3467 amdgpu_bo_unreserve(aobj);
3468 }
756e6880 3469 }
756e6880 3470
fe1053b7
AD
3471 if (fb == NULL || fb->obj[0] == NULL) {
3472 continue;
3473 }
3474 robj = gem_to_amdgpu_bo(fb->obj[0]);
3475 /* don't unpin kernel fb objects */
3476 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3477 r = amdgpu_bo_reserve(robj, true);
3478 if (r == 0) {
3479 amdgpu_bo_unpin(robj);
3480 amdgpu_bo_unreserve(robj);
3481 }
d38ceaf9
AD
3482 }
3483 }
3484 }
fe1053b7 3485
5e6932fe 3486 amdgpu_ras_suspend(adev);
3487
fe1053b7
AD
3488 r = amdgpu_device_ip_suspend_phase1(adev);
3489
94fa5660
EQ
3490 amdgpu_amdkfd_suspend(adev, !fbcon);
3491
d38ceaf9
AD
3492 /* evict vram memory */
3493 amdgpu_bo_evict_vram(adev);
3494
5ceb54c6 3495 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3496
fe1053b7 3497 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3498
a0a71e49
AD
3499 /* evict remaining vram memory
3500 * This second call to evict vram is to evict the gart page table
3501 * using the CPU.
3502 */
d38ceaf9
AD
3503 amdgpu_bo_evict_vram(adev);
3504
d38ceaf9
AD
3505 return 0;
3506}
3507
3508/**
810ddc3a 3509 * amdgpu_device_resume - initiate device resume
d38ceaf9 3510 *
87e3f136 3511 * @dev: drm dev pointer
87e3f136 3512 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3513 *
3514 * Bring the hw back to operating state (all asics).
3515 * Returns 0 for success or an error on failure.
3516 * Called at driver resume.
3517 */
de185019 3518int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3519{
3520 struct drm_connector *connector;
f8d2d39e 3521 struct drm_connector_list_iter iter;
d38ceaf9 3522 struct amdgpu_device *adev = dev->dev_private;
756e6880 3523 struct drm_crtc *crtc;
03161a6e 3524 int r = 0;
d38ceaf9
AD
3525
3526 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3527 return 0;
3528
d38ceaf9 3529 /* post card */
39c640c0 3530 if (amdgpu_device_need_post(adev)) {
74b0b157 3531 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3532 if (r)
3533 DRM_ERROR("amdgpu asic init failed\n");
3534 }
d38ceaf9 3535
06ec9070 3536 r = amdgpu_device_ip_resume(adev);
e6707218 3537 if (r) {
06ec9070 3538 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3539 return r;
e6707218 3540 }
5ceb54c6
AD
3541 amdgpu_fence_driver_resume(adev);
3542
d38ceaf9 3543
06ec9070 3544 r = amdgpu_device_ip_late_init(adev);
03161a6e 3545 if (r)
4d3b9ae5 3546 return r;
d38ceaf9 3547
beff74bc
AD
3548 queue_delayed_work(system_wq, &adev->delayed_init_work,
3549 msecs_to_jiffies(AMDGPU_RESUME_MS));
3550
fe1053b7
AD
3551 if (!amdgpu_device_has_dc_support(adev)) {
3552 /* pin cursors */
3553 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3554 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3555
91334223 3556 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3557 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3558 r = amdgpu_bo_reserve(aobj, true);
3559 if (r == 0) {
3560 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3561 if (r != 0)
3562 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3563 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3564 amdgpu_bo_unreserve(aobj);
3565 }
756e6880
AD
3566 }
3567 }
3568 }
9593f4d6 3569 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3570 if (r)
3571 return r;
756e6880 3572
96a5d8d4 3573 /* Make sure IB tests flushed */
beff74bc 3574 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3575
d38ceaf9
AD
3576 /* blat the mode back in */
3577 if (fbcon) {
4562236b
HW
3578 if (!amdgpu_device_has_dc_support(adev)) {
3579 /* pre DCE11 */
3580 drm_helper_resume_force_mode(dev);
3581
3582 /* turn on display hw */
3583 drm_modeset_lock_all(dev);
f8d2d39e
LP
3584
3585 drm_connector_list_iter_begin(dev, &iter);
3586 drm_for_each_connector_iter(connector, &iter)
3587 drm_helper_connector_dpms(connector,
3588 DRM_MODE_DPMS_ON);
3589 drm_connector_list_iter_end(&iter);
3590
4562236b 3591 drm_modeset_unlock_all(dev);
d38ceaf9 3592 }
4d3b9ae5 3593 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3594 }
3595
3596 drm_kms_helper_poll_enable(dev);
23a1a9e5 3597
5e6932fe 3598 amdgpu_ras_resume(adev);
3599
23a1a9e5
L
3600 /*
3601 * Most of the connector probing functions try to acquire runtime pm
3602 * refs to ensure that the GPU is powered on when connector polling is
3603 * performed. Since we're calling this from a runtime PM callback,
3604 * trying to acquire rpm refs will cause us to deadlock.
3605 *
3606 * Since we're guaranteed to be holding the rpm lock, it's safe to
3607 * temporarily disable the rpm helpers so this doesn't deadlock us.
3608 */
3609#ifdef CONFIG_PM
3610 dev->dev->power.disable_depth++;
3611#endif
4562236b
HW
3612 if (!amdgpu_device_has_dc_support(adev))
3613 drm_helper_hpd_irq_event(dev);
3614 else
3615 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3616#ifdef CONFIG_PM
3617 dev->dev->power.disable_depth--;
3618#endif
44779b43
RZ
3619 adev->in_suspend = false;
3620
4d3b9ae5 3621 return 0;
d38ceaf9
AD
3622}
3623
e3ecdffa
AD
3624/**
3625 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3626 *
3627 * @adev: amdgpu_device pointer
3628 *
3629 * The list of all the hardware IPs that make up the asic is walked and
3630 * the check_soft_reset callbacks are run. check_soft_reset determines
3631 * if the asic is still hung or not.
3632 * Returns true if any of the IPs are still in a hung state, false if not.
3633 */
06ec9070 3634static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3635{
3636 int i;
3637 bool asic_hang = false;
3638
f993d628
ML
3639 if (amdgpu_sriov_vf(adev))
3640 return true;
3641
8bc04c29
AD
3642 if (amdgpu_asic_need_full_reset(adev))
3643 return true;
3644
63fbf42f 3645 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3646 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3647 continue;
a1255107
AD
3648 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3649 adev->ip_blocks[i].status.hang =
3650 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3651 if (adev->ip_blocks[i].status.hang) {
3652 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3653 asic_hang = true;
3654 }
3655 }
3656 return asic_hang;
3657}
3658
e3ecdffa
AD
3659/**
3660 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3661 *
3662 * @adev: amdgpu_device pointer
3663 *
3664 * The list of all the hardware IPs that make up the asic is walked and the
3665 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3666 * handles any IP specific hardware or software state changes that are
3667 * necessary for a soft reset to succeed.
3668 * Returns 0 on success, negative error code on failure.
3669 */
06ec9070 3670static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3671{
3672 int i, r = 0;
3673
3674 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3675 if (!adev->ip_blocks[i].status.valid)
d31a501e 3676 continue;
a1255107
AD
3677 if (adev->ip_blocks[i].status.hang &&
3678 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3679 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3680 if (r)
3681 return r;
3682 }
3683 }
3684
3685 return 0;
3686}
3687
e3ecdffa
AD
3688/**
3689 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3690 *
3691 * @adev: amdgpu_device pointer
3692 *
3693 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3694 * reset is necessary to recover.
3695 * Returns true if a full asic reset is required, false if not.
3696 */
06ec9070 3697static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3698{
da146d3b
AD
3699 int i;
3700
8bc04c29
AD
3701 if (amdgpu_asic_need_full_reset(adev))
3702 return true;
3703
da146d3b 3704 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3705 if (!adev->ip_blocks[i].status.valid)
da146d3b 3706 continue;
a1255107
AD
3707 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3708 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3709 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3710 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3711 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3712 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3713 DRM_INFO("Some block need full reset!\n");
3714 return true;
3715 }
3716 }
35d782fe
CZ
3717 }
3718 return false;
3719}
3720
e3ecdffa
AD
3721/**
3722 * amdgpu_device_ip_soft_reset - do a soft reset
3723 *
3724 * @adev: amdgpu_device pointer
3725 *
3726 * The list of all the hardware IPs that make up the asic is walked and the
3727 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3728 * IP specific hardware or software state changes that are necessary to soft
3729 * reset the IP.
3730 * Returns 0 on success, negative error code on failure.
3731 */
06ec9070 3732static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3733{
3734 int i, r = 0;
3735
3736 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3737 if (!adev->ip_blocks[i].status.valid)
35d782fe 3738 continue;
a1255107
AD
3739 if (adev->ip_blocks[i].status.hang &&
3740 adev->ip_blocks[i].version->funcs->soft_reset) {
3741 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3742 if (r)
3743 return r;
3744 }
3745 }
3746
3747 return 0;
3748}
3749
e3ecdffa
AD
3750/**
3751 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3752 *
3753 * @adev: amdgpu_device pointer
3754 *
3755 * The list of all the hardware IPs that make up the asic is walked and the
3756 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3757 * handles any IP specific hardware or software state changes that are
3758 * necessary after the IP has been soft reset.
3759 * Returns 0 on success, negative error code on failure.
3760 */
06ec9070 3761static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3762{
3763 int i, r = 0;
3764
3765 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3766 if (!adev->ip_blocks[i].status.valid)
35d782fe 3767 continue;
a1255107
AD
3768 if (adev->ip_blocks[i].status.hang &&
3769 adev->ip_blocks[i].version->funcs->post_soft_reset)
3770 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3771 if (r)
3772 return r;
3773 }
3774
3775 return 0;
3776}
3777
e3ecdffa 3778/**
c33adbc7 3779 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3780 *
3781 * @adev: amdgpu_device pointer
3782 *
3783 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3784 * restore things like GPUVM page tables after a GPU reset where
3785 * the contents of VRAM might be lost.
403009bf
CK
3786 *
3787 * Returns:
3788 * 0 on success, negative error code on failure.
e3ecdffa 3789 */
c33adbc7 3790static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3791{
c41d1cf6 3792 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3793 struct amdgpu_bo *shadow;
3794 long r = 1, tmo;
c41d1cf6
ML
3795
3796 if (amdgpu_sriov_runtime(adev))
b045d3af 3797 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3798 else
3799 tmo = msecs_to_jiffies(100);
3800
3801 DRM_INFO("recover vram bo from shadow start\n");
3802 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3803 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3804
3805 /* No need to recover an evicted BO */
3806 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3807 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3808 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3809 continue;
3810
3811 r = amdgpu_bo_restore_shadow(shadow, &next);
3812 if (r)
3813 break;
3814
c41d1cf6 3815 if (fence) {
1712fb1a 3816 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3817 dma_fence_put(fence);
3818 fence = next;
1712fb1a 3819 if (tmo == 0) {
3820 r = -ETIMEDOUT;
c41d1cf6 3821 break;
1712fb1a 3822 } else if (tmo < 0) {
3823 r = tmo;
3824 break;
3825 }
403009bf
CK
3826 } else {
3827 fence = next;
c41d1cf6 3828 }
c41d1cf6
ML
3829 }
3830 mutex_unlock(&adev->shadow_list_lock);
3831
403009bf
CK
3832 if (fence)
3833 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3834 dma_fence_put(fence);
3835
1712fb1a 3836 if (r < 0 || tmo <= 0) {
3837 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3838 return -EIO;
3839 }
c41d1cf6 3840
403009bf
CK
3841 DRM_INFO("recover vram bo from shadow done\n");
3842 return 0;
c41d1cf6
ML
3843}
3844
a90ad3c2 3845
e3ecdffa 3846/**
06ec9070 3847 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3848 *
3849 * @adev: amdgpu device pointer
87e3f136 3850 * @from_hypervisor: request from hypervisor
5740682e
ML
3851 *
3852 * do VF FLR and reinitialize Asic
3f48c681 3853 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3854 */
3855static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3856 bool from_hypervisor)
5740682e
ML
3857{
3858 int r;
3859
3860 if (from_hypervisor)
3861 r = amdgpu_virt_request_full_gpu(adev, true);
3862 else
3863 r = amdgpu_virt_reset_gpu(adev);
3864 if (r)
3865 return r;
a90ad3c2 3866
b639c22c
JZ
3867 amdgpu_amdkfd_pre_reset(adev);
3868
a90ad3c2 3869 /* Resume IP prior to SMC */
06ec9070 3870 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3871 if (r)
3872 goto error;
a90ad3c2 3873
c9ffa427 3874 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3875 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3876 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3877
7a3e0bb2
RZ
3878 r = amdgpu_device_fw_loading(adev);
3879 if (r)
3880 return r;
3881
a90ad3c2 3882 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3883 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3884 if (r)
3885 goto error;
a90ad3c2
ML
3886
3887 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3888 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3889 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3890
abc34253
ED
3891error:
3892 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3893 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3894 amdgpu_inc_vram_lost(adev);
c33adbc7 3895 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3896 }
3897
3898 return r;
3899}
3900
12938fad
CK
3901/**
3902 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3903 *
3904 * @adev: amdgpu device pointer
3905 *
3906 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3907 * a hung GPU.
3908 */
3909bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3910{
3911 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3912 DRM_INFO("Timeout, but no hardware hang detected.\n");
3913 return false;
3914 }
3915
3ba7b418
AG
3916 if (amdgpu_gpu_recovery == 0)
3917 goto disabled;
3918
3919 if (amdgpu_sriov_vf(adev))
3920 return true;
3921
3922 if (amdgpu_gpu_recovery == -1) {
3923 switch (adev->asic_type) {
fc42d47c
AG
3924 case CHIP_BONAIRE:
3925 case CHIP_HAWAII:
3ba7b418
AG
3926 case CHIP_TOPAZ:
3927 case CHIP_TONGA:
3928 case CHIP_FIJI:
3929 case CHIP_POLARIS10:
3930 case CHIP_POLARIS11:
3931 case CHIP_POLARIS12:
3932 case CHIP_VEGAM:
3933 case CHIP_VEGA20:
3934 case CHIP_VEGA10:
3935 case CHIP_VEGA12:
c43b849f 3936 case CHIP_RAVEN:
e9d4cf91 3937 case CHIP_ARCTURUS:
2cb44fb0 3938 case CHIP_RENOIR:
658c6639
AD
3939 case CHIP_NAVI10:
3940 case CHIP_NAVI14:
3941 case CHIP_NAVI12:
131a3c74 3942 case CHIP_SIENNA_CICHLID:
3ba7b418
AG
3943 break;
3944 default:
3945 goto disabled;
3946 }
12938fad
CK
3947 }
3948
3949 return true;
3ba7b418
AG
3950
3951disabled:
3952 DRM_INFO("GPU recovery disabled.\n");
3953 return false;
12938fad
CK
3954}
3955
5c6dd71e 3956
26bc5340
AG
3957static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3958 struct amdgpu_job *job,
3959 bool *need_full_reset_arg)
3960{
3961 int i, r = 0;
3962 bool need_full_reset = *need_full_reset_arg;
71182665 3963
728e7e0c
JZ
3964 amdgpu_debugfs_wait_dump(adev);
3965
71182665 3966 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3967 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3968 struct amdgpu_ring *ring = adev->rings[i];
3969
51687759 3970 if (!ring || !ring->sched.thread)
0875dc9e 3971 continue;
5740682e 3972
2f9d4084
ML
3973 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3974 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3975 }
d38ceaf9 3976
222b5f04
AG
3977 if(job)
3978 drm_sched_increase_karma(&job->base);
3979
1d721ed6 3980 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3981 if (!amdgpu_sriov_vf(adev)) {
3982
3983 if (!need_full_reset)
3984 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3985
3986 if (!need_full_reset) {
3987 amdgpu_device_ip_pre_soft_reset(adev);
3988 r = amdgpu_device_ip_soft_reset(adev);
3989 amdgpu_device_ip_post_soft_reset(adev);
3990 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3991 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3992 need_full_reset = true;
3993 }
3994 }
3995
3996 if (need_full_reset)
3997 r = amdgpu_device_ip_suspend(adev);
3998
3999 *need_full_reset_arg = need_full_reset;
4000 }
4001
4002 return r;
4003}
4004
041a62bc 4005static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
4006 struct list_head *device_list_handle,
4007 bool *need_full_reset_arg)
4008{
4009 struct amdgpu_device *tmp_adev = NULL;
4010 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4011 int r = 0;
4012
4013 /*
4014 * ASIC reset has to be done on all HGMI hive nodes ASAP
4015 * to allow proper links negotiation in FW (within 1 sec)
4016 */
4017 if (need_full_reset) {
4018 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 4019 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4020 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 4021 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4022 r = -EALREADY;
4023 } else
4024 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4025
041a62bc
AG
4026 if (r) {
4027 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
4028 r, tmp_adev->ddev->unique);
4029 break;
ce316fa5
LM
4030 }
4031 }
4032
041a62bc
AG
4033 /* For XGMI wait for all resets to complete before proceed */
4034 if (!r) {
ce316fa5
LM
4035 list_for_each_entry(tmp_adev, device_list_handle,
4036 gmc.xgmi.head) {
4037 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4038 flush_work(&tmp_adev->xgmi_reset_work);
4039 r = tmp_adev->asic_reset_res;
4040 if (r)
4041 break;
ce316fa5
LM
4042 }
4043 }
4044 }
ce316fa5 4045 }
26bc5340 4046
43c4d576
JC
4047 if (!r && amdgpu_ras_intr_triggered()) {
4048 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4049 if (tmp_adev->mmhub.funcs &&
4050 tmp_adev->mmhub.funcs->reset_ras_error_count)
4051 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4052 }
4053
00eaa571 4054 amdgpu_ras_intr_cleared();
43c4d576 4055 }
00eaa571 4056
26bc5340
AG
4057 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4058 if (need_full_reset) {
4059 /* post card */
4060 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4061 DRM_WARN("asic atom init failed!");
4062
4063 if (!r) {
4064 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4065 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4066 if (r)
4067 goto out;
4068
4069 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4070 if (vram_lost) {
77e7f829 4071 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4072 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4073 }
4074
4075 r = amdgpu_gtt_mgr_recover(
4076 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4077 if (r)
4078 goto out;
4079
4080 r = amdgpu_device_fw_loading(tmp_adev);
4081 if (r)
4082 return r;
4083
4084 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4085 if (r)
4086 goto out;
4087
4088 if (vram_lost)
4089 amdgpu_device_fill_reset_magic(tmp_adev);
4090
fdafb359
EQ
4091 /*
4092 * Add this ASIC as tracked as reset was already
4093 * complete successfully.
4094 */
4095 amdgpu_register_gpu_instance(tmp_adev);
4096
7c04ca50 4097 r = amdgpu_device_ip_late_init(tmp_adev);
4098 if (r)
4099 goto out;
4100
565d1941
EQ
4101 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4102
e79a04d5 4103 /* must succeed. */
511fdbc3 4104 amdgpu_ras_resume(tmp_adev);
e79a04d5 4105
26bc5340
AG
4106 /* Update PSP FW topology after reset */
4107 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4108 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4109 }
4110 }
4111
4112
4113out:
4114 if (!r) {
4115 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4116 r = amdgpu_ib_ring_tests(tmp_adev);
4117 if (r) {
4118 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4119 r = amdgpu_device_ip_suspend(tmp_adev);
4120 need_full_reset = true;
4121 r = -EAGAIN;
4122 goto end;
4123 }
4124 }
4125
4126 if (!r)
4127 r = amdgpu_device_recover_vram(tmp_adev);
4128 else
4129 tmp_adev->asic_reset_res = r;
4130 }
4131
4132end:
4133 *need_full_reset_arg = need_full_reset;
4134 return r;
4135}
4136
1d721ed6 4137static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 4138{
1d721ed6
AG
4139 if (trylock) {
4140 if (!mutex_trylock(&adev->lock_reset))
4141 return false;
4142 } else
4143 mutex_lock(&adev->lock_reset);
5740682e 4144
26bc5340 4145 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 4146 adev->in_gpu_reset = true;
a3a09142
AD
4147 switch (amdgpu_asic_reset_method(adev)) {
4148 case AMD_RESET_METHOD_MODE1:
4149 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4150 break;
4151 case AMD_RESET_METHOD_MODE2:
4152 adev->mp1_state = PP_MP1_STATE_RESET;
4153 break;
4154 default:
4155 adev->mp1_state = PP_MP1_STATE_NONE;
4156 break;
4157 }
1d721ed6
AG
4158
4159 return true;
26bc5340 4160}
d38ceaf9 4161
26bc5340
AG
4162static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4163{
89041940 4164 amdgpu_vf_error_trans_all(adev);
a3a09142 4165 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 4166 adev->in_gpu_reset = false;
13a752e3 4167 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4168}
4169
3f12acc8
EQ
4170static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4171{
4172 struct pci_dev *p = NULL;
4173
4174 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4175 adev->pdev->bus->number, 1);
4176 if (p) {
4177 pm_runtime_enable(&(p->dev));
4178 pm_runtime_resume(&(p->dev));
4179 }
4180}
4181
4182static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4183{
4184 enum amd_reset_method reset_method;
4185 struct pci_dev *p = NULL;
4186 u64 expires;
4187
4188 /*
4189 * For now, only BACO and mode1 reset are confirmed
4190 * to suffer the audio issue without proper suspended.
4191 */
4192 reset_method = amdgpu_asic_reset_method(adev);
4193 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4194 (reset_method != AMD_RESET_METHOD_MODE1))
4195 return -EINVAL;
4196
4197 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4198 adev->pdev->bus->number, 1);
4199 if (!p)
4200 return -ENODEV;
4201
4202 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4203 if (!expires)
4204 /*
4205 * If we cannot get the audio device autosuspend delay,
4206 * a fixed 4S interval will be used. Considering 3S is
4207 * the audio controller default autosuspend delay setting.
4208 * 4S used here is guaranteed to cover that.
4209 */
54b7feb9 4210 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4211
4212 while (!pm_runtime_status_suspended(&(p->dev))) {
4213 if (!pm_runtime_suspend(&(p->dev)))
4214 break;
4215
4216 if (expires < ktime_get_mono_fast_ns()) {
4217 dev_warn(adev->dev, "failed to suspend display audio\n");
4218 /* TODO: abort the succeeding gpu reset? */
4219 return -ETIMEDOUT;
4220 }
4221 }
4222
4223 pm_runtime_disable(&(p->dev));
4224
4225 return 0;
4226}
4227
26bc5340
AG
4228/**
4229 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4230 *
4231 * @adev: amdgpu device pointer
4232 * @job: which job trigger hang
4233 *
4234 * Attempt to reset the GPU if it has hung (all asics).
4235 * Attempt to do soft-reset or full-reset and reinitialize Asic
4236 * Returns 0 for success or an error on failure.
4237 */
4238
4239int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4240 struct amdgpu_job *job)
4241{
1d721ed6 4242 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4243 bool need_full_reset = false;
4244 bool job_signaled = false;
26bc5340 4245 struct amdgpu_hive_info *hive = NULL;
26bc5340 4246 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4247 int i, r = 0;
bb5c7235 4248 bool need_emergency_restart = false;
3f12acc8 4249 bool audio_suspended = false;
26bc5340 4250
bb5c7235
WS
4251 /**
4252 * Special case: RAS triggered and full reset isn't supported
4253 */
4254 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4255
d5ea093e
AG
4256 /*
4257 * Flush RAM to disk so that after reboot
4258 * the user can read log and see why the system rebooted.
4259 */
bb5c7235 4260 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4261 DRM_WARN("Emergency reboot.");
4262
4263 ksys_sync_helper();
4264 emergency_restart();
4265 }
4266
b823821f 4267 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4268 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4269
4270 /*
1d721ed6
AG
4271 * Here we trylock to avoid chain of resets executing from
4272 * either trigger by jobs on different adevs in XGMI hive or jobs on
4273 * different schedulers for same device while this TO handler is running.
4274 * We always reset all schedulers for device and all devices for XGMI
4275 * hive so that should take care of them too.
26bc5340 4276 */
7dd8c205 4277 hive = amdgpu_get_xgmi_hive(adev, true);
1d721ed6
AG
4278 if (hive && !mutex_trylock(&hive->reset_lock)) {
4279 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4280 job ? job->base.id : -1, hive->hive_id);
9e94d22c 4281 mutex_unlock(&hive->hive_lock);
26bc5340 4282 return 0;
1d721ed6 4283 }
26bc5340 4284
9e94d22c
EQ
4285 /*
4286 * Build list of devices to reset.
4287 * In case we are in XGMI hive mode, resort the device list
4288 * to put adev in the 1st position.
4289 */
4290 INIT_LIST_HEAD(&device_list);
4291 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4292 if (!hive)
26bc5340 4293 return -ENODEV;
9e94d22c
EQ
4294 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4295 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4296 device_list_handle = &hive->device_list;
4297 } else {
4298 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4299 device_list_handle = &device_list;
4300 }
4301
1d721ed6
AG
4302 /* block all schedulers and reset given job's ring */
4303 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
9e94d22c
EQ
4304 if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
4305 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4306 job ? job->base.id : -1);
4307 mutex_unlock(&hive->hive_lock);
4308 return 0;
7c6e68c7
AG
4309 }
4310
3f12acc8
EQ
4311 /*
4312 * Try to put the audio codec into suspend state
4313 * before gpu reset started.
4314 *
4315 * Due to the power domain of the graphics device
4316 * is shared with AZ power domain. Without this,
4317 * we may change the audio hardware from behind
4318 * the audio driver's back. That will trigger
4319 * some audio codec errors.
4320 */
4321 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4322 audio_suspended = true;
4323
9e94d22c
EQ
4324 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4325
52fb44cf
EQ
4326 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4327
9e94d22c
EQ
4328 if (!amdgpu_sriov_vf(tmp_adev))
4329 amdgpu_amdkfd_pre_reset(tmp_adev);
4330
12ffa55d
AG
4331 /*
4332 * Mark these ASICs to be reseted as untracked first
4333 * And add them back after reset completed
4334 */
4335 amdgpu_unregister_gpu_instance(tmp_adev);
4336
a2f63ee8 4337 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4338
f1c1314b 4339 /* disable ras on ALL IPs */
bb5c7235 4340 if (!need_emergency_restart &&
b823821f 4341 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4342 amdgpu_ras_suspend(tmp_adev);
4343
1d721ed6
AG
4344 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4345 struct amdgpu_ring *ring = tmp_adev->rings[i];
4346
4347 if (!ring || !ring->sched.thread)
4348 continue;
4349
0b2d2c2e 4350 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4351
bb5c7235 4352 if (need_emergency_restart)
7c6e68c7 4353 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4354 }
4355 }
4356
bb5c7235 4357 if (need_emergency_restart)
7c6e68c7
AG
4358 goto skip_sched_resume;
4359
1d721ed6
AG
4360 /*
4361 * Must check guilty signal here since after this point all old
4362 * HW fences are force signaled.
4363 *
4364 * job->base holds a reference to parent fence
4365 */
4366 if (job && job->base.s_fence->parent &&
7dd8c205 4367 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4368 job_signaled = true;
1d721ed6
AG
4369 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4370 goto skip_hw_reset;
4371 }
4372
26bc5340
AG
4373retry: /* Rest of adevs pre asic reset from XGMI hive. */
4374 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340
AG
4375 r = amdgpu_device_pre_asic_reset(tmp_adev,
4376 NULL,
4377 &need_full_reset);
4378 /*TODO Should we stop ?*/
4379 if (r) {
4380 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4381 r, tmp_adev->ddev->unique);
4382 tmp_adev->asic_reset_res = r;
4383 }
4384 }
4385
4386 /* Actual ASIC resets if needed.*/
4387 /* TODO Implement XGMI hive reset logic for SRIOV */
4388 if (amdgpu_sriov_vf(adev)) {
4389 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4390 if (r)
4391 adev->asic_reset_res = r;
4392 } else {
041a62bc 4393 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4394 if (r && r == -EAGAIN)
4395 goto retry;
4396 }
4397
1d721ed6
AG
4398skip_hw_reset:
4399
26bc5340
AG
4400 /* Post ASIC reset for all devs .*/
4401 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4402
1d721ed6
AG
4403 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4404 struct amdgpu_ring *ring = tmp_adev->rings[i];
4405
4406 if (!ring || !ring->sched.thread)
4407 continue;
4408
4409 /* No point to resubmit jobs if we didn't HW reset*/
4410 if (!tmp_adev->asic_reset_res && !job_signaled)
4411 drm_sched_resubmit_jobs(&ring->sched);
4412
4413 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4414 }
4415
4416 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4417 drm_helper_resume_force_mode(tmp_adev->ddev);
4418 }
4419
4420 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4421
4422 if (r) {
4423 /* bad news, how to tell it to userspace ? */
12ffa55d 4424 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4425 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4426 } else {
12ffa55d 4427 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4428 }
7c6e68c7 4429 }
26bc5340 4430
7c6e68c7
AG
4431skip_sched_resume:
4432 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4433 /*unlock kfd: SRIOV would do it separately */
bb5c7235 4434 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4435 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4436 if (audio_suspended)
4437 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4438 amdgpu_device_unlock_adev(tmp_adev);
4439 }
4440
9e94d22c 4441 if (hive) {
22d6575b 4442 mutex_unlock(&hive->reset_lock);
9e94d22c
EQ
4443 mutex_unlock(&hive->hive_lock);
4444 }
26bc5340
AG
4445
4446 if (r)
4447 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4448 return r;
4449}
4450
e3ecdffa
AD
4451/**
4452 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4453 *
4454 * @adev: amdgpu_device pointer
4455 *
4456 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4457 * and lanes) of the slot the device is in. Handles APUs and
4458 * virtualized environments where PCIE config space may not be available.
4459 */
5494d864 4460static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4461{
5d9a6330 4462 struct pci_dev *pdev;
c5313457
HK
4463 enum pci_bus_speed speed_cap, platform_speed_cap;
4464 enum pcie_link_width platform_link_width;
d0dd7f0c 4465
cd474ba0
AD
4466 if (amdgpu_pcie_gen_cap)
4467 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4468
cd474ba0
AD
4469 if (amdgpu_pcie_lane_cap)
4470 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4471
cd474ba0
AD
4472 /* covers APUs as well */
4473 if (pci_is_root_bus(adev->pdev->bus)) {
4474 if (adev->pm.pcie_gen_mask == 0)
4475 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4476 if (adev->pm.pcie_mlw_mask == 0)
4477 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4478 return;
cd474ba0 4479 }
d0dd7f0c 4480
c5313457
HK
4481 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4482 return;
4483
dbaa922b
AD
4484 pcie_bandwidth_available(adev->pdev, NULL,
4485 &platform_speed_cap, &platform_link_width);
c5313457 4486
cd474ba0 4487 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4488 /* asic caps */
4489 pdev = adev->pdev;
4490 speed_cap = pcie_get_speed_cap(pdev);
4491 if (speed_cap == PCI_SPEED_UNKNOWN) {
4492 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4493 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4494 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4495 } else {
5d9a6330
AD
4496 if (speed_cap == PCIE_SPEED_16_0GT)
4497 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4498 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4499 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4500 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4501 else if (speed_cap == PCIE_SPEED_8_0GT)
4502 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4503 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4504 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4505 else if (speed_cap == PCIE_SPEED_5_0GT)
4506 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4507 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4508 else
4509 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4510 }
4511 /* platform caps */
c5313457 4512 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4513 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4514 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4515 } else {
c5313457 4516 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4517 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4518 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4519 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4520 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4521 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4522 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4523 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4524 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4525 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4526 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4527 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4528 else
4529 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4530
cd474ba0
AD
4531 }
4532 }
4533 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4534 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4535 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4536 } else {
c5313457 4537 switch (platform_link_width) {
5d9a6330 4538 case PCIE_LNK_X32:
cd474ba0
AD
4539 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4540 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4541 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4542 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4543 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4544 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4545 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4546 break;
5d9a6330 4547 case PCIE_LNK_X16:
cd474ba0
AD
4548 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4549 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4550 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4551 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4552 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4553 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4554 break;
5d9a6330 4555 case PCIE_LNK_X12:
cd474ba0
AD
4556 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4557 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4558 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4559 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4560 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4561 break;
5d9a6330 4562 case PCIE_LNK_X8:
cd474ba0
AD
4563 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4564 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4565 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4566 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4567 break;
5d9a6330 4568 case PCIE_LNK_X4:
cd474ba0
AD
4569 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4570 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4571 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4572 break;
5d9a6330 4573 case PCIE_LNK_X2:
cd474ba0
AD
4574 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4575 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4576 break;
5d9a6330 4577 case PCIE_LNK_X1:
cd474ba0
AD
4578 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4579 break;
4580 default:
4581 break;
4582 }
d0dd7f0c
AD
4583 }
4584 }
4585}
d38ceaf9 4586
361dbd01
AD
4587int amdgpu_device_baco_enter(struct drm_device *dev)
4588{
4589 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4590 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4591
4592 if (!amdgpu_device_supports_baco(adev->ddev))
4593 return -ENOTSUPP;
4594
7a22677b
LM
4595 if (ras && ras->supported)
4596 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4597
9530273e 4598 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4599}
4600
4601int amdgpu_device_baco_exit(struct drm_device *dev)
4602{
4603 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4604 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4605 int ret = 0;
361dbd01
AD
4606
4607 if (!amdgpu_device_supports_baco(adev->ddev))
4608 return -ENOTSUPP;
4609
9530273e
EQ
4610 ret = amdgpu_dpm_baco_exit(adev);
4611 if (ret)
4612 return ret;
7a22677b
LM
4613
4614 if (ras && ras->supported)
4615 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4616
4617 return 0;
361dbd01 4618}