drm/amdgpu: add bad gpu tag definition
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
c0a43457 83MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin");
120eb833 84MODULE_FIRMWARE("amdgpu/navy_flounder_gpu_info.bin");
e2a75f88 85
2dc80b00
S
86#define AMDGPU_RESUME_MS 2000
87
050091ab 88const char *amdgpu_asic_name[] = {
da69c161
KW
89 "TAHITI",
90 "PITCAIRN",
91 "VERDE",
92 "OLAND",
93 "HAINAN",
d38ceaf9
AD
94 "BONAIRE",
95 "KAVERI",
96 "KABINI",
97 "HAWAII",
98 "MULLINS",
99 "TOPAZ",
100 "TONGA",
48299f95 101 "FIJI",
d38ceaf9 102 "CARRIZO",
139f4917 103 "STONEY",
2cc0c0b5
FC
104 "POLARIS10",
105 "POLARIS11",
c4642a47 106 "POLARIS12",
48ff108d 107 "VEGAM",
d4196f01 108 "VEGA10",
8fab806a 109 "VEGA12",
956fcddc 110 "VEGA20",
2ca8a5d2 111 "RAVEN",
d6c3b24e 112 "ARCTURUS",
1eee4228 113 "RENOIR",
852a6626 114 "NAVI10",
87dbad02 115 "NAVI14",
9802f5d7 116 "NAVI12",
ccaf72d3 117 "SIENNA_CICHLID",
ddd8fbe7 118 "NAVY_FLOUNDER",
d38ceaf9
AD
119 "LAST",
120};
121
dcea6e65
KR
122/**
123 * DOC: pcie_replay_count
124 *
125 * The amdgpu driver provides a sysfs API for reporting the total number
126 * of PCIe replays (NAKs)
127 * The file pcie_replay_count is used for this and returns the total
128 * number of replays as a sum of the NAKs generated and NAKs received
129 */
130
131static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
132 struct device_attribute *attr, char *buf)
133{
134 struct drm_device *ddev = dev_get_drvdata(dev);
135 struct amdgpu_device *adev = ddev->dev_private;
136 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
137
138 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
139}
140
141static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
142 amdgpu_device_get_pcie_replay_count, NULL);
143
5494d864
AD
144static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
145
bd607166
KR
146/**
147 * DOC: product_name
148 *
149 * The amdgpu driver provides a sysfs API for reporting the product name
150 * for the device
151 * The file serial_number is used for this and returns the product name
152 * as returned from the FRU.
153 * NOTE: This is only available for certain server cards
154 */
155
156static ssize_t amdgpu_device_get_product_name(struct device *dev,
157 struct device_attribute *attr, char *buf)
158{
159 struct drm_device *ddev = dev_get_drvdata(dev);
160 struct amdgpu_device *adev = ddev->dev_private;
161
162 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
163}
164
165static DEVICE_ATTR(product_name, S_IRUGO,
166 amdgpu_device_get_product_name, NULL);
167
168/**
169 * DOC: product_number
170 *
171 * The amdgpu driver provides a sysfs API for reporting the part number
172 * for the device
173 * The file serial_number is used for this and returns the part number
174 * as returned from the FRU.
175 * NOTE: This is only available for certain server cards
176 */
177
178static ssize_t amdgpu_device_get_product_number(struct device *dev,
179 struct device_attribute *attr, char *buf)
180{
181 struct drm_device *ddev = dev_get_drvdata(dev);
182 struct amdgpu_device *adev = ddev->dev_private;
183
184 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
185}
186
187static DEVICE_ATTR(product_number, S_IRUGO,
188 amdgpu_device_get_product_number, NULL);
189
190/**
191 * DOC: serial_number
192 *
193 * The amdgpu driver provides a sysfs API for reporting the serial number
194 * for the device
195 * The file serial_number is used for this and returns the serial number
196 * as returned from the FRU.
197 * NOTE: This is only available for certain server cards
198 */
199
200static ssize_t amdgpu_device_get_serial_number(struct device *dev,
201 struct device_attribute *attr, char *buf)
202{
203 struct drm_device *ddev = dev_get_drvdata(dev);
204 struct amdgpu_device *adev = ddev->dev_private;
205
206 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
207}
208
209static DEVICE_ATTR(serial_number, S_IRUGO,
210 amdgpu_device_get_serial_number, NULL);
211
e3ecdffa 212/**
31af062a 213 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
214 *
215 * @dev: drm_device pointer
216 *
217 * Returns true if the device is a dGPU with HG/PX power control,
218 * otherwise return false.
219 */
31af062a 220bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
221{
222 struct amdgpu_device *adev = dev->dev_private;
223
2f7d10b3 224 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
225 return true;
226 return false;
227}
228
a69cba42
AD
229/**
230 * amdgpu_device_supports_baco - Does the device support BACO
231 *
232 * @dev: drm_device pointer
233 *
234 * Returns true if the device supporte BACO,
235 * otherwise return false.
236 */
237bool amdgpu_device_supports_baco(struct drm_device *dev)
238{
239 struct amdgpu_device *adev = dev->dev_private;
240
241 return amdgpu_asic_supports_baco(adev);
242}
243
e35e2b11
TY
244/**
245 * VRAM access helper functions.
246 *
247 * amdgpu_device_vram_access - read/write a buffer in vram
248 *
249 * @adev: amdgpu_device pointer
250 * @pos: offset of the buffer in vram
251 * @buf: virtual address of the buffer in system memory
252 * @size: read/write size, sizeof(@buf) must > @size
253 * @write: true - write to vram, otherwise - read from vram
254 */
255void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
256 uint32_t *buf, size_t size, bool write)
257{
e35e2b11 258 unsigned long flags;
ce05ac56
CK
259 uint32_t hi = ~0;
260 uint64_t last;
261
9d11eb0d
CK
262
263#ifdef CONFIG_64BIT
264 last = min(pos + size, adev->gmc.visible_vram_size);
265 if (last > pos) {
266 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
267 size_t count = last - pos;
268
269 if (write) {
270 memcpy_toio(addr, buf, count);
271 mb();
272 amdgpu_asic_flush_hdp(adev, NULL);
273 } else {
274 amdgpu_asic_invalidate_hdp(adev, NULL);
275 mb();
276 memcpy_fromio(buf, addr, count);
277 }
278
279 if (count == size)
280 return;
281
282 pos += count;
283 buf += count / 4;
284 size -= count;
285 }
286#endif
287
ce05ac56
CK
288 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
289 for (last = pos + size; pos < last; pos += 4) {
290 uint32_t tmp = pos >> 31;
e35e2b11 291
e35e2b11 292 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
293 if (tmp != hi) {
294 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
295 hi = tmp;
296 }
e35e2b11
TY
297 if (write)
298 WREG32_NO_KIQ(mmMM_DATA, *buf++);
299 else
300 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 301 }
ce05ac56 302 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
303}
304
d38ceaf9 305/*
e78b579d 306 * MMIO register access helper functions.
d38ceaf9 307 */
e3ecdffa 308/**
e78b579d 309 * amdgpu_mm_rreg - read a memory mapped IO register
e3ecdffa
AD
310 *
311 * @adev: amdgpu_device pointer
312 * @reg: dword aligned register offset
313 * @acc_flags: access flags which require special behavior
314 *
315 * Returns the 32 bit value from the offset specified.
316 */
e78b579d
HZ
317uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
318 uint32_t acc_flags)
d38ceaf9 319{
f4b373f4
TSD
320 uint32_t ret;
321
f384ff95 322 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 323 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 324
ec59847e 325 if ((reg * 4) < adev->rmmio_size)
f4b373f4 326 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
e78b579d
HZ
327 else {
328 unsigned long flags;
329
330 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
331 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
332 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
333 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
334 }
335 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
f4b373f4 336 return ret;
d38ceaf9
AD
337}
338
421a2a30
ML
339/*
340 * MMIO register read with bytes helper functions
341 * @offset:bytes offset from MMIO start
342 *
343*/
344
e3ecdffa
AD
345/**
346 * amdgpu_mm_rreg8 - read a memory mapped IO register
347 *
348 * @adev: amdgpu_device pointer
349 * @offset: byte aligned register offset
350 *
351 * Returns the 8 bit value from the offset specified.
352 */
421a2a30
ML
353uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
354 if (offset < adev->rmmio_size)
355 return (readb(adev->rmmio + offset));
356 BUG();
357}
358
359/*
360 * MMIO register write with bytes helper functions
361 * @offset:bytes offset from MMIO start
362 * @value: the value want to be written to the register
363 *
364*/
e3ecdffa
AD
365/**
366 * amdgpu_mm_wreg8 - read a memory mapped IO register
367 *
368 * @adev: amdgpu_device pointer
369 * @offset: byte aligned register offset
370 * @value: 8 bit value to write
371 *
372 * Writes the value specified to the offset specified.
373 */
421a2a30
ML
374void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
375 if (offset < adev->rmmio_size)
376 writeb(value, adev->rmmio + offset);
377 else
378 BUG();
379}
380
e78b579d 381void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
2e0cc4d4 382{
e78b579d 383 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
2e0cc4d4 384
ec59847e 385 if ((reg * 4) < adev->rmmio_size)
2e0cc4d4 386 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
e78b579d
HZ
387 else {
388 unsigned long flags;
389
390 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
391 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
392 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
393 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
394 }
2e0cc4d4
ML
395}
396
e3ecdffa 397/**
e78b579d 398 * amdgpu_mm_wreg - write to a memory mapped IO register
e3ecdffa
AD
399 *
400 * @adev: amdgpu_device pointer
401 * @reg: dword aligned register offset
402 * @v: 32 bit value to write to the register
403 * @acc_flags: access flags which require special behavior
404 *
405 * Writes the value specified to the offset specified.
406 */
e78b579d
HZ
407void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
408 uint32_t acc_flags)
d38ceaf9 409{
f384ff95 410 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 411 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 412
e78b579d 413 amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
2e0cc4d4 414}
d38ceaf9 415
2e0cc4d4
ML
416/*
417 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
418 *
419 * this function is invoked only the debugfs register access
420 * */
421void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
422 uint32_t acc_flags)
423{
424 if (amdgpu_sriov_fullaccess(adev) &&
425 adev->gfx.rlc.funcs &&
426 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 427
2e0cc4d4
ML
428 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
429 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 430 }
2e0cc4d4 431
e78b579d 432 amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
d38ceaf9
AD
433}
434
e3ecdffa
AD
435/**
436 * amdgpu_io_rreg - read an IO register
437 *
438 * @adev: amdgpu_device pointer
439 * @reg: dword aligned register offset
440 *
441 * Returns the 32 bit value from the offset specified.
442 */
d38ceaf9
AD
443u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
444{
445 if ((reg * 4) < adev->rio_mem_size)
446 return ioread32(adev->rio_mem + (reg * 4));
447 else {
448 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
449 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
450 }
451}
452
e3ecdffa
AD
453/**
454 * amdgpu_io_wreg - write to an IO register
455 *
456 * @adev: amdgpu_device pointer
457 * @reg: dword aligned register offset
458 * @v: 32 bit value to write to the register
459 *
460 * Writes the value specified to the offset specified.
461 */
d38ceaf9
AD
462void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
463{
d38ceaf9
AD
464 if ((reg * 4) < adev->rio_mem_size)
465 iowrite32(v, adev->rio_mem + (reg * 4));
466 else {
467 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
468 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
469 }
470}
471
472/**
473 * amdgpu_mm_rdoorbell - read a doorbell dword
474 *
475 * @adev: amdgpu_device pointer
476 * @index: doorbell index
477 *
478 * Returns the value in the doorbell aperture at the
479 * requested doorbell index (CIK).
480 */
481u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
482{
483 if (index < adev->doorbell.num_doorbells) {
484 return readl(adev->doorbell.ptr + index);
485 } else {
486 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
487 return 0;
488 }
489}
490
491/**
492 * amdgpu_mm_wdoorbell - write a doorbell dword
493 *
494 * @adev: amdgpu_device pointer
495 * @index: doorbell index
496 * @v: value to write
497 *
498 * Writes @v to the doorbell aperture at the
499 * requested doorbell index (CIK).
500 */
501void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
502{
503 if (index < adev->doorbell.num_doorbells) {
504 writel(v, adev->doorbell.ptr + index);
505 } else {
506 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
507 }
508}
509
832be404
KW
510/**
511 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
512 *
513 * @adev: amdgpu_device pointer
514 * @index: doorbell index
515 *
516 * Returns the value in the doorbell aperture at the
517 * requested doorbell index (VEGA10+).
518 */
519u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
520{
521 if (index < adev->doorbell.num_doorbells) {
522 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
523 } else {
524 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
525 return 0;
526 }
527}
528
529/**
530 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
531 *
532 * @adev: amdgpu_device pointer
533 * @index: doorbell index
534 * @v: value to write
535 *
536 * Writes @v to the doorbell aperture at the
537 * requested doorbell index (VEGA10+).
538 */
539void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
540{
541 if (index < adev->doorbell.num_doorbells) {
542 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
543 } else {
544 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
545 }
546}
547
d38ceaf9
AD
548/**
549 * amdgpu_invalid_rreg - dummy reg read function
550 *
551 * @adev: amdgpu device pointer
552 * @reg: offset of register
553 *
554 * Dummy register read function. Used for register blocks
555 * that certain asics don't have (all asics).
556 * Returns the value in the register.
557 */
558static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
559{
560 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
561 BUG();
562 return 0;
563}
564
565/**
566 * amdgpu_invalid_wreg - dummy reg write function
567 *
568 * @adev: amdgpu device pointer
569 * @reg: offset of register
570 * @v: value to write to the register
571 *
572 * Dummy register read function. Used for register blocks
573 * that certain asics don't have (all asics).
574 */
575static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
576{
577 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
578 reg, v);
579 BUG();
580}
581
4fa1c6a6
TZ
582/**
583 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
584 *
585 * @adev: amdgpu device pointer
586 * @reg: offset of register
587 *
588 * Dummy register read function. Used for register blocks
589 * that certain asics don't have (all asics).
590 * Returns the value in the register.
591 */
592static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
593{
594 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
595 BUG();
596 return 0;
597}
598
599/**
600 * amdgpu_invalid_wreg64 - dummy reg write function
601 *
602 * @adev: amdgpu device pointer
603 * @reg: offset of register
604 * @v: value to write to the register
605 *
606 * Dummy register read function. Used for register blocks
607 * that certain asics don't have (all asics).
608 */
609static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
610{
611 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
612 reg, v);
613 BUG();
614}
615
d38ceaf9
AD
616/**
617 * amdgpu_block_invalid_rreg - dummy reg read function
618 *
619 * @adev: amdgpu device pointer
620 * @block: offset of instance
621 * @reg: offset of register
622 *
623 * Dummy register read function. Used for register blocks
624 * that certain asics don't have (all asics).
625 * Returns the value in the register.
626 */
627static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
628 uint32_t block, uint32_t reg)
629{
630 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
631 reg, block);
632 BUG();
633 return 0;
634}
635
636/**
637 * amdgpu_block_invalid_wreg - dummy reg write function
638 *
639 * @adev: amdgpu device pointer
640 * @block: offset of instance
641 * @reg: offset of register
642 * @v: value to write to the register
643 *
644 * Dummy register read function. Used for register blocks
645 * that certain asics don't have (all asics).
646 */
647static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
648 uint32_t block,
649 uint32_t reg, uint32_t v)
650{
651 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
652 reg, block, v);
653 BUG();
654}
655
e3ecdffa
AD
656/**
657 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
658 *
659 * @adev: amdgpu device pointer
660 *
661 * Allocates a scratch page of VRAM for use by various things in the
662 * driver.
663 */
06ec9070 664static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 665{
a4a02777
CK
666 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
667 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
668 &adev->vram_scratch.robj,
669 &adev->vram_scratch.gpu_addr,
670 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
671}
672
e3ecdffa
AD
673/**
674 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
675 *
676 * @adev: amdgpu device pointer
677 *
678 * Frees the VRAM scratch page.
679 */
06ec9070 680static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 681{
078af1a3 682 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
683}
684
685/**
9c3f2b54 686 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
687 *
688 * @adev: amdgpu_device pointer
689 * @registers: pointer to the register array
690 * @array_size: size of the register array
691 *
692 * Programs an array or registers with and and or masks.
693 * This is a helper for setting golden registers.
694 */
9c3f2b54
AD
695void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
696 const u32 *registers,
697 const u32 array_size)
d38ceaf9
AD
698{
699 u32 tmp, reg, and_mask, or_mask;
700 int i;
701
702 if (array_size % 3)
703 return;
704
705 for (i = 0; i < array_size; i +=3) {
706 reg = registers[i + 0];
707 and_mask = registers[i + 1];
708 or_mask = registers[i + 2];
709
710 if (and_mask == 0xffffffff) {
711 tmp = or_mask;
712 } else {
713 tmp = RREG32(reg);
714 tmp &= ~and_mask;
e0d07657
HZ
715 if (adev->family >= AMDGPU_FAMILY_AI)
716 tmp |= (or_mask & and_mask);
717 else
718 tmp |= or_mask;
d38ceaf9
AD
719 }
720 WREG32(reg, tmp);
721 }
722}
723
e3ecdffa
AD
724/**
725 * amdgpu_device_pci_config_reset - reset the GPU
726 *
727 * @adev: amdgpu_device pointer
728 *
729 * Resets the GPU using the pci config reset sequence.
730 * Only applicable to asics prior to vega10.
731 */
8111c387 732void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
733{
734 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
735}
736
737/*
738 * GPU doorbell aperture helpers function.
739 */
740/**
06ec9070 741 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
742 *
743 * @adev: amdgpu_device pointer
744 *
745 * Init doorbell driver information (CIK)
746 * Returns 0 on success, error on failure.
747 */
06ec9070 748static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 749{
6585661d 750
705e519e
CK
751 /* No doorbell on SI hardware generation */
752 if (adev->asic_type < CHIP_BONAIRE) {
753 adev->doorbell.base = 0;
754 adev->doorbell.size = 0;
755 adev->doorbell.num_doorbells = 0;
756 adev->doorbell.ptr = NULL;
757 return 0;
758 }
759
d6895ad3
CK
760 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
761 return -EINVAL;
762
22357775
AD
763 amdgpu_asic_init_doorbell_index(adev);
764
d38ceaf9
AD
765 /* doorbell bar mapping */
766 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
767 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
768
edf600da 769 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 770 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
771 if (adev->doorbell.num_doorbells == 0)
772 return -EINVAL;
773
ec3db8a6 774 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
775 * paging queue doorbell use the second page. The
776 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
777 * doorbells are in the first page. So with paging queue enabled,
778 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
779 */
780 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 781 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 782
8972e5d2
CK
783 adev->doorbell.ptr = ioremap(adev->doorbell.base,
784 adev->doorbell.num_doorbells *
785 sizeof(u32));
786 if (adev->doorbell.ptr == NULL)
d38ceaf9 787 return -ENOMEM;
d38ceaf9
AD
788
789 return 0;
790}
791
792/**
06ec9070 793 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
794 *
795 * @adev: amdgpu_device pointer
796 *
797 * Tear down doorbell driver information (CIK)
798 */
06ec9070 799static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
800{
801 iounmap(adev->doorbell.ptr);
802 adev->doorbell.ptr = NULL;
803}
804
22cb0164 805
d38ceaf9
AD
806
807/*
06ec9070 808 * amdgpu_device_wb_*()
455a7bc2 809 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 810 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
811 */
812
813/**
06ec9070 814 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
815 *
816 * @adev: amdgpu_device pointer
817 *
818 * Disables Writeback and frees the Writeback memory (all asics).
819 * Used at driver shutdown.
820 */
06ec9070 821static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
822{
823 if (adev->wb.wb_obj) {
a76ed485
AD
824 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
825 &adev->wb.gpu_addr,
826 (void **)&adev->wb.wb);
d38ceaf9
AD
827 adev->wb.wb_obj = NULL;
828 }
829}
830
831/**
06ec9070 832 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
833 *
834 * @adev: amdgpu_device pointer
835 *
455a7bc2 836 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
837 * Used at driver startup.
838 * Returns 0 on success or an -error on failure.
839 */
06ec9070 840static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
841{
842 int r;
843
844 if (adev->wb.wb_obj == NULL) {
97407b63
AD
845 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
846 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
847 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
848 &adev->wb.wb_obj, &adev->wb.gpu_addr,
849 (void **)&adev->wb.wb);
d38ceaf9
AD
850 if (r) {
851 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
852 return r;
853 }
d38ceaf9
AD
854
855 adev->wb.num_wb = AMDGPU_MAX_WB;
856 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
857
858 /* clear wb memory */
73469585 859 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
860 }
861
862 return 0;
863}
864
865/**
131b4b36 866 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
867 *
868 * @adev: amdgpu_device pointer
869 * @wb: wb index
870 *
871 * Allocate a wb slot for use by the driver (all asics).
872 * Returns 0 on success or -EINVAL on failure.
873 */
131b4b36 874int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
875{
876 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 877
97407b63 878 if (offset < adev->wb.num_wb) {
7014285a 879 __set_bit(offset, adev->wb.used);
63ae07ca 880 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
881 return 0;
882 } else {
883 return -EINVAL;
884 }
885}
886
d38ceaf9 887/**
131b4b36 888 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
889 *
890 * @adev: amdgpu_device pointer
891 * @wb: wb index
892 *
893 * Free a wb slot allocated for use by the driver (all asics)
894 */
131b4b36 895void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 896{
73469585 897 wb >>= 3;
d38ceaf9 898 if (wb < adev->wb.num_wb)
73469585 899 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
900}
901
d6895ad3
CK
902/**
903 * amdgpu_device_resize_fb_bar - try to resize FB BAR
904 *
905 * @adev: amdgpu_device pointer
906 *
907 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
908 * to fail, but if any of the BARs is not accessible after the size we abort
909 * driver loading by returning -ENODEV.
910 */
911int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
912{
770d13b1 913 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 914 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
915 struct pci_bus *root;
916 struct resource *res;
917 unsigned i;
d6895ad3
CK
918 u16 cmd;
919 int r;
920
0c03b912 921 /* Bypass for VF */
922 if (amdgpu_sriov_vf(adev))
923 return 0;
924
b7221f2b
AD
925 /* skip if the bios has already enabled large BAR */
926 if (adev->gmc.real_vram_size &&
927 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
928 return 0;
929
31b8adab
CK
930 /* Check if the root BUS has 64bit memory resources */
931 root = adev->pdev->bus;
932 while (root->parent)
933 root = root->parent;
934
935 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 936 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
937 res->start > 0x100000000ull)
938 break;
939 }
940
941 /* Trying to resize is pointless without a root hub window above 4GB */
942 if (!res)
943 return 0;
944
d6895ad3
CK
945 /* Disable memory decoding while we change the BAR addresses and size */
946 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
947 pci_write_config_word(adev->pdev, PCI_COMMAND,
948 cmd & ~PCI_COMMAND_MEMORY);
949
950 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 951 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
952 if (adev->asic_type >= CHIP_BONAIRE)
953 pci_release_resource(adev->pdev, 2);
954
955 pci_release_resource(adev->pdev, 0);
956
957 r = pci_resize_resource(adev->pdev, 0, rbar_size);
958 if (r == -ENOSPC)
959 DRM_INFO("Not enough PCI address space for a large BAR.");
960 else if (r && r != -ENOTSUPP)
961 DRM_ERROR("Problem resizing BAR0 (%d).", r);
962
963 pci_assign_unassigned_bus_resources(adev->pdev->bus);
964
965 /* When the doorbell or fb BAR isn't available we have no chance of
966 * using the device.
967 */
06ec9070 968 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
969 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
970 return -ENODEV;
971
972 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
973
974 return 0;
975}
a05502e5 976
d38ceaf9
AD
977/*
978 * GPU helpers function.
979 */
980/**
39c640c0 981 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
982 *
983 * @adev: amdgpu_device pointer
984 *
c836fec5
JQ
985 * Check if the asic has been initialized (all asics) at driver startup
986 * or post is needed if hw reset is performed.
987 * Returns true if need or false if not.
d38ceaf9 988 */
39c640c0 989bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
990{
991 uint32_t reg;
992
bec86378
ML
993 if (amdgpu_sriov_vf(adev))
994 return false;
995
996 if (amdgpu_passthrough(adev)) {
1da2c326
ML
997 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
998 * some old smc fw still need driver do vPost otherwise gpu hang, while
999 * those smc fw version above 22.15 doesn't have this flaw, so we force
1000 * vpost executed for smc version below 22.15
bec86378
ML
1001 */
1002 if (adev->asic_type == CHIP_FIJI) {
1003 int err;
1004 uint32_t fw_ver;
1005 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1006 /* force vPost if error occured */
1007 if (err)
1008 return true;
1009
1010 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1011 if (fw_ver < 0x00160e00)
1012 return true;
bec86378 1013 }
bec86378 1014 }
91fe77eb 1015
1016 if (adev->has_hw_reset) {
1017 adev->has_hw_reset = false;
1018 return true;
1019 }
1020
1021 /* bios scratch used on CIK+ */
1022 if (adev->asic_type >= CHIP_BONAIRE)
1023 return amdgpu_atombios_scratch_need_asic_init(adev);
1024
1025 /* check MEM_SIZE for older asics */
1026 reg = amdgpu_asic_get_config_memsize(adev);
1027
1028 if ((reg != 0) && (reg != 0xffffffff))
1029 return false;
1030
1031 return true;
bec86378
ML
1032}
1033
d38ceaf9
AD
1034/* if we get transitioned to only one device, take VGA back */
1035/**
06ec9070 1036 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1037 *
1038 * @cookie: amdgpu_device pointer
1039 * @state: enable/disable vga decode
1040 *
1041 * Enable/disable vga decode (all asics).
1042 * Returns VGA resource flags.
1043 */
06ec9070 1044static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1045{
1046 struct amdgpu_device *adev = cookie;
1047 amdgpu_asic_set_vga_state(adev, state);
1048 if (state)
1049 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1050 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1051 else
1052 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1053}
1054
e3ecdffa
AD
1055/**
1056 * amdgpu_device_check_block_size - validate the vm block size
1057 *
1058 * @adev: amdgpu_device pointer
1059 *
1060 * Validates the vm block size specified via module parameter.
1061 * The vm block size defines number of bits in page table versus page directory,
1062 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1063 * page table and the remaining bits are in the page directory.
1064 */
06ec9070 1065static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1066{
1067 /* defines number of bits in page table versus page directory,
1068 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1069 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1070 if (amdgpu_vm_block_size == -1)
1071 return;
a1adf8be 1072
bab4fee7 1073 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1074 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1075 amdgpu_vm_block_size);
97489129 1076 amdgpu_vm_block_size = -1;
a1adf8be 1077 }
a1adf8be
CZ
1078}
1079
e3ecdffa
AD
1080/**
1081 * amdgpu_device_check_vm_size - validate the vm size
1082 *
1083 * @adev: amdgpu_device pointer
1084 *
1085 * Validates the vm size in GB specified via module parameter.
1086 * The VM size is the size of the GPU virtual memory space in GB.
1087 */
06ec9070 1088static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1089{
64dab074
AD
1090 /* no need to check the default value */
1091 if (amdgpu_vm_size == -1)
1092 return;
1093
83ca145d
ZJ
1094 if (amdgpu_vm_size < 1) {
1095 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1096 amdgpu_vm_size);
f3368128 1097 amdgpu_vm_size = -1;
83ca145d 1098 }
83ca145d
ZJ
1099}
1100
7951e376
RZ
1101static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1102{
1103 struct sysinfo si;
a9d4fe2f 1104 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1105 uint64_t total_memory;
1106 uint64_t dram_size_seven_GB = 0x1B8000000;
1107 uint64_t dram_size_three_GB = 0xB8000000;
1108
1109 if (amdgpu_smu_memory_pool_size == 0)
1110 return;
1111
1112 if (!is_os_64) {
1113 DRM_WARN("Not 64-bit OS, feature not supported\n");
1114 goto def_value;
1115 }
1116 si_meminfo(&si);
1117 total_memory = (uint64_t)si.totalram * si.mem_unit;
1118
1119 if ((amdgpu_smu_memory_pool_size == 1) ||
1120 (amdgpu_smu_memory_pool_size == 2)) {
1121 if (total_memory < dram_size_three_GB)
1122 goto def_value1;
1123 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1124 (amdgpu_smu_memory_pool_size == 8)) {
1125 if (total_memory < dram_size_seven_GB)
1126 goto def_value1;
1127 } else {
1128 DRM_WARN("Smu memory pool size not supported\n");
1129 goto def_value;
1130 }
1131 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1132
1133 return;
1134
1135def_value1:
1136 DRM_WARN("No enough system memory\n");
1137def_value:
1138 adev->pm.smu_prv_buffer_size = 0;
1139}
1140
d38ceaf9 1141/**
06ec9070 1142 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1143 *
1144 * @adev: amdgpu_device pointer
1145 *
1146 * Validates certain module parameters and updates
1147 * the associated values used by the driver (all asics).
1148 */
912dfc84 1149static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1150{
5b011235
CZ
1151 if (amdgpu_sched_jobs < 4) {
1152 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1153 amdgpu_sched_jobs);
1154 amdgpu_sched_jobs = 4;
76117507 1155 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1156 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1157 amdgpu_sched_jobs);
1158 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1159 }
d38ceaf9 1160
83e74db6 1161 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1162 /* gart size must be greater or equal to 32M */
1163 dev_warn(adev->dev, "gart size (%d) too small\n",
1164 amdgpu_gart_size);
83e74db6 1165 amdgpu_gart_size = -1;
d38ceaf9
AD
1166 }
1167
36d38372 1168 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1169 /* gtt size must be greater or equal to 32M */
36d38372
CK
1170 dev_warn(adev->dev, "gtt size (%d) too small\n",
1171 amdgpu_gtt_size);
1172 amdgpu_gtt_size = -1;
d38ceaf9
AD
1173 }
1174
d07f14be
RH
1175 /* valid range is between 4 and 9 inclusive */
1176 if (amdgpu_vm_fragment_size != -1 &&
1177 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1178 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1179 amdgpu_vm_fragment_size = -1;
1180 }
1181
5d5bd5e3
KW
1182 if (amdgpu_sched_hw_submission < 2) {
1183 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1184 amdgpu_sched_hw_submission);
1185 amdgpu_sched_hw_submission = 2;
1186 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1187 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1188 amdgpu_sched_hw_submission);
1189 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1190 }
1191
7951e376
RZ
1192 amdgpu_device_check_smu_prv_buffer_size(adev);
1193
06ec9070 1194 amdgpu_device_check_vm_size(adev);
d38ceaf9 1195
06ec9070 1196 amdgpu_device_check_block_size(adev);
6a7f76e7 1197
19aede77 1198 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1199
c6252390 1200 amdgpu_gmc_tmz_set(adev);
01a8dcec 1201
e3c00faa 1202 return 0;
d38ceaf9
AD
1203}
1204
1205/**
1206 * amdgpu_switcheroo_set_state - set switcheroo state
1207 *
1208 * @pdev: pci dev pointer
1694467b 1209 * @state: vga_switcheroo state
d38ceaf9
AD
1210 *
1211 * Callback for the switcheroo driver. Suspends or resumes the
1212 * the asics before or after it is powered up using ACPI methods.
1213 */
1214static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1215{
1216 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1217 int r;
d38ceaf9 1218
31af062a 1219 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1220 return;
1221
1222 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1223 pr_info("switched on\n");
d38ceaf9
AD
1224 /* don't suspend or resume card normally */
1225 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1226
de185019
AD
1227 pci_set_power_state(dev->pdev, PCI_D0);
1228 pci_restore_state(dev->pdev);
1229 r = pci_enable_device(dev->pdev);
1230 if (r)
1231 DRM_WARN("pci_enable_device failed (%d)\n", r);
1232 amdgpu_device_resume(dev, true);
d38ceaf9 1233
d38ceaf9
AD
1234 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1235 drm_kms_helper_poll_enable(dev);
1236 } else {
dd4fa6c1 1237 pr_info("switched off\n");
d38ceaf9
AD
1238 drm_kms_helper_poll_disable(dev);
1239 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1240 amdgpu_device_suspend(dev, true);
1241 pci_save_state(dev->pdev);
1242 /* Shut down the device */
1243 pci_disable_device(dev->pdev);
1244 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1245 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1246 }
1247}
1248
1249/**
1250 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1251 *
1252 * @pdev: pci dev pointer
1253 *
1254 * Callback for the switcheroo driver. Check of the switcheroo
1255 * state can be changed.
1256 * Returns true if the state can be changed, false if not.
1257 */
1258static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1259{
1260 struct drm_device *dev = pci_get_drvdata(pdev);
1261
1262 /*
1263 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1264 * locking inversion with the driver load path. And the access here is
1265 * completely racy anyway. So don't bother with locking for now.
1266 */
7e13ad89 1267 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1268}
1269
1270static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1271 .set_gpu_state = amdgpu_switcheroo_set_state,
1272 .reprobe = NULL,
1273 .can_switch = amdgpu_switcheroo_can_switch,
1274};
1275
e3ecdffa
AD
1276/**
1277 * amdgpu_device_ip_set_clockgating_state - set the CG state
1278 *
87e3f136 1279 * @dev: amdgpu_device pointer
e3ecdffa
AD
1280 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1281 * @state: clockgating state (gate or ungate)
1282 *
1283 * Sets the requested clockgating state for all instances of
1284 * the hardware IP specified.
1285 * Returns the error code from the last instance.
1286 */
43fa561f 1287int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1288 enum amd_ip_block_type block_type,
1289 enum amd_clockgating_state state)
d38ceaf9 1290{
43fa561f 1291 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1292 int i, r = 0;
1293
1294 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1295 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1296 continue;
c722865a
RZ
1297 if (adev->ip_blocks[i].version->type != block_type)
1298 continue;
1299 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1300 continue;
1301 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1302 (void *)adev, state);
1303 if (r)
1304 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1305 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1306 }
1307 return r;
1308}
1309
e3ecdffa
AD
1310/**
1311 * amdgpu_device_ip_set_powergating_state - set the PG state
1312 *
87e3f136 1313 * @dev: amdgpu_device pointer
e3ecdffa
AD
1314 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1315 * @state: powergating state (gate or ungate)
1316 *
1317 * Sets the requested powergating state for all instances of
1318 * the hardware IP specified.
1319 * Returns the error code from the last instance.
1320 */
43fa561f 1321int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1322 enum amd_ip_block_type block_type,
1323 enum amd_powergating_state state)
d38ceaf9 1324{
43fa561f 1325 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1326 int i, r = 0;
1327
1328 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1329 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1330 continue;
c722865a
RZ
1331 if (adev->ip_blocks[i].version->type != block_type)
1332 continue;
1333 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1334 continue;
1335 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1336 (void *)adev, state);
1337 if (r)
1338 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1339 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1340 }
1341 return r;
1342}
1343
e3ecdffa
AD
1344/**
1345 * amdgpu_device_ip_get_clockgating_state - get the CG state
1346 *
1347 * @adev: amdgpu_device pointer
1348 * @flags: clockgating feature flags
1349 *
1350 * Walks the list of IPs on the device and updates the clockgating
1351 * flags for each IP.
1352 * Updates @flags with the feature flags for each hardware IP where
1353 * clockgating is enabled.
1354 */
2990a1fc
AD
1355void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1356 u32 *flags)
6cb2d4e4
HR
1357{
1358 int i;
1359
1360 for (i = 0; i < adev->num_ip_blocks; i++) {
1361 if (!adev->ip_blocks[i].status.valid)
1362 continue;
1363 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1364 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1365 }
1366}
1367
e3ecdffa
AD
1368/**
1369 * amdgpu_device_ip_wait_for_idle - wait for idle
1370 *
1371 * @adev: amdgpu_device pointer
1372 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1373 *
1374 * Waits for the request hardware IP to be idle.
1375 * Returns 0 for success or a negative error code on failure.
1376 */
2990a1fc
AD
1377int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1378 enum amd_ip_block_type block_type)
5dbbb60b
AD
1379{
1380 int i, r;
1381
1382 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1383 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1384 continue;
a1255107
AD
1385 if (adev->ip_blocks[i].version->type == block_type) {
1386 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1387 if (r)
1388 return r;
1389 break;
1390 }
1391 }
1392 return 0;
1393
1394}
1395
e3ecdffa
AD
1396/**
1397 * amdgpu_device_ip_is_idle - is the hardware IP idle
1398 *
1399 * @adev: amdgpu_device pointer
1400 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1401 *
1402 * Check if the hardware IP is idle or not.
1403 * Returns true if it the IP is idle, false if not.
1404 */
2990a1fc
AD
1405bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1406 enum amd_ip_block_type block_type)
5dbbb60b
AD
1407{
1408 int i;
1409
1410 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1411 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1412 continue;
a1255107
AD
1413 if (adev->ip_blocks[i].version->type == block_type)
1414 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1415 }
1416 return true;
1417
1418}
1419
e3ecdffa
AD
1420/**
1421 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1422 *
1423 * @adev: amdgpu_device pointer
87e3f136 1424 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1425 *
1426 * Returns a pointer to the hardware IP block structure
1427 * if it exists for the asic, otherwise NULL.
1428 */
2990a1fc
AD
1429struct amdgpu_ip_block *
1430amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1431 enum amd_ip_block_type type)
d38ceaf9
AD
1432{
1433 int i;
1434
1435 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1436 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1437 return &adev->ip_blocks[i];
1438
1439 return NULL;
1440}
1441
1442/**
2990a1fc 1443 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1444 *
1445 * @adev: amdgpu_device pointer
5fc3aeeb 1446 * @type: enum amd_ip_block_type
d38ceaf9
AD
1447 * @major: major version
1448 * @minor: minor version
1449 *
1450 * return 0 if equal or greater
1451 * return 1 if smaller or the ip_block doesn't exist
1452 */
2990a1fc
AD
1453int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1454 enum amd_ip_block_type type,
1455 u32 major, u32 minor)
d38ceaf9 1456{
2990a1fc 1457 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1458
a1255107
AD
1459 if (ip_block && ((ip_block->version->major > major) ||
1460 ((ip_block->version->major == major) &&
1461 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1462 return 0;
1463
1464 return 1;
1465}
1466
a1255107 1467/**
2990a1fc 1468 * amdgpu_device_ip_block_add
a1255107
AD
1469 *
1470 * @adev: amdgpu_device pointer
1471 * @ip_block_version: pointer to the IP to add
1472 *
1473 * Adds the IP block driver information to the collection of IPs
1474 * on the asic.
1475 */
2990a1fc
AD
1476int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1477 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1478{
1479 if (!ip_block_version)
1480 return -EINVAL;
1481
e966a725 1482 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1483 ip_block_version->funcs->name);
1484
a1255107
AD
1485 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1486
1487 return 0;
1488}
1489
e3ecdffa
AD
1490/**
1491 * amdgpu_device_enable_virtual_display - enable virtual display feature
1492 *
1493 * @adev: amdgpu_device pointer
1494 *
1495 * Enabled the virtual display feature if the user has enabled it via
1496 * the module parameter virtual_display. This feature provides a virtual
1497 * display hardware on headless boards or in virtualized environments.
1498 * This function parses and validates the configuration string specified by
1499 * the user and configues the virtual display configuration (number of
1500 * virtual connectors, crtcs, etc.) specified.
1501 */
483ef985 1502static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1503{
1504 adev->enable_virtual_display = false;
1505
1506 if (amdgpu_virtual_display) {
1507 struct drm_device *ddev = adev->ddev;
1508 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1509 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1510
1511 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1512 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1513 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1514 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1515 if (!strcmp("all", pciaddname)
1516 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1517 long num_crtc;
1518 int res = -1;
1519
9accf2fd 1520 adev->enable_virtual_display = true;
0f66356d
ED
1521
1522 if (pciaddname_tmp)
1523 res = kstrtol(pciaddname_tmp, 10,
1524 &num_crtc);
1525
1526 if (!res) {
1527 if (num_crtc < 1)
1528 num_crtc = 1;
1529 if (num_crtc > 6)
1530 num_crtc = 6;
1531 adev->mode_info.num_crtc = num_crtc;
1532 } else {
1533 adev->mode_info.num_crtc = 1;
1534 }
9accf2fd
ED
1535 break;
1536 }
1537 }
1538
0f66356d
ED
1539 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1540 amdgpu_virtual_display, pci_address_name,
1541 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1542
1543 kfree(pciaddstr);
1544 }
1545}
1546
e3ecdffa
AD
1547/**
1548 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1549 *
1550 * @adev: amdgpu_device pointer
1551 *
1552 * Parses the asic configuration parameters specified in the gpu info
1553 * firmware and makes them availale to the driver for use in configuring
1554 * the asic.
1555 * Returns 0 on success, -EINVAL on failure.
1556 */
e2a75f88
AD
1557static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1558{
e2a75f88 1559 const char *chip_name;
c0a43457 1560 char fw_name[40];
e2a75f88
AD
1561 int err;
1562 const struct gpu_info_firmware_header_v1_0 *hdr;
1563
ab4fe3e1
HR
1564 adev->firmware.gpu_info_fw = NULL;
1565
4292b0b2 1566 if (adev->discovery_bin) {
258620d0 1567 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1568
1569 /*
1570 * FIXME: The bounding box is still needed by Navi12, so
1571 * temporarily read it from gpu_info firmware. Should be droped
1572 * when DAL no longer needs it.
1573 */
1574 if (adev->asic_type != CHIP_NAVI12)
1575 return 0;
258620d0
AD
1576 }
1577
e2a75f88 1578 switch (adev->asic_type) {
e2a75f88
AD
1579#ifdef CONFIG_DRM_AMDGPU_SI
1580 case CHIP_VERDE:
1581 case CHIP_TAHITI:
1582 case CHIP_PITCAIRN:
1583 case CHIP_OLAND:
1584 case CHIP_HAINAN:
1585#endif
1586#ifdef CONFIG_DRM_AMDGPU_CIK
1587 case CHIP_BONAIRE:
1588 case CHIP_HAWAII:
1589 case CHIP_KAVERI:
1590 case CHIP_KABINI:
1591 case CHIP_MULLINS:
1592#endif
da87c30b
AD
1593 case CHIP_TOPAZ:
1594 case CHIP_TONGA:
1595 case CHIP_FIJI:
1596 case CHIP_POLARIS10:
1597 case CHIP_POLARIS11:
1598 case CHIP_POLARIS12:
1599 case CHIP_VEGAM:
1600 case CHIP_CARRIZO:
1601 case CHIP_STONEY:
27c0bc71 1602 case CHIP_VEGA20:
e2a75f88
AD
1603 default:
1604 return 0;
1605 case CHIP_VEGA10:
1606 chip_name = "vega10";
1607 break;
3f76dced
AD
1608 case CHIP_VEGA12:
1609 chip_name = "vega12";
1610 break;
2d2e5e7e 1611 case CHIP_RAVEN:
54f78a76 1612 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1613 chip_name = "raven2";
54f78a76 1614 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1615 chip_name = "picasso";
54c4d17e
FX
1616 else
1617 chip_name = "raven";
2d2e5e7e 1618 break;
65e60f6e
LM
1619 case CHIP_ARCTURUS:
1620 chip_name = "arcturus";
1621 break;
b51a26a0
HR
1622 case CHIP_RENOIR:
1623 chip_name = "renoir";
1624 break;
23c6268e
HR
1625 case CHIP_NAVI10:
1626 chip_name = "navi10";
1627 break;
ed42cfe1
XY
1628 case CHIP_NAVI14:
1629 chip_name = "navi14";
1630 break;
42b325e5
XY
1631 case CHIP_NAVI12:
1632 chip_name = "navi12";
1633 break;
c0a43457
LG
1634 case CHIP_SIENNA_CICHLID:
1635 chip_name = "sienna_cichlid";
1636 break;
120eb833
JC
1637 case CHIP_NAVY_FLOUNDER:
1638 chip_name = "navy_flounder";
1639 break;
e2a75f88
AD
1640 }
1641
1642 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1643 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1644 if (err) {
1645 dev_err(adev->dev,
1646 "Failed to load gpu_info firmware \"%s\"\n",
1647 fw_name);
1648 goto out;
1649 }
ab4fe3e1 1650 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1651 if (err) {
1652 dev_err(adev->dev,
1653 "Failed to validate gpu_info firmware \"%s\"\n",
1654 fw_name);
1655 goto out;
1656 }
1657
ab4fe3e1 1658 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1659 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1660
1661 switch (hdr->version_major) {
1662 case 1:
1663 {
1664 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1665 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1666 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1667
cc375d8c
TY
1668 /*
1669 * Should be droped when DAL no longer needs it.
1670 */
1671 if (adev->asic_type == CHIP_NAVI12)
1672 goto parse_soc_bounding_box;
1673
b5ab16bf
AD
1674 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1675 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1676 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1677 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1678 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1679 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1680 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1681 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1682 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1683 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1684 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1685 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1686 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1687 adev->gfx.cu_info.max_waves_per_simd =
1688 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1689 adev->gfx.cu_info.max_scratch_slots_per_cu =
1690 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1691 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1692 if (hdr->version_minor >= 1) {
35c2e910
HZ
1693 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1694 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1695 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1696 adev->gfx.config.num_sc_per_sh =
1697 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1698 adev->gfx.config.num_packer_per_sc =
1699 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1700 }
ec51d3fa 1701
cc375d8c 1702parse_soc_bounding_box:
ec51d3fa
XY
1703 /*
1704 * soc bounding box info is not integrated in disocovery table,
258620d0 1705 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1706 */
48321c3d
HW
1707 if (hdr->version_minor == 2) {
1708 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1709 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1710 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1711 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1712 }
e2a75f88
AD
1713 break;
1714 }
1715 default:
1716 dev_err(adev->dev,
1717 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1718 err = -EINVAL;
1719 goto out;
1720 }
1721out:
e2a75f88
AD
1722 return err;
1723}
1724
e3ecdffa
AD
1725/**
1726 * amdgpu_device_ip_early_init - run early init for hardware IPs
1727 *
1728 * @adev: amdgpu_device pointer
1729 *
1730 * Early initialization pass for hardware IPs. The hardware IPs that make
1731 * up each asic are discovered each IP's early_init callback is run. This
1732 * is the first stage in initializing the asic.
1733 * Returns 0 on success, negative error code on failure.
1734 */
06ec9070 1735static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1736{
aaa36a97 1737 int i, r;
d38ceaf9 1738
483ef985 1739 amdgpu_device_enable_virtual_display(adev);
a6be7570 1740
00a979f3 1741 if (amdgpu_sriov_vf(adev)) {
00a979f3 1742 r = amdgpu_virt_request_full_gpu(adev, true);
e3a4d51c 1743 if (r)
00a979f3 1744 return r;
00a979f3
WS
1745 }
1746
d38ceaf9 1747 switch (adev->asic_type) {
33f34802
KW
1748#ifdef CONFIG_DRM_AMDGPU_SI
1749 case CHIP_VERDE:
1750 case CHIP_TAHITI:
1751 case CHIP_PITCAIRN:
1752 case CHIP_OLAND:
1753 case CHIP_HAINAN:
295d0daf 1754 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1755 r = si_set_ip_blocks(adev);
1756 if (r)
1757 return r;
1758 break;
1759#endif
a2e73f56
AD
1760#ifdef CONFIG_DRM_AMDGPU_CIK
1761 case CHIP_BONAIRE:
1762 case CHIP_HAWAII:
1763 case CHIP_KAVERI:
1764 case CHIP_KABINI:
1765 case CHIP_MULLINS:
e1ad2d53 1766 if (adev->flags & AMD_IS_APU)
a2e73f56 1767 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1768 else
1769 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1770
1771 r = cik_set_ip_blocks(adev);
1772 if (r)
1773 return r;
1774 break;
1775#endif
da87c30b
AD
1776 case CHIP_TOPAZ:
1777 case CHIP_TONGA:
1778 case CHIP_FIJI:
1779 case CHIP_POLARIS10:
1780 case CHIP_POLARIS11:
1781 case CHIP_POLARIS12:
1782 case CHIP_VEGAM:
1783 case CHIP_CARRIZO:
1784 case CHIP_STONEY:
1785 if (adev->flags & AMD_IS_APU)
1786 adev->family = AMDGPU_FAMILY_CZ;
1787 else
1788 adev->family = AMDGPU_FAMILY_VI;
1789
1790 r = vi_set_ip_blocks(adev);
1791 if (r)
1792 return r;
1793 break;
e48a3cd9
AD
1794 case CHIP_VEGA10:
1795 case CHIP_VEGA12:
e4bd8170 1796 case CHIP_VEGA20:
e48a3cd9 1797 case CHIP_RAVEN:
61cf44c1 1798 case CHIP_ARCTURUS:
b51a26a0 1799 case CHIP_RENOIR:
70534d1e 1800 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
1801 adev->family = AMDGPU_FAMILY_RV;
1802 else
1803 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1804
1805 r = soc15_set_ip_blocks(adev);
1806 if (r)
1807 return r;
1808 break;
0a5b8c7b 1809 case CHIP_NAVI10:
7ecb5cd4 1810 case CHIP_NAVI14:
4808cf9c 1811 case CHIP_NAVI12:
11e8aef5 1812 case CHIP_SIENNA_CICHLID:
41f446bf 1813 case CHIP_NAVY_FLOUNDER:
0a5b8c7b
HR
1814 adev->family = AMDGPU_FAMILY_NV;
1815
1816 r = nv_set_ip_blocks(adev);
1817 if (r)
1818 return r;
1819 break;
d38ceaf9
AD
1820 default:
1821 /* FIXME: not supported yet */
1822 return -EINVAL;
1823 }
1824
1884734a 1825 amdgpu_amdkfd_device_probe(adev);
1826
3b94fb10 1827 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1828 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1829 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1830
d38ceaf9
AD
1831 for (i = 0; i < adev->num_ip_blocks; i++) {
1832 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1833 DRM_ERROR("disabled ip block: %d <%s>\n",
1834 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1835 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1836 } else {
a1255107
AD
1837 if (adev->ip_blocks[i].version->funcs->early_init) {
1838 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1839 if (r == -ENOENT) {
a1255107 1840 adev->ip_blocks[i].status.valid = false;
2c1a2784 1841 } else if (r) {
a1255107
AD
1842 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1843 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1844 return r;
2c1a2784 1845 } else {
a1255107 1846 adev->ip_blocks[i].status.valid = true;
2c1a2784 1847 }
974e6b64 1848 } else {
a1255107 1849 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1850 }
d38ceaf9 1851 }
21a249ca
AD
1852 /* get the vbios after the asic_funcs are set up */
1853 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
1854 r = amdgpu_device_parse_gpu_info_fw(adev);
1855 if (r)
1856 return r;
1857
21a249ca
AD
1858 /* Read BIOS */
1859 if (!amdgpu_get_bios(adev))
1860 return -EINVAL;
1861
1862 r = amdgpu_atombios_init(adev);
1863 if (r) {
1864 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1865 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1866 return r;
1867 }
1868 }
d38ceaf9
AD
1869 }
1870
395d1fb9
NH
1871 adev->cg_flags &= amdgpu_cg_mask;
1872 adev->pg_flags &= amdgpu_pg_mask;
1873
d38ceaf9
AD
1874 return 0;
1875}
1876
0a4f2520
RZ
1877static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1878{
1879 int i, r;
1880
1881 for (i = 0; i < adev->num_ip_blocks; i++) {
1882 if (!adev->ip_blocks[i].status.sw)
1883 continue;
1884 if (adev->ip_blocks[i].status.hw)
1885 continue;
1886 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1887 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1888 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1889 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1890 if (r) {
1891 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1892 adev->ip_blocks[i].version->funcs->name, r);
1893 return r;
1894 }
1895 adev->ip_blocks[i].status.hw = true;
1896 }
1897 }
1898
1899 return 0;
1900}
1901
1902static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1903{
1904 int i, r;
1905
1906 for (i = 0; i < adev->num_ip_blocks; i++) {
1907 if (!adev->ip_blocks[i].status.sw)
1908 continue;
1909 if (adev->ip_blocks[i].status.hw)
1910 continue;
1911 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1912 if (r) {
1913 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1914 adev->ip_blocks[i].version->funcs->name, r);
1915 return r;
1916 }
1917 adev->ip_blocks[i].status.hw = true;
1918 }
1919
1920 return 0;
1921}
1922
7a3e0bb2
RZ
1923static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1924{
1925 int r = 0;
1926 int i;
80f41f84 1927 uint32_t smu_version;
7a3e0bb2
RZ
1928
1929 if (adev->asic_type >= CHIP_VEGA10) {
1930 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1931 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1932 continue;
1933
1934 /* no need to do the fw loading again if already done*/
1935 if (adev->ip_blocks[i].status.hw == true)
1936 break;
1937
df9c8d1a 1938 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
1939 r = adev->ip_blocks[i].version->funcs->resume(adev);
1940 if (r) {
1941 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1942 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1943 return r;
1944 }
1945 } else {
1946 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1947 if (r) {
1948 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1949 adev->ip_blocks[i].version->funcs->name, r);
1950 return r;
7a3e0bb2 1951 }
7a3e0bb2 1952 }
482f0e53
ML
1953
1954 adev->ip_blocks[i].status.hw = true;
1955 break;
7a3e0bb2
RZ
1956 }
1957 }
482f0e53 1958
8973d9ec
ED
1959 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1960 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1961
80f41f84 1962 return r;
7a3e0bb2
RZ
1963}
1964
e3ecdffa
AD
1965/**
1966 * amdgpu_device_ip_init - run init for hardware IPs
1967 *
1968 * @adev: amdgpu_device pointer
1969 *
1970 * Main initialization pass for hardware IPs. The list of all the hardware
1971 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1972 * are run. sw_init initializes the software state associated with each IP
1973 * and hw_init initializes the hardware associated with each IP.
1974 * Returns 0 on success, negative error code on failure.
1975 */
06ec9070 1976static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1977{
1978 int i, r;
1979
c030f2e4 1980 r = amdgpu_ras_init(adev);
1981 if (r)
1982 return r;
1983
d38ceaf9 1984 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1985 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1986 continue;
a1255107 1987 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1988 if (r) {
a1255107
AD
1989 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1990 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1991 goto init_failed;
2c1a2784 1992 }
a1255107 1993 adev->ip_blocks[i].status.sw = true;
bfca0289 1994
d38ceaf9 1995 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1996 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1997 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1998 if (r) {
1999 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2000 goto init_failed;
2c1a2784 2001 }
a1255107 2002 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2003 if (r) {
2004 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2005 goto init_failed;
2c1a2784 2006 }
06ec9070 2007 r = amdgpu_device_wb_init(adev);
2c1a2784 2008 if (r) {
06ec9070 2009 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2010 goto init_failed;
2c1a2784 2011 }
a1255107 2012 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2013
2014 /* right after GMC hw init, we create CSA */
f92d5c61 2015 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2016 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2017 AMDGPU_GEM_DOMAIN_VRAM,
2018 AMDGPU_CSA_SIZE);
2493664f
ML
2019 if (r) {
2020 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2021 goto init_failed;
2493664f
ML
2022 }
2023 }
d38ceaf9
AD
2024 }
2025 }
2026
c9ffa427
YT
2027 if (amdgpu_sriov_vf(adev))
2028 amdgpu_virt_init_data_exchange(adev);
2029
533aed27
AG
2030 r = amdgpu_ib_pool_init(adev);
2031 if (r) {
2032 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2033 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2034 goto init_failed;
2035 }
2036
c8963ea4
RZ
2037 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2038 if (r)
72d3f592 2039 goto init_failed;
0a4f2520
RZ
2040
2041 r = amdgpu_device_ip_hw_init_phase1(adev);
2042 if (r)
72d3f592 2043 goto init_failed;
0a4f2520 2044
7a3e0bb2
RZ
2045 r = amdgpu_device_fw_loading(adev);
2046 if (r)
72d3f592 2047 goto init_failed;
7a3e0bb2 2048
0a4f2520
RZ
2049 r = amdgpu_device_ip_hw_init_phase2(adev);
2050 if (r)
72d3f592 2051 goto init_failed;
d38ceaf9 2052
121a2bc6
AG
2053 /*
2054 * retired pages will be loaded from eeprom and reserved here,
2055 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2056 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2057 * for I2C communication which only true at this point.
2058 * recovery_init may fail, but it can free all resources allocated by
2059 * itself and its failure should not stop amdgpu init process.
2060 *
2061 * Note: theoretically, this should be called before all vram allocations
2062 * to protect retired page from abusing
2063 */
2064 amdgpu_ras_recovery_init(adev);
2065
3e2e2ab5
HZ
2066 if (adev->gmc.xgmi.num_physical_nodes > 1)
2067 amdgpu_xgmi_add_device(adev);
1884734a 2068 amdgpu_amdkfd_device_init(adev);
c6332b97 2069
bd607166
KR
2070 amdgpu_fru_get_product_info(adev);
2071
72d3f592 2072init_failed:
c9ffa427 2073 if (amdgpu_sriov_vf(adev))
c6332b97 2074 amdgpu_virt_release_full_gpu(adev, true);
2075
72d3f592 2076 return r;
d38ceaf9
AD
2077}
2078
e3ecdffa
AD
2079/**
2080 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2081 *
2082 * @adev: amdgpu_device pointer
2083 *
2084 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2085 * this function before a GPU reset. If the value is retained after a
2086 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2087 */
06ec9070 2088static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2089{
2090 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2091}
2092
e3ecdffa
AD
2093/**
2094 * amdgpu_device_check_vram_lost - check if vram is valid
2095 *
2096 * @adev: amdgpu_device pointer
2097 *
2098 * Checks the reset magic value written to the gart pointer in VRAM.
2099 * The driver calls this after a GPU reset to see if the contents of
2100 * VRAM is lost or now.
2101 * returns true if vram is lost, false if not.
2102 */
06ec9070 2103static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2104{
dadce777
EQ
2105 if (memcmp(adev->gart.ptr, adev->reset_magic,
2106 AMDGPU_RESET_MAGIC_NUM))
2107 return true;
2108
df9c8d1a 2109 if (!amdgpu_in_reset(adev))
dadce777
EQ
2110 return false;
2111
2112 /*
2113 * For all ASICs with baco/mode1 reset, the VRAM is
2114 * always assumed to be lost.
2115 */
2116 switch (amdgpu_asic_reset_method(adev)) {
2117 case AMD_RESET_METHOD_BACO:
2118 case AMD_RESET_METHOD_MODE1:
2119 return true;
2120 default:
2121 return false;
2122 }
0c49e0b8
CZ
2123}
2124
e3ecdffa 2125/**
1112a46b 2126 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2127 *
2128 * @adev: amdgpu_device pointer
b8b72130 2129 * @state: clockgating state (gate or ungate)
e3ecdffa 2130 *
e3ecdffa 2131 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2132 * set_clockgating_state callbacks are run.
2133 * Late initialization pass enabling clockgating for hardware IPs.
2134 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2135 * Returns 0 on success, negative error code on failure.
2136 */
fdd34271 2137
1112a46b
RZ
2138static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2139 enum amd_clockgating_state state)
d38ceaf9 2140{
1112a46b 2141 int i, j, r;
d38ceaf9 2142
4a2ba394
SL
2143 if (amdgpu_emu_mode == 1)
2144 return 0;
2145
1112a46b
RZ
2146 for (j = 0; j < adev->num_ip_blocks; j++) {
2147 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2148 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2149 continue;
4a446d55 2150 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2151 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2152 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2153 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2154 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2155 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2156 /* enable clockgating to save power */
a1255107 2157 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2158 state);
4a446d55
AD
2159 if (r) {
2160 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2161 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2162 return r;
2163 }
b0b00ff1 2164 }
d38ceaf9 2165 }
06b18f61 2166
c9f96fd5
RZ
2167 return 0;
2168}
2169
1112a46b 2170static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2171{
1112a46b 2172 int i, j, r;
06b18f61 2173
c9f96fd5
RZ
2174 if (amdgpu_emu_mode == 1)
2175 return 0;
2176
1112a46b
RZ
2177 for (j = 0; j < adev->num_ip_blocks; j++) {
2178 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2179 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2180 continue;
2181 /* skip CG for VCE/UVD, it's handled specially */
2182 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2183 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2184 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2185 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2186 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2187 /* enable powergating to save power */
2188 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2189 state);
c9f96fd5
RZ
2190 if (r) {
2191 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2192 adev->ip_blocks[i].version->funcs->name, r);
2193 return r;
2194 }
2195 }
2196 }
2dc80b00
S
2197 return 0;
2198}
2199
beff74bc
AD
2200static int amdgpu_device_enable_mgpu_fan_boost(void)
2201{
2202 struct amdgpu_gpu_instance *gpu_ins;
2203 struct amdgpu_device *adev;
2204 int i, ret = 0;
2205
2206 mutex_lock(&mgpu_info.mutex);
2207
2208 /*
2209 * MGPU fan boost feature should be enabled
2210 * only when there are two or more dGPUs in
2211 * the system
2212 */
2213 if (mgpu_info.num_dgpu < 2)
2214 goto out;
2215
2216 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2217 gpu_ins = &(mgpu_info.gpu_ins[i]);
2218 adev = gpu_ins->adev;
2219 if (!(adev->flags & AMD_IS_APU) &&
2220 !gpu_ins->mgpu_fan_enabled &&
2221 adev->powerplay.pp_funcs &&
2222 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2223 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2224 if (ret)
2225 break;
2226
2227 gpu_ins->mgpu_fan_enabled = 1;
2228 }
2229 }
2230
2231out:
2232 mutex_unlock(&mgpu_info.mutex);
2233
2234 return ret;
2235}
2236
e3ecdffa
AD
2237/**
2238 * amdgpu_device_ip_late_init - run late init for hardware IPs
2239 *
2240 * @adev: amdgpu_device pointer
2241 *
2242 * Late initialization pass for hardware IPs. The list of all the hardware
2243 * IPs that make up the asic is walked and the late_init callbacks are run.
2244 * late_init covers any special initialization that an IP requires
2245 * after all of the have been initialized or something that needs to happen
2246 * late in the init process.
2247 * Returns 0 on success, negative error code on failure.
2248 */
06ec9070 2249static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2250{
60599a03 2251 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2252 int i = 0, r;
2253
2254 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2255 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2256 continue;
2257 if (adev->ip_blocks[i].version->funcs->late_init) {
2258 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2259 if (r) {
2260 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2261 adev->ip_blocks[i].version->funcs->name, r);
2262 return r;
2263 }
2dc80b00 2264 }
73f847db 2265 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2266 }
2267
a891d239
DL
2268 amdgpu_ras_set_error_query_ready(adev, true);
2269
1112a46b
RZ
2270 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2271 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2272
06ec9070 2273 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2274
beff74bc
AD
2275 r = amdgpu_device_enable_mgpu_fan_boost();
2276 if (r)
2277 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2278
60599a03
EQ
2279
2280 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2281 mutex_lock(&mgpu_info.mutex);
2282
2283 /*
2284 * Reset device p-state to low as this was booted with high.
2285 *
2286 * This should be performed only after all devices from the same
2287 * hive get initialized.
2288 *
2289 * However, it's unknown how many device in the hive in advance.
2290 * As this is counted one by one during devices initializations.
2291 *
2292 * So, we wait for all XGMI interlinked devices initialized.
2293 * This may bring some delays as those devices may come from
2294 * different hives. But that should be OK.
2295 */
2296 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2297 for (i = 0; i < mgpu_info.num_gpu; i++) {
2298 gpu_instance = &(mgpu_info.gpu_ins[i]);
2299 if (gpu_instance->adev->flags & AMD_IS_APU)
2300 continue;
2301
d84a430d
JK
2302 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2303 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2304 if (r) {
2305 DRM_ERROR("pstate setting failed (%d).\n", r);
2306 break;
2307 }
2308 }
2309 }
2310
2311 mutex_unlock(&mgpu_info.mutex);
2312 }
2313
d38ceaf9
AD
2314 return 0;
2315}
2316
e3ecdffa
AD
2317/**
2318 * amdgpu_device_ip_fini - run fini for hardware IPs
2319 *
2320 * @adev: amdgpu_device pointer
2321 *
2322 * Main teardown pass for hardware IPs. The list of all the hardware
2323 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2324 * are run. hw_fini tears down the hardware associated with each IP
2325 * and sw_fini tears down any software state associated with each IP.
2326 * Returns 0 on success, negative error code on failure.
2327 */
06ec9070 2328static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2329{
2330 int i, r;
2331
5278a159
SY
2332 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2333 amdgpu_virt_release_ras_err_handler_data(adev);
2334
c030f2e4 2335 amdgpu_ras_pre_fini(adev);
2336
a82400b5
AG
2337 if (adev->gmc.xgmi.num_physical_nodes > 1)
2338 amdgpu_xgmi_remove_device(adev);
2339
1884734a 2340 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2341
2342 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2343 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2344
3e96dbfd
AD
2345 /* need to disable SMC first */
2346 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2347 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2348 continue;
fdd34271 2349 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2350 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2351 /* XXX handle errors */
2352 if (r) {
2353 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2354 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2355 }
a1255107 2356 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2357 break;
2358 }
2359 }
2360
d38ceaf9 2361 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2362 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2363 continue;
8201a67a 2364
a1255107 2365 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2366 /* XXX handle errors */
2c1a2784 2367 if (r) {
a1255107
AD
2368 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2369 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2370 }
8201a67a 2371
a1255107 2372 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2373 }
2374
9950cda2 2375
d38ceaf9 2376 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2377 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2378 continue;
c12aba3a
ML
2379
2380 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2381 amdgpu_ucode_free_bo(adev);
1e256e27 2382 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2383 amdgpu_device_wb_fini(adev);
2384 amdgpu_device_vram_scratch_fini(adev);
533aed27 2385 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2386 }
2387
a1255107 2388 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2389 /* XXX handle errors */
2c1a2784 2390 if (r) {
a1255107
AD
2391 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2392 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2393 }
a1255107
AD
2394 adev->ip_blocks[i].status.sw = false;
2395 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2396 }
2397
a6dcfd9c 2398 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2399 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2400 continue;
a1255107
AD
2401 if (adev->ip_blocks[i].version->funcs->late_fini)
2402 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2403 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2404 }
2405
c030f2e4 2406 amdgpu_ras_fini(adev);
2407
030308fc 2408 if (amdgpu_sriov_vf(adev))
24136135
ML
2409 if (amdgpu_virt_release_full_gpu(adev, false))
2410 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2411
d38ceaf9
AD
2412 return 0;
2413}
2414
e3ecdffa 2415/**
beff74bc 2416 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2417 *
1112a46b 2418 * @work: work_struct.
e3ecdffa 2419 */
beff74bc 2420static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2421{
2422 struct amdgpu_device *adev =
beff74bc 2423 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2424 int r;
2425
2426 r = amdgpu_ib_ring_tests(adev);
2427 if (r)
2428 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2429}
2430
1e317b99
RZ
2431static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2432{
2433 struct amdgpu_device *adev =
2434 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2435
2436 mutex_lock(&adev->gfx.gfx_off_mutex);
2437 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2438 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2439 adev->gfx.gfx_off_state = true;
2440 }
2441 mutex_unlock(&adev->gfx.gfx_off_mutex);
2442}
2443
e3ecdffa 2444/**
e7854a03 2445 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2446 *
2447 * @adev: amdgpu_device pointer
2448 *
2449 * Main suspend function for hardware IPs. The list of all the hardware
2450 * IPs that make up the asic is walked, clockgating is disabled and the
2451 * suspend callbacks are run. suspend puts the hardware and software state
2452 * in each IP into a state suitable for suspend.
2453 * Returns 0 on success, negative error code on failure.
2454 */
e7854a03
AD
2455static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2456{
2457 int i, r;
2458
ced1ba97
PL
2459 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2460 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2461
e7854a03
AD
2462 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2463 if (!adev->ip_blocks[i].status.valid)
2464 continue;
2b9f7848 2465
e7854a03 2466 /* displays are handled separately */
2b9f7848
ND
2467 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2468 continue;
2469
2470 /* XXX handle errors */
2471 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2472 /* XXX handle errors */
2473 if (r) {
2474 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2475 adev->ip_blocks[i].version->funcs->name, r);
2476 return r;
e7854a03 2477 }
2b9f7848
ND
2478
2479 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2480 }
2481
e7854a03
AD
2482 return 0;
2483}
2484
2485/**
2486 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2487 *
2488 * @adev: amdgpu_device pointer
2489 *
2490 * Main suspend function for hardware IPs. The list of all the hardware
2491 * IPs that make up the asic is walked, clockgating is disabled and the
2492 * suspend callbacks are run. suspend puts the hardware and software state
2493 * in each IP into a state suitable for suspend.
2494 * Returns 0 on success, negative error code on failure.
2495 */
2496static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2497{
2498 int i, r;
2499
2500 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2501 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2502 continue;
e7854a03
AD
2503 /* displays are handled in phase1 */
2504 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2505 continue;
bff77e86
LM
2506 /* PSP lost connection when err_event_athub occurs */
2507 if (amdgpu_ras_intr_triggered() &&
2508 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2509 adev->ip_blocks[i].status.hw = false;
2510 continue;
2511 }
d38ceaf9 2512 /* XXX handle errors */
a1255107 2513 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2514 /* XXX handle errors */
2c1a2784 2515 if (r) {
a1255107
AD
2516 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2517 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2518 }
876923fb 2519 adev->ip_blocks[i].status.hw = false;
a3a09142 2520 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2521 if(!amdgpu_sriov_vf(adev)){
2522 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2523 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2524 if (r) {
2525 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2526 adev->mp1_state, r);
2527 return r;
2528 }
a3a09142
AD
2529 }
2530 }
b5507c7e 2531 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2532 }
2533
2534 return 0;
2535}
2536
e7854a03
AD
2537/**
2538 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2539 *
2540 * @adev: amdgpu_device pointer
2541 *
2542 * Main suspend function for hardware IPs. The list of all the hardware
2543 * IPs that make up the asic is walked, clockgating is disabled and the
2544 * suspend callbacks are run. suspend puts the hardware and software state
2545 * in each IP into a state suitable for suspend.
2546 * Returns 0 on success, negative error code on failure.
2547 */
2548int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2549{
2550 int r;
2551
e7819644
YT
2552 if (amdgpu_sriov_vf(adev))
2553 amdgpu_virt_request_full_gpu(adev, false);
2554
e7854a03
AD
2555 r = amdgpu_device_ip_suspend_phase1(adev);
2556 if (r)
2557 return r;
2558 r = amdgpu_device_ip_suspend_phase2(adev);
2559
e7819644
YT
2560 if (amdgpu_sriov_vf(adev))
2561 amdgpu_virt_release_full_gpu(adev, false);
2562
e7854a03
AD
2563 return r;
2564}
2565
06ec9070 2566static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2567{
2568 int i, r;
2569
2cb681b6
ML
2570 static enum amd_ip_block_type ip_order[] = {
2571 AMD_IP_BLOCK_TYPE_GMC,
2572 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2573 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2574 AMD_IP_BLOCK_TYPE_IH,
2575 };
a90ad3c2 2576
392cf6a7
LC
2577 for (i = 0; i < adev->num_ip_blocks; i++)
2578 adev->ip_blocks[i].status.hw = false;
2579
2cb681b6
ML
2580 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2581 int j;
2582 struct amdgpu_ip_block *block;
a90ad3c2 2583
2cb681b6
ML
2584 for (j = 0; j < adev->num_ip_blocks; j++) {
2585 block = &adev->ip_blocks[j];
2586
2587 if (block->version->type != ip_order[i] ||
2588 !block->status.valid)
2589 continue;
2590
2591 r = block->version->funcs->hw_init(adev);
0aaeefcc 2592 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2593 if (r)
2594 return r;
482f0e53 2595 block->status.hw = true;
a90ad3c2
ML
2596 }
2597 }
2598
2599 return 0;
2600}
2601
06ec9070 2602static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2603{
2604 int i, r;
2605
2cb681b6
ML
2606 static enum amd_ip_block_type ip_order[] = {
2607 AMD_IP_BLOCK_TYPE_SMC,
2608 AMD_IP_BLOCK_TYPE_DCE,
2609 AMD_IP_BLOCK_TYPE_GFX,
2610 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2611 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2612 AMD_IP_BLOCK_TYPE_VCE,
2613 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2614 };
a90ad3c2 2615
2cb681b6
ML
2616 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2617 int j;
2618 struct amdgpu_ip_block *block;
a90ad3c2 2619
2cb681b6
ML
2620 for (j = 0; j < adev->num_ip_blocks; j++) {
2621 block = &adev->ip_blocks[j];
2622
2623 if (block->version->type != ip_order[i] ||
482f0e53
ML
2624 !block->status.valid ||
2625 block->status.hw)
2cb681b6
ML
2626 continue;
2627
895bd048
JZ
2628 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2629 r = block->version->funcs->resume(adev);
2630 else
2631 r = block->version->funcs->hw_init(adev);
2632
0aaeefcc 2633 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2634 if (r)
2635 return r;
482f0e53 2636 block->status.hw = true;
a90ad3c2
ML
2637 }
2638 }
2639
2640 return 0;
2641}
2642
e3ecdffa
AD
2643/**
2644 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2645 *
2646 * @adev: amdgpu_device pointer
2647 *
2648 * First resume function for hardware IPs. The list of all the hardware
2649 * IPs that make up the asic is walked and the resume callbacks are run for
2650 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2651 * after a suspend and updates the software state as necessary. This
2652 * function is also used for restoring the GPU after a GPU reset.
2653 * Returns 0 on success, negative error code on failure.
2654 */
06ec9070 2655static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2656{
2657 int i, r;
2658
a90ad3c2 2659 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2660 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2661 continue;
a90ad3c2 2662 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2663 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2664 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2665
fcf0649f
CZ
2666 r = adev->ip_blocks[i].version->funcs->resume(adev);
2667 if (r) {
2668 DRM_ERROR("resume of IP block <%s> failed %d\n",
2669 adev->ip_blocks[i].version->funcs->name, r);
2670 return r;
2671 }
482f0e53 2672 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2673 }
2674 }
2675
2676 return 0;
2677}
2678
e3ecdffa
AD
2679/**
2680 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2681 *
2682 * @adev: amdgpu_device pointer
2683 *
2684 * First resume function for hardware IPs. The list of all the hardware
2685 * IPs that make up the asic is walked and the resume callbacks are run for
2686 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2687 * functional state after a suspend and updates the software state as
2688 * necessary. This function is also used for restoring the GPU after a GPU
2689 * reset.
2690 * Returns 0 on success, negative error code on failure.
2691 */
06ec9070 2692static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2693{
2694 int i, r;
2695
2696 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2697 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2698 continue;
fcf0649f 2699 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2700 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2701 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2702 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2703 continue;
a1255107 2704 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2705 if (r) {
a1255107
AD
2706 DRM_ERROR("resume of IP block <%s> failed %d\n",
2707 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2708 return r;
2c1a2784 2709 }
482f0e53 2710 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2711 }
2712
2713 return 0;
2714}
2715
e3ecdffa
AD
2716/**
2717 * amdgpu_device_ip_resume - run resume for hardware IPs
2718 *
2719 * @adev: amdgpu_device pointer
2720 *
2721 * Main resume function for hardware IPs. The hardware IPs
2722 * are split into two resume functions because they are
2723 * are also used in in recovering from a GPU reset and some additional
2724 * steps need to be take between them. In this case (S3/S4) they are
2725 * run sequentially.
2726 * Returns 0 on success, negative error code on failure.
2727 */
06ec9070 2728static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2729{
2730 int r;
2731
06ec9070 2732 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2733 if (r)
2734 return r;
7a3e0bb2
RZ
2735
2736 r = amdgpu_device_fw_loading(adev);
2737 if (r)
2738 return r;
2739
06ec9070 2740 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2741
2742 return r;
2743}
2744
e3ecdffa
AD
2745/**
2746 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2747 *
2748 * @adev: amdgpu_device pointer
2749 *
2750 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2751 */
4e99a44e 2752static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2753{
6867e1b5
ML
2754 if (amdgpu_sriov_vf(adev)) {
2755 if (adev->is_atom_fw) {
2756 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2757 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2758 } else {
2759 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2760 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2761 }
2762
2763 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2764 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2765 }
048765ad
AR
2766}
2767
e3ecdffa
AD
2768/**
2769 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2770 *
2771 * @asic_type: AMD asic type
2772 *
2773 * Check if there is DC (new modesetting infrastructre) support for an asic.
2774 * returns true if DC has support, false if not.
2775 */
4562236b
HW
2776bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2777{
2778 switch (asic_type) {
2779#if defined(CONFIG_DRM_AMD_DC)
64200c46
MR
2780#if defined(CONFIG_DRM_AMD_DC_SI)
2781 case CHIP_TAHITI:
2782 case CHIP_PITCAIRN:
2783 case CHIP_VERDE:
2784 case CHIP_OLAND:
2785#endif
4562236b 2786 case CHIP_BONAIRE:
0d6fbccb 2787 case CHIP_KAVERI:
367e6687
AD
2788 case CHIP_KABINI:
2789 case CHIP_MULLINS:
d9fda248
HW
2790 /*
2791 * We have systems in the wild with these ASICs that require
2792 * LVDS and VGA support which is not supported with DC.
2793 *
2794 * Fallback to the non-DC driver here by default so as not to
2795 * cause regressions.
2796 */
2797 return amdgpu_dc > 0;
2798 case CHIP_HAWAII:
4562236b
HW
2799 case CHIP_CARRIZO:
2800 case CHIP_STONEY:
4562236b 2801 case CHIP_POLARIS10:
675fd32b 2802 case CHIP_POLARIS11:
2c8ad2d5 2803 case CHIP_POLARIS12:
675fd32b 2804 case CHIP_VEGAM:
4562236b
HW
2805 case CHIP_TONGA:
2806 case CHIP_FIJI:
42f8ffa1 2807 case CHIP_VEGA10:
dca7b401 2808 case CHIP_VEGA12:
c6034aa2 2809 case CHIP_VEGA20:
b86a1aa3 2810#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2811 case CHIP_RAVEN:
b4f199c7 2812 case CHIP_NAVI10:
8fceceb6 2813 case CHIP_NAVI14:
078655d9 2814 case CHIP_NAVI12:
e1c14c43 2815 case CHIP_RENOIR:
81d9bfb8
JFZ
2816#endif
2817#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
2818 case CHIP_SIENNA_CICHLID:
a6c5308f 2819 case CHIP_NAVY_FLOUNDER:
42f8ffa1 2820#endif
fd187853 2821 return amdgpu_dc != 0;
4562236b
HW
2822#endif
2823 default:
93b09a9a
SS
2824 if (amdgpu_dc > 0)
2825 DRM_INFO("Display Core has been requested via kernel parameter "
2826 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2827 return false;
2828 }
2829}
2830
2831/**
2832 * amdgpu_device_has_dc_support - check if dc is supported
2833 *
2834 * @adev: amdgpu_device_pointer
2835 *
2836 * Returns true for supported, false for not supported
2837 */
2838bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2839{
2555039d
XY
2840 if (amdgpu_sriov_vf(adev))
2841 return false;
2842
4562236b
HW
2843 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2844}
2845
d4535e2c
AG
2846
2847static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2848{
2849 struct amdgpu_device *adev =
2850 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2851 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2852
c6a6e2db
AG
2853 /* It's a bug to not have a hive within this function */
2854 if (WARN_ON(!hive))
2855 return;
2856
2857 /*
2858 * Use task barrier to synchronize all xgmi reset works across the
2859 * hive. task_barrier_enter and task_barrier_exit will block
2860 * until all the threads running the xgmi reset works reach
2861 * those points. task_barrier_full will do both blocks.
2862 */
2863 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2864
2865 task_barrier_enter(&hive->tb);
2866 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2867
2868 if (adev->asic_reset_res)
2869 goto fail;
2870
2871 task_barrier_exit(&hive->tb);
2872 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2873
2874 if (adev->asic_reset_res)
2875 goto fail;
43c4d576
JC
2876
2877 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2878 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2879 } else {
2880
2881 task_barrier_full(&hive->tb);
2882 adev->asic_reset_res = amdgpu_asic_reset(adev);
2883 }
ce316fa5 2884
c6a6e2db 2885fail:
d4535e2c 2886 if (adev->asic_reset_res)
fed184e9 2887 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2888 adev->asic_reset_res, adev->ddev->unique);
2889}
2890
71f98027
AD
2891static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2892{
2893 char *input = amdgpu_lockup_timeout;
2894 char *timeout_setting = NULL;
2895 int index = 0;
2896 long timeout;
2897 int ret = 0;
2898
2899 /*
2900 * By default timeout for non compute jobs is 10000.
2901 * And there is no timeout enforced on compute jobs.
2902 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 2903 * jobs are 60000 by default.
71f98027
AD
2904 */
2905 adev->gfx_timeout = msecs_to_jiffies(10000);
2906 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2907 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 2908 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
2909 else
2910 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2911
f440ff44 2912 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2913 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2914 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2915 ret = kstrtol(timeout_setting, 0, &timeout);
2916 if (ret)
2917 return ret;
2918
2919 if (timeout == 0) {
2920 index++;
2921 continue;
2922 } else if (timeout < 0) {
2923 timeout = MAX_SCHEDULE_TIMEOUT;
2924 } else {
2925 timeout = msecs_to_jiffies(timeout);
2926 }
2927
2928 switch (index++) {
2929 case 0:
2930 adev->gfx_timeout = timeout;
2931 break;
2932 case 1:
2933 adev->compute_timeout = timeout;
2934 break;
2935 case 2:
2936 adev->sdma_timeout = timeout;
2937 break;
2938 case 3:
2939 adev->video_timeout = timeout;
2940 break;
2941 default:
2942 break;
2943 }
2944 }
2945 /*
2946 * There is only one value specified and
2947 * it should apply to all non-compute jobs.
2948 */
bcccee89 2949 if (index == 1) {
71f98027 2950 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2951 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2952 adev->compute_timeout = adev->gfx_timeout;
2953 }
71f98027
AD
2954 }
2955
2956 return ret;
2957}
d4535e2c 2958
77f3a5cd
ND
2959static const struct attribute *amdgpu_dev_attributes[] = {
2960 &dev_attr_product_name.attr,
2961 &dev_attr_product_number.attr,
2962 &dev_attr_serial_number.attr,
2963 &dev_attr_pcie_replay_count.attr,
2964 NULL
2965};
2966
d38ceaf9
AD
2967/**
2968 * amdgpu_device_init - initialize the driver
2969 *
2970 * @adev: amdgpu_device pointer
87e3f136 2971 * @ddev: drm dev pointer
d38ceaf9
AD
2972 * @pdev: pci dev pointer
2973 * @flags: driver flags
2974 *
2975 * Initializes the driver info and hw (all asics).
2976 * Returns 0 for success or an error on failure.
2977 * Called at driver startup.
2978 */
2979int amdgpu_device_init(struct amdgpu_device *adev,
2980 struct drm_device *ddev,
2981 struct pci_dev *pdev,
2982 uint32_t flags)
2983{
2984 int r, i;
3840c5bc 2985 bool boco = false;
95844d20 2986 u32 max_MBps;
d38ceaf9
AD
2987
2988 adev->shutdown = false;
2989 adev->dev = &pdev->dev;
2990 adev->ddev = ddev;
2991 adev->pdev = pdev;
2992 adev->flags = flags;
4e66d7d2
YZ
2993
2994 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2995 adev->asic_type = amdgpu_force_asic_type;
2996 else
2997 adev->asic_type = flags & AMD_ASIC_MASK;
2998
d38ceaf9 2999 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 3000 if (amdgpu_emu_mode == 1)
8bdab6bb 3001 adev->usec_timeout *= 10;
770d13b1 3002 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
3003 adev->accel_working = false;
3004 adev->num_rings = 0;
3005 adev->mman.buffer_funcs = NULL;
3006 adev->mman.buffer_funcs_ring = NULL;
3007 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3008 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3009 adev->gmc.gmc_funcs = NULL;
f54d1867 3010 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3011 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3012
3013 adev->smc_rreg = &amdgpu_invalid_rreg;
3014 adev->smc_wreg = &amdgpu_invalid_wreg;
3015 adev->pcie_rreg = &amdgpu_invalid_rreg;
3016 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3017 adev->pciep_rreg = &amdgpu_invalid_rreg;
3018 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3019 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3020 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3021 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3022 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3023 adev->didt_rreg = &amdgpu_invalid_rreg;
3024 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3025 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3026 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3027 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3028 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3029
3e39ab90
AD
3030 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3031 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3032 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3033
3034 /* mutex initialization are all done here so we
3035 * can recall function without having locking issues */
d38ceaf9 3036 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 3037 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3038 mutex_init(&adev->pm.mutex);
3039 mutex_init(&adev->gfx.gpu_clock_mutex);
3040 mutex_init(&adev->srbm_mutex);
b8866c26 3041 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3042 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3043 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3044 mutex_init(&adev->mn_lock);
e23b74aa 3045 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3046 hash_init(adev->mn_hash);
df9c8d1a
DL
3047 init_rwsem(&adev->reset_sem);
3048 atomic_set(&adev->in_gpu_reset, 0);
32eaeae0 3049 mutex_init(&adev->psp.mutex);
bd052211 3050 mutex_init(&adev->notifier_lock);
d38ceaf9 3051
912dfc84
EQ
3052 r = amdgpu_device_check_arguments(adev);
3053 if (r)
3054 return r;
d38ceaf9 3055
d38ceaf9
AD
3056 spin_lock_init(&adev->mmio_idx_lock);
3057 spin_lock_init(&adev->smc_idx_lock);
3058 spin_lock_init(&adev->pcie_idx_lock);
3059 spin_lock_init(&adev->uvd_ctx_idx_lock);
3060 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3061 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3062 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3063 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3064 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3065
0c4e7fa5
CZ
3066 INIT_LIST_HEAD(&adev->shadow_list);
3067 mutex_init(&adev->shadow_list_lock);
3068
beff74bc
AD
3069 INIT_DELAYED_WORK(&adev->delayed_init_work,
3070 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3071 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3072 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3073
d4535e2c
AG
3074 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3075
d23ee13f 3076 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3077 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3078
b265bdbd
EQ
3079 atomic_set(&adev->throttling_logging_enabled, 1);
3080 /*
3081 * If throttling continues, logging will be performed every minute
3082 * to avoid log flooding. "-1" is subtracted since the thermal
3083 * throttling interrupt comes every second. Thus, the total logging
3084 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3085 * for throttling interrupt) = 60 seconds.
3086 */
3087 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3088 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3089
0fa49558
AX
3090 /* Registers mapping */
3091 /* TODO: block userspace mapping of io register */
da69c161
KW
3092 if (adev->asic_type >= CHIP_BONAIRE) {
3093 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3094 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3095 } else {
3096 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3097 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3098 }
d38ceaf9 3099
d38ceaf9
AD
3100 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3101 if (adev->rmmio == NULL) {
3102 return -ENOMEM;
3103 }
3104 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3105 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3106
d38ceaf9
AD
3107 /* io port mapping */
3108 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3109 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3110 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3111 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3112 break;
3113 }
3114 }
3115 if (adev->rio_mem == NULL)
b64a18c5 3116 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3117
b2109d8e
JX
3118 /* enable PCIE atomic ops */
3119 r = pci_enable_atomic_ops_to_root(adev->pdev,
3120 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3121 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3122 if (r) {
3123 adev->have_atomics_support = false;
3124 DRM_INFO("PCIE atomic ops is not supported\n");
3125 } else {
3126 adev->have_atomics_support = true;
3127 }
3128
5494d864
AD
3129 amdgpu_device_get_pcie_info(adev);
3130
b239c017
JX
3131 if (amdgpu_mcbp)
3132 DRM_INFO("MCBP is enabled\n");
3133
5f84cc63
JX
3134 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3135 adev->enable_mes = true;
3136
3aa0115d
ML
3137 /* detect hw virtualization here */
3138 amdgpu_detect_virtualization(adev);
3139
dffa11b4
ML
3140 r = amdgpu_device_get_job_timeout_settings(adev);
3141 if (r) {
3142 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3143 return r;
a190d1c7
XY
3144 }
3145
d38ceaf9 3146 /* early init functions */
06ec9070 3147 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3148 if (r)
3149 return r;
3150
6585661d
OZ
3151 /* doorbell bar mapping and doorbell index init*/
3152 amdgpu_device_doorbell_init(adev);
3153
d38ceaf9
AD
3154 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3155 /* this will fail for cards that aren't VGA class devices, just
3156 * ignore it */
06ec9070 3157 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3158
31af062a 3159 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3160 boco = true;
3161 if (amdgpu_has_atpx() &&
3162 (amdgpu_is_atpx_hybrid() ||
3163 amdgpu_has_atpx_dgpu_power_cntl()) &&
3164 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3165 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3166 &amdgpu_switcheroo_ops, boco);
3167 if (boco)
d38ceaf9
AD
3168 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3169
9475a943
SL
3170 if (amdgpu_emu_mode == 1) {
3171 /* post the asic on emulation mode */
3172 emu_soc_asic_init(adev);
bfca0289 3173 goto fence_driver_init;
9475a943 3174 }
bfca0289 3175
4e99a44e
ML
3176 /* detect if we are with an SRIOV vbios */
3177 amdgpu_device_detect_sriov_bios(adev);
048765ad 3178
95e8e59e
AD
3179 /* check if we need to reset the asic
3180 * E.g., driver was not cleanly unloaded previously, etc.
3181 */
f14899fd 3182 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3183 r = amdgpu_asic_reset(adev);
3184 if (r) {
3185 dev_err(adev->dev, "asic reset on init failed\n");
3186 goto failed;
3187 }
3188 }
3189
d38ceaf9 3190 /* Post card if necessary */
39c640c0 3191 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3192 if (!adev->bios) {
bec86378 3193 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3194 r = -EINVAL;
3195 goto failed;
d38ceaf9 3196 }
bec86378 3197 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3198 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3199 if (r) {
3200 dev_err(adev->dev, "gpu post error!\n");
3201 goto failed;
3202 }
d38ceaf9
AD
3203 }
3204
88b64e95
AD
3205 if (adev->is_atom_fw) {
3206 /* Initialize clocks */
3207 r = amdgpu_atomfirmware_get_clock_info(adev);
3208 if (r) {
3209 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3210 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3211 goto failed;
3212 }
3213 } else {
a5bde2f9
AD
3214 /* Initialize clocks */
3215 r = amdgpu_atombios_get_clock_info(adev);
3216 if (r) {
3217 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3218 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3219 goto failed;
a5bde2f9
AD
3220 }
3221 /* init i2c buses */
4562236b
HW
3222 if (!amdgpu_device_has_dc_support(adev))
3223 amdgpu_atombios_i2c_init(adev);
2c1a2784 3224 }
d38ceaf9 3225
bfca0289 3226fence_driver_init:
d38ceaf9
AD
3227 /* Fence driver */
3228 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3229 if (r) {
3230 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3231 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3232 goto failed;
2c1a2784 3233 }
d38ceaf9
AD
3234
3235 /* init the mode config */
3236 drm_mode_config_init(adev->ddev);
3237
06ec9070 3238 r = amdgpu_device_ip_init(adev);
d38ceaf9 3239 if (r) {
8840a387 3240 /* failed in exclusive mode due to timeout */
3241 if (amdgpu_sriov_vf(adev) &&
3242 !amdgpu_sriov_runtime(adev) &&
3243 amdgpu_virt_mmio_blocked(adev) &&
3244 !amdgpu_virt_wait_reset(adev)) {
3245 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3246 /* Don't send request since VF is inactive. */
3247 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3248 adev->virt.ops = NULL;
8840a387 3249 r = -EAGAIN;
3250 goto failed;
3251 }
06ec9070 3252 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3253 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3254 goto failed;
d38ceaf9
AD
3255 }
3256
d69b8971
YZ
3257 dev_info(adev->dev,
3258 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3259 adev->gfx.config.max_shader_engines,
3260 adev->gfx.config.max_sh_per_se,
3261 adev->gfx.config.max_cu_per_sh,
3262 adev->gfx.cu_info.number);
3263
d38ceaf9
AD
3264 adev->accel_working = true;
3265
e59c0205
AX
3266 amdgpu_vm_check_compute_bug(adev);
3267
95844d20
MO
3268 /* Initialize the buffer migration limit. */
3269 if (amdgpu_moverate >= 0)
3270 max_MBps = amdgpu_moverate;
3271 else
3272 max_MBps = 8; /* Allow 8 MB/s. */
3273 /* Get a log2 for easy divisions. */
3274 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3275
9bc92b9c
ML
3276 amdgpu_fbdev_init(adev);
3277
d2f52ac8 3278 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3279 if (r) {
3280 adev->pm_sysfs_en = false;
d2f52ac8 3281 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3282 } else
3283 adev->pm_sysfs_en = true;
d2f52ac8 3284
5bb23532 3285 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3286 if (r) {
3287 adev->ucode_sysfs_en = false;
5bb23532 3288 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3289 } else
3290 adev->ucode_sysfs_en = true;
5bb23532 3291
d38ceaf9
AD
3292 if ((amdgpu_testing & 1)) {
3293 if (adev->accel_working)
3294 amdgpu_test_moves(adev);
3295 else
3296 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3297 }
d38ceaf9
AD
3298 if (amdgpu_benchmarking) {
3299 if (adev->accel_working)
3300 amdgpu_benchmark(adev, amdgpu_benchmarking);
3301 else
3302 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3303 }
3304
b0adca4d
EQ
3305 /*
3306 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3307 * Otherwise the mgpu fan boost feature will be skipped due to the
3308 * gpu instance is counted less.
3309 */
3310 amdgpu_register_gpu_instance(adev);
3311
d38ceaf9
AD
3312 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3313 * explicit gating rather than handling it automatically.
3314 */
06ec9070 3315 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3316 if (r) {
06ec9070 3317 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3318 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3319 goto failed;
2c1a2784 3320 }
d38ceaf9 3321
108c6a63 3322 /* must succeed. */
511fdbc3 3323 amdgpu_ras_resume(adev);
108c6a63 3324
beff74bc
AD
3325 queue_delayed_work(system_wq, &adev->delayed_init_work,
3326 msecs_to_jiffies(AMDGPU_RESUME_MS));
3327
2c738637
ML
3328 if (amdgpu_sriov_vf(adev))
3329 flush_delayed_work(&adev->delayed_init_work);
3330
77f3a5cd 3331 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
bd607166 3332 if (r) {
77f3a5cd 3333 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166
KR
3334 return r;
3335 }
3336
d155bef0
AB
3337 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3338 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3339 if (r)
3340 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3341
d38ceaf9 3342 return 0;
83ba126a
AD
3343
3344failed:
89041940 3345 amdgpu_vf_error_trans_all(adev);
3840c5bc 3346 if (boco)
83ba126a 3347 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3348
83ba126a 3349 return r;
d38ceaf9
AD
3350}
3351
d38ceaf9
AD
3352/**
3353 * amdgpu_device_fini - tear down the driver
3354 *
3355 * @adev: amdgpu_device pointer
3356 *
3357 * Tear down the driver info (all asics).
3358 * Called at driver shutdown.
3359 */
3360void amdgpu_device_fini(struct amdgpu_device *adev)
3361{
3362 int r;
3363
3364 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3365 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3366 adev->shutdown = true;
9f875167 3367
752c683d
ML
3368 /* make sure IB test finished before entering exclusive mode
3369 * to avoid preemption on IB test
3370 * */
3371 if (amdgpu_sriov_vf(adev))
3372 amdgpu_virt_request_full_gpu(adev, false);
3373
e5b03032
ML
3374 /* disable all interrupts */
3375 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3376 if (adev->mode_info.mode_config_initialized){
3377 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3378 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3379 else
3380 drm_atomic_helper_shutdown(adev->ddev);
3381 }
d38ceaf9 3382 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3383 if (adev->pm_sysfs_en)
3384 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3385 amdgpu_fbdev_fini(adev);
06ec9070 3386 r = amdgpu_device_ip_fini(adev);
75e1658e
ND
3387 release_firmware(adev->firmware.gpu_info_fw);
3388 adev->firmware.gpu_info_fw = NULL;
d38ceaf9
AD
3389 adev->accel_working = false;
3390 /* free i2c buses */
4562236b
HW
3391 if (!amdgpu_device_has_dc_support(adev))
3392 amdgpu_i2c_fini(adev);
bfca0289
SL
3393
3394 if (amdgpu_emu_mode != 1)
3395 amdgpu_atombios_fini(adev);
3396
d38ceaf9
AD
3397 kfree(adev->bios);
3398 adev->bios = NULL;
3840c5bc
AD
3399 if (amdgpu_has_atpx() &&
3400 (amdgpu_is_atpx_hybrid() ||
3401 amdgpu_has_atpx_dgpu_power_cntl()) &&
3402 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3403 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3404 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3405 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3406 vga_client_register(adev->pdev, NULL, NULL, NULL);
3407 if (adev->rio_mem)
3408 pci_iounmap(adev->pdev, adev->rio_mem);
3409 adev->rio_mem = NULL;
3410 iounmap(adev->rmmio);
3411 adev->rmmio = NULL;
06ec9070 3412 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3413
7c868b59
YT
3414 if (adev->ucode_sysfs_en)
3415 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3416
3417 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3418 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3419 amdgpu_pmu_fini(adev);
4292b0b2 3420 if (adev->discovery_bin)
a190d1c7 3421 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3422}
3423
3424
3425/*
3426 * Suspend & resume.
3427 */
3428/**
810ddc3a 3429 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3430 *
87e3f136 3431 * @dev: drm dev pointer
87e3f136 3432 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3433 *
3434 * Puts the hw in the suspend state (all asics).
3435 * Returns 0 for success or an error on failure.
3436 * Called at driver suspend.
3437 */
de185019 3438int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3439{
3440 struct amdgpu_device *adev;
3441 struct drm_crtc *crtc;
3442 struct drm_connector *connector;
f8d2d39e 3443 struct drm_connector_list_iter iter;
5ceb54c6 3444 int r;
d38ceaf9
AD
3445
3446 if (dev == NULL || dev->dev_private == NULL) {
3447 return -ENODEV;
3448 }
3449
3450 adev = dev->dev_private;
3451
3452 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3453 return 0;
3454
44779b43 3455 adev->in_suspend = true;
d38ceaf9
AD
3456 drm_kms_helper_poll_disable(dev);
3457
5f818173
S
3458 if (fbcon)
3459 amdgpu_fbdev_set_suspend(adev, 1);
3460
beff74bc 3461 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3462
4562236b
HW
3463 if (!amdgpu_device_has_dc_support(adev)) {
3464 /* turn off display hw */
3465 drm_modeset_lock_all(dev);
f8d2d39e
LP
3466 drm_connector_list_iter_begin(dev, &iter);
3467 drm_for_each_connector_iter(connector, &iter)
3468 drm_helper_connector_dpms(connector,
3469 DRM_MODE_DPMS_OFF);
3470 drm_connector_list_iter_end(&iter);
4562236b 3471 drm_modeset_unlock_all(dev);
fe1053b7
AD
3472 /* unpin the front buffers and cursors */
3473 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3474 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3475 struct drm_framebuffer *fb = crtc->primary->fb;
3476 struct amdgpu_bo *robj;
3477
91334223 3478 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3479 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3480 r = amdgpu_bo_reserve(aobj, true);
3481 if (r == 0) {
3482 amdgpu_bo_unpin(aobj);
3483 amdgpu_bo_unreserve(aobj);
3484 }
756e6880 3485 }
756e6880 3486
fe1053b7
AD
3487 if (fb == NULL || fb->obj[0] == NULL) {
3488 continue;
3489 }
3490 robj = gem_to_amdgpu_bo(fb->obj[0]);
3491 /* don't unpin kernel fb objects */
3492 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3493 r = amdgpu_bo_reserve(robj, true);
3494 if (r == 0) {
3495 amdgpu_bo_unpin(robj);
3496 amdgpu_bo_unreserve(robj);
3497 }
d38ceaf9
AD
3498 }
3499 }
3500 }
fe1053b7 3501
5e6932fe 3502 amdgpu_ras_suspend(adev);
3503
fe1053b7
AD
3504 r = amdgpu_device_ip_suspend_phase1(adev);
3505
94fa5660
EQ
3506 amdgpu_amdkfd_suspend(adev, !fbcon);
3507
d38ceaf9
AD
3508 /* evict vram memory */
3509 amdgpu_bo_evict_vram(adev);
3510
5ceb54c6 3511 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3512
fe1053b7 3513 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3514
a0a71e49
AD
3515 /* evict remaining vram memory
3516 * This second call to evict vram is to evict the gart page table
3517 * using the CPU.
3518 */
d38ceaf9
AD
3519 amdgpu_bo_evict_vram(adev);
3520
d38ceaf9
AD
3521 return 0;
3522}
3523
3524/**
810ddc3a 3525 * amdgpu_device_resume - initiate device resume
d38ceaf9 3526 *
87e3f136 3527 * @dev: drm dev pointer
87e3f136 3528 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3529 *
3530 * Bring the hw back to operating state (all asics).
3531 * Returns 0 for success or an error on failure.
3532 * Called at driver resume.
3533 */
de185019 3534int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3535{
3536 struct drm_connector *connector;
f8d2d39e 3537 struct drm_connector_list_iter iter;
d38ceaf9 3538 struct amdgpu_device *adev = dev->dev_private;
756e6880 3539 struct drm_crtc *crtc;
03161a6e 3540 int r = 0;
d38ceaf9
AD
3541
3542 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3543 return 0;
3544
d38ceaf9 3545 /* post card */
39c640c0 3546 if (amdgpu_device_need_post(adev)) {
74b0b157 3547 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3548 if (r)
3549 DRM_ERROR("amdgpu asic init failed\n");
3550 }
d38ceaf9 3551
06ec9070 3552 r = amdgpu_device_ip_resume(adev);
e6707218 3553 if (r) {
06ec9070 3554 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3555 return r;
e6707218 3556 }
5ceb54c6
AD
3557 amdgpu_fence_driver_resume(adev);
3558
d38ceaf9 3559
06ec9070 3560 r = amdgpu_device_ip_late_init(adev);
03161a6e 3561 if (r)
4d3b9ae5 3562 return r;
d38ceaf9 3563
beff74bc
AD
3564 queue_delayed_work(system_wq, &adev->delayed_init_work,
3565 msecs_to_jiffies(AMDGPU_RESUME_MS));
3566
fe1053b7
AD
3567 if (!amdgpu_device_has_dc_support(adev)) {
3568 /* pin cursors */
3569 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3570 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3571
91334223 3572 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3573 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3574 r = amdgpu_bo_reserve(aobj, true);
3575 if (r == 0) {
3576 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3577 if (r != 0)
3578 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3579 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3580 amdgpu_bo_unreserve(aobj);
3581 }
756e6880
AD
3582 }
3583 }
3584 }
9593f4d6 3585 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3586 if (r)
3587 return r;
756e6880 3588
96a5d8d4 3589 /* Make sure IB tests flushed */
beff74bc 3590 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3591
d38ceaf9
AD
3592 /* blat the mode back in */
3593 if (fbcon) {
4562236b
HW
3594 if (!amdgpu_device_has_dc_support(adev)) {
3595 /* pre DCE11 */
3596 drm_helper_resume_force_mode(dev);
3597
3598 /* turn on display hw */
3599 drm_modeset_lock_all(dev);
f8d2d39e
LP
3600
3601 drm_connector_list_iter_begin(dev, &iter);
3602 drm_for_each_connector_iter(connector, &iter)
3603 drm_helper_connector_dpms(connector,
3604 DRM_MODE_DPMS_ON);
3605 drm_connector_list_iter_end(&iter);
3606
4562236b 3607 drm_modeset_unlock_all(dev);
d38ceaf9 3608 }
4d3b9ae5 3609 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3610 }
3611
3612 drm_kms_helper_poll_enable(dev);
23a1a9e5 3613
5e6932fe 3614 amdgpu_ras_resume(adev);
3615
23a1a9e5
L
3616 /*
3617 * Most of the connector probing functions try to acquire runtime pm
3618 * refs to ensure that the GPU is powered on when connector polling is
3619 * performed. Since we're calling this from a runtime PM callback,
3620 * trying to acquire rpm refs will cause us to deadlock.
3621 *
3622 * Since we're guaranteed to be holding the rpm lock, it's safe to
3623 * temporarily disable the rpm helpers so this doesn't deadlock us.
3624 */
3625#ifdef CONFIG_PM
3626 dev->dev->power.disable_depth++;
3627#endif
4562236b
HW
3628 if (!amdgpu_device_has_dc_support(adev))
3629 drm_helper_hpd_irq_event(dev);
3630 else
3631 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3632#ifdef CONFIG_PM
3633 dev->dev->power.disable_depth--;
3634#endif
44779b43
RZ
3635 adev->in_suspend = false;
3636
4d3b9ae5 3637 return 0;
d38ceaf9
AD
3638}
3639
e3ecdffa
AD
3640/**
3641 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3642 *
3643 * @adev: amdgpu_device pointer
3644 *
3645 * The list of all the hardware IPs that make up the asic is walked and
3646 * the check_soft_reset callbacks are run. check_soft_reset determines
3647 * if the asic is still hung or not.
3648 * Returns true if any of the IPs are still in a hung state, false if not.
3649 */
06ec9070 3650static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3651{
3652 int i;
3653 bool asic_hang = false;
3654
f993d628
ML
3655 if (amdgpu_sriov_vf(adev))
3656 return true;
3657
8bc04c29
AD
3658 if (amdgpu_asic_need_full_reset(adev))
3659 return true;
3660
63fbf42f 3661 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3662 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3663 continue;
a1255107
AD
3664 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3665 adev->ip_blocks[i].status.hang =
3666 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3667 if (adev->ip_blocks[i].status.hang) {
3668 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3669 asic_hang = true;
3670 }
3671 }
3672 return asic_hang;
3673}
3674
e3ecdffa
AD
3675/**
3676 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3677 *
3678 * @adev: amdgpu_device pointer
3679 *
3680 * The list of all the hardware IPs that make up the asic is walked and the
3681 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3682 * handles any IP specific hardware or software state changes that are
3683 * necessary for a soft reset to succeed.
3684 * Returns 0 on success, negative error code on failure.
3685 */
06ec9070 3686static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3687{
3688 int i, r = 0;
3689
3690 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3691 if (!adev->ip_blocks[i].status.valid)
d31a501e 3692 continue;
a1255107
AD
3693 if (adev->ip_blocks[i].status.hang &&
3694 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3695 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3696 if (r)
3697 return r;
3698 }
3699 }
3700
3701 return 0;
3702}
3703
e3ecdffa
AD
3704/**
3705 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3706 *
3707 * @adev: amdgpu_device pointer
3708 *
3709 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3710 * reset is necessary to recover.
3711 * Returns true if a full asic reset is required, false if not.
3712 */
06ec9070 3713static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3714{
da146d3b
AD
3715 int i;
3716
8bc04c29
AD
3717 if (amdgpu_asic_need_full_reset(adev))
3718 return true;
3719
da146d3b 3720 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3721 if (!adev->ip_blocks[i].status.valid)
da146d3b 3722 continue;
a1255107
AD
3723 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3724 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3725 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3726 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3727 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3728 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3729 DRM_INFO("Some block need full reset!\n");
3730 return true;
3731 }
3732 }
35d782fe
CZ
3733 }
3734 return false;
3735}
3736
e3ecdffa
AD
3737/**
3738 * amdgpu_device_ip_soft_reset - do a soft reset
3739 *
3740 * @adev: amdgpu_device pointer
3741 *
3742 * The list of all the hardware IPs that make up the asic is walked and the
3743 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3744 * IP specific hardware or software state changes that are necessary to soft
3745 * reset the IP.
3746 * Returns 0 on success, negative error code on failure.
3747 */
06ec9070 3748static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3749{
3750 int i, r = 0;
3751
3752 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3753 if (!adev->ip_blocks[i].status.valid)
35d782fe 3754 continue;
a1255107
AD
3755 if (adev->ip_blocks[i].status.hang &&
3756 adev->ip_blocks[i].version->funcs->soft_reset) {
3757 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3758 if (r)
3759 return r;
3760 }
3761 }
3762
3763 return 0;
3764}
3765
e3ecdffa
AD
3766/**
3767 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3768 *
3769 * @adev: amdgpu_device pointer
3770 *
3771 * The list of all the hardware IPs that make up the asic is walked and the
3772 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3773 * handles any IP specific hardware or software state changes that are
3774 * necessary after the IP has been soft reset.
3775 * Returns 0 on success, negative error code on failure.
3776 */
06ec9070 3777static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3778{
3779 int i, r = 0;
3780
3781 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3782 if (!adev->ip_blocks[i].status.valid)
35d782fe 3783 continue;
a1255107
AD
3784 if (adev->ip_blocks[i].status.hang &&
3785 adev->ip_blocks[i].version->funcs->post_soft_reset)
3786 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3787 if (r)
3788 return r;
3789 }
3790
3791 return 0;
3792}
3793
e3ecdffa 3794/**
c33adbc7 3795 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3796 *
3797 * @adev: amdgpu_device pointer
3798 *
3799 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3800 * restore things like GPUVM page tables after a GPU reset where
3801 * the contents of VRAM might be lost.
403009bf
CK
3802 *
3803 * Returns:
3804 * 0 on success, negative error code on failure.
e3ecdffa 3805 */
c33adbc7 3806static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3807{
c41d1cf6 3808 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3809 struct amdgpu_bo *shadow;
3810 long r = 1, tmo;
c41d1cf6
ML
3811
3812 if (amdgpu_sriov_runtime(adev))
b045d3af 3813 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3814 else
3815 tmo = msecs_to_jiffies(100);
3816
3817 DRM_INFO("recover vram bo from shadow start\n");
3818 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3819 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3820
3821 /* No need to recover an evicted BO */
3822 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3823 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3824 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3825 continue;
3826
3827 r = amdgpu_bo_restore_shadow(shadow, &next);
3828 if (r)
3829 break;
3830
c41d1cf6 3831 if (fence) {
1712fb1a 3832 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3833 dma_fence_put(fence);
3834 fence = next;
1712fb1a 3835 if (tmo == 0) {
3836 r = -ETIMEDOUT;
c41d1cf6 3837 break;
1712fb1a 3838 } else if (tmo < 0) {
3839 r = tmo;
3840 break;
3841 }
403009bf
CK
3842 } else {
3843 fence = next;
c41d1cf6 3844 }
c41d1cf6
ML
3845 }
3846 mutex_unlock(&adev->shadow_list_lock);
3847
403009bf
CK
3848 if (fence)
3849 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3850 dma_fence_put(fence);
3851
1712fb1a 3852 if (r < 0 || tmo <= 0) {
3853 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3854 return -EIO;
3855 }
c41d1cf6 3856
403009bf
CK
3857 DRM_INFO("recover vram bo from shadow done\n");
3858 return 0;
c41d1cf6
ML
3859}
3860
a90ad3c2 3861
e3ecdffa 3862/**
06ec9070 3863 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3864 *
3865 * @adev: amdgpu device pointer
87e3f136 3866 * @from_hypervisor: request from hypervisor
5740682e
ML
3867 *
3868 * do VF FLR and reinitialize Asic
3f48c681 3869 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3870 */
3871static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3872 bool from_hypervisor)
5740682e
ML
3873{
3874 int r;
3875
3876 if (from_hypervisor)
3877 r = amdgpu_virt_request_full_gpu(adev, true);
3878 else
3879 r = amdgpu_virt_reset_gpu(adev);
3880 if (r)
3881 return r;
a90ad3c2 3882
b639c22c
JZ
3883 amdgpu_amdkfd_pre_reset(adev);
3884
a90ad3c2 3885 /* Resume IP prior to SMC */
06ec9070 3886 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3887 if (r)
3888 goto error;
a90ad3c2 3889
c9ffa427 3890 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3891 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3892 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3893
7a3e0bb2
RZ
3894 r = amdgpu_device_fw_loading(adev);
3895 if (r)
3896 return r;
3897
a90ad3c2 3898 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3899 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3900 if (r)
3901 goto error;
a90ad3c2
ML
3902
3903 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3904 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3905 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3906
abc34253
ED
3907error:
3908 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3909 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3910 amdgpu_inc_vram_lost(adev);
c33adbc7 3911 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3912 }
3913
3914 return r;
3915}
3916
12938fad
CK
3917/**
3918 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3919 *
3920 * @adev: amdgpu device pointer
3921 *
3922 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3923 * a hung GPU.
3924 */
3925bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3926{
3927 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3928 DRM_INFO("Timeout, but no hardware hang detected.\n");
3929 return false;
3930 }
3931
3ba7b418
AG
3932 if (amdgpu_gpu_recovery == 0)
3933 goto disabled;
3934
3935 if (amdgpu_sriov_vf(adev))
3936 return true;
3937
3938 if (amdgpu_gpu_recovery == -1) {
3939 switch (adev->asic_type) {
fc42d47c
AG
3940 case CHIP_BONAIRE:
3941 case CHIP_HAWAII:
3ba7b418
AG
3942 case CHIP_TOPAZ:
3943 case CHIP_TONGA:
3944 case CHIP_FIJI:
3945 case CHIP_POLARIS10:
3946 case CHIP_POLARIS11:
3947 case CHIP_POLARIS12:
3948 case CHIP_VEGAM:
3949 case CHIP_VEGA20:
3950 case CHIP_VEGA10:
3951 case CHIP_VEGA12:
c43b849f 3952 case CHIP_RAVEN:
e9d4cf91 3953 case CHIP_ARCTURUS:
2cb44fb0 3954 case CHIP_RENOIR:
658c6639
AD
3955 case CHIP_NAVI10:
3956 case CHIP_NAVI14:
3957 case CHIP_NAVI12:
131a3c74 3958 case CHIP_SIENNA_CICHLID:
3ba7b418
AG
3959 break;
3960 default:
3961 goto disabled;
3962 }
12938fad
CK
3963 }
3964
3965 return true;
3ba7b418
AG
3966
3967disabled:
3968 DRM_INFO("GPU recovery disabled.\n");
3969 return false;
12938fad
CK
3970}
3971
5c6dd71e 3972
26bc5340
AG
3973static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3974 struct amdgpu_job *job,
3975 bool *need_full_reset_arg)
3976{
3977 int i, r = 0;
3978 bool need_full_reset = *need_full_reset_arg;
71182665 3979
728e7e0c
JZ
3980 amdgpu_debugfs_wait_dump(adev);
3981
71182665 3982 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3983 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3984 struct amdgpu_ring *ring = adev->rings[i];
3985
51687759 3986 if (!ring || !ring->sched.thread)
0875dc9e 3987 continue;
5740682e 3988
2f9d4084
ML
3989 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3990 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3991 }
d38ceaf9 3992
222b5f04
AG
3993 if(job)
3994 drm_sched_increase_karma(&job->base);
3995
1d721ed6 3996 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3997 if (!amdgpu_sriov_vf(adev)) {
3998
3999 if (!need_full_reset)
4000 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4001
4002 if (!need_full_reset) {
4003 amdgpu_device_ip_pre_soft_reset(adev);
4004 r = amdgpu_device_ip_soft_reset(adev);
4005 amdgpu_device_ip_post_soft_reset(adev);
4006 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4007 DRM_INFO("soft reset failed, will fallback to full reset!\n");
4008 need_full_reset = true;
4009 }
4010 }
4011
4012 if (need_full_reset)
4013 r = amdgpu_device_ip_suspend(adev);
4014
4015 *need_full_reset_arg = need_full_reset;
4016 }
4017
4018 return r;
4019}
4020
041a62bc 4021static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
4022 struct list_head *device_list_handle,
4023 bool *need_full_reset_arg)
4024{
4025 struct amdgpu_device *tmp_adev = NULL;
4026 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4027 int r = 0;
4028
4029 /*
4030 * ASIC reset has to be done on all HGMI hive nodes ASAP
4031 * to allow proper links negotiation in FW (within 1 sec)
4032 */
4033 if (need_full_reset) {
4034 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 4035 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4036 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 4037 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4038 r = -EALREADY;
4039 } else
4040 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4041
041a62bc
AG
4042 if (r) {
4043 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
4044 r, tmp_adev->ddev->unique);
4045 break;
ce316fa5
LM
4046 }
4047 }
4048
041a62bc
AG
4049 /* For XGMI wait for all resets to complete before proceed */
4050 if (!r) {
ce316fa5
LM
4051 list_for_each_entry(tmp_adev, device_list_handle,
4052 gmc.xgmi.head) {
4053 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4054 flush_work(&tmp_adev->xgmi_reset_work);
4055 r = tmp_adev->asic_reset_res;
4056 if (r)
4057 break;
ce316fa5
LM
4058 }
4059 }
4060 }
ce316fa5 4061 }
26bc5340 4062
43c4d576
JC
4063 if (!r && amdgpu_ras_intr_triggered()) {
4064 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4065 if (tmp_adev->mmhub.funcs &&
4066 tmp_adev->mmhub.funcs->reset_ras_error_count)
4067 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4068 }
4069
00eaa571 4070 amdgpu_ras_intr_cleared();
43c4d576 4071 }
00eaa571 4072
26bc5340
AG
4073 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4074 if (need_full_reset) {
4075 /* post card */
df9c8d1a
DL
4076 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) {
4077 dev_warn(tmp_adev->dev, "asic atom init failed!");
4078 r = -EAGAIN;
4079 goto out;
4080 }
26bc5340
AG
4081
4082 if (!r) {
4083 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4084 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4085 if (r)
4086 goto out;
4087
4088 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4089 if (vram_lost) {
77e7f829 4090 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4091 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4092 }
4093
4094 r = amdgpu_gtt_mgr_recover(
4095 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4096 if (r)
4097 goto out;
4098
4099 r = amdgpu_device_fw_loading(tmp_adev);
4100 if (r)
4101 return r;
4102
4103 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4104 if (r)
4105 goto out;
4106
4107 if (vram_lost)
4108 amdgpu_device_fill_reset_magic(tmp_adev);
4109
fdafb359
EQ
4110 /*
4111 * Add this ASIC as tracked as reset was already
4112 * complete successfully.
4113 */
4114 amdgpu_register_gpu_instance(tmp_adev);
4115
7c04ca50 4116 r = amdgpu_device_ip_late_init(tmp_adev);
4117 if (r)
4118 goto out;
4119
565d1941
EQ
4120 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4121
e79a04d5 4122 /* must succeed. */
511fdbc3 4123 amdgpu_ras_resume(tmp_adev);
e79a04d5 4124
26bc5340
AG
4125 /* Update PSP FW topology after reset */
4126 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4127 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4128 }
4129 }
4130
4131
4132out:
4133 if (!r) {
4134 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4135 r = amdgpu_ib_ring_tests(tmp_adev);
4136 if (r) {
4137 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4138 r = amdgpu_device_ip_suspend(tmp_adev);
4139 need_full_reset = true;
4140 r = -EAGAIN;
4141 goto end;
4142 }
4143 }
4144
4145 if (!r)
4146 r = amdgpu_device_recover_vram(tmp_adev);
4147 else
4148 tmp_adev->asic_reset_res = r;
4149 }
4150
4151end:
4152 *need_full_reset_arg = need_full_reset;
4153 return r;
4154}
4155
df9c8d1a 4156static bool amdgpu_device_lock_adev(struct amdgpu_device *adev)
26bc5340 4157{
df9c8d1a
DL
4158 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4159 return false;
4160
4161 down_write(&adev->reset_sem);
5740682e 4162
26bc5340 4163 atomic_inc(&adev->gpu_reset_counter);
a3a09142
AD
4164 switch (amdgpu_asic_reset_method(adev)) {
4165 case AMD_RESET_METHOD_MODE1:
4166 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4167 break;
4168 case AMD_RESET_METHOD_MODE2:
4169 adev->mp1_state = PP_MP1_STATE_RESET;
4170 break;
4171 default:
4172 adev->mp1_state = PP_MP1_STATE_NONE;
4173 break;
4174 }
1d721ed6
AG
4175
4176 return true;
26bc5340 4177}
d38ceaf9 4178
26bc5340
AG
4179static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4180{
89041940 4181 amdgpu_vf_error_trans_all(adev);
a3a09142 4182 adev->mp1_state = PP_MP1_STATE_NONE;
df9c8d1a
DL
4183 atomic_set(&adev->in_gpu_reset, 0);
4184 up_write(&adev->reset_sem);
26bc5340
AG
4185}
4186
3f12acc8
EQ
4187static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4188{
4189 struct pci_dev *p = NULL;
4190
4191 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4192 adev->pdev->bus->number, 1);
4193 if (p) {
4194 pm_runtime_enable(&(p->dev));
4195 pm_runtime_resume(&(p->dev));
4196 }
4197}
4198
4199static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4200{
4201 enum amd_reset_method reset_method;
4202 struct pci_dev *p = NULL;
4203 u64 expires;
4204
4205 /*
4206 * For now, only BACO and mode1 reset are confirmed
4207 * to suffer the audio issue without proper suspended.
4208 */
4209 reset_method = amdgpu_asic_reset_method(adev);
4210 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4211 (reset_method != AMD_RESET_METHOD_MODE1))
4212 return -EINVAL;
4213
4214 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4215 adev->pdev->bus->number, 1);
4216 if (!p)
4217 return -ENODEV;
4218
4219 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4220 if (!expires)
4221 /*
4222 * If we cannot get the audio device autosuspend delay,
4223 * a fixed 4S interval will be used. Considering 3S is
4224 * the audio controller default autosuspend delay setting.
4225 * 4S used here is guaranteed to cover that.
4226 */
54b7feb9 4227 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4228
4229 while (!pm_runtime_status_suspended(&(p->dev))) {
4230 if (!pm_runtime_suspend(&(p->dev)))
4231 break;
4232
4233 if (expires < ktime_get_mono_fast_ns()) {
4234 dev_warn(adev->dev, "failed to suspend display audio\n");
4235 /* TODO: abort the succeeding gpu reset? */
4236 return -ETIMEDOUT;
4237 }
4238 }
4239
4240 pm_runtime_disable(&(p->dev));
4241
4242 return 0;
4243}
4244
26bc5340
AG
4245/**
4246 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4247 *
4248 * @adev: amdgpu device pointer
4249 * @job: which job trigger hang
4250 *
4251 * Attempt to reset the GPU if it has hung (all asics).
4252 * Attempt to do soft-reset or full-reset and reinitialize Asic
4253 * Returns 0 for success or an error on failure.
4254 */
4255
4256int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4257 struct amdgpu_job *job)
4258{
1d721ed6 4259 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4260 bool need_full_reset = false;
4261 bool job_signaled = false;
26bc5340 4262 struct amdgpu_hive_info *hive = NULL;
26bc5340 4263 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4264 int i, r = 0;
bb5c7235 4265 bool need_emergency_restart = false;
3f12acc8 4266 bool audio_suspended = false;
26bc5340 4267
bb5c7235
WS
4268 /**
4269 * Special case: RAS triggered and full reset isn't supported
4270 */
4271 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4272
d5ea093e
AG
4273 /*
4274 * Flush RAM to disk so that after reboot
4275 * the user can read log and see why the system rebooted.
4276 */
bb5c7235 4277 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4278 DRM_WARN("Emergency reboot.");
4279
4280 ksys_sync_helper();
4281 emergency_restart();
4282 }
4283
b823821f 4284 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4285 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4286
4287 /*
1d721ed6
AG
4288 * Here we trylock to avoid chain of resets executing from
4289 * either trigger by jobs on different adevs in XGMI hive or jobs on
4290 * different schedulers for same device while this TO handler is running.
4291 * We always reset all schedulers for device and all devices for XGMI
4292 * hive so that should take care of them too.
26bc5340 4293 */
df9c8d1a
DL
4294 hive = amdgpu_get_xgmi_hive(adev, false);
4295 if (hive) {
4296 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4297 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4298 job ? job->base.id : -1, hive->hive_id);
4299 return 0;
4300 }
4301 mutex_lock(&hive->hive_lock);
1d721ed6 4302 }
26bc5340 4303
9e94d22c
EQ
4304 /*
4305 * Build list of devices to reset.
4306 * In case we are in XGMI hive mode, resort the device list
4307 * to put adev in the 1st position.
4308 */
4309 INIT_LIST_HEAD(&device_list);
4310 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4311 if (!hive)
26bc5340 4312 return -ENODEV;
9e94d22c
EQ
4313 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4314 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4315 device_list_handle = &hive->device_list;
4316 } else {
4317 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4318 device_list_handle = &device_list;
4319 }
4320
1d721ed6
AG
4321 /* block all schedulers and reset given job's ring */
4322 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
df9c8d1a 4323 if (!amdgpu_device_lock_adev(tmp_adev)) {
9e94d22c
EQ
4324 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4325 job ? job->base.id : -1);
df9c8d1a
DL
4326 r = 0;
4327 goto skip_recovery;
7c6e68c7
AG
4328 }
4329
3f12acc8
EQ
4330 /*
4331 * Try to put the audio codec into suspend state
4332 * before gpu reset started.
4333 *
4334 * Due to the power domain of the graphics device
4335 * is shared with AZ power domain. Without this,
4336 * we may change the audio hardware from behind
4337 * the audio driver's back. That will trigger
4338 * some audio codec errors.
4339 */
4340 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4341 audio_suspended = true;
4342
9e94d22c
EQ
4343 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4344
52fb44cf
EQ
4345 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4346
9e94d22c
EQ
4347 if (!amdgpu_sriov_vf(tmp_adev))
4348 amdgpu_amdkfd_pre_reset(tmp_adev);
4349
12ffa55d
AG
4350 /*
4351 * Mark these ASICs to be reseted as untracked first
4352 * And add them back after reset completed
4353 */
4354 amdgpu_unregister_gpu_instance(tmp_adev);
4355
a2f63ee8 4356 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4357
f1c1314b 4358 /* disable ras on ALL IPs */
bb5c7235 4359 if (!need_emergency_restart &&
b823821f 4360 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4361 amdgpu_ras_suspend(tmp_adev);
4362
1d721ed6
AG
4363 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4364 struct amdgpu_ring *ring = tmp_adev->rings[i];
4365
4366 if (!ring || !ring->sched.thread)
4367 continue;
4368
0b2d2c2e 4369 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4370
bb5c7235 4371 if (need_emergency_restart)
7c6e68c7 4372 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4373 }
4374 }
4375
bb5c7235 4376 if (need_emergency_restart)
7c6e68c7
AG
4377 goto skip_sched_resume;
4378
1d721ed6
AG
4379 /*
4380 * Must check guilty signal here since after this point all old
4381 * HW fences are force signaled.
4382 *
4383 * job->base holds a reference to parent fence
4384 */
4385 if (job && job->base.s_fence->parent &&
7dd8c205 4386 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4387 job_signaled = true;
1d721ed6
AG
4388 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4389 goto skip_hw_reset;
4390 }
4391
26bc5340
AG
4392retry: /* Rest of adevs pre asic reset from XGMI hive. */
4393 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340
AG
4394 r = amdgpu_device_pre_asic_reset(tmp_adev,
4395 NULL,
4396 &need_full_reset);
4397 /*TODO Should we stop ?*/
4398 if (r) {
4399 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4400 r, tmp_adev->ddev->unique);
4401 tmp_adev->asic_reset_res = r;
4402 }
4403 }
4404
4405 /* Actual ASIC resets if needed.*/
4406 /* TODO Implement XGMI hive reset logic for SRIOV */
4407 if (amdgpu_sriov_vf(adev)) {
4408 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4409 if (r)
4410 adev->asic_reset_res = r;
4411 } else {
041a62bc 4412 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4413 if (r && r == -EAGAIN)
4414 goto retry;
4415 }
4416
1d721ed6
AG
4417skip_hw_reset:
4418
26bc5340
AG
4419 /* Post ASIC reset for all devs .*/
4420 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4421
1d721ed6
AG
4422 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4423 struct amdgpu_ring *ring = tmp_adev->rings[i];
4424
4425 if (!ring || !ring->sched.thread)
4426 continue;
4427
4428 /* No point to resubmit jobs if we didn't HW reset*/
4429 if (!tmp_adev->asic_reset_res && !job_signaled)
4430 drm_sched_resubmit_jobs(&ring->sched);
4431
4432 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4433 }
4434
4435 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4436 drm_helper_resume_force_mode(tmp_adev->ddev);
4437 }
4438
4439 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4440
4441 if (r) {
4442 /* bad news, how to tell it to userspace ? */
12ffa55d 4443 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4444 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4445 } else {
12ffa55d 4446 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4447 }
7c6e68c7 4448 }
26bc5340 4449
7c6e68c7
AG
4450skip_sched_resume:
4451 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4452 /*unlock kfd: SRIOV would do it separately */
bb5c7235 4453 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4454 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4455 if (audio_suspended)
4456 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4457 amdgpu_device_unlock_adev(tmp_adev);
4458 }
4459
df9c8d1a 4460skip_recovery:
9e94d22c 4461 if (hive) {
df9c8d1a 4462 atomic_set(&hive->in_reset, 0);
9e94d22c
EQ
4463 mutex_unlock(&hive->hive_lock);
4464 }
26bc5340
AG
4465
4466 if (r)
4467 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4468 return r;
4469}
4470
e3ecdffa
AD
4471/**
4472 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4473 *
4474 * @adev: amdgpu_device pointer
4475 *
4476 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4477 * and lanes) of the slot the device is in. Handles APUs and
4478 * virtualized environments where PCIE config space may not be available.
4479 */
5494d864 4480static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4481{
5d9a6330 4482 struct pci_dev *pdev;
c5313457
HK
4483 enum pci_bus_speed speed_cap, platform_speed_cap;
4484 enum pcie_link_width platform_link_width;
d0dd7f0c 4485
cd474ba0
AD
4486 if (amdgpu_pcie_gen_cap)
4487 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4488
cd474ba0
AD
4489 if (amdgpu_pcie_lane_cap)
4490 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4491
cd474ba0
AD
4492 /* covers APUs as well */
4493 if (pci_is_root_bus(adev->pdev->bus)) {
4494 if (adev->pm.pcie_gen_mask == 0)
4495 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4496 if (adev->pm.pcie_mlw_mask == 0)
4497 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4498 return;
cd474ba0 4499 }
d0dd7f0c 4500
c5313457
HK
4501 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4502 return;
4503
dbaa922b
AD
4504 pcie_bandwidth_available(adev->pdev, NULL,
4505 &platform_speed_cap, &platform_link_width);
c5313457 4506
cd474ba0 4507 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4508 /* asic caps */
4509 pdev = adev->pdev;
4510 speed_cap = pcie_get_speed_cap(pdev);
4511 if (speed_cap == PCI_SPEED_UNKNOWN) {
4512 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4513 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4514 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4515 } else {
5d9a6330
AD
4516 if (speed_cap == PCIE_SPEED_16_0GT)
4517 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4518 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4519 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4520 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4521 else if (speed_cap == PCIE_SPEED_8_0GT)
4522 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4523 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4524 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4525 else if (speed_cap == PCIE_SPEED_5_0GT)
4526 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4527 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4528 else
4529 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4530 }
4531 /* platform caps */
c5313457 4532 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4533 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4534 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4535 } else {
c5313457 4536 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4537 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4538 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4539 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4540 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4541 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4542 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4543 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4544 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4545 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4546 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4547 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4548 else
4549 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4550
cd474ba0
AD
4551 }
4552 }
4553 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4554 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4555 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4556 } else {
c5313457 4557 switch (platform_link_width) {
5d9a6330 4558 case PCIE_LNK_X32:
cd474ba0
AD
4559 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4560 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4561 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4562 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4563 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4564 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4565 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4566 break;
5d9a6330 4567 case PCIE_LNK_X16:
cd474ba0
AD
4568 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4569 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4570 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4571 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4572 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4573 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4574 break;
5d9a6330 4575 case PCIE_LNK_X12:
cd474ba0
AD
4576 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4577 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4578 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4579 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4580 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4581 break;
5d9a6330 4582 case PCIE_LNK_X8:
cd474ba0
AD
4583 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4584 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4585 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4586 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4587 break;
5d9a6330 4588 case PCIE_LNK_X4:
cd474ba0
AD
4589 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4590 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4591 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4592 break;
5d9a6330 4593 case PCIE_LNK_X2:
cd474ba0
AD
4594 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4595 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4596 break;
5d9a6330 4597 case PCIE_LNK_X1:
cd474ba0
AD
4598 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4599 break;
4600 default:
4601 break;
4602 }
d0dd7f0c
AD
4603 }
4604 }
4605}
d38ceaf9 4606
361dbd01
AD
4607int amdgpu_device_baco_enter(struct drm_device *dev)
4608{
4609 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4610 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4611
4612 if (!amdgpu_device_supports_baco(adev->ddev))
4613 return -ENOTSUPP;
4614
7a22677b
LM
4615 if (ras && ras->supported)
4616 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4617
9530273e 4618 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4619}
4620
4621int amdgpu_device_baco_exit(struct drm_device *dev)
4622{
4623 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4624 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4625 int ret = 0;
361dbd01
AD
4626
4627 if (!amdgpu_device_supports_baco(adev->ddev))
4628 return -ENOTSUPP;
4629
9530273e
EQ
4630 ret = amdgpu_dpm_baco_exit(adev);
4631 if (ret)
4632 return ret;
7a22677b
LM
4633
4634 if (ras && ras->supported)
4635 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4636
4637 return 0;
361dbd01 4638}