drm/amd/display: create plane rotation property for Bonaire and later
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
c0a43457 83MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin");
120eb833 84MODULE_FIRMWARE("amdgpu/navy_flounder_gpu_info.bin");
e2a75f88 85
2dc80b00
S
86#define AMDGPU_RESUME_MS 2000
87
050091ab 88const char *amdgpu_asic_name[] = {
da69c161
KW
89 "TAHITI",
90 "PITCAIRN",
91 "VERDE",
92 "OLAND",
93 "HAINAN",
d38ceaf9
AD
94 "BONAIRE",
95 "KAVERI",
96 "KABINI",
97 "HAWAII",
98 "MULLINS",
99 "TOPAZ",
100 "TONGA",
48299f95 101 "FIJI",
d38ceaf9 102 "CARRIZO",
139f4917 103 "STONEY",
2cc0c0b5
FC
104 "POLARIS10",
105 "POLARIS11",
c4642a47 106 "POLARIS12",
48ff108d 107 "VEGAM",
d4196f01 108 "VEGA10",
8fab806a 109 "VEGA12",
956fcddc 110 "VEGA20",
2ca8a5d2 111 "RAVEN",
d6c3b24e 112 "ARCTURUS",
1eee4228 113 "RENOIR",
852a6626 114 "NAVI10",
87dbad02 115 "NAVI14",
9802f5d7 116 "NAVI12",
ccaf72d3 117 "SIENNA_CICHLID",
ddd8fbe7 118 "NAVY_FLOUNDER",
d38ceaf9
AD
119 "LAST",
120};
121
dcea6e65
KR
122/**
123 * DOC: pcie_replay_count
124 *
125 * The amdgpu driver provides a sysfs API for reporting the total number
126 * of PCIe replays (NAKs)
127 * The file pcie_replay_count is used for this and returns the total
128 * number of replays as a sum of the NAKs generated and NAKs received
129 */
130
131static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
132 struct device_attribute *attr, char *buf)
133{
134 struct drm_device *ddev = dev_get_drvdata(dev);
135 struct amdgpu_device *adev = ddev->dev_private;
136 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
137
138 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
139}
140
141static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
142 amdgpu_device_get_pcie_replay_count, NULL);
143
5494d864
AD
144static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
145
bd607166
KR
146/**
147 * DOC: product_name
148 *
149 * The amdgpu driver provides a sysfs API for reporting the product name
150 * for the device
151 * The file serial_number is used for this and returns the product name
152 * as returned from the FRU.
153 * NOTE: This is only available for certain server cards
154 */
155
156static ssize_t amdgpu_device_get_product_name(struct device *dev,
157 struct device_attribute *attr, char *buf)
158{
159 struct drm_device *ddev = dev_get_drvdata(dev);
160 struct amdgpu_device *adev = ddev->dev_private;
161
162 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
163}
164
165static DEVICE_ATTR(product_name, S_IRUGO,
166 amdgpu_device_get_product_name, NULL);
167
168/**
169 * DOC: product_number
170 *
171 * The amdgpu driver provides a sysfs API for reporting the part number
172 * for the device
173 * The file serial_number is used for this and returns the part number
174 * as returned from the FRU.
175 * NOTE: This is only available for certain server cards
176 */
177
178static ssize_t amdgpu_device_get_product_number(struct device *dev,
179 struct device_attribute *attr, char *buf)
180{
181 struct drm_device *ddev = dev_get_drvdata(dev);
182 struct amdgpu_device *adev = ddev->dev_private;
183
184 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
185}
186
187static DEVICE_ATTR(product_number, S_IRUGO,
188 amdgpu_device_get_product_number, NULL);
189
190/**
191 * DOC: serial_number
192 *
193 * The amdgpu driver provides a sysfs API for reporting the serial number
194 * for the device
195 * The file serial_number is used for this and returns the serial number
196 * as returned from the FRU.
197 * NOTE: This is only available for certain server cards
198 */
199
200static ssize_t amdgpu_device_get_serial_number(struct device *dev,
201 struct device_attribute *attr, char *buf)
202{
203 struct drm_device *ddev = dev_get_drvdata(dev);
204 struct amdgpu_device *adev = ddev->dev_private;
205
206 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
207}
208
209static DEVICE_ATTR(serial_number, S_IRUGO,
210 amdgpu_device_get_serial_number, NULL);
211
e3ecdffa 212/**
31af062a 213 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
214 *
215 * @dev: drm_device pointer
216 *
217 * Returns true if the device is a dGPU with HG/PX power control,
218 * otherwise return false.
219 */
31af062a 220bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
221{
222 struct amdgpu_device *adev = dev->dev_private;
223
2f7d10b3 224 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
225 return true;
226 return false;
227}
228
a69cba42
AD
229/**
230 * amdgpu_device_supports_baco - Does the device support BACO
231 *
232 * @dev: drm_device pointer
233 *
234 * Returns true if the device supporte BACO,
235 * otherwise return false.
236 */
237bool amdgpu_device_supports_baco(struct drm_device *dev)
238{
239 struct amdgpu_device *adev = dev->dev_private;
240
241 return amdgpu_asic_supports_baco(adev);
242}
243
e35e2b11
TY
244/**
245 * VRAM access helper functions.
246 *
247 * amdgpu_device_vram_access - read/write a buffer in vram
248 *
249 * @adev: amdgpu_device pointer
250 * @pos: offset of the buffer in vram
251 * @buf: virtual address of the buffer in system memory
252 * @size: read/write size, sizeof(@buf) must > @size
253 * @write: true - write to vram, otherwise - read from vram
254 */
255void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
256 uint32_t *buf, size_t size, bool write)
257{
e35e2b11 258 unsigned long flags;
ce05ac56
CK
259 uint32_t hi = ~0;
260 uint64_t last;
261
9d11eb0d
CK
262
263#ifdef CONFIG_64BIT
264 last = min(pos + size, adev->gmc.visible_vram_size);
265 if (last > pos) {
266 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
267 size_t count = last - pos;
268
269 if (write) {
270 memcpy_toio(addr, buf, count);
271 mb();
272 amdgpu_asic_flush_hdp(adev, NULL);
273 } else {
274 amdgpu_asic_invalidate_hdp(adev, NULL);
275 mb();
276 memcpy_fromio(buf, addr, count);
277 }
278
279 if (count == size)
280 return;
281
282 pos += count;
283 buf += count / 4;
284 size -= count;
285 }
286#endif
287
ce05ac56
CK
288 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
289 for (last = pos + size; pos < last; pos += 4) {
290 uint32_t tmp = pos >> 31;
e35e2b11 291
e35e2b11 292 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
293 if (tmp != hi) {
294 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
295 hi = tmp;
296 }
e35e2b11
TY
297 if (write)
298 WREG32_NO_KIQ(mmMM_DATA, *buf++);
299 else
300 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 301 }
ce05ac56 302 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
303}
304
d38ceaf9 305/*
e78b579d 306 * MMIO register access helper functions.
d38ceaf9 307 */
e3ecdffa 308/**
e78b579d 309 * amdgpu_mm_rreg - read a memory mapped IO register
e3ecdffa
AD
310 *
311 * @adev: amdgpu_device pointer
312 * @reg: dword aligned register offset
313 * @acc_flags: access flags which require special behavior
314 *
315 * Returns the 32 bit value from the offset specified.
316 */
e78b579d
HZ
317uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
318 uint32_t acc_flags)
d38ceaf9 319{
f4b373f4
TSD
320 uint32_t ret;
321
f384ff95 322 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 323 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 324
ec59847e 325 if ((reg * 4) < adev->rmmio_size)
f4b373f4 326 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
e78b579d
HZ
327 else {
328 unsigned long flags;
329
330 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
331 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
332 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
333 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
334 }
335 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
f4b373f4 336 return ret;
d38ceaf9
AD
337}
338
421a2a30
ML
339/*
340 * MMIO register read with bytes helper functions
341 * @offset:bytes offset from MMIO start
342 *
343*/
344
e3ecdffa
AD
345/**
346 * amdgpu_mm_rreg8 - read a memory mapped IO register
347 *
348 * @adev: amdgpu_device pointer
349 * @offset: byte aligned register offset
350 *
351 * Returns the 8 bit value from the offset specified.
352 */
421a2a30
ML
353uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
354 if (offset < adev->rmmio_size)
355 return (readb(adev->rmmio + offset));
356 BUG();
357}
358
359/*
360 * MMIO register write with bytes helper functions
361 * @offset:bytes offset from MMIO start
362 * @value: the value want to be written to the register
363 *
364*/
e3ecdffa
AD
365/**
366 * amdgpu_mm_wreg8 - read a memory mapped IO register
367 *
368 * @adev: amdgpu_device pointer
369 * @offset: byte aligned register offset
370 * @value: 8 bit value to write
371 *
372 * Writes the value specified to the offset specified.
373 */
421a2a30
ML
374void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
375 if (offset < adev->rmmio_size)
376 writeb(value, adev->rmmio + offset);
377 else
378 BUG();
379}
380
e78b579d 381void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
2e0cc4d4 382{
e78b579d 383 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
2e0cc4d4 384
ec59847e 385 if ((reg * 4) < adev->rmmio_size)
2e0cc4d4 386 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
e78b579d
HZ
387 else {
388 unsigned long flags;
389
390 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
391 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
392 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
393 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
394 }
2e0cc4d4
ML
395}
396
e3ecdffa 397/**
e78b579d 398 * amdgpu_mm_wreg - write to a memory mapped IO register
e3ecdffa
AD
399 *
400 * @adev: amdgpu_device pointer
401 * @reg: dword aligned register offset
402 * @v: 32 bit value to write to the register
403 * @acc_flags: access flags which require special behavior
404 *
405 * Writes the value specified to the offset specified.
406 */
e78b579d
HZ
407void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
408 uint32_t acc_flags)
d38ceaf9 409{
f384ff95 410 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 411 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 412
e78b579d 413 amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
2e0cc4d4 414}
d38ceaf9 415
2e0cc4d4
ML
416/*
417 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
418 *
419 * this function is invoked only the debugfs register access
420 * */
421void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
422 uint32_t acc_flags)
423{
424 if (amdgpu_sriov_fullaccess(adev) &&
425 adev->gfx.rlc.funcs &&
426 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 427
2e0cc4d4
ML
428 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
429 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 430 }
2e0cc4d4 431
e78b579d 432 amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
d38ceaf9
AD
433}
434
e3ecdffa
AD
435/**
436 * amdgpu_io_rreg - read an IO register
437 *
438 * @adev: amdgpu_device pointer
439 * @reg: dword aligned register offset
440 *
441 * Returns the 32 bit value from the offset specified.
442 */
d38ceaf9
AD
443u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
444{
445 if ((reg * 4) < adev->rio_mem_size)
446 return ioread32(adev->rio_mem + (reg * 4));
447 else {
448 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
449 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
450 }
451}
452
e3ecdffa
AD
453/**
454 * amdgpu_io_wreg - write to an IO register
455 *
456 * @adev: amdgpu_device pointer
457 * @reg: dword aligned register offset
458 * @v: 32 bit value to write to the register
459 *
460 * Writes the value specified to the offset specified.
461 */
d38ceaf9
AD
462void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
463{
d38ceaf9
AD
464 if ((reg * 4) < adev->rio_mem_size)
465 iowrite32(v, adev->rio_mem + (reg * 4));
466 else {
467 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
468 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
469 }
470}
471
472/**
473 * amdgpu_mm_rdoorbell - read a doorbell dword
474 *
475 * @adev: amdgpu_device pointer
476 * @index: doorbell index
477 *
478 * Returns the value in the doorbell aperture at the
479 * requested doorbell index (CIK).
480 */
481u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
482{
483 if (index < adev->doorbell.num_doorbells) {
484 return readl(adev->doorbell.ptr + index);
485 } else {
486 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
487 return 0;
488 }
489}
490
491/**
492 * amdgpu_mm_wdoorbell - write a doorbell dword
493 *
494 * @adev: amdgpu_device pointer
495 * @index: doorbell index
496 * @v: value to write
497 *
498 * Writes @v to the doorbell aperture at the
499 * requested doorbell index (CIK).
500 */
501void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
502{
503 if (index < adev->doorbell.num_doorbells) {
504 writel(v, adev->doorbell.ptr + index);
505 } else {
506 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
507 }
508}
509
832be404
KW
510/**
511 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
512 *
513 * @adev: amdgpu_device pointer
514 * @index: doorbell index
515 *
516 * Returns the value in the doorbell aperture at the
517 * requested doorbell index (VEGA10+).
518 */
519u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
520{
521 if (index < adev->doorbell.num_doorbells) {
522 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
523 } else {
524 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
525 return 0;
526 }
527}
528
529/**
530 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
531 *
532 * @adev: amdgpu_device pointer
533 * @index: doorbell index
534 * @v: value to write
535 *
536 * Writes @v to the doorbell aperture at the
537 * requested doorbell index (VEGA10+).
538 */
539void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
540{
541 if (index < adev->doorbell.num_doorbells) {
542 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
543 } else {
544 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
545 }
546}
547
d38ceaf9
AD
548/**
549 * amdgpu_invalid_rreg - dummy reg read function
550 *
551 * @adev: amdgpu device pointer
552 * @reg: offset of register
553 *
554 * Dummy register read function. Used for register blocks
555 * that certain asics don't have (all asics).
556 * Returns the value in the register.
557 */
558static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
559{
560 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
561 BUG();
562 return 0;
563}
564
565/**
566 * amdgpu_invalid_wreg - dummy reg write function
567 *
568 * @adev: amdgpu device pointer
569 * @reg: offset of register
570 * @v: value to write to the register
571 *
572 * Dummy register read function. Used for register blocks
573 * that certain asics don't have (all asics).
574 */
575static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
576{
577 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
578 reg, v);
579 BUG();
580}
581
4fa1c6a6
TZ
582/**
583 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
584 *
585 * @adev: amdgpu device pointer
586 * @reg: offset of register
587 *
588 * Dummy register read function. Used for register blocks
589 * that certain asics don't have (all asics).
590 * Returns the value in the register.
591 */
592static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
593{
594 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
595 BUG();
596 return 0;
597}
598
599/**
600 * amdgpu_invalid_wreg64 - dummy reg write function
601 *
602 * @adev: amdgpu device pointer
603 * @reg: offset of register
604 * @v: value to write to the register
605 *
606 * Dummy register read function. Used for register blocks
607 * that certain asics don't have (all asics).
608 */
609static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
610{
611 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
612 reg, v);
613 BUG();
614}
615
d38ceaf9
AD
616/**
617 * amdgpu_block_invalid_rreg - dummy reg read function
618 *
619 * @adev: amdgpu device pointer
620 * @block: offset of instance
621 * @reg: offset of register
622 *
623 * Dummy register read function. Used for register blocks
624 * that certain asics don't have (all asics).
625 * Returns the value in the register.
626 */
627static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
628 uint32_t block, uint32_t reg)
629{
630 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
631 reg, block);
632 BUG();
633 return 0;
634}
635
636/**
637 * amdgpu_block_invalid_wreg - dummy reg write function
638 *
639 * @adev: amdgpu device pointer
640 * @block: offset of instance
641 * @reg: offset of register
642 * @v: value to write to the register
643 *
644 * Dummy register read function. Used for register blocks
645 * that certain asics don't have (all asics).
646 */
647static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
648 uint32_t block,
649 uint32_t reg, uint32_t v)
650{
651 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
652 reg, block, v);
653 BUG();
654}
655
e3ecdffa
AD
656/**
657 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
658 *
659 * @adev: amdgpu device pointer
660 *
661 * Allocates a scratch page of VRAM for use by various things in the
662 * driver.
663 */
06ec9070 664static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 665{
a4a02777
CK
666 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
667 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
668 &adev->vram_scratch.robj,
669 &adev->vram_scratch.gpu_addr,
670 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
671}
672
e3ecdffa
AD
673/**
674 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
675 *
676 * @adev: amdgpu device pointer
677 *
678 * Frees the VRAM scratch page.
679 */
06ec9070 680static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 681{
078af1a3 682 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
683}
684
685/**
9c3f2b54 686 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
687 *
688 * @adev: amdgpu_device pointer
689 * @registers: pointer to the register array
690 * @array_size: size of the register array
691 *
692 * Programs an array or registers with and and or masks.
693 * This is a helper for setting golden registers.
694 */
9c3f2b54
AD
695void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
696 const u32 *registers,
697 const u32 array_size)
d38ceaf9
AD
698{
699 u32 tmp, reg, and_mask, or_mask;
700 int i;
701
702 if (array_size % 3)
703 return;
704
705 for (i = 0; i < array_size; i +=3) {
706 reg = registers[i + 0];
707 and_mask = registers[i + 1];
708 or_mask = registers[i + 2];
709
710 if (and_mask == 0xffffffff) {
711 tmp = or_mask;
712 } else {
713 tmp = RREG32(reg);
714 tmp &= ~and_mask;
e0d07657
HZ
715 if (adev->family >= AMDGPU_FAMILY_AI)
716 tmp |= (or_mask & and_mask);
717 else
718 tmp |= or_mask;
d38ceaf9
AD
719 }
720 WREG32(reg, tmp);
721 }
722}
723
e3ecdffa
AD
724/**
725 * amdgpu_device_pci_config_reset - reset the GPU
726 *
727 * @adev: amdgpu_device pointer
728 *
729 * Resets the GPU using the pci config reset sequence.
730 * Only applicable to asics prior to vega10.
731 */
8111c387 732void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
733{
734 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
735}
736
737/*
738 * GPU doorbell aperture helpers function.
739 */
740/**
06ec9070 741 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
742 *
743 * @adev: amdgpu_device pointer
744 *
745 * Init doorbell driver information (CIK)
746 * Returns 0 on success, error on failure.
747 */
06ec9070 748static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 749{
6585661d 750
705e519e
CK
751 /* No doorbell on SI hardware generation */
752 if (adev->asic_type < CHIP_BONAIRE) {
753 adev->doorbell.base = 0;
754 adev->doorbell.size = 0;
755 adev->doorbell.num_doorbells = 0;
756 adev->doorbell.ptr = NULL;
757 return 0;
758 }
759
d6895ad3
CK
760 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
761 return -EINVAL;
762
22357775
AD
763 amdgpu_asic_init_doorbell_index(adev);
764
d38ceaf9
AD
765 /* doorbell bar mapping */
766 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
767 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
768
edf600da 769 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 770 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
771 if (adev->doorbell.num_doorbells == 0)
772 return -EINVAL;
773
ec3db8a6 774 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
775 * paging queue doorbell use the second page. The
776 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
777 * doorbells are in the first page. So with paging queue enabled,
778 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
779 */
780 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 781 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 782
8972e5d2
CK
783 adev->doorbell.ptr = ioremap(adev->doorbell.base,
784 adev->doorbell.num_doorbells *
785 sizeof(u32));
786 if (adev->doorbell.ptr == NULL)
d38ceaf9 787 return -ENOMEM;
d38ceaf9
AD
788
789 return 0;
790}
791
792/**
06ec9070 793 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
794 *
795 * @adev: amdgpu_device pointer
796 *
797 * Tear down doorbell driver information (CIK)
798 */
06ec9070 799static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
800{
801 iounmap(adev->doorbell.ptr);
802 adev->doorbell.ptr = NULL;
803}
804
22cb0164 805
d38ceaf9
AD
806
807/*
06ec9070 808 * amdgpu_device_wb_*()
455a7bc2 809 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 810 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
811 */
812
813/**
06ec9070 814 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
815 *
816 * @adev: amdgpu_device pointer
817 *
818 * Disables Writeback and frees the Writeback memory (all asics).
819 * Used at driver shutdown.
820 */
06ec9070 821static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
822{
823 if (adev->wb.wb_obj) {
a76ed485
AD
824 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
825 &adev->wb.gpu_addr,
826 (void **)&adev->wb.wb);
d38ceaf9
AD
827 adev->wb.wb_obj = NULL;
828 }
829}
830
831/**
06ec9070 832 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
833 *
834 * @adev: amdgpu_device pointer
835 *
455a7bc2 836 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
837 * Used at driver startup.
838 * Returns 0 on success or an -error on failure.
839 */
06ec9070 840static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
841{
842 int r;
843
844 if (adev->wb.wb_obj == NULL) {
97407b63
AD
845 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
846 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
847 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
848 &adev->wb.wb_obj, &adev->wb.gpu_addr,
849 (void **)&adev->wb.wb);
d38ceaf9
AD
850 if (r) {
851 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
852 return r;
853 }
d38ceaf9
AD
854
855 adev->wb.num_wb = AMDGPU_MAX_WB;
856 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
857
858 /* clear wb memory */
73469585 859 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
860 }
861
862 return 0;
863}
864
865/**
131b4b36 866 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
867 *
868 * @adev: amdgpu_device pointer
869 * @wb: wb index
870 *
871 * Allocate a wb slot for use by the driver (all asics).
872 * Returns 0 on success or -EINVAL on failure.
873 */
131b4b36 874int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
875{
876 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 877
97407b63 878 if (offset < adev->wb.num_wb) {
7014285a 879 __set_bit(offset, adev->wb.used);
63ae07ca 880 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
881 return 0;
882 } else {
883 return -EINVAL;
884 }
885}
886
d38ceaf9 887/**
131b4b36 888 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
889 *
890 * @adev: amdgpu_device pointer
891 * @wb: wb index
892 *
893 * Free a wb slot allocated for use by the driver (all asics)
894 */
131b4b36 895void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 896{
73469585 897 wb >>= 3;
d38ceaf9 898 if (wb < adev->wb.num_wb)
73469585 899 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
900}
901
d6895ad3
CK
902/**
903 * amdgpu_device_resize_fb_bar - try to resize FB BAR
904 *
905 * @adev: amdgpu_device pointer
906 *
907 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
908 * to fail, but if any of the BARs is not accessible after the size we abort
909 * driver loading by returning -ENODEV.
910 */
911int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
912{
770d13b1 913 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 914 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
915 struct pci_bus *root;
916 struct resource *res;
917 unsigned i;
d6895ad3
CK
918 u16 cmd;
919 int r;
920
0c03b912 921 /* Bypass for VF */
922 if (amdgpu_sriov_vf(adev))
923 return 0;
924
b7221f2b
AD
925 /* skip if the bios has already enabled large BAR */
926 if (adev->gmc.real_vram_size &&
927 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
928 return 0;
929
31b8adab
CK
930 /* Check if the root BUS has 64bit memory resources */
931 root = adev->pdev->bus;
932 while (root->parent)
933 root = root->parent;
934
935 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 936 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
937 res->start > 0x100000000ull)
938 break;
939 }
940
941 /* Trying to resize is pointless without a root hub window above 4GB */
942 if (!res)
943 return 0;
944
d6895ad3
CK
945 /* Disable memory decoding while we change the BAR addresses and size */
946 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
947 pci_write_config_word(adev->pdev, PCI_COMMAND,
948 cmd & ~PCI_COMMAND_MEMORY);
949
950 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 951 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
952 if (adev->asic_type >= CHIP_BONAIRE)
953 pci_release_resource(adev->pdev, 2);
954
955 pci_release_resource(adev->pdev, 0);
956
957 r = pci_resize_resource(adev->pdev, 0, rbar_size);
958 if (r == -ENOSPC)
959 DRM_INFO("Not enough PCI address space for a large BAR.");
960 else if (r && r != -ENOTSUPP)
961 DRM_ERROR("Problem resizing BAR0 (%d).", r);
962
963 pci_assign_unassigned_bus_resources(adev->pdev->bus);
964
965 /* When the doorbell or fb BAR isn't available we have no chance of
966 * using the device.
967 */
06ec9070 968 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
969 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
970 return -ENODEV;
971
972 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
973
974 return 0;
975}
a05502e5 976
d38ceaf9
AD
977/*
978 * GPU helpers function.
979 */
980/**
39c640c0 981 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
982 *
983 * @adev: amdgpu_device pointer
984 *
c836fec5
JQ
985 * Check if the asic has been initialized (all asics) at driver startup
986 * or post is needed if hw reset is performed.
987 * Returns true if need or false if not.
d38ceaf9 988 */
39c640c0 989bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
990{
991 uint32_t reg;
992
bec86378
ML
993 if (amdgpu_sriov_vf(adev))
994 return false;
995
996 if (amdgpu_passthrough(adev)) {
1da2c326
ML
997 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
998 * some old smc fw still need driver do vPost otherwise gpu hang, while
999 * those smc fw version above 22.15 doesn't have this flaw, so we force
1000 * vpost executed for smc version below 22.15
bec86378
ML
1001 */
1002 if (adev->asic_type == CHIP_FIJI) {
1003 int err;
1004 uint32_t fw_ver;
1005 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1006 /* force vPost if error occured */
1007 if (err)
1008 return true;
1009
1010 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
1011 if (fw_ver < 0x00160e00)
1012 return true;
bec86378 1013 }
bec86378 1014 }
91fe77eb 1015
1016 if (adev->has_hw_reset) {
1017 adev->has_hw_reset = false;
1018 return true;
1019 }
1020
1021 /* bios scratch used on CIK+ */
1022 if (adev->asic_type >= CHIP_BONAIRE)
1023 return amdgpu_atombios_scratch_need_asic_init(adev);
1024
1025 /* check MEM_SIZE for older asics */
1026 reg = amdgpu_asic_get_config_memsize(adev);
1027
1028 if ((reg != 0) && (reg != 0xffffffff))
1029 return false;
1030
1031 return true;
bec86378
ML
1032}
1033
d38ceaf9
AD
1034/* if we get transitioned to only one device, take VGA back */
1035/**
06ec9070 1036 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1037 *
1038 * @cookie: amdgpu_device pointer
1039 * @state: enable/disable vga decode
1040 *
1041 * Enable/disable vga decode (all asics).
1042 * Returns VGA resource flags.
1043 */
06ec9070 1044static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1045{
1046 struct amdgpu_device *adev = cookie;
1047 amdgpu_asic_set_vga_state(adev, state);
1048 if (state)
1049 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1050 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1051 else
1052 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1053}
1054
e3ecdffa
AD
1055/**
1056 * amdgpu_device_check_block_size - validate the vm block size
1057 *
1058 * @adev: amdgpu_device pointer
1059 *
1060 * Validates the vm block size specified via module parameter.
1061 * The vm block size defines number of bits in page table versus page directory,
1062 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1063 * page table and the remaining bits are in the page directory.
1064 */
06ec9070 1065static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1066{
1067 /* defines number of bits in page table versus page directory,
1068 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1069 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1070 if (amdgpu_vm_block_size == -1)
1071 return;
a1adf8be 1072
bab4fee7 1073 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1074 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1075 amdgpu_vm_block_size);
97489129 1076 amdgpu_vm_block_size = -1;
a1adf8be 1077 }
a1adf8be
CZ
1078}
1079
e3ecdffa
AD
1080/**
1081 * amdgpu_device_check_vm_size - validate the vm size
1082 *
1083 * @adev: amdgpu_device pointer
1084 *
1085 * Validates the vm size in GB specified via module parameter.
1086 * The VM size is the size of the GPU virtual memory space in GB.
1087 */
06ec9070 1088static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1089{
64dab074
AD
1090 /* no need to check the default value */
1091 if (amdgpu_vm_size == -1)
1092 return;
1093
83ca145d
ZJ
1094 if (amdgpu_vm_size < 1) {
1095 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1096 amdgpu_vm_size);
f3368128 1097 amdgpu_vm_size = -1;
83ca145d 1098 }
83ca145d
ZJ
1099}
1100
7951e376
RZ
1101static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1102{
1103 struct sysinfo si;
a9d4fe2f 1104 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1105 uint64_t total_memory;
1106 uint64_t dram_size_seven_GB = 0x1B8000000;
1107 uint64_t dram_size_three_GB = 0xB8000000;
1108
1109 if (amdgpu_smu_memory_pool_size == 0)
1110 return;
1111
1112 if (!is_os_64) {
1113 DRM_WARN("Not 64-bit OS, feature not supported\n");
1114 goto def_value;
1115 }
1116 si_meminfo(&si);
1117 total_memory = (uint64_t)si.totalram * si.mem_unit;
1118
1119 if ((amdgpu_smu_memory_pool_size == 1) ||
1120 (amdgpu_smu_memory_pool_size == 2)) {
1121 if (total_memory < dram_size_three_GB)
1122 goto def_value1;
1123 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1124 (amdgpu_smu_memory_pool_size == 8)) {
1125 if (total_memory < dram_size_seven_GB)
1126 goto def_value1;
1127 } else {
1128 DRM_WARN("Smu memory pool size not supported\n");
1129 goto def_value;
1130 }
1131 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1132
1133 return;
1134
1135def_value1:
1136 DRM_WARN("No enough system memory\n");
1137def_value:
1138 adev->pm.smu_prv_buffer_size = 0;
1139}
1140
d38ceaf9 1141/**
06ec9070 1142 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1143 *
1144 * @adev: amdgpu_device pointer
1145 *
1146 * Validates certain module parameters and updates
1147 * the associated values used by the driver (all asics).
1148 */
912dfc84 1149static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1150{
5b011235
CZ
1151 if (amdgpu_sched_jobs < 4) {
1152 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1153 amdgpu_sched_jobs);
1154 amdgpu_sched_jobs = 4;
76117507 1155 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1156 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1157 amdgpu_sched_jobs);
1158 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1159 }
d38ceaf9 1160
83e74db6 1161 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1162 /* gart size must be greater or equal to 32M */
1163 dev_warn(adev->dev, "gart size (%d) too small\n",
1164 amdgpu_gart_size);
83e74db6 1165 amdgpu_gart_size = -1;
d38ceaf9
AD
1166 }
1167
36d38372 1168 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1169 /* gtt size must be greater or equal to 32M */
36d38372
CK
1170 dev_warn(adev->dev, "gtt size (%d) too small\n",
1171 amdgpu_gtt_size);
1172 amdgpu_gtt_size = -1;
d38ceaf9
AD
1173 }
1174
d07f14be
RH
1175 /* valid range is between 4 and 9 inclusive */
1176 if (amdgpu_vm_fragment_size != -1 &&
1177 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1178 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1179 amdgpu_vm_fragment_size = -1;
1180 }
1181
5d5bd5e3
KW
1182 if (amdgpu_sched_hw_submission < 2) {
1183 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1184 amdgpu_sched_hw_submission);
1185 amdgpu_sched_hw_submission = 2;
1186 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1187 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1188 amdgpu_sched_hw_submission);
1189 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1190 }
1191
7951e376
RZ
1192 amdgpu_device_check_smu_prv_buffer_size(adev);
1193
06ec9070 1194 amdgpu_device_check_vm_size(adev);
d38ceaf9 1195
06ec9070 1196 amdgpu_device_check_block_size(adev);
6a7f76e7 1197
19aede77 1198 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1199
c6252390 1200 amdgpu_gmc_tmz_set(adev);
01a8dcec 1201
e3c00faa 1202 return 0;
d38ceaf9
AD
1203}
1204
1205/**
1206 * amdgpu_switcheroo_set_state - set switcheroo state
1207 *
1208 * @pdev: pci dev pointer
1694467b 1209 * @state: vga_switcheroo state
d38ceaf9
AD
1210 *
1211 * Callback for the switcheroo driver. Suspends or resumes the
1212 * the asics before or after it is powered up using ACPI methods.
1213 */
1214static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1215{
1216 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1217 int r;
d38ceaf9 1218
31af062a 1219 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1220 return;
1221
1222 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1223 pr_info("switched on\n");
d38ceaf9
AD
1224 /* don't suspend or resume card normally */
1225 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1226
de185019
AD
1227 pci_set_power_state(dev->pdev, PCI_D0);
1228 pci_restore_state(dev->pdev);
1229 r = pci_enable_device(dev->pdev);
1230 if (r)
1231 DRM_WARN("pci_enable_device failed (%d)\n", r);
1232 amdgpu_device_resume(dev, true);
d38ceaf9 1233
d38ceaf9
AD
1234 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1235 drm_kms_helper_poll_enable(dev);
1236 } else {
dd4fa6c1 1237 pr_info("switched off\n");
d38ceaf9
AD
1238 drm_kms_helper_poll_disable(dev);
1239 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1240 amdgpu_device_suspend(dev, true);
1241 pci_save_state(dev->pdev);
1242 /* Shut down the device */
1243 pci_disable_device(dev->pdev);
1244 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1245 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1246 }
1247}
1248
1249/**
1250 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1251 *
1252 * @pdev: pci dev pointer
1253 *
1254 * Callback for the switcheroo driver. Check of the switcheroo
1255 * state can be changed.
1256 * Returns true if the state can be changed, false if not.
1257 */
1258static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1259{
1260 struct drm_device *dev = pci_get_drvdata(pdev);
1261
1262 /*
1263 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1264 * locking inversion with the driver load path. And the access here is
1265 * completely racy anyway. So don't bother with locking for now.
1266 */
7e13ad89 1267 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1268}
1269
1270static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1271 .set_gpu_state = amdgpu_switcheroo_set_state,
1272 .reprobe = NULL,
1273 .can_switch = amdgpu_switcheroo_can_switch,
1274};
1275
e3ecdffa
AD
1276/**
1277 * amdgpu_device_ip_set_clockgating_state - set the CG state
1278 *
87e3f136 1279 * @dev: amdgpu_device pointer
e3ecdffa
AD
1280 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1281 * @state: clockgating state (gate or ungate)
1282 *
1283 * Sets the requested clockgating state for all instances of
1284 * the hardware IP specified.
1285 * Returns the error code from the last instance.
1286 */
43fa561f 1287int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1288 enum amd_ip_block_type block_type,
1289 enum amd_clockgating_state state)
d38ceaf9 1290{
43fa561f 1291 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1292 int i, r = 0;
1293
1294 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1295 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1296 continue;
c722865a
RZ
1297 if (adev->ip_blocks[i].version->type != block_type)
1298 continue;
1299 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1300 continue;
1301 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1302 (void *)adev, state);
1303 if (r)
1304 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1305 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1306 }
1307 return r;
1308}
1309
e3ecdffa
AD
1310/**
1311 * amdgpu_device_ip_set_powergating_state - set the PG state
1312 *
87e3f136 1313 * @dev: amdgpu_device pointer
e3ecdffa
AD
1314 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1315 * @state: powergating state (gate or ungate)
1316 *
1317 * Sets the requested powergating state for all instances of
1318 * the hardware IP specified.
1319 * Returns the error code from the last instance.
1320 */
43fa561f 1321int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1322 enum amd_ip_block_type block_type,
1323 enum amd_powergating_state state)
d38ceaf9 1324{
43fa561f 1325 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1326 int i, r = 0;
1327
1328 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1329 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1330 continue;
c722865a
RZ
1331 if (adev->ip_blocks[i].version->type != block_type)
1332 continue;
1333 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1334 continue;
1335 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1336 (void *)adev, state);
1337 if (r)
1338 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1339 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1340 }
1341 return r;
1342}
1343
e3ecdffa
AD
1344/**
1345 * amdgpu_device_ip_get_clockgating_state - get the CG state
1346 *
1347 * @adev: amdgpu_device pointer
1348 * @flags: clockgating feature flags
1349 *
1350 * Walks the list of IPs on the device and updates the clockgating
1351 * flags for each IP.
1352 * Updates @flags with the feature flags for each hardware IP where
1353 * clockgating is enabled.
1354 */
2990a1fc
AD
1355void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1356 u32 *flags)
6cb2d4e4
HR
1357{
1358 int i;
1359
1360 for (i = 0; i < adev->num_ip_blocks; i++) {
1361 if (!adev->ip_blocks[i].status.valid)
1362 continue;
1363 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1364 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1365 }
1366}
1367
e3ecdffa
AD
1368/**
1369 * amdgpu_device_ip_wait_for_idle - wait for idle
1370 *
1371 * @adev: amdgpu_device pointer
1372 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1373 *
1374 * Waits for the request hardware IP to be idle.
1375 * Returns 0 for success or a negative error code on failure.
1376 */
2990a1fc
AD
1377int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1378 enum amd_ip_block_type block_type)
5dbbb60b
AD
1379{
1380 int i, r;
1381
1382 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1383 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1384 continue;
a1255107
AD
1385 if (adev->ip_blocks[i].version->type == block_type) {
1386 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1387 if (r)
1388 return r;
1389 break;
1390 }
1391 }
1392 return 0;
1393
1394}
1395
e3ecdffa
AD
1396/**
1397 * amdgpu_device_ip_is_idle - is the hardware IP idle
1398 *
1399 * @adev: amdgpu_device pointer
1400 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1401 *
1402 * Check if the hardware IP is idle or not.
1403 * Returns true if it the IP is idle, false if not.
1404 */
2990a1fc
AD
1405bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1406 enum amd_ip_block_type block_type)
5dbbb60b
AD
1407{
1408 int i;
1409
1410 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1411 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1412 continue;
a1255107
AD
1413 if (adev->ip_blocks[i].version->type == block_type)
1414 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1415 }
1416 return true;
1417
1418}
1419
e3ecdffa
AD
1420/**
1421 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1422 *
1423 * @adev: amdgpu_device pointer
87e3f136 1424 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1425 *
1426 * Returns a pointer to the hardware IP block structure
1427 * if it exists for the asic, otherwise NULL.
1428 */
2990a1fc
AD
1429struct amdgpu_ip_block *
1430amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1431 enum amd_ip_block_type type)
d38ceaf9
AD
1432{
1433 int i;
1434
1435 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1436 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1437 return &adev->ip_blocks[i];
1438
1439 return NULL;
1440}
1441
1442/**
2990a1fc 1443 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1444 *
1445 * @adev: amdgpu_device pointer
5fc3aeeb 1446 * @type: enum amd_ip_block_type
d38ceaf9
AD
1447 * @major: major version
1448 * @minor: minor version
1449 *
1450 * return 0 if equal or greater
1451 * return 1 if smaller or the ip_block doesn't exist
1452 */
2990a1fc
AD
1453int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1454 enum amd_ip_block_type type,
1455 u32 major, u32 minor)
d38ceaf9 1456{
2990a1fc 1457 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1458
a1255107
AD
1459 if (ip_block && ((ip_block->version->major > major) ||
1460 ((ip_block->version->major == major) &&
1461 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1462 return 0;
1463
1464 return 1;
1465}
1466
a1255107 1467/**
2990a1fc 1468 * amdgpu_device_ip_block_add
a1255107
AD
1469 *
1470 * @adev: amdgpu_device pointer
1471 * @ip_block_version: pointer to the IP to add
1472 *
1473 * Adds the IP block driver information to the collection of IPs
1474 * on the asic.
1475 */
2990a1fc
AD
1476int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1477 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1478{
1479 if (!ip_block_version)
1480 return -EINVAL;
1481
e966a725 1482 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1483 ip_block_version->funcs->name);
1484
a1255107
AD
1485 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1486
1487 return 0;
1488}
1489
e3ecdffa
AD
1490/**
1491 * amdgpu_device_enable_virtual_display - enable virtual display feature
1492 *
1493 * @adev: amdgpu_device pointer
1494 *
1495 * Enabled the virtual display feature if the user has enabled it via
1496 * the module parameter virtual_display. This feature provides a virtual
1497 * display hardware on headless boards or in virtualized environments.
1498 * This function parses and validates the configuration string specified by
1499 * the user and configues the virtual display configuration (number of
1500 * virtual connectors, crtcs, etc.) specified.
1501 */
483ef985 1502static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1503{
1504 adev->enable_virtual_display = false;
1505
1506 if (amdgpu_virtual_display) {
1507 struct drm_device *ddev = adev->ddev;
1508 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1509 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1510
1511 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1512 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1513 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1514 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1515 if (!strcmp("all", pciaddname)
1516 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1517 long num_crtc;
1518 int res = -1;
1519
9accf2fd 1520 adev->enable_virtual_display = true;
0f66356d
ED
1521
1522 if (pciaddname_tmp)
1523 res = kstrtol(pciaddname_tmp, 10,
1524 &num_crtc);
1525
1526 if (!res) {
1527 if (num_crtc < 1)
1528 num_crtc = 1;
1529 if (num_crtc > 6)
1530 num_crtc = 6;
1531 adev->mode_info.num_crtc = num_crtc;
1532 } else {
1533 adev->mode_info.num_crtc = 1;
1534 }
9accf2fd
ED
1535 break;
1536 }
1537 }
1538
0f66356d
ED
1539 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1540 amdgpu_virtual_display, pci_address_name,
1541 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1542
1543 kfree(pciaddstr);
1544 }
1545}
1546
e3ecdffa
AD
1547/**
1548 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1549 *
1550 * @adev: amdgpu_device pointer
1551 *
1552 * Parses the asic configuration parameters specified in the gpu info
1553 * firmware and makes them availale to the driver for use in configuring
1554 * the asic.
1555 * Returns 0 on success, -EINVAL on failure.
1556 */
e2a75f88
AD
1557static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1558{
e2a75f88 1559 const char *chip_name;
c0a43457 1560 char fw_name[40];
e2a75f88
AD
1561 int err;
1562 const struct gpu_info_firmware_header_v1_0 *hdr;
1563
ab4fe3e1
HR
1564 adev->firmware.gpu_info_fw = NULL;
1565
4292b0b2 1566 if (adev->discovery_bin) {
258620d0 1567 amdgpu_discovery_get_gfx_info(adev);
cc375d8c
TY
1568
1569 /*
1570 * FIXME: The bounding box is still needed by Navi12, so
1571 * temporarily read it from gpu_info firmware. Should be droped
1572 * when DAL no longer needs it.
1573 */
1574 if (adev->asic_type != CHIP_NAVI12)
1575 return 0;
258620d0
AD
1576 }
1577
e2a75f88 1578 switch (adev->asic_type) {
e2a75f88
AD
1579#ifdef CONFIG_DRM_AMDGPU_SI
1580 case CHIP_VERDE:
1581 case CHIP_TAHITI:
1582 case CHIP_PITCAIRN:
1583 case CHIP_OLAND:
1584 case CHIP_HAINAN:
1585#endif
1586#ifdef CONFIG_DRM_AMDGPU_CIK
1587 case CHIP_BONAIRE:
1588 case CHIP_HAWAII:
1589 case CHIP_KAVERI:
1590 case CHIP_KABINI:
1591 case CHIP_MULLINS:
1592#endif
da87c30b
AD
1593 case CHIP_TOPAZ:
1594 case CHIP_TONGA:
1595 case CHIP_FIJI:
1596 case CHIP_POLARIS10:
1597 case CHIP_POLARIS11:
1598 case CHIP_POLARIS12:
1599 case CHIP_VEGAM:
1600 case CHIP_CARRIZO:
1601 case CHIP_STONEY:
27c0bc71 1602 case CHIP_VEGA20:
e2a75f88
AD
1603 default:
1604 return 0;
1605 case CHIP_VEGA10:
1606 chip_name = "vega10";
1607 break;
3f76dced
AD
1608 case CHIP_VEGA12:
1609 chip_name = "vega12";
1610 break;
2d2e5e7e 1611 case CHIP_RAVEN:
54f78a76 1612 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1613 chip_name = "raven2";
54f78a76 1614 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1615 chip_name = "picasso";
54c4d17e
FX
1616 else
1617 chip_name = "raven";
2d2e5e7e 1618 break;
65e60f6e
LM
1619 case CHIP_ARCTURUS:
1620 chip_name = "arcturus";
1621 break;
b51a26a0
HR
1622 case CHIP_RENOIR:
1623 chip_name = "renoir";
1624 break;
23c6268e
HR
1625 case CHIP_NAVI10:
1626 chip_name = "navi10";
1627 break;
ed42cfe1
XY
1628 case CHIP_NAVI14:
1629 chip_name = "navi14";
1630 break;
42b325e5
XY
1631 case CHIP_NAVI12:
1632 chip_name = "navi12";
1633 break;
c0a43457
LG
1634 case CHIP_SIENNA_CICHLID:
1635 chip_name = "sienna_cichlid";
1636 break;
120eb833
JC
1637 case CHIP_NAVY_FLOUNDER:
1638 chip_name = "navy_flounder";
1639 break;
e2a75f88
AD
1640 }
1641
1642 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1643 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1644 if (err) {
1645 dev_err(adev->dev,
1646 "Failed to load gpu_info firmware \"%s\"\n",
1647 fw_name);
1648 goto out;
1649 }
ab4fe3e1 1650 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1651 if (err) {
1652 dev_err(adev->dev,
1653 "Failed to validate gpu_info firmware \"%s\"\n",
1654 fw_name);
1655 goto out;
1656 }
1657
ab4fe3e1 1658 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1659 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1660
1661 switch (hdr->version_major) {
1662 case 1:
1663 {
1664 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1665 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1666 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1667
cc375d8c
TY
1668 /*
1669 * Should be droped when DAL no longer needs it.
1670 */
1671 if (adev->asic_type == CHIP_NAVI12)
1672 goto parse_soc_bounding_box;
1673
b5ab16bf
AD
1674 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1675 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1676 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1677 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1678 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1679 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1680 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1681 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1682 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1683 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1684 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1685 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1686 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1687 adev->gfx.cu_info.max_waves_per_simd =
1688 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1689 adev->gfx.cu_info.max_scratch_slots_per_cu =
1690 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1691 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1692 if (hdr->version_minor >= 1) {
35c2e910
HZ
1693 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1694 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1695 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1696 adev->gfx.config.num_sc_per_sh =
1697 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1698 adev->gfx.config.num_packer_per_sc =
1699 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1700 }
ec51d3fa 1701
cc375d8c 1702parse_soc_bounding_box:
ec51d3fa
XY
1703 /*
1704 * soc bounding box info is not integrated in disocovery table,
258620d0 1705 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1706 */
48321c3d
HW
1707 if (hdr->version_minor == 2) {
1708 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1709 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1710 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1711 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1712 }
e2a75f88
AD
1713 break;
1714 }
1715 default:
1716 dev_err(adev->dev,
1717 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1718 err = -EINVAL;
1719 goto out;
1720 }
1721out:
e2a75f88
AD
1722 return err;
1723}
1724
e3ecdffa
AD
1725/**
1726 * amdgpu_device_ip_early_init - run early init for hardware IPs
1727 *
1728 * @adev: amdgpu_device pointer
1729 *
1730 * Early initialization pass for hardware IPs. The hardware IPs that make
1731 * up each asic are discovered each IP's early_init callback is run. This
1732 * is the first stage in initializing the asic.
1733 * Returns 0 on success, negative error code on failure.
1734 */
06ec9070 1735static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1736{
aaa36a97 1737 int i, r;
d38ceaf9 1738
483ef985 1739 amdgpu_device_enable_virtual_display(adev);
a6be7570 1740
00a979f3 1741 if (amdgpu_sriov_vf(adev)) {
00a979f3 1742 r = amdgpu_virt_request_full_gpu(adev, true);
e3a4d51c 1743 if (r)
00a979f3 1744 return r;
00a979f3
WS
1745 }
1746
d38ceaf9 1747 switch (adev->asic_type) {
33f34802
KW
1748#ifdef CONFIG_DRM_AMDGPU_SI
1749 case CHIP_VERDE:
1750 case CHIP_TAHITI:
1751 case CHIP_PITCAIRN:
1752 case CHIP_OLAND:
1753 case CHIP_HAINAN:
295d0daf 1754 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1755 r = si_set_ip_blocks(adev);
1756 if (r)
1757 return r;
1758 break;
1759#endif
a2e73f56
AD
1760#ifdef CONFIG_DRM_AMDGPU_CIK
1761 case CHIP_BONAIRE:
1762 case CHIP_HAWAII:
1763 case CHIP_KAVERI:
1764 case CHIP_KABINI:
1765 case CHIP_MULLINS:
e1ad2d53 1766 if (adev->flags & AMD_IS_APU)
a2e73f56 1767 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1768 else
1769 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1770
1771 r = cik_set_ip_blocks(adev);
1772 if (r)
1773 return r;
1774 break;
1775#endif
da87c30b
AD
1776 case CHIP_TOPAZ:
1777 case CHIP_TONGA:
1778 case CHIP_FIJI:
1779 case CHIP_POLARIS10:
1780 case CHIP_POLARIS11:
1781 case CHIP_POLARIS12:
1782 case CHIP_VEGAM:
1783 case CHIP_CARRIZO:
1784 case CHIP_STONEY:
1785 if (adev->flags & AMD_IS_APU)
1786 adev->family = AMDGPU_FAMILY_CZ;
1787 else
1788 adev->family = AMDGPU_FAMILY_VI;
1789
1790 r = vi_set_ip_blocks(adev);
1791 if (r)
1792 return r;
1793 break;
e48a3cd9
AD
1794 case CHIP_VEGA10:
1795 case CHIP_VEGA12:
e4bd8170 1796 case CHIP_VEGA20:
e48a3cd9 1797 case CHIP_RAVEN:
61cf44c1 1798 case CHIP_ARCTURUS:
b51a26a0 1799 case CHIP_RENOIR:
70534d1e 1800 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
1801 adev->family = AMDGPU_FAMILY_RV;
1802 else
1803 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1804
1805 r = soc15_set_ip_blocks(adev);
1806 if (r)
1807 return r;
1808 break;
0a5b8c7b 1809 case CHIP_NAVI10:
7ecb5cd4 1810 case CHIP_NAVI14:
4808cf9c 1811 case CHIP_NAVI12:
11e8aef5 1812 case CHIP_SIENNA_CICHLID:
41f446bf 1813 case CHIP_NAVY_FLOUNDER:
0a5b8c7b
HR
1814 adev->family = AMDGPU_FAMILY_NV;
1815
1816 r = nv_set_ip_blocks(adev);
1817 if (r)
1818 return r;
1819 break;
d38ceaf9
AD
1820 default:
1821 /* FIXME: not supported yet */
1822 return -EINVAL;
1823 }
1824
1884734a 1825 amdgpu_amdkfd_device_probe(adev);
1826
3b94fb10 1827 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1828 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1829 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1830
d38ceaf9
AD
1831 for (i = 0; i < adev->num_ip_blocks; i++) {
1832 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1833 DRM_ERROR("disabled ip block: %d <%s>\n",
1834 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1835 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1836 } else {
a1255107
AD
1837 if (adev->ip_blocks[i].version->funcs->early_init) {
1838 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1839 if (r == -ENOENT) {
a1255107 1840 adev->ip_blocks[i].status.valid = false;
2c1a2784 1841 } else if (r) {
a1255107
AD
1842 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1843 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1844 return r;
2c1a2784 1845 } else {
a1255107 1846 adev->ip_blocks[i].status.valid = true;
2c1a2784 1847 }
974e6b64 1848 } else {
a1255107 1849 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1850 }
d38ceaf9 1851 }
21a249ca
AD
1852 /* get the vbios after the asic_funcs are set up */
1853 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
1854 r = amdgpu_device_parse_gpu_info_fw(adev);
1855 if (r)
1856 return r;
1857
21a249ca
AD
1858 /* Read BIOS */
1859 if (!amdgpu_get_bios(adev))
1860 return -EINVAL;
1861
1862 r = amdgpu_atombios_init(adev);
1863 if (r) {
1864 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1865 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1866 return r;
1867 }
1868 }
d38ceaf9
AD
1869 }
1870
395d1fb9
NH
1871 adev->cg_flags &= amdgpu_cg_mask;
1872 adev->pg_flags &= amdgpu_pg_mask;
1873
d38ceaf9
AD
1874 return 0;
1875}
1876
0a4f2520
RZ
1877static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1878{
1879 int i, r;
1880
1881 for (i = 0; i < adev->num_ip_blocks; i++) {
1882 if (!adev->ip_blocks[i].status.sw)
1883 continue;
1884 if (adev->ip_blocks[i].status.hw)
1885 continue;
1886 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1887 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1888 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1889 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1890 if (r) {
1891 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1892 adev->ip_blocks[i].version->funcs->name, r);
1893 return r;
1894 }
1895 adev->ip_blocks[i].status.hw = true;
1896 }
1897 }
1898
1899 return 0;
1900}
1901
1902static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1903{
1904 int i, r;
1905
1906 for (i = 0; i < adev->num_ip_blocks; i++) {
1907 if (!adev->ip_blocks[i].status.sw)
1908 continue;
1909 if (adev->ip_blocks[i].status.hw)
1910 continue;
1911 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1912 if (r) {
1913 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1914 adev->ip_blocks[i].version->funcs->name, r);
1915 return r;
1916 }
1917 adev->ip_blocks[i].status.hw = true;
1918 }
1919
1920 return 0;
1921}
1922
7a3e0bb2
RZ
1923static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1924{
1925 int r = 0;
1926 int i;
80f41f84 1927 uint32_t smu_version;
7a3e0bb2
RZ
1928
1929 if (adev->asic_type >= CHIP_VEGA10) {
1930 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1931 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1932 continue;
1933
1934 /* no need to do the fw loading again if already done*/
1935 if (adev->ip_blocks[i].status.hw == true)
1936 break;
1937
df9c8d1a 1938 if (amdgpu_in_reset(adev) || adev->in_suspend) {
482f0e53
ML
1939 r = adev->ip_blocks[i].version->funcs->resume(adev);
1940 if (r) {
1941 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1942 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1943 return r;
1944 }
1945 } else {
1946 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1947 if (r) {
1948 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1949 adev->ip_blocks[i].version->funcs->name, r);
1950 return r;
7a3e0bb2 1951 }
7a3e0bb2 1952 }
482f0e53
ML
1953
1954 adev->ip_blocks[i].status.hw = true;
1955 break;
7a3e0bb2
RZ
1956 }
1957 }
482f0e53 1958
8973d9ec
ED
1959 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1960 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1961
80f41f84 1962 return r;
7a3e0bb2
RZ
1963}
1964
e3ecdffa
AD
1965/**
1966 * amdgpu_device_ip_init - run init for hardware IPs
1967 *
1968 * @adev: amdgpu_device pointer
1969 *
1970 * Main initialization pass for hardware IPs. The list of all the hardware
1971 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1972 * are run. sw_init initializes the software state associated with each IP
1973 * and hw_init initializes the hardware associated with each IP.
1974 * Returns 0 on success, negative error code on failure.
1975 */
06ec9070 1976static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1977{
1978 int i, r;
1979
c030f2e4 1980 r = amdgpu_ras_init(adev);
1981 if (r)
1982 return r;
1983
d38ceaf9 1984 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1985 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1986 continue;
a1255107 1987 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1988 if (r) {
a1255107
AD
1989 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1990 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1991 goto init_failed;
2c1a2784 1992 }
a1255107 1993 adev->ip_blocks[i].status.sw = true;
bfca0289 1994
d38ceaf9 1995 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1996 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1997 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1998 if (r) {
1999 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 2000 goto init_failed;
2c1a2784 2001 }
a1255107 2002 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
2003 if (r) {
2004 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 2005 goto init_failed;
2c1a2784 2006 }
06ec9070 2007 r = amdgpu_device_wb_init(adev);
2c1a2784 2008 if (r) {
06ec9070 2009 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 2010 goto init_failed;
2c1a2784 2011 }
a1255107 2012 adev->ip_blocks[i].status.hw = true;
2493664f
ML
2013
2014 /* right after GMC hw init, we create CSA */
f92d5c61 2015 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
2016 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2017 AMDGPU_GEM_DOMAIN_VRAM,
2018 AMDGPU_CSA_SIZE);
2493664f
ML
2019 if (r) {
2020 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 2021 goto init_failed;
2493664f
ML
2022 }
2023 }
d38ceaf9
AD
2024 }
2025 }
2026
c9ffa427
YT
2027 if (amdgpu_sriov_vf(adev))
2028 amdgpu_virt_init_data_exchange(adev);
2029
533aed27
AG
2030 r = amdgpu_ib_pool_init(adev);
2031 if (r) {
2032 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2033 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2034 goto init_failed;
2035 }
2036
c8963ea4
RZ
2037 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2038 if (r)
72d3f592 2039 goto init_failed;
0a4f2520
RZ
2040
2041 r = amdgpu_device_ip_hw_init_phase1(adev);
2042 if (r)
72d3f592 2043 goto init_failed;
0a4f2520 2044
7a3e0bb2
RZ
2045 r = amdgpu_device_fw_loading(adev);
2046 if (r)
72d3f592 2047 goto init_failed;
7a3e0bb2 2048
0a4f2520
RZ
2049 r = amdgpu_device_ip_hw_init_phase2(adev);
2050 if (r)
72d3f592 2051 goto init_failed;
d38ceaf9 2052
121a2bc6
AG
2053 /*
2054 * retired pages will be loaded from eeprom and reserved here,
2055 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2056 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2057 * for I2C communication which only true at this point.
2058 * recovery_init may fail, but it can free all resources allocated by
2059 * itself and its failure should not stop amdgpu init process.
2060 *
2061 * Note: theoretically, this should be called before all vram allocations
2062 * to protect retired page from abusing
2063 */
2064 amdgpu_ras_recovery_init(adev);
2065
3e2e2ab5
HZ
2066 if (adev->gmc.xgmi.num_physical_nodes > 1)
2067 amdgpu_xgmi_add_device(adev);
1884734a 2068 amdgpu_amdkfd_device_init(adev);
c6332b97 2069
bd607166
KR
2070 amdgpu_fru_get_product_info(adev);
2071
72d3f592 2072init_failed:
c9ffa427 2073 if (amdgpu_sriov_vf(adev))
c6332b97 2074 amdgpu_virt_release_full_gpu(adev, true);
2075
72d3f592 2076 return r;
d38ceaf9
AD
2077}
2078
e3ecdffa
AD
2079/**
2080 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2081 *
2082 * @adev: amdgpu_device pointer
2083 *
2084 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2085 * this function before a GPU reset. If the value is retained after a
2086 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2087 */
06ec9070 2088static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2089{
2090 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2091}
2092
e3ecdffa
AD
2093/**
2094 * amdgpu_device_check_vram_lost - check if vram is valid
2095 *
2096 * @adev: amdgpu_device pointer
2097 *
2098 * Checks the reset magic value written to the gart pointer in VRAM.
2099 * The driver calls this after a GPU reset to see if the contents of
2100 * VRAM is lost or now.
2101 * returns true if vram is lost, false if not.
2102 */
06ec9070 2103static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2104{
dadce777
EQ
2105 if (memcmp(adev->gart.ptr, adev->reset_magic,
2106 AMDGPU_RESET_MAGIC_NUM))
2107 return true;
2108
df9c8d1a 2109 if (!amdgpu_in_reset(adev))
dadce777
EQ
2110 return false;
2111
2112 /*
2113 * For all ASICs with baco/mode1 reset, the VRAM is
2114 * always assumed to be lost.
2115 */
2116 switch (amdgpu_asic_reset_method(adev)) {
2117 case AMD_RESET_METHOD_BACO:
2118 case AMD_RESET_METHOD_MODE1:
2119 return true;
2120 default:
2121 return false;
2122 }
0c49e0b8
CZ
2123}
2124
e3ecdffa 2125/**
1112a46b 2126 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2127 *
2128 * @adev: amdgpu_device pointer
b8b72130 2129 * @state: clockgating state (gate or ungate)
e3ecdffa 2130 *
e3ecdffa 2131 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2132 * set_clockgating_state callbacks are run.
2133 * Late initialization pass enabling clockgating for hardware IPs.
2134 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2135 * Returns 0 on success, negative error code on failure.
2136 */
fdd34271 2137
1112a46b
RZ
2138static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2139 enum amd_clockgating_state state)
d38ceaf9 2140{
1112a46b 2141 int i, j, r;
d38ceaf9 2142
4a2ba394
SL
2143 if (amdgpu_emu_mode == 1)
2144 return 0;
2145
1112a46b
RZ
2146 for (j = 0; j < adev->num_ip_blocks; j++) {
2147 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2148 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2149 continue;
4a446d55 2150 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2151 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2152 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2153 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2154 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2155 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2156 /* enable clockgating to save power */
a1255107 2157 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2158 state);
4a446d55
AD
2159 if (r) {
2160 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2161 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2162 return r;
2163 }
b0b00ff1 2164 }
d38ceaf9 2165 }
06b18f61 2166
c9f96fd5
RZ
2167 return 0;
2168}
2169
1112a46b 2170static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2171{
1112a46b 2172 int i, j, r;
06b18f61 2173
c9f96fd5
RZ
2174 if (amdgpu_emu_mode == 1)
2175 return 0;
2176
1112a46b
RZ
2177 for (j = 0; j < adev->num_ip_blocks; j++) {
2178 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2179 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2180 continue;
2181 /* skip CG for VCE/UVD, it's handled specially */
2182 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2183 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2184 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2185 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2186 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2187 /* enable powergating to save power */
2188 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2189 state);
c9f96fd5
RZ
2190 if (r) {
2191 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2192 adev->ip_blocks[i].version->funcs->name, r);
2193 return r;
2194 }
2195 }
2196 }
2dc80b00
S
2197 return 0;
2198}
2199
beff74bc
AD
2200static int amdgpu_device_enable_mgpu_fan_boost(void)
2201{
2202 struct amdgpu_gpu_instance *gpu_ins;
2203 struct amdgpu_device *adev;
2204 int i, ret = 0;
2205
2206 mutex_lock(&mgpu_info.mutex);
2207
2208 /*
2209 * MGPU fan boost feature should be enabled
2210 * only when there are two or more dGPUs in
2211 * the system
2212 */
2213 if (mgpu_info.num_dgpu < 2)
2214 goto out;
2215
2216 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2217 gpu_ins = &(mgpu_info.gpu_ins[i]);
2218 adev = gpu_ins->adev;
2219 if (!(adev->flags & AMD_IS_APU) &&
2220 !gpu_ins->mgpu_fan_enabled &&
2221 adev->powerplay.pp_funcs &&
2222 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2223 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2224 if (ret)
2225 break;
2226
2227 gpu_ins->mgpu_fan_enabled = 1;
2228 }
2229 }
2230
2231out:
2232 mutex_unlock(&mgpu_info.mutex);
2233
2234 return ret;
2235}
2236
e3ecdffa
AD
2237/**
2238 * amdgpu_device_ip_late_init - run late init for hardware IPs
2239 *
2240 * @adev: amdgpu_device pointer
2241 *
2242 * Late initialization pass for hardware IPs. The list of all the hardware
2243 * IPs that make up the asic is walked and the late_init callbacks are run.
2244 * late_init covers any special initialization that an IP requires
2245 * after all of the have been initialized or something that needs to happen
2246 * late in the init process.
2247 * Returns 0 on success, negative error code on failure.
2248 */
06ec9070 2249static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2250{
60599a03 2251 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2252 int i = 0, r;
2253
2254 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2255 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2256 continue;
2257 if (adev->ip_blocks[i].version->funcs->late_init) {
2258 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2259 if (r) {
2260 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2261 adev->ip_blocks[i].version->funcs->name, r);
2262 return r;
2263 }
2dc80b00 2264 }
73f847db 2265 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2266 }
2267
a891d239
DL
2268 amdgpu_ras_set_error_query_ready(adev, true);
2269
1112a46b
RZ
2270 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2271 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2272
06ec9070 2273 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2274
beff74bc
AD
2275 r = amdgpu_device_enable_mgpu_fan_boost();
2276 if (r)
2277 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2278
60599a03
EQ
2279
2280 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2281 mutex_lock(&mgpu_info.mutex);
2282
2283 /*
2284 * Reset device p-state to low as this was booted with high.
2285 *
2286 * This should be performed only after all devices from the same
2287 * hive get initialized.
2288 *
2289 * However, it's unknown how many device in the hive in advance.
2290 * As this is counted one by one during devices initializations.
2291 *
2292 * So, we wait for all XGMI interlinked devices initialized.
2293 * This may bring some delays as those devices may come from
2294 * different hives. But that should be OK.
2295 */
2296 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2297 for (i = 0; i < mgpu_info.num_gpu; i++) {
2298 gpu_instance = &(mgpu_info.gpu_ins[i]);
2299 if (gpu_instance->adev->flags & AMD_IS_APU)
2300 continue;
2301
d84a430d
JK
2302 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2303 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2304 if (r) {
2305 DRM_ERROR("pstate setting failed (%d).\n", r);
2306 break;
2307 }
2308 }
2309 }
2310
2311 mutex_unlock(&mgpu_info.mutex);
2312 }
2313
d38ceaf9
AD
2314 return 0;
2315}
2316
e3ecdffa
AD
2317/**
2318 * amdgpu_device_ip_fini - run fini for hardware IPs
2319 *
2320 * @adev: amdgpu_device pointer
2321 *
2322 * Main teardown pass for hardware IPs. The list of all the hardware
2323 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2324 * are run. hw_fini tears down the hardware associated with each IP
2325 * and sw_fini tears down any software state associated with each IP.
2326 * Returns 0 on success, negative error code on failure.
2327 */
06ec9070 2328static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2329{
2330 int i, r;
2331
5278a159
SY
2332 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2333 amdgpu_virt_release_ras_err_handler_data(adev);
2334
c030f2e4 2335 amdgpu_ras_pre_fini(adev);
2336
a82400b5
AG
2337 if (adev->gmc.xgmi.num_physical_nodes > 1)
2338 amdgpu_xgmi_remove_device(adev);
2339
1884734a 2340 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2341
2342 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2343 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2344
3e96dbfd
AD
2345 /* need to disable SMC first */
2346 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2347 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2348 continue;
fdd34271 2349 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2350 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2351 /* XXX handle errors */
2352 if (r) {
2353 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2354 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2355 }
a1255107 2356 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2357 break;
2358 }
2359 }
2360
d38ceaf9 2361 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2362 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2363 continue;
8201a67a 2364
a1255107 2365 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2366 /* XXX handle errors */
2c1a2784 2367 if (r) {
a1255107
AD
2368 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2369 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2370 }
8201a67a 2371
a1255107 2372 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2373 }
2374
9950cda2 2375
d38ceaf9 2376 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2377 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2378 continue;
c12aba3a
ML
2379
2380 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2381 amdgpu_ucode_free_bo(adev);
1e256e27 2382 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2383 amdgpu_device_wb_fini(adev);
2384 amdgpu_device_vram_scratch_fini(adev);
533aed27 2385 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2386 }
2387
a1255107 2388 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2389 /* XXX handle errors */
2c1a2784 2390 if (r) {
a1255107
AD
2391 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2392 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2393 }
a1255107
AD
2394 adev->ip_blocks[i].status.sw = false;
2395 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2396 }
2397
a6dcfd9c 2398 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2399 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2400 continue;
a1255107
AD
2401 if (adev->ip_blocks[i].version->funcs->late_fini)
2402 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2403 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2404 }
2405
c030f2e4 2406 amdgpu_ras_fini(adev);
2407
030308fc 2408 if (amdgpu_sriov_vf(adev))
24136135
ML
2409 if (amdgpu_virt_release_full_gpu(adev, false))
2410 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2411
d38ceaf9
AD
2412 return 0;
2413}
2414
e3ecdffa 2415/**
beff74bc 2416 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2417 *
1112a46b 2418 * @work: work_struct.
e3ecdffa 2419 */
beff74bc 2420static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2421{
2422 struct amdgpu_device *adev =
beff74bc 2423 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2424 int r;
2425
2426 r = amdgpu_ib_ring_tests(adev);
2427 if (r)
2428 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2429}
2430
1e317b99
RZ
2431static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2432{
2433 struct amdgpu_device *adev =
2434 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2435
2436 mutex_lock(&adev->gfx.gfx_off_mutex);
2437 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2438 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2439 adev->gfx.gfx_off_state = true;
2440 }
2441 mutex_unlock(&adev->gfx.gfx_off_mutex);
2442}
2443
e3ecdffa 2444/**
e7854a03 2445 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2446 *
2447 * @adev: amdgpu_device pointer
2448 *
2449 * Main suspend function for hardware IPs. The list of all the hardware
2450 * IPs that make up the asic is walked, clockgating is disabled and the
2451 * suspend callbacks are run. suspend puts the hardware and software state
2452 * in each IP into a state suitable for suspend.
2453 * Returns 0 on success, negative error code on failure.
2454 */
e7854a03
AD
2455static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2456{
2457 int i, r;
2458
ced1ba97
PL
2459 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2460 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2461
e7854a03
AD
2462 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2463 if (!adev->ip_blocks[i].status.valid)
2464 continue;
2b9f7848 2465
e7854a03 2466 /* displays are handled separately */
2b9f7848
ND
2467 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2468 continue;
2469
2470 /* XXX handle errors */
2471 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2472 /* XXX handle errors */
2473 if (r) {
2474 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2475 adev->ip_blocks[i].version->funcs->name, r);
2476 return r;
e7854a03 2477 }
2b9f7848
ND
2478
2479 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2480 }
2481
e7854a03
AD
2482 return 0;
2483}
2484
2485/**
2486 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2487 *
2488 * @adev: amdgpu_device pointer
2489 *
2490 * Main suspend function for hardware IPs. The list of all the hardware
2491 * IPs that make up the asic is walked, clockgating is disabled and the
2492 * suspend callbacks are run. suspend puts the hardware and software state
2493 * in each IP into a state suitable for suspend.
2494 * Returns 0 on success, negative error code on failure.
2495 */
2496static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2497{
2498 int i, r;
2499
2500 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2501 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2502 continue;
e7854a03
AD
2503 /* displays are handled in phase1 */
2504 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2505 continue;
bff77e86
LM
2506 /* PSP lost connection when err_event_athub occurs */
2507 if (amdgpu_ras_intr_triggered() &&
2508 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2509 adev->ip_blocks[i].status.hw = false;
2510 continue;
2511 }
d38ceaf9 2512 /* XXX handle errors */
a1255107 2513 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2514 /* XXX handle errors */
2c1a2784 2515 if (r) {
a1255107
AD
2516 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2517 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2518 }
876923fb 2519 adev->ip_blocks[i].status.hw = false;
a3a09142 2520 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2521 if(!amdgpu_sriov_vf(adev)){
2522 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2523 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2524 if (r) {
2525 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2526 adev->mp1_state, r);
2527 return r;
2528 }
a3a09142
AD
2529 }
2530 }
b5507c7e 2531 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2532 }
2533
2534 return 0;
2535}
2536
e7854a03
AD
2537/**
2538 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2539 *
2540 * @adev: amdgpu_device pointer
2541 *
2542 * Main suspend function for hardware IPs. The list of all the hardware
2543 * IPs that make up the asic is walked, clockgating is disabled and the
2544 * suspend callbacks are run. suspend puts the hardware and software state
2545 * in each IP into a state suitable for suspend.
2546 * Returns 0 on success, negative error code on failure.
2547 */
2548int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2549{
2550 int r;
2551
e7819644
YT
2552 if (amdgpu_sriov_vf(adev))
2553 amdgpu_virt_request_full_gpu(adev, false);
2554
e7854a03
AD
2555 r = amdgpu_device_ip_suspend_phase1(adev);
2556 if (r)
2557 return r;
2558 r = amdgpu_device_ip_suspend_phase2(adev);
2559
e7819644
YT
2560 if (amdgpu_sriov_vf(adev))
2561 amdgpu_virt_release_full_gpu(adev, false);
2562
e7854a03
AD
2563 return r;
2564}
2565
06ec9070 2566static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2567{
2568 int i, r;
2569
2cb681b6
ML
2570 static enum amd_ip_block_type ip_order[] = {
2571 AMD_IP_BLOCK_TYPE_GMC,
2572 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2573 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2574 AMD_IP_BLOCK_TYPE_IH,
2575 };
a90ad3c2 2576
2cb681b6
ML
2577 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2578 int j;
2579 struct amdgpu_ip_block *block;
a90ad3c2 2580
2cb681b6
ML
2581 for (j = 0; j < adev->num_ip_blocks; j++) {
2582 block = &adev->ip_blocks[j];
2583
482f0e53 2584 block->status.hw = false;
2cb681b6
ML
2585 if (block->version->type != ip_order[i] ||
2586 !block->status.valid)
2587 continue;
2588
2589 r = block->version->funcs->hw_init(adev);
0aaeefcc 2590 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2591 if (r)
2592 return r;
482f0e53 2593 block->status.hw = true;
a90ad3c2
ML
2594 }
2595 }
2596
2597 return 0;
2598}
2599
06ec9070 2600static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2601{
2602 int i, r;
2603
2cb681b6
ML
2604 static enum amd_ip_block_type ip_order[] = {
2605 AMD_IP_BLOCK_TYPE_SMC,
2606 AMD_IP_BLOCK_TYPE_DCE,
2607 AMD_IP_BLOCK_TYPE_GFX,
2608 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2609 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2610 AMD_IP_BLOCK_TYPE_VCE,
2611 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2612 };
a90ad3c2 2613
2cb681b6
ML
2614 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2615 int j;
2616 struct amdgpu_ip_block *block;
a90ad3c2 2617
2cb681b6
ML
2618 for (j = 0; j < adev->num_ip_blocks; j++) {
2619 block = &adev->ip_blocks[j];
2620
2621 if (block->version->type != ip_order[i] ||
482f0e53
ML
2622 !block->status.valid ||
2623 block->status.hw)
2cb681b6
ML
2624 continue;
2625
895bd048
JZ
2626 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2627 r = block->version->funcs->resume(adev);
2628 else
2629 r = block->version->funcs->hw_init(adev);
2630
0aaeefcc 2631 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2632 if (r)
2633 return r;
482f0e53 2634 block->status.hw = true;
a90ad3c2
ML
2635 }
2636 }
2637
2638 return 0;
2639}
2640
e3ecdffa
AD
2641/**
2642 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2643 *
2644 * @adev: amdgpu_device pointer
2645 *
2646 * First resume function for hardware IPs. The list of all the hardware
2647 * IPs that make up the asic is walked and the resume callbacks are run for
2648 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2649 * after a suspend and updates the software state as necessary. This
2650 * function is also used for restoring the GPU after a GPU reset.
2651 * Returns 0 on success, negative error code on failure.
2652 */
06ec9070 2653static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2654{
2655 int i, r;
2656
a90ad3c2 2657 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2658 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2659 continue;
a90ad3c2 2660 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2661 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2662 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2663
fcf0649f
CZ
2664 r = adev->ip_blocks[i].version->funcs->resume(adev);
2665 if (r) {
2666 DRM_ERROR("resume of IP block <%s> failed %d\n",
2667 adev->ip_blocks[i].version->funcs->name, r);
2668 return r;
2669 }
482f0e53 2670 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2671 }
2672 }
2673
2674 return 0;
2675}
2676
e3ecdffa
AD
2677/**
2678 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2679 *
2680 * @adev: amdgpu_device pointer
2681 *
2682 * First resume function for hardware IPs. The list of all the hardware
2683 * IPs that make up the asic is walked and the resume callbacks are run for
2684 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2685 * functional state after a suspend and updates the software state as
2686 * necessary. This function is also used for restoring the GPU after a GPU
2687 * reset.
2688 * Returns 0 on success, negative error code on failure.
2689 */
06ec9070 2690static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2691{
2692 int i, r;
2693
2694 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2695 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2696 continue;
fcf0649f 2697 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2698 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2699 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2700 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2701 continue;
a1255107 2702 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2703 if (r) {
a1255107
AD
2704 DRM_ERROR("resume of IP block <%s> failed %d\n",
2705 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2706 return r;
2c1a2784 2707 }
482f0e53 2708 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2709 }
2710
2711 return 0;
2712}
2713
e3ecdffa
AD
2714/**
2715 * amdgpu_device_ip_resume - run resume for hardware IPs
2716 *
2717 * @adev: amdgpu_device pointer
2718 *
2719 * Main resume function for hardware IPs. The hardware IPs
2720 * are split into two resume functions because they are
2721 * are also used in in recovering from a GPU reset and some additional
2722 * steps need to be take between them. In this case (S3/S4) they are
2723 * run sequentially.
2724 * Returns 0 on success, negative error code on failure.
2725 */
06ec9070 2726static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2727{
2728 int r;
2729
06ec9070 2730 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2731 if (r)
2732 return r;
7a3e0bb2
RZ
2733
2734 r = amdgpu_device_fw_loading(adev);
2735 if (r)
2736 return r;
2737
06ec9070 2738 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2739
2740 return r;
2741}
2742
e3ecdffa
AD
2743/**
2744 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2745 *
2746 * @adev: amdgpu_device pointer
2747 *
2748 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2749 */
4e99a44e 2750static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2751{
6867e1b5
ML
2752 if (amdgpu_sriov_vf(adev)) {
2753 if (adev->is_atom_fw) {
2754 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2755 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2756 } else {
2757 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2758 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2759 }
2760
2761 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2762 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2763 }
048765ad
AR
2764}
2765
e3ecdffa
AD
2766/**
2767 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2768 *
2769 * @asic_type: AMD asic type
2770 *
2771 * Check if there is DC (new modesetting infrastructre) support for an asic.
2772 * returns true if DC has support, false if not.
2773 */
4562236b
HW
2774bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2775{
2776 switch (asic_type) {
2777#if defined(CONFIG_DRM_AMD_DC)
2778 case CHIP_BONAIRE:
0d6fbccb 2779 case CHIP_KAVERI:
367e6687
AD
2780 case CHIP_KABINI:
2781 case CHIP_MULLINS:
d9fda248
HW
2782 /*
2783 * We have systems in the wild with these ASICs that require
2784 * LVDS and VGA support which is not supported with DC.
2785 *
2786 * Fallback to the non-DC driver here by default so as not to
2787 * cause regressions.
2788 */
2789 return amdgpu_dc > 0;
2790 case CHIP_HAWAII:
4562236b
HW
2791 case CHIP_CARRIZO:
2792 case CHIP_STONEY:
4562236b 2793 case CHIP_POLARIS10:
675fd32b 2794 case CHIP_POLARIS11:
2c8ad2d5 2795 case CHIP_POLARIS12:
675fd32b 2796 case CHIP_VEGAM:
4562236b
HW
2797 case CHIP_TONGA:
2798 case CHIP_FIJI:
42f8ffa1 2799 case CHIP_VEGA10:
dca7b401 2800 case CHIP_VEGA12:
c6034aa2 2801 case CHIP_VEGA20:
b86a1aa3 2802#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2803 case CHIP_RAVEN:
b4f199c7 2804 case CHIP_NAVI10:
8fceceb6 2805 case CHIP_NAVI14:
078655d9 2806 case CHIP_NAVI12:
e1c14c43 2807 case CHIP_RENOIR:
81d9bfb8
JFZ
2808#endif
2809#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
2810 case CHIP_SIENNA_CICHLID:
a6c5308f 2811 case CHIP_NAVY_FLOUNDER:
42f8ffa1 2812#endif
fd187853 2813 return amdgpu_dc != 0;
4562236b
HW
2814#endif
2815 default:
93b09a9a
SS
2816 if (amdgpu_dc > 0)
2817 DRM_INFO("Display Core has been requested via kernel parameter "
2818 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2819 return false;
2820 }
2821}
2822
2823/**
2824 * amdgpu_device_has_dc_support - check if dc is supported
2825 *
2826 * @adev: amdgpu_device_pointer
2827 *
2828 * Returns true for supported, false for not supported
2829 */
2830bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2831{
2555039d
XY
2832 if (amdgpu_sriov_vf(adev))
2833 return false;
2834
4562236b
HW
2835 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2836}
2837
d4535e2c
AG
2838
2839static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2840{
2841 struct amdgpu_device *adev =
2842 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2843 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2844
c6a6e2db
AG
2845 /* It's a bug to not have a hive within this function */
2846 if (WARN_ON(!hive))
2847 return;
2848
2849 /*
2850 * Use task barrier to synchronize all xgmi reset works across the
2851 * hive. task_barrier_enter and task_barrier_exit will block
2852 * until all the threads running the xgmi reset works reach
2853 * those points. task_barrier_full will do both blocks.
2854 */
2855 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2856
2857 task_barrier_enter(&hive->tb);
2858 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2859
2860 if (adev->asic_reset_res)
2861 goto fail;
2862
2863 task_barrier_exit(&hive->tb);
2864 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2865
2866 if (adev->asic_reset_res)
2867 goto fail;
43c4d576
JC
2868
2869 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2870 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2871 } else {
2872
2873 task_barrier_full(&hive->tb);
2874 adev->asic_reset_res = amdgpu_asic_reset(adev);
2875 }
ce316fa5 2876
c6a6e2db 2877fail:
d4535e2c 2878 if (adev->asic_reset_res)
fed184e9 2879 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2880 adev->asic_reset_res, adev->ddev->unique);
2881}
2882
71f98027
AD
2883static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2884{
2885 char *input = amdgpu_lockup_timeout;
2886 char *timeout_setting = NULL;
2887 int index = 0;
2888 long timeout;
2889 int ret = 0;
2890
2891 /*
2892 * By default timeout for non compute jobs is 10000.
2893 * And there is no timeout enforced on compute jobs.
2894 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 2895 * jobs are 60000 by default.
71f98027
AD
2896 */
2897 adev->gfx_timeout = msecs_to_jiffies(10000);
2898 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2899 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 2900 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
2901 else
2902 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2903
f440ff44 2904 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2905 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2906 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2907 ret = kstrtol(timeout_setting, 0, &timeout);
2908 if (ret)
2909 return ret;
2910
2911 if (timeout == 0) {
2912 index++;
2913 continue;
2914 } else if (timeout < 0) {
2915 timeout = MAX_SCHEDULE_TIMEOUT;
2916 } else {
2917 timeout = msecs_to_jiffies(timeout);
2918 }
2919
2920 switch (index++) {
2921 case 0:
2922 adev->gfx_timeout = timeout;
2923 break;
2924 case 1:
2925 adev->compute_timeout = timeout;
2926 break;
2927 case 2:
2928 adev->sdma_timeout = timeout;
2929 break;
2930 case 3:
2931 adev->video_timeout = timeout;
2932 break;
2933 default:
2934 break;
2935 }
2936 }
2937 /*
2938 * There is only one value specified and
2939 * it should apply to all non-compute jobs.
2940 */
bcccee89 2941 if (index == 1) {
71f98027 2942 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2943 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2944 adev->compute_timeout = adev->gfx_timeout;
2945 }
71f98027
AD
2946 }
2947
2948 return ret;
2949}
d4535e2c 2950
77f3a5cd
ND
2951static const struct attribute *amdgpu_dev_attributes[] = {
2952 &dev_attr_product_name.attr,
2953 &dev_attr_product_number.attr,
2954 &dev_attr_serial_number.attr,
2955 &dev_attr_pcie_replay_count.attr,
2956 NULL
2957};
2958
d38ceaf9
AD
2959/**
2960 * amdgpu_device_init - initialize the driver
2961 *
2962 * @adev: amdgpu_device pointer
87e3f136 2963 * @ddev: drm dev pointer
d38ceaf9
AD
2964 * @pdev: pci dev pointer
2965 * @flags: driver flags
2966 *
2967 * Initializes the driver info and hw (all asics).
2968 * Returns 0 for success or an error on failure.
2969 * Called at driver startup.
2970 */
2971int amdgpu_device_init(struct amdgpu_device *adev,
2972 struct drm_device *ddev,
2973 struct pci_dev *pdev,
2974 uint32_t flags)
2975{
2976 int r, i;
3840c5bc 2977 bool boco = false;
95844d20 2978 u32 max_MBps;
d38ceaf9
AD
2979
2980 adev->shutdown = false;
2981 adev->dev = &pdev->dev;
2982 adev->ddev = ddev;
2983 adev->pdev = pdev;
2984 adev->flags = flags;
4e66d7d2
YZ
2985
2986 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2987 adev->asic_type = amdgpu_force_asic_type;
2988 else
2989 adev->asic_type = flags & AMD_ASIC_MASK;
2990
d38ceaf9 2991 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2992 if (amdgpu_emu_mode == 1)
8bdab6bb 2993 adev->usec_timeout *= 10;
770d13b1 2994 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2995 adev->accel_working = false;
2996 adev->num_rings = 0;
2997 adev->mman.buffer_funcs = NULL;
2998 adev->mman.buffer_funcs_ring = NULL;
2999 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 3000 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 3001 adev->gmc.gmc_funcs = NULL;
f54d1867 3002 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 3003 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
3004
3005 adev->smc_rreg = &amdgpu_invalid_rreg;
3006 adev->smc_wreg = &amdgpu_invalid_wreg;
3007 adev->pcie_rreg = &amdgpu_invalid_rreg;
3008 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
3009 adev->pciep_rreg = &amdgpu_invalid_rreg;
3010 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
3011 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3012 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
3013 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3014 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3015 adev->didt_rreg = &amdgpu_invalid_rreg;
3016 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
3017 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3018 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
3019 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3020 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3021
3e39ab90
AD
3022 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3023 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3024 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
3025
3026 /* mutex initialization are all done here so we
3027 * can recall function without having locking issues */
d38ceaf9 3028 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 3029 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
3030 mutex_init(&adev->pm.mutex);
3031 mutex_init(&adev->gfx.gpu_clock_mutex);
3032 mutex_init(&adev->srbm_mutex);
b8866c26 3033 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3034 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3035 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3036 mutex_init(&adev->mn_lock);
e23b74aa 3037 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3038 hash_init(adev->mn_hash);
df9c8d1a
DL
3039 init_rwsem(&adev->reset_sem);
3040 atomic_set(&adev->in_gpu_reset, 0);
32eaeae0 3041 mutex_init(&adev->psp.mutex);
bd052211 3042 mutex_init(&adev->notifier_lock);
d38ceaf9 3043
912dfc84
EQ
3044 r = amdgpu_device_check_arguments(adev);
3045 if (r)
3046 return r;
d38ceaf9 3047
d38ceaf9
AD
3048 spin_lock_init(&adev->mmio_idx_lock);
3049 spin_lock_init(&adev->smc_idx_lock);
3050 spin_lock_init(&adev->pcie_idx_lock);
3051 spin_lock_init(&adev->uvd_ctx_idx_lock);
3052 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3053 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3054 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3055 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3056 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3057
0c4e7fa5
CZ
3058 INIT_LIST_HEAD(&adev->shadow_list);
3059 mutex_init(&adev->shadow_list_lock);
3060
beff74bc
AD
3061 INIT_DELAYED_WORK(&adev->delayed_init_work,
3062 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3063 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3064 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3065
d4535e2c
AG
3066 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3067
d23ee13f 3068 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3069 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3070
b265bdbd
EQ
3071 atomic_set(&adev->throttling_logging_enabled, 1);
3072 /*
3073 * If throttling continues, logging will be performed every minute
3074 * to avoid log flooding. "-1" is subtracted since the thermal
3075 * throttling interrupt comes every second. Thus, the total logging
3076 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3077 * for throttling interrupt) = 60 seconds.
3078 */
3079 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3080 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3081
0fa49558
AX
3082 /* Registers mapping */
3083 /* TODO: block userspace mapping of io register */
da69c161
KW
3084 if (adev->asic_type >= CHIP_BONAIRE) {
3085 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3086 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3087 } else {
3088 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3089 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3090 }
d38ceaf9 3091
d38ceaf9
AD
3092 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3093 if (adev->rmmio == NULL) {
3094 return -ENOMEM;
3095 }
3096 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3097 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3098
d38ceaf9
AD
3099 /* io port mapping */
3100 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3101 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3102 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3103 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3104 break;
3105 }
3106 }
3107 if (adev->rio_mem == NULL)
b64a18c5 3108 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3109
b2109d8e
JX
3110 /* enable PCIE atomic ops */
3111 r = pci_enable_atomic_ops_to_root(adev->pdev,
3112 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3113 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3114 if (r) {
3115 adev->have_atomics_support = false;
3116 DRM_INFO("PCIE atomic ops is not supported\n");
3117 } else {
3118 adev->have_atomics_support = true;
3119 }
3120
5494d864
AD
3121 amdgpu_device_get_pcie_info(adev);
3122
b239c017
JX
3123 if (amdgpu_mcbp)
3124 DRM_INFO("MCBP is enabled\n");
3125
5f84cc63
JX
3126 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3127 adev->enable_mes = true;
3128
3aa0115d
ML
3129 /* detect hw virtualization here */
3130 amdgpu_detect_virtualization(adev);
3131
dffa11b4
ML
3132 r = amdgpu_device_get_job_timeout_settings(adev);
3133 if (r) {
3134 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3135 return r;
a190d1c7
XY
3136 }
3137
d38ceaf9 3138 /* early init functions */
06ec9070 3139 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3140 if (r)
3141 return r;
3142
6585661d
OZ
3143 /* doorbell bar mapping and doorbell index init*/
3144 amdgpu_device_doorbell_init(adev);
3145
d38ceaf9
AD
3146 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3147 /* this will fail for cards that aren't VGA class devices, just
3148 * ignore it */
06ec9070 3149 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3150
31af062a 3151 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3152 boco = true;
3153 if (amdgpu_has_atpx() &&
3154 (amdgpu_is_atpx_hybrid() ||
3155 amdgpu_has_atpx_dgpu_power_cntl()) &&
3156 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3157 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3158 &amdgpu_switcheroo_ops, boco);
3159 if (boco)
d38ceaf9
AD
3160 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3161
9475a943
SL
3162 if (amdgpu_emu_mode == 1) {
3163 /* post the asic on emulation mode */
3164 emu_soc_asic_init(adev);
bfca0289 3165 goto fence_driver_init;
9475a943 3166 }
bfca0289 3167
4e99a44e
ML
3168 /* detect if we are with an SRIOV vbios */
3169 amdgpu_device_detect_sriov_bios(adev);
048765ad 3170
95e8e59e
AD
3171 /* check if we need to reset the asic
3172 * E.g., driver was not cleanly unloaded previously, etc.
3173 */
f14899fd 3174 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3175 r = amdgpu_asic_reset(adev);
3176 if (r) {
3177 dev_err(adev->dev, "asic reset on init failed\n");
3178 goto failed;
3179 }
3180 }
3181
d38ceaf9 3182 /* Post card if necessary */
39c640c0 3183 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3184 if (!adev->bios) {
bec86378 3185 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3186 r = -EINVAL;
3187 goto failed;
d38ceaf9 3188 }
bec86378 3189 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3190 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3191 if (r) {
3192 dev_err(adev->dev, "gpu post error!\n");
3193 goto failed;
3194 }
d38ceaf9
AD
3195 }
3196
88b64e95
AD
3197 if (adev->is_atom_fw) {
3198 /* Initialize clocks */
3199 r = amdgpu_atomfirmware_get_clock_info(adev);
3200 if (r) {
3201 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3202 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3203 goto failed;
3204 }
3205 } else {
a5bde2f9
AD
3206 /* Initialize clocks */
3207 r = amdgpu_atombios_get_clock_info(adev);
3208 if (r) {
3209 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3210 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3211 goto failed;
a5bde2f9
AD
3212 }
3213 /* init i2c buses */
4562236b
HW
3214 if (!amdgpu_device_has_dc_support(adev))
3215 amdgpu_atombios_i2c_init(adev);
2c1a2784 3216 }
d38ceaf9 3217
bfca0289 3218fence_driver_init:
d38ceaf9
AD
3219 /* Fence driver */
3220 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3221 if (r) {
3222 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3223 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3224 goto failed;
2c1a2784 3225 }
d38ceaf9
AD
3226
3227 /* init the mode config */
3228 drm_mode_config_init(adev->ddev);
3229
06ec9070 3230 r = amdgpu_device_ip_init(adev);
d38ceaf9 3231 if (r) {
8840a387 3232 /* failed in exclusive mode due to timeout */
3233 if (amdgpu_sriov_vf(adev) &&
3234 !amdgpu_sriov_runtime(adev) &&
3235 amdgpu_virt_mmio_blocked(adev) &&
3236 !amdgpu_virt_wait_reset(adev)) {
3237 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3238 /* Don't send request since VF is inactive. */
3239 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3240 adev->virt.ops = NULL;
8840a387 3241 r = -EAGAIN;
3242 goto failed;
3243 }
06ec9070 3244 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3245 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3246 goto failed;
d38ceaf9
AD
3247 }
3248
d69b8971
YZ
3249 dev_info(adev->dev,
3250 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3251 adev->gfx.config.max_shader_engines,
3252 adev->gfx.config.max_sh_per_se,
3253 adev->gfx.config.max_cu_per_sh,
3254 adev->gfx.cu_info.number);
3255
d38ceaf9
AD
3256 adev->accel_working = true;
3257
e59c0205
AX
3258 amdgpu_vm_check_compute_bug(adev);
3259
95844d20
MO
3260 /* Initialize the buffer migration limit. */
3261 if (amdgpu_moverate >= 0)
3262 max_MBps = amdgpu_moverate;
3263 else
3264 max_MBps = 8; /* Allow 8 MB/s. */
3265 /* Get a log2 for easy divisions. */
3266 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3267
9bc92b9c
ML
3268 amdgpu_fbdev_init(adev);
3269
d2f52ac8 3270 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3271 if (r) {
3272 adev->pm_sysfs_en = false;
d2f52ac8 3273 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3274 } else
3275 adev->pm_sysfs_en = true;
d2f52ac8 3276
5bb23532 3277 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3278 if (r) {
3279 adev->ucode_sysfs_en = false;
5bb23532 3280 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3281 } else
3282 adev->ucode_sysfs_en = true;
5bb23532 3283
d38ceaf9
AD
3284 if ((amdgpu_testing & 1)) {
3285 if (adev->accel_working)
3286 amdgpu_test_moves(adev);
3287 else
3288 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3289 }
d38ceaf9
AD
3290 if (amdgpu_benchmarking) {
3291 if (adev->accel_working)
3292 amdgpu_benchmark(adev, amdgpu_benchmarking);
3293 else
3294 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3295 }
3296
b0adca4d
EQ
3297 /*
3298 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3299 * Otherwise the mgpu fan boost feature will be skipped due to the
3300 * gpu instance is counted less.
3301 */
3302 amdgpu_register_gpu_instance(adev);
3303
d38ceaf9
AD
3304 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3305 * explicit gating rather than handling it automatically.
3306 */
06ec9070 3307 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3308 if (r) {
06ec9070 3309 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3310 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3311 goto failed;
2c1a2784 3312 }
d38ceaf9 3313
108c6a63 3314 /* must succeed. */
511fdbc3 3315 amdgpu_ras_resume(adev);
108c6a63 3316
beff74bc
AD
3317 queue_delayed_work(system_wq, &adev->delayed_init_work,
3318 msecs_to_jiffies(AMDGPU_RESUME_MS));
3319
2c738637
ML
3320 if (amdgpu_sriov_vf(adev))
3321 flush_delayed_work(&adev->delayed_init_work);
3322
77f3a5cd 3323 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
bd607166 3324 if (r) {
77f3a5cd 3325 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166
KR
3326 return r;
3327 }
3328
d155bef0
AB
3329 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3330 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3331 if (r)
3332 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3333
d38ceaf9 3334 return 0;
83ba126a
AD
3335
3336failed:
89041940 3337 amdgpu_vf_error_trans_all(adev);
3840c5bc 3338 if (boco)
83ba126a 3339 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3340
83ba126a 3341 return r;
d38ceaf9
AD
3342}
3343
d38ceaf9
AD
3344/**
3345 * amdgpu_device_fini - tear down the driver
3346 *
3347 * @adev: amdgpu_device pointer
3348 *
3349 * Tear down the driver info (all asics).
3350 * Called at driver shutdown.
3351 */
3352void amdgpu_device_fini(struct amdgpu_device *adev)
3353{
3354 int r;
3355
3356 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3357 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3358 adev->shutdown = true;
9f875167 3359
752c683d
ML
3360 /* make sure IB test finished before entering exclusive mode
3361 * to avoid preemption on IB test
3362 * */
3363 if (amdgpu_sriov_vf(adev))
3364 amdgpu_virt_request_full_gpu(adev, false);
3365
e5b03032
ML
3366 /* disable all interrupts */
3367 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3368 if (adev->mode_info.mode_config_initialized){
3369 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3370 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3371 else
3372 drm_atomic_helper_shutdown(adev->ddev);
3373 }
d38ceaf9 3374 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3375 if (adev->pm_sysfs_en)
3376 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3377 amdgpu_fbdev_fini(adev);
06ec9070 3378 r = amdgpu_device_ip_fini(adev);
75e1658e
ND
3379 release_firmware(adev->firmware.gpu_info_fw);
3380 adev->firmware.gpu_info_fw = NULL;
d38ceaf9
AD
3381 adev->accel_working = false;
3382 /* free i2c buses */
4562236b
HW
3383 if (!amdgpu_device_has_dc_support(adev))
3384 amdgpu_i2c_fini(adev);
bfca0289
SL
3385
3386 if (amdgpu_emu_mode != 1)
3387 amdgpu_atombios_fini(adev);
3388
d38ceaf9
AD
3389 kfree(adev->bios);
3390 adev->bios = NULL;
3840c5bc
AD
3391 if (amdgpu_has_atpx() &&
3392 (amdgpu_is_atpx_hybrid() ||
3393 amdgpu_has_atpx_dgpu_power_cntl()) &&
3394 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3395 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3396 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3397 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3398 vga_client_register(adev->pdev, NULL, NULL, NULL);
3399 if (adev->rio_mem)
3400 pci_iounmap(adev->pdev, adev->rio_mem);
3401 adev->rio_mem = NULL;
3402 iounmap(adev->rmmio);
3403 adev->rmmio = NULL;
06ec9070 3404 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3405
7c868b59
YT
3406 if (adev->ucode_sysfs_en)
3407 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3408
3409 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3410 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3411 amdgpu_pmu_fini(adev);
4292b0b2 3412 if (adev->discovery_bin)
a190d1c7 3413 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3414}
3415
3416
3417/*
3418 * Suspend & resume.
3419 */
3420/**
810ddc3a 3421 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3422 *
87e3f136 3423 * @dev: drm dev pointer
87e3f136 3424 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3425 *
3426 * Puts the hw in the suspend state (all asics).
3427 * Returns 0 for success or an error on failure.
3428 * Called at driver suspend.
3429 */
de185019 3430int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3431{
3432 struct amdgpu_device *adev;
3433 struct drm_crtc *crtc;
3434 struct drm_connector *connector;
f8d2d39e 3435 struct drm_connector_list_iter iter;
5ceb54c6 3436 int r;
d38ceaf9
AD
3437
3438 if (dev == NULL || dev->dev_private == NULL) {
3439 return -ENODEV;
3440 }
3441
3442 adev = dev->dev_private;
3443
3444 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3445 return 0;
3446
44779b43 3447 adev->in_suspend = true;
d38ceaf9
AD
3448 drm_kms_helper_poll_disable(dev);
3449
5f818173
S
3450 if (fbcon)
3451 amdgpu_fbdev_set_suspend(adev, 1);
3452
beff74bc 3453 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3454
4562236b
HW
3455 if (!amdgpu_device_has_dc_support(adev)) {
3456 /* turn off display hw */
3457 drm_modeset_lock_all(dev);
f8d2d39e
LP
3458 drm_connector_list_iter_begin(dev, &iter);
3459 drm_for_each_connector_iter(connector, &iter)
3460 drm_helper_connector_dpms(connector,
3461 DRM_MODE_DPMS_OFF);
3462 drm_connector_list_iter_end(&iter);
4562236b 3463 drm_modeset_unlock_all(dev);
fe1053b7
AD
3464 /* unpin the front buffers and cursors */
3465 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3466 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3467 struct drm_framebuffer *fb = crtc->primary->fb;
3468 struct amdgpu_bo *robj;
3469
91334223 3470 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3471 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3472 r = amdgpu_bo_reserve(aobj, true);
3473 if (r == 0) {
3474 amdgpu_bo_unpin(aobj);
3475 amdgpu_bo_unreserve(aobj);
3476 }
756e6880 3477 }
756e6880 3478
fe1053b7
AD
3479 if (fb == NULL || fb->obj[0] == NULL) {
3480 continue;
3481 }
3482 robj = gem_to_amdgpu_bo(fb->obj[0]);
3483 /* don't unpin kernel fb objects */
3484 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3485 r = amdgpu_bo_reserve(robj, true);
3486 if (r == 0) {
3487 amdgpu_bo_unpin(robj);
3488 amdgpu_bo_unreserve(robj);
3489 }
d38ceaf9
AD
3490 }
3491 }
3492 }
fe1053b7 3493
5e6932fe 3494 amdgpu_ras_suspend(adev);
3495
fe1053b7
AD
3496 r = amdgpu_device_ip_suspend_phase1(adev);
3497
94fa5660
EQ
3498 amdgpu_amdkfd_suspend(adev, !fbcon);
3499
d38ceaf9
AD
3500 /* evict vram memory */
3501 amdgpu_bo_evict_vram(adev);
3502
5ceb54c6 3503 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3504
fe1053b7 3505 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3506
a0a71e49
AD
3507 /* evict remaining vram memory
3508 * This second call to evict vram is to evict the gart page table
3509 * using the CPU.
3510 */
d38ceaf9
AD
3511 amdgpu_bo_evict_vram(adev);
3512
d38ceaf9
AD
3513 return 0;
3514}
3515
3516/**
810ddc3a 3517 * amdgpu_device_resume - initiate device resume
d38ceaf9 3518 *
87e3f136 3519 * @dev: drm dev pointer
87e3f136 3520 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3521 *
3522 * Bring the hw back to operating state (all asics).
3523 * Returns 0 for success or an error on failure.
3524 * Called at driver resume.
3525 */
de185019 3526int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3527{
3528 struct drm_connector *connector;
f8d2d39e 3529 struct drm_connector_list_iter iter;
d38ceaf9 3530 struct amdgpu_device *adev = dev->dev_private;
756e6880 3531 struct drm_crtc *crtc;
03161a6e 3532 int r = 0;
d38ceaf9
AD
3533
3534 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3535 return 0;
3536
d38ceaf9 3537 /* post card */
39c640c0 3538 if (amdgpu_device_need_post(adev)) {
74b0b157 3539 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3540 if (r)
3541 DRM_ERROR("amdgpu asic init failed\n");
3542 }
d38ceaf9 3543
06ec9070 3544 r = amdgpu_device_ip_resume(adev);
e6707218 3545 if (r) {
06ec9070 3546 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3547 return r;
e6707218 3548 }
5ceb54c6
AD
3549 amdgpu_fence_driver_resume(adev);
3550
d38ceaf9 3551
06ec9070 3552 r = amdgpu_device_ip_late_init(adev);
03161a6e 3553 if (r)
4d3b9ae5 3554 return r;
d38ceaf9 3555
beff74bc
AD
3556 queue_delayed_work(system_wq, &adev->delayed_init_work,
3557 msecs_to_jiffies(AMDGPU_RESUME_MS));
3558
fe1053b7
AD
3559 if (!amdgpu_device_has_dc_support(adev)) {
3560 /* pin cursors */
3561 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3562 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3563
91334223 3564 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3565 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3566 r = amdgpu_bo_reserve(aobj, true);
3567 if (r == 0) {
3568 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3569 if (r != 0)
3570 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3571 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3572 amdgpu_bo_unreserve(aobj);
3573 }
756e6880
AD
3574 }
3575 }
3576 }
9593f4d6 3577 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3578 if (r)
3579 return r;
756e6880 3580
96a5d8d4 3581 /* Make sure IB tests flushed */
beff74bc 3582 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3583
d38ceaf9
AD
3584 /* blat the mode back in */
3585 if (fbcon) {
4562236b
HW
3586 if (!amdgpu_device_has_dc_support(adev)) {
3587 /* pre DCE11 */
3588 drm_helper_resume_force_mode(dev);
3589
3590 /* turn on display hw */
3591 drm_modeset_lock_all(dev);
f8d2d39e
LP
3592
3593 drm_connector_list_iter_begin(dev, &iter);
3594 drm_for_each_connector_iter(connector, &iter)
3595 drm_helper_connector_dpms(connector,
3596 DRM_MODE_DPMS_ON);
3597 drm_connector_list_iter_end(&iter);
3598
4562236b 3599 drm_modeset_unlock_all(dev);
d38ceaf9 3600 }
4d3b9ae5 3601 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3602 }
3603
3604 drm_kms_helper_poll_enable(dev);
23a1a9e5 3605
5e6932fe 3606 amdgpu_ras_resume(adev);
3607
23a1a9e5
L
3608 /*
3609 * Most of the connector probing functions try to acquire runtime pm
3610 * refs to ensure that the GPU is powered on when connector polling is
3611 * performed. Since we're calling this from a runtime PM callback,
3612 * trying to acquire rpm refs will cause us to deadlock.
3613 *
3614 * Since we're guaranteed to be holding the rpm lock, it's safe to
3615 * temporarily disable the rpm helpers so this doesn't deadlock us.
3616 */
3617#ifdef CONFIG_PM
3618 dev->dev->power.disable_depth++;
3619#endif
4562236b
HW
3620 if (!amdgpu_device_has_dc_support(adev))
3621 drm_helper_hpd_irq_event(dev);
3622 else
3623 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3624#ifdef CONFIG_PM
3625 dev->dev->power.disable_depth--;
3626#endif
44779b43
RZ
3627 adev->in_suspend = false;
3628
4d3b9ae5 3629 return 0;
d38ceaf9
AD
3630}
3631
e3ecdffa
AD
3632/**
3633 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3634 *
3635 * @adev: amdgpu_device pointer
3636 *
3637 * The list of all the hardware IPs that make up the asic is walked and
3638 * the check_soft_reset callbacks are run. check_soft_reset determines
3639 * if the asic is still hung or not.
3640 * Returns true if any of the IPs are still in a hung state, false if not.
3641 */
06ec9070 3642static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3643{
3644 int i;
3645 bool asic_hang = false;
3646
f993d628
ML
3647 if (amdgpu_sriov_vf(adev))
3648 return true;
3649
8bc04c29
AD
3650 if (amdgpu_asic_need_full_reset(adev))
3651 return true;
3652
63fbf42f 3653 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3654 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3655 continue;
a1255107
AD
3656 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3657 adev->ip_blocks[i].status.hang =
3658 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3659 if (adev->ip_blocks[i].status.hang) {
3660 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3661 asic_hang = true;
3662 }
3663 }
3664 return asic_hang;
3665}
3666
e3ecdffa
AD
3667/**
3668 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3669 *
3670 * @adev: amdgpu_device pointer
3671 *
3672 * The list of all the hardware IPs that make up the asic is walked and the
3673 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3674 * handles any IP specific hardware or software state changes that are
3675 * necessary for a soft reset to succeed.
3676 * Returns 0 on success, negative error code on failure.
3677 */
06ec9070 3678static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3679{
3680 int i, r = 0;
3681
3682 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3683 if (!adev->ip_blocks[i].status.valid)
d31a501e 3684 continue;
a1255107
AD
3685 if (adev->ip_blocks[i].status.hang &&
3686 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3687 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3688 if (r)
3689 return r;
3690 }
3691 }
3692
3693 return 0;
3694}
3695
e3ecdffa
AD
3696/**
3697 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3698 *
3699 * @adev: amdgpu_device pointer
3700 *
3701 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3702 * reset is necessary to recover.
3703 * Returns true if a full asic reset is required, false if not.
3704 */
06ec9070 3705static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3706{
da146d3b
AD
3707 int i;
3708
8bc04c29
AD
3709 if (amdgpu_asic_need_full_reset(adev))
3710 return true;
3711
da146d3b 3712 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3713 if (!adev->ip_blocks[i].status.valid)
da146d3b 3714 continue;
a1255107
AD
3715 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3716 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3717 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3718 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3719 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3720 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3721 DRM_INFO("Some block need full reset!\n");
3722 return true;
3723 }
3724 }
35d782fe
CZ
3725 }
3726 return false;
3727}
3728
e3ecdffa
AD
3729/**
3730 * amdgpu_device_ip_soft_reset - do a soft reset
3731 *
3732 * @adev: amdgpu_device pointer
3733 *
3734 * The list of all the hardware IPs that make up the asic is walked and the
3735 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3736 * IP specific hardware or software state changes that are necessary to soft
3737 * reset the IP.
3738 * Returns 0 on success, negative error code on failure.
3739 */
06ec9070 3740static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3741{
3742 int i, r = 0;
3743
3744 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3745 if (!adev->ip_blocks[i].status.valid)
35d782fe 3746 continue;
a1255107
AD
3747 if (adev->ip_blocks[i].status.hang &&
3748 adev->ip_blocks[i].version->funcs->soft_reset) {
3749 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3750 if (r)
3751 return r;
3752 }
3753 }
3754
3755 return 0;
3756}
3757
e3ecdffa
AD
3758/**
3759 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3760 *
3761 * @adev: amdgpu_device pointer
3762 *
3763 * The list of all the hardware IPs that make up the asic is walked and the
3764 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3765 * handles any IP specific hardware or software state changes that are
3766 * necessary after the IP has been soft reset.
3767 * Returns 0 on success, negative error code on failure.
3768 */
06ec9070 3769static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3770{
3771 int i, r = 0;
3772
3773 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3774 if (!adev->ip_blocks[i].status.valid)
35d782fe 3775 continue;
a1255107
AD
3776 if (adev->ip_blocks[i].status.hang &&
3777 adev->ip_blocks[i].version->funcs->post_soft_reset)
3778 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3779 if (r)
3780 return r;
3781 }
3782
3783 return 0;
3784}
3785
e3ecdffa 3786/**
c33adbc7 3787 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3788 *
3789 * @adev: amdgpu_device pointer
3790 *
3791 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3792 * restore things like GPUVM page tables after a GPU reset where
3793 * the contents of VRAM might be lost.
403009bf
CK
3794 *
3795 * Returns:
3796 * 0 on success, negative error code on failure.
e3ecdffa 3797 */
c33adbc7 3798static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3799{
c41d1cf6 3800 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3801 struct amdgpu_bo *shadow;
3802 long r = 1, tmo;
c41d1cf6
ML
3803
3804 if (amdgpu_sriov_runtime(adev))
b045d3af 3805 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3806 else
3807 tmo = msecs_to_jiffies(100);
3808
3809 DRM_INFO("recover vram bo from shadow start\n");
3810 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3811 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3812
3813 /* No need to recover an evicted BO */
3814 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3815 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3816 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3817 continue;
3818
3819 r = amdgpu_bo_restore_shadow(shadow, &next);
3820 if (r)
3821 break;
3822
c41d1cf6 3823 if (fence) {
1712fb1a 3824 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3825 dma_fence_put(fence);
3826 fence = next;
1712fb1a 3827 if (tmo == 0) {
3828 r = -ETIMEDOUT;
c41d1cf6 3829 break;
1712fb1a 3830 } else if (tmo < 0) {
3831 r = tmo;
3832 break;
3833 }
403009bf
CK
3834 } else {
3835 fence = next;
c41d1cf6 3836 }
c41d1cf6
ML
3837 }
3838 mutex_unlock(&adev->shadow_list_lock);
3839
403009bf
CK
3840 if (fence)
3841 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3842 dma_fence_put(fence);
3843
1712fb1a 3844 if (r < 0 || tmo <= 0) {
3845 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3846 return -EIO;
3847 }
c41d1cf6 3848
403009bf
CK
3849 DRM_INFO("recover vram bo from shadow done\n");
3850 return 0;
c41d1cf6
ML
3851}
3852
a90ad3c2 3853
e3ecdffa 3854/**
06ec9070 3855 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3856 *
3857 * @adev: amdgpu device pointer
87e3f136 3858 * @from_hypervisor: request from hypervisor
5740682e
ML
3859 *
3860 * do VF FLR and reinitialize Asic
3f48c681 3861 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3862 */
3863static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3864 bool from_hypervisor)
5740682e
ML
3865{
3866 int r;
3867
3868 if (from_hypervisor)
3869 r = amdgpu_virt_request_full_gpu(adev, true);
3870 else
3871 r = amdgpu_virt_reset_gpu(adev);
3872 if (r)
3873 return r;
a90ad3c2 3874
b639c22c
JZ
3875 amdgpu_amdkfd_pre_reset(adev);
3876
a90ad3c2 3877 /* Resume IP prior to SMC */
06ec9070 3878 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3879 if (r)
3880 goto error;
a90ad3c2 3881
c9ffa427 3882 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3883 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3884 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3885
7a3e0bb2
RZ
3886 r = amdgpu_device_fw_loading(adev);
3887 if (r)
3888 return r;
3889
a90ad3c2 3890 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3891 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3892 if (r)
3893 goto error;
a90ad3c2
ML
3894
3895 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3896 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3897 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3898
abc34253
ED
3899error:
3900 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3901 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3902 amdgpu_inc_vram_lost(adev);
c33adbc7 3903 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3904 }
3905
3906 return r;
3907}
3908
12938fad
CK
3909/**
3910 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3911 *
3912 * @adev: amdgpu device pointer
3913 *
3914 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3915 * a hung GPU.
3916 */
3917bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3918{
3919 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3920 DRM_INFO("Timeout, but no hardware hang detected.\n");
3921 return false;
3922 }
3923
3ba7b418
AG
3924 if (amdgpu_gpu_recovery == 0)
3925 goto disabled;
3926
3927 if (amdgpu_sriov_vf(adev))
3928 return true;
3929
3930 if (amdgpu_gpu_recovery == -1) {
3931 switch (adev->asic_type) {
fc42d47c
AG
3932 case CHIP_BONAIRE:
3933 case CHIP_HAWAII:
3ba7b418
AG
3934 case CHIP_TOPAZ:
3935 case CHIP_TONGA:
3936 case CHIP_FIJI:
3937 case CHIP_POLARIS10:
3938 case CHIP_POLARIS11:
3939 case CHIP_POLARIS12:
3940 case CHIP_VEGAM:
3941 case CHIP_VEGA20:
3942 case CHIP_VEGA10:
3943 case CHIP_VEGA12:
c43b849f 3944 case CHIP_RAVEN:
e9d4cf91 3945 case CHIP_ARCTURUS:
2cb44fb0 3946 case CHIP_RENOIR:
658c6639
AD
3947 case CHIP_NAVI10:
3948 case CHIP_NAVI14:
3949 case CHIP_NAVI12:
131a3c74 3950 case CHIP_SIENNA_CICHLID:
3ba7b418
AG
3951 break;
3952 default:
3953 goto disabled;
3954 }
12938fad
CK
3955 }
3956
3957 return true;
3ba7b418
AG
3958
3959disabled:
3960 DRM_INFO("GPU recovery disabled.\n");
3961 return false;
12938fad
CK
3962}
3963
5c6dd71e 3964
26bc5340
AG
3965static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3966 struct amdgpu_job *job,
3967 bool *need_full_reset_arg)
3968{
3969 int i, r = 0;
3970 bool need_full_reset = *need_full_reset_arg;
71182665 3971
728e7e0c
JZ
3972 amdgpu_debugfs_wait_dump(adev);
3973
71182665 3974 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3975 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3976 struct amdgpu_ring *ring = adev->rings[i];
3977
51687759 3978 if (!ring || !ring->sched.thread)
0875dc9e 3979 continue;
5740682e 3980
2f9d4084
ML
3981 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3982 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3983 }
d38ceaf9 3984
222b5f04
AG
3985 if(job)
3986 drm_sched_increase_karma(&job->base);
3987
1d721ed6 3988 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3989 if (!amdgpu_sriov_vf(adev)) {
3990
3991 if (!need_full_reset)
3992 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3993
3994 if (!need_full_reset) {
3995 amdgpu_device_ip_pre_soft_reset(adev);
3996 r = amdgpu_device_ip_soft_reset(adev);
3997 amdgpu_device_ip_post_soft_reset(adev);
3998 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3999 DRM_INFO("soft reset failed, will fallback to full reset!\n");
4000 need_full_reset = true;
4001 }
4002 }
4003
4004 if (need_full_reset)
4005 r = amdgpu_device_ip_suspend(adev);
4006
4007 *need_full_reset_arg = need_full_reset;
4008 }
4009
4010 return r;
4011}
4012
041a62bc 4013static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
4014 struct list_head *device_list_handle,
4015 bool *need_full_reset_arg)
4016{
4017 struct amdgpu_device *tmp_adev = NULL;
4018 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
4019 int r = 0;
4020
4021 /*
4022 * ASIC reset has to be done on all HGMI hive nodes ASAP
4023 * to allow proper links negotiation in FW (within 1 sec)
4024 */
4025 if (need_full_reset) {
4026 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 4027 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 4028 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 4029 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
4030 r = -EALREADY;
4031 } else
4032 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4033
041a62bc
AG
4034 if (r) {
4035 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
4036 r, tmp_adev->ddev->unique);
4037 break;
ce316fa5
LM
4038 }
4039 }
4040
041a62bc
AG
4041 /* For XGMI wait for all resets to complete before proceed */
4042 if (!r) {
ce316fa5
LM
4043 list_for_each_entry(tmp_adev, device_list_handle,
4044 gmc.xgmi.head) {
4045 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4046 flush_work(&tmp_adev->xgmi_reset_work);
4047 r = tmp_adev->asic_reset_res;
4048 if (r)
4049 break;
ce316fa5
LM
4050 }
4051 }
4052 }
ce316fa5 4053 }
26bc5340 4054
43c4d576
JC
4055 if (!r && amdgpu_ras_intr_triggered()) {
4056 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4057 if (tmp_adev->mmhub.funcs &&
4058 tmp_adev->mmhub.funcs->reset_ras_error_count)
4059 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4060 }
4061
00eaa571 4062 amdgpu_ras_intr_cleared();
43c4d576 4063 }
00eaa571 4064
26bc5340
AG
4065 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4066 if (need_full_reset) {
4067 /* post card */
df9c8d1a
DL
4068 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) {
4069 dev_warn(tmp_adev->dev, "asic atom init failed!");
4070 r = -EAGAIN;
4071 goto out;
4072 }
26bc5340
AG
4073
4074 if (!r) {
4075 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4076 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4077 if (r)
4078 goto out;
4079
4080 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4081 if (vram_lost) {
77e7f829 4082 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4083 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4084 }
4085
4086 r = amdgpu_gtt_mgr_recover(
4087 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4088 if (r)
4089 goto out;
4090
4091 r = amdgpu_device_fw_loading(tmp_adev);
4092 if (r)
4093 return r;
4094
4095 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4096 if (r)
4097 goto out;
4098
4099 if (vram_lost)
4100 amdgpu_device_fill_reset_magic(tmp_adev);
4101
fdafb359
EQ
4102 /*
4103 * Add this ASIC as tracked as reset was already
4104 * complete successfully.
4105 */
4106 amdgpu_register_gpu_instance(tmp_adev);
4107
7c04ca50 4108 r = amdgpu_device_ip_late_init(tmp_adev);
4109 if (r)
4110 goto out;
4111
565d1941
EQ
4112 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4113
e79a04d5 4114 /* must succeed. */
511fdbc3 4115 amdgpu_ras_resume(tmp_adev);
e79a04d5 4116
26bc5340
AG
4117 /* Update PSP FW topology after reset */
4118 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4119 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4120 }
4121 }
4122
4123
4124out:
4125 if (!r) {
4126 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4127 r = amdgpu_ib_ring_tests(tmp_adev);
4128 if (r) {
4129 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4130 r = amdgpu_device_ip_suspend(tmp_adev);
4131 need_full_reset = true;
4132 r = -EAGAIN;
4133 goto end;
4134 }
4135 }
4136
4137 if (!r)
4138 r = amdgpu_device_recover_vram(tmp_adev);
4139 else
4140 tmp_adev->asic_reset_res = r;
4141 }
4142
4143end:
4144 *need_full_reset_arg = need_full_reset;
4145 return r;
4146}
4147
df9c8d1a 4148static bool amdgpu_device_lock_adev(struct amdgpu_device *adev)
26bc5340 4149{
df9c8d1a
DL
4150 if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
4151 return false;
4152
4153 down_write(&adev->reset_sem);
5740682e 4154
26bc5340 4155 atomic_inc(&adev->gpu_reset_counter);
a3a09142
AD
4156 switch (amdgpu_asic_reset_method(adev)) {
4157 case AMD_RESET_METHOD_MODE1:
4158 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4159 break;
4160 case AMD_RESET_METHOD_MODE2:
4161 adev->mp1_state = PP_MP1_STATE_RESET;
4162 break;
4163 default:
4164 adev->mp1_state = PP_MP1_STATE_NONE;
4165 break;
4166 }
1d721ed6
AG
4167
4168 return true;
26bc5340 4169}
d38ceaf9 4170
26bc5340
AG
4171static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4172{
89041940 4173 amdgpu_vf_error_trans_all(adev);
a3a09142 4174 adev->mp1_state = PP_MP1_STATE_NONE;
df9c8d1a
DL
4175 atomic_set(&adev->in_gpu_reset, 0);
4176 up_write(&adev->reset_sem);
26bc5340
AG
4177}
4178
3f12acc8
EQ
4179static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4180{
4181 struct pci_dev *p = NULL;
4182
4183 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4184 adev->pdev->bus->number, 1);
4185 if (p) {
4186 pm_runtime_enable(&(p->dev));
4187 pm_runtime_resume(&(p->dev));
4188 }
4189}
4190
4191static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4192{
4193 enum amd_reset_method reset_method;
4194 struct pci_dev *p = NULL;
4195 u64 expires;
4196
4197 /*
4198 * For now, only BACO and mode1 reset are confirmed
4199 * to suffer the audio issue without proper suspended.
4200 */
4201 reset_method = amdgpu_asic_reset_method(adev);
4202 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4203 (reset_method != AMD_RESET_METHOD_MODE1))
4204 return -EINVAL;
4205
4206 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4207 adev->pdev->bus->number, 1);
4208 if (!p)
4209 return -ENODEV;
4210
4211 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4212 if (!expires)
4213 /*
4214 * If we cannot get the audio device autosuspend delay,
4215 * a fixed 4S interval will be used. Considering 3S is
4216 * the audio controller default autosuspend delay setting.
4217 * 4S used here is guaranteed to cover that.
4218 */
54b7feb9 4219 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4220
4221 while (!pm_runtime_status_suspended(&(p->dev))) {
4222 if (!pm_runtime_suspend(&(p->dev)))
4223 break;
4224
4225 if (expires < ktime_get_mono_fast_ns()) {
4226 dev_warn(adev->dev, "failed to suspend display audio\n");
4227 /* TODO: abort the succeeding gpu reset? */
4228 return -ETIMEDOUT;
4229 }
4230 }
4231
4232 pm_runtime_disable(&(p->dev));
4233
4234 return 0;
4235}
4236
26bc5340
AG
4237/**
4238 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4239 *
4240 * @adev: amdgpu device pointer
4241 * @job: which job trigger hang
4242 *
4243 * Attempt to reset the GPU if it has hung (all asics).
4244 * Attempt to do soft-reset or full-reset and reinitialize Asic
4245 * Returns 0 for success or an error on failure.
4246 */
4247
4248int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4249 struct amdgpu_job *job)
4250{
1d721ed6 4251 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4252 bool need_full_reset = false;
4253 bool job_signaled = false;
26bc5340 4254 struct amdgpu_hive_info *hive = NULL;
26bc5340 4255 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4256 int i, r = 0;
bb5c7235 4257 bool need_emergency_restart = false;
3f12acc8 4258 bool audio_suspended = false;
26bc5340 4259
bb5c7235
WS
4260 /**
4261 * Special case: RAS triggered and full reset isn't supported
4262 */
4263 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
4264
d5ea093e
AG
4265 /*
4266 * Flush RAM to disk so that after reboot
4267 * the user can read log and see why the system rebooted.
4268 */
bb5c7235 4269 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4270 DRM_WARN("Emergency reboot.");
4271
4272 ksys_sync_helper();
4273 emergency_restart();
4274 }
4275
b823821f 4276 dev_info(adev->dev, "GPU %s begin!\n",
bb5c7235 4277 need_emergency_restart ? "jobs stop":"reset");
26bc5340
AG
4278
4279 /*
1d721ed6
AG
4280 * Here we trylock to avoid chain of resets executing from
4281 * either trigger by jobs on different adevs in XGMI hive or jobs on
4282 * different schedulers for same device while this TO handler is running.
4283 * We always reset all schedulers for device and all devices for XGMI
4284 * hive so that should take care of them too.
26bc5340 4285 */
df9c8d1a
DL
4286 hive = amdgpu_get_xgmi_hive(adev, false);
4287 if (hive) {
4288 if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
4289 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
4290 job ? job->base.id : -1, hive->hive_id);
4291 return 0;
4292 }
4293 mutex_lock(&hive->hive_lock);
1d721ed6 4294 }
26bc5340 4295
9e94d22c
EQ
4296 /*
4297 * Build list of devices to reset.
4298 * In case we are in XGMI hive mode, resort the device list
4299 * to put adev in the 1st position.
4300 */
4301 INIT_LIST_HEAD(&device_list);
4302 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4303 if (!hive)
26bc5340 4304 return -ENODEV;
9e94d22c
EQ
4305 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4306 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4307 device_list_handle = &hive->device_list;
4308 } else {
4309 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4310 device_list_handle = &device_list;
4311 }
4312
1d721ed6
AG
4313 /* block all schedulers and reset given job's ring */
4314 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
df9c8d1a 4315 if (!amdgpu_device_lock_adev(tmp_adev)) {
9e94d22c
EQ
4316 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4317 job ? job->base.id : -1);
df9c8d1a
DL
4318 r = 0;
4319 goto skip_recovery;
7c6e68c7
AG
4320 }
4321
3f12acc8
EQ
4322 /*
4323 * Try to put the audio codec into suspend state
4324 * before gpu reset started.
4325 *
4326 * Due to the power domain of the graphics device
4327 * is shared with AZ power domain. Without this,
4328 * we may change the audio hardware from behind
4329 * the audio driver's back. That will trigger
4330 * some audio codec errors.
4331 */
4332 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4333 audio_suspended = true;
4334
9e94d22c
EQ
4335 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4336
52fb44cf
EQ
4337 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4338
9e94d22c
EQ
4339 if (!amdgpu_sriov_vf(tmp_adev))
4340 amdgpu_amdkfd_pre_reset(tmp_adev);
4341
12ffa55d
AG
4342 /*
4343 * Mark these ASICs to be reseted as untracked first
4344 * And add them back after reset completed
4345 */
4346 amdgpu_unregister_gpu_instance(tmp_adev);
4347
a2f63ee8 4348 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4349
f1c1314b 4350 /* disable ras on ALL IPs */
bb5c7235 4351 if (!need_emergency_restart &&
b823821f 4352 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4353 amdgpu_ras_suspend(tmp_adev);
4354
1d721ed6
AG
4355 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4356 struct amdgpu_ring *ring = tmp_adev->rings[i];
4357
4358 if (!ring || !ring->sched.thread)
4359 continue;
4360
0b2d2c2e 4361 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4362
bb5c7235 4363 if (need_emergency_restart)
7c6e68c7 4364 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4365 }
4366 }
4367
bb5c7235 4368 if (need_emergency_restart)
7c6e68c7
AG
4369 goto skip_sched_resume;
4370
1d721ed6
AG
4371 /*
4372 * Must check guilty signal here since after this point all old
4373 * HW fences are force signaled.
4374 *
4375 * job->base holds a reference to parent fence
4376 */
4377 if (job && job->base.s_fence->parent &&
7dd8c205 4378 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4379 job_signaled = true;
1d721ed6
AG
4380 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4381 goto skip_hw_reset;
4382 }
4383
26bc5340
AG
4384retry: /* Rest of adevs pre asic reset from XGMI hive. */
4385 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340
AG
4386 r = amdgpu_device_pre_asic_reset(tmp_adev,
4387 NULL,
4388 &need_full_reset);
4389 /*TODO Should we stop ?*/
4390 if (r) {
4391 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4392 r, tmp_adev->ddev->unique);
4393 tmp_adev->asic_reset_res = r;
4394 }
4395 }
4396
4397 /* Actual ASIC resets if needed.*/
4398 /* TODO Implement XGMI hive reset logic for SRIOV */
4399 if (amdgpu_sriov_vf(adev)) {
4400 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4401 if (r)
4402 adev->asic_reset_res = r;
4403 } else {
041a62bc 4404 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4405 if (r && r == -EAGAIN)
4406 goto retry;
4407 }
4408
1d721ed6
AG
4409skip_hw_reset:
4410
26bc5340
AG
4411 /* Post ASIC reset for all devs .*/
4412 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4413
1d721ed6
AG
4414 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4415 struct amdgpu_ring *ring = tmp_adev->rings[i];
4416
4417 if (!ring || !ring->sched.thread)
4418 continue;
4419
4420 /* No point to resubmit jobs if we didn't HW reset*/
4421 if (!tmp_adev->asic_reset_res && !job_signaled)
4422 drm_sched_resubmit_jobs(&ring->sched);
4423
4424 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4425 }
4426
4427 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4428 drm_helper_resume_force_mode(tmp_adev->ddev);
4429 }
4430
4431 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4432
4433 if (r) {
4434 /* bad news, how to tell it to userspace ? */
12ffa55d 4435 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4436 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4437 } else {
12ffa55d 4438 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4439 }
7c6e68c7 4440 }
26bc5340 4441
7c6e68c7
AG
4442skip_sched_resume:
4443 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4444 /*unlock kfd: SRIOV would do it separately */
bb5c7235 4445 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4446 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4447 if (audio_suspended)
4448 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4449 amdgpu_device_unlock_adev(tmp_adev);
4450 }
4451
df9c8d1a 4452skip_recovery:
9e94d22c 4453 if (hive) {
df9c8d1a 4454 atomic_set(&hive->in_reset, 0);
9e94d22c
EQ
4455 mutex_unlock(&hive->hive_lock);
4456 }
26bc5340
AG
4457
4458 if (r)
4459 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4460 return r;
4461}
4462
e3ecdffa
AD
4463/**
4464 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4465 *
4466 * @adev: amdgpu_device pointer
4467 *
4468 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4469 * and lanes) of the slot the device is in. Handles APUs and
4470 * virtualized environments where PCIE config space may not be available.
4471 */
5494d864 4472static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4473{
5d9a6330 4474 struct pci_dev *pdev;
c5313457
HK
4475 enum pci_bus_speed speed_cap, platform_speed_cap;
4476 enum pcie_link_width platform_link_width;
d0dd7f0c 4477
cd474ba0
AD
4478 if (amdgpu_pcie_gen_cap)
4479 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4480
cd474ba0
AD
4481 if (amdgpu_pcie_lane_cap)
4482 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4483
cd474ba0
AD
4484 /* covers APUs as well */
4485 if (pci_is_root_bus(adev->pdev->bus)) {
4486 if (adev->pm.pcie_gen_mask == 0)
4487 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4488 if (adev->pm.pcie_mlw_mask == 0)
4489 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4490 return;
cd474ba0 4491 }
d0dd7f0c 4492
c5313457
HK
4493 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4494 return;
4495
dbaa922b
AD
4496 pcie_bandwidth_available(adev->pdev, NULL,
4497 &platform_speed_cap, &platform_link_width);
c5313457 4498
cd474ba0 4499 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4500 /* asic caps */
4501 pdev = adev->pdev;
4502 speed_cap = pcie_get_speed_cap(pdev);
4503 if (speed_cap == PCI_SPEED_UNKNOWN) {
4504 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4505 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4506 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4507 } else {
5d9a6330
AD
4508 if (speed_cap == PCIE_SPEED_16_0GT)
4509 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4510 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4511 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4512 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4513 else if (speed_cap == PCIE_SPEED_8_0GT)
4514 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4515 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4516 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4517 else if (speed_cap == PCIE_SPEED_5_0GT)
4518 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4519 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4520 else
4521 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4522 }
4523 /* platform caps */
c5313457 4524 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4525 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4526 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4527 } else {
c5313457 4528 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4529 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4530 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4531 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4532 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4533 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4534 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4535 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4536 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4537 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4538 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4539 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4540 else
4541 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4542
cd474ba0
AD
4543 }
4544 }
4545 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4546 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4547 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4548 } else {
c5313457 4549 switch (platform_link_width) {
5d9a6330 4550 case PCIE_LNK_X32:
cd474ba0
AD
4551 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4552 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4553 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4554 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4555 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4556 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4557 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4558 break;
5d9a6330 4559 case PCIE_LNK_X16:
cd474ba0
AD
4560 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4561 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4562 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4563 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4564 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4565 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4566 break;
5d9a6330 4567 case PCIE_LNK_X12:
cd474ba0
AD
4568 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4569 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4570 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4571 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4572 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4573 break;
5d9a6330 4574 case PCIE_LNK_X8:
cd474ba0
AD
4575 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4576 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4577 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4578 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4579 break;
5d9a6330 4580 case PCIE_LNK_X4:
cd474ba0
AD
4581 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4582 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4583 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4584 break;
5d9a6330 4585 case PCIE_LNK_X2:
cd474ba0
AD
4586 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4587 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4588 break;
5d9a6330 4589 case PCIE_LNK_X1:
cd474ba0
AD
4590 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4591 break;
4592 default:
4593 break;
4594 }
d0dd7f0c
AD
4595 }
4596 }
4597}
d38ceaf9 4598
361dbd01
AD
4599int amdgpu_device_baco_enter(struct drm_device *dev)
4600{
4601 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4602 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4603
4604 if (!amdgpu_device_supports_baco(adev->ddev))
4605 return -ENOTSUPP;
4606
7a22677b
LM
4607 if (ras && ras->supported)
4608 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4609
9530273e 4610 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4611}
4612
4613int amdgpu_device_baco_exit(struct drm_device *dev)
4614{
4615 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4616 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4617 int ret = 0;
361dbd01
AD
4618
4619 if (!amdgpu_device_supports_baco(adev->ddev))
4620 return -ENOTSUPP;
4621
9530273e
EQ
4622 ret = amdgpu_dpm_baco_exit(adev);
4623 if (ret)
4624 return ret;
7a22677b
LM
4625
4626 if (ras && ras->supported)
4627 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4628
4629 return 0;
361dbd01 4630}