drm/amd/display: Add dcn30 Headers (v2)
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 83
2dc80b00
S
84#define AMDGPU_RESUME_MS 2000
85
050091ab 86const char *amdgpu_asic_name[] = {
da69c161
KW
87 "TAHITI",
88 "PITCAIRN",
89 "VERDE",
90 "OLAND",
91 "HAINAN",
d38ceaf9
AD
92 "BONAIRE",
93 "KAVERI",
94 "KABINI",
95 "HAWAII",
96 "MULLINS",
97 "TOPAZ",
98 "TONGA",
48299f95 99 "FIJI",
d38ceaf9 100 "CARRIZO",
139f4917 101 "STONEY",
2cc0c0b5
FC
102 "POLARIS10",
103 "POLARIS11",
c4642a47 104 "POLARIS12",
48ff108d 105 "VEGAM",
d4196f01 106 "VEGA10",
8fab806a 107 "VEGA12",
956fcddc 108 "VEGA20",
2ca8a5d2 109 "RAVEN",
d6c3b24e 110 "ARCTURUS",
1eee4228 111 "RENOIR",
852a6626 112 "NAVI10",
87dbad02 113 "NAVI14",
9802f5d7 114 "NAVI12",
d38ceaf9
AD
115 "LAST",
116};
117
dcea6e65
KR
118/**
119 * DOC: pcie_replay_count
120 *
121 * The amdgpu driver provides a sysfs API for reporting the total number
122 * of PCIe replays (NAKs)
123 * The file pcie_replay_count is used for this and returns the total
124 * number of replays as a sum of the NAKs generated and NAKs received
125 */
126
127static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
128 struct device_attribute *attr, char *buf)
129{
130 struct drm_device *ddev = dev_get_drvdata(dev);
131 struct amdgpu_device *adev = ddev->dev_private;
132 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
133
134 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
135}
136
137static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
138 amdgpu_device_get_pcie_replay_count, NULL);
139
5494d864
AD
140static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
141
bd607166
KR
142/**
143 * DOC: product_name
144 *
145 * The amdgpu driver provides a sysfs API for reporting the product name
146 * for the device
147 * The file serial_number is used for this and returns the product name
148 * as returned from the FRU.
149 * NOTE: This is only available for certain server cards
150 */
151
152static ssize_t amdgpu_device_get_product_name(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
156 struct amdgpu_device *adev = ddev->dev_private;
157
158 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
159}
160
161static DEVICE_ATTR(product_name, S_IRUGO,
162 amdgpu_device_get_product_name, NULL);
163
164/**
165 * DOC: product_number
166 *
167 * The amdgpu driver provides a sysfs API for reporting the part number
168 * for the device
169 * The file serial_number is used for this and returns the part number
170 * as returned from the FRU.
171 * NOTE: This is only available for certain server cards
172 */
173
174static ssize_t amdgpu_device_get_product_number(struct device *dev,
175 struct device_attribute *attr, char *buf)
176{
177 struct drm_device *ddev = dev_get_drvdata(dev);
178 struct amdgpu_device *adev = ddev->dev_private;
179
180 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
181}
182
183static DEVICE_ATTR(product_number, S_IRUGO,
184 amdgpu_device_get_product_number, NULL);
185
186/**
187 * DOC: serial_number
188 *
189 * The amdgpu driver provides a sysfs API for reporting the serial number
190 * for the device
191 * The file serial_number is used for this and returns the serial number
192 * as returned from the FRU.
193 * NOTE: This is only available for certain server cards
194 */
195
196static ssize_t amdgpu_device_get_serial_number(struct device *dev,
197 struct device_attribute *attr, char *buf)
198{
199 struct drm_device *ddev = dev_get_drvdata(dev);
200 struct amdgpu_device *adev = ddev->dev_private;
201
202 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
203}
204
205static DEVICE_ATTR(serial_number, S_IRUGO,
206 amdgpu_device_get_serial_number, NULL);
207
e3ecdffa 208/**
31af062a 209 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
210 *
211 * @dev: drm_device pointer
212 *
213 * Returns true if the device is a dGPU with HG/PX power control,
214 * otherwise return false.
215 */
31af062a 216bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
217{
218 struct amdgpu_device *adev = dev->dev_private;
219
2f7d10b3 220 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
221 return true;
222 return false;
223}
224
a69cba42
AD
225/**
226 * amdgpu_device_supports_baco - Does the device support BACO
227 *
228 * @dev: drm_device pointer
229 *
230 * Returns true if the device supporte BACO,
231 * otherwise return false.
232 */
233bool amdgpu_device_supports_baco(struct drm_device *dev)
234{
235 struct amdgpu_device *adev = dev->dev_private;
236
237 return amdgpu_asic_supports_baco(adev);
238}
239
e35e2b11
TY
240/**
241 * VRAM access helper functions.
242 *
243 * amdgpu_device_vram_access - read/write a buffer in vram
244 *
245 * @adev: amdgpu_device pointer
246 * @pos: offset of the buffer in vram
247 * @buf: virtual address of the buffer in system memory
248 * @size: read/write size, sizeof(@buf) must > @size
249 * @write: true - write to vram, otherwise - read from vram
250 */
251void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
252 uint32_t *buf, size_t size, bool write)
253{
e35e2b11 254 unsigned long flags;
ce05ac56
CK
255 uint32_t hi = ~0;
256 uint64_t last;
257
9d11eb0d
CK
258
259#ifdef CONFIG_64BIT
260 last = min(pos + size, adev->gmc.visible_vram_size);
261 if (last > pos) {
262 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
263 size_t count = last - pos;
264
265 if (write) {
266 memcpy_toio(addr, buf, count);
267 mb();
268 amdgpu_asic_flush_hdp(adev, NULL);
269 } else {
270 amdgpu_asic_invalidate_hdp(adev, NULL);
271 mb();
272 memcpy_fromio(buf, addr, count);
273 }
274
275 if (count == size)
276 return;
277
278 pos += count;
279 buf += count / 4;
280 size -= count;
281 }
282#endif
283
ce05ac56
CK
284 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
285 for (last = pos + size; pos < last; pos += 4) {
286 uint32_t tmp = pos >> 31;
e35e2b11 287
e35e2b11 288 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
289 if (tmp != hi) {
290 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
291 hi = tmp;
292 }
e35e2b11
TY
293 if (write)
294 WREG32_NO_KIQ(mmMM_DATA, *buf++);
295 else
296 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 297 }
ce05ac56 298 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
299}
300
d38ceaf9 301/*
2eee0229 302 * device register access helper functions.
d38ceaf9 303 */
e3ecdffa 304/**
2eee0229 305 * amdgpu_device_rreg - read a register
e3ecdffa
AD
306 *
307 * @adev: amdgpu_device pointer
308 * @reg: dword aligned register offset
309 * @acc_flags: access flags which require special behavior
310 *
311 * Returns the 32 bit value from the offset specified.
312 */
2eee0229
HZ
313uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
314 uint32_t acc_flags)
d38ceaf9 315{
f4b373f4
TSD
316 uint32_t ret;
317
f384ff95 318 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 319 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 320
ec59847e 321 if ((reg * 4) < adev->rmmio_size)
f4b373f4 322 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
323 else
324 ret = adev->pcie_rreg(adev, (reg * 4));
325 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
f4b373f4 326 return ret;
d38ceaf9
AD
327}
328
421a2a30
ML
329/*
330 * MMIO register read with bytes helper functions
331 * @offset:bytes offset from MMIO start
332 *
333*/
334
e3ecdffa
AD
335/**
336 * amdgpu_mm_rreg8 - read a memory mapped IO register
337 *
338 * @adev: amdgpu_device pointer
339 * @offset: byte aligned register offset
340 *
341 * Returns the 8 bit value from the offset specified.
342 */
421a2a30
ML
343uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
344 if (offset < adev->rmmio_size)
345 return (readb(adev->rmmio + offset));
346 BUG();
347}
348
349/*
350 * MMIO register write with bytes helper functions
351 * @offset:bytes offset from MMIO start
352 * @value: the value want to be written to the register
353 *
354*/
e3ecdffa
AD
355/**
356 * amdgpu_mm_wreg8 - read a memory mapped IO register
357 *
358 * @adev: amdgpu_device pointer
359 * @offset: byte aligned register offset
360 * @value: 8 bit value to write
361 *
362 * Writes the value specified to the offset specified.
363 */
421a2a30
ML
364void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
365 if (offset < adev->rmmio_size)
366 writeb(value, adev->rmmio + offset);
367 else
368 BUG();
369}
370
2eee0229
HZ
371void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
372 uint32_t v, uint32_t acc_flags)
2e0cc4d4 373{
2eee0229 374 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 375
ec59847e 376 if ((reg * 4) < adev->rmmio_size)
2e0cc4d4 377 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
378 else
379 adev->pcie_wreg(adev, (reg * 4), v);
2e0cc4d4
ML
380}
381
e3ecdffa 382/**
2eee0229 383 * amdgpu_device_wreg - write to a register
e3ecdffa
AD
384 *
385 * @adev: amdgpu_device pointer
386 * @reg: dword aligned register offset
387 * @v: 32 bit value to write to the register
388 * @acc_flags: access flags which require special behavior
389 *
390 * Writes the value specified to the offset specified.
391 */
2eee0229
HZ
392void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
393 uint32_t acc_flags)
d38ceaf9 394{
f384ff95 395 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 396 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 397
2eee0229 398 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
2e0cc4d4 399}
d38ceaf9 400
2e0cc4d4
ML
401/*
402 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
403 *
404 * this function is invoked only the debugfs register access
405 * */
406void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
407 uint32_t acc_flags)
408{
409 if (amdgpu_sriov_fullaccess(adev) &&
410 adev->gfx.rlc.funcs &&
411 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 412
2e0cc4d4
ML
413 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
414 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 415 }
2e0cc4d4 416
2eee0229 417 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
d38ceaf9
AD
418}
419
e3ecdffa
AD
420/**
421 * amdgpu_io_rreg - read an IO register
422 *
423 * @adev: amdgpu_device pointer
424 * @reg: dword aligned register offset
425 *
426 * Returns the 32 bit value from the offset specified.
427 */
d38ceaf9
AD
428u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
429{
430 if ((reg * 4) < adev->rio_mem_size)
431 return ioread32(adev->rio_mem + (reg * 4));
432 else {
433 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
434 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
435 }
436}
437
e3ecdffa
AD
438/**
439 * amdgpu_io_wreg - write to an IO register
440 *
441 * @adev: amdgpu_device pointer
442 * @reg: dword aligned register offset
443 * @v: 32 bit value to write to the register
444 *
445 * Writes the value specified to the offset specified.
446 */
d38ceaf9
AD
447void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
448{
d38ceaf9
AD
449 if ((reg * 4) < adev->rio_mem_size)
450 iowrite32(v, adev->rio_mem + (reg * 4));
451 else {
452 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
453 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
454 }
455}
456
457/**
458 * amdgpu_mm_rdoorbell - read a doorbell dword
459 *
460 * @adev: amdgpu_device pointer
461 * @index: doorbell index
462 *
463 * Returns the value in the doorbell aperture at the
464 * requested doorbell index (CIK).
465 */
466u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
467{
468 if (index < adev->doorbell.num_doorbells) {
469 return readl(adev->doorbell.ptr + index);
470 } else {
471 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
472 return 0;
473 }
474}
475
476/**
477 * amdgpu_mm_wdoorbell - write a doorbell dword
478 *
479 * @adev: amdgpu_device pointer
480 * @index: doorbell index
481 * @v: value to write
482 *
483 * Writes @v to the doorbell aperture at the
484 * requested doorbell index (CIK).
485 */
486void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
487{
488 if (index < adev->doorbell.num_doorbells) {
489 writel(v, adev->doorbell.ptr + index);
490 } else {
491 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
492 }
493}
494
832be404
KW
495/**
496 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
497 *
498 * @adev: amdgpu_device pointer
499 * @index: doorbell index
500 *
501 * Returns the value in the doorbell aperture at the
502 * requested doorbell index (VEGA10+).
503 */
504u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
505{
506 if (index < adev->doorbell.num_doorbells) {
507 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
508 } else {
509 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
510 return 0;
511 }
512}
513
514/**
515 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
516 *
517 * @adev: amdgpu_device pointer
518 * @index: doorbell index
519 * @v: value to write
520 *
521 * Writes @v to the doorbell aperture at the
522 * requested doorbell index (VEGA10+).
523 */
524void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
525{
526 if (index < adev->doorbell.num_doorbells) {
527 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
528 } else {
529 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
530 }
531}
532
d38ceaf9
AD
533/**
534 * amdgpu_invalid_rreg - dummy reg read function
535 *
536 * @adev: amdgpu device pointer
537 * @reg: offset of register
538 *
539 * Dummy register read function. Used for register blocks
540 * that certain asics don't have (all asics).
541 * Returns the value in the register.
542 */
543static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
544{
545 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
546 BUG();
547 return 0;
548}
549
550/**
551 * amdgpu_invalid_wreg - dummy reg write function
552 *
553 * @adev: amdgpu device pointer
554 * @reg: offset of register
555 * @v: value to write to the register
556 *
557 * Dummy register read function. Used for register blocks
558 * that certain asics don't have (all asics).
559 */
560static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
561{
562 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
563 reg, v);
564 BUG();
565}
566
4fa1c6a6
TZ
567/**
568 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
569 *
570 * @adev: amdgpu device pointer
571 * @reg: offset of register
572 *
573 * Dummy register read function. Used for register blocks
574 * that certain asics don't have (all asics).
575 * Returns the value in the register.
576 */
577static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
578{
579 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
580 BUG();
581 return 0;
582}
583
584/**
585 * amdgpu_invalid_wreg64 - dummy reg write function
586 *
587 * @adev: amdgpu device pointer
588 * @reg: offset of register
589 * @v: value to write to the register
590 *
591 * Dummy register read function. Used for register blocks
592 * that certain asics don't have (all asics).
593 */
594static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
595{
596 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
597 reg, v);
598 BUG();
599}
600
d38ceaf9
AD
601/**
602 * amdgpu_block_invalid_rreg - dummy reg read function
603 *
604 * @adev: amdgpu device pointer
605 * @block: offset of instance
606 * @reg: offset of register
607 *
608 * Dummy register read function. Used for register blocks
609 * that certain asics don't have (all asics).
610 * Returns the value in the register.
611 */
612static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
613 uint32_t block, uint32_t reg)
614{
615 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
616 reg, block);
617 BUG();
618 return 0;
619}
620
621/**
622 * amdgpu_block_invalid_wreg - dummy reg write function
623 *
624 * @adev: amdgpu device pointer
625 * @block: offset of instance
626 * @reg: offset of register
627 * @v: value to write to the register
628 *
629 * Dummy register read function. Used for register blocks
630 * that certain asics don't have (all asics).
631 */
632static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
633 uint32_t block,
634 uint32_t reg, uint32_t v)
635{
636 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
637 reg, block, v);
638 BUG();
639}
640
e3ecdffa
AD
641/**
642 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
643 *
644 * @adev: amdgpu device pointer
645 *
646 * Allocates a scratch page of VRAM for use by various things in the
647 * driver.
648 */
06ec9070 649static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 650{
a4a02777
CK
651 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
652 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
653 &adev->vram_scratch.robj,
654 &adev->vram_scratch.gpu_addr,
655 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
656}
657
e3ecdffa
AD
658/**
659 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
660 *
661 * @adev: amdgpu device pointer
662 *
663 * Frees the VRAM scratch page.
664 */
06ec9070 665static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 666{
078af1a3 667 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
668}
669
670/**
9c3f2b54 671 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
672 *
673 * @adev: amdgpu_device pointer
674 * @registers: pointer to the register array
675 * @array_size: size of the register array
676 *
677 * Programs an array or registers with and and or masks.
678 * This is a helper for setting golden registers.
679 */
9c3f2b54
AD
680void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
681 const u32 *registers,
682 const u32 array_size)
d38ceaf9
AD
683{
684 u32 tmp, reg, and_mask, or_mask;
685 int i;
686
687 if (array_size % 3)
688 return;
689
690 for (i = 0; i < array_size; i +=3) {
691 reg = registers[i + 0];
692 and_mask = registers[i + 1];
693 or_mask = registers[i + 2];
694
695 if (and_mask == 0xffffffff) {
696 tmp = or_mask;
697 } else {
698 tmp = RREG32(reg);
699 tmp &= ~and_mask;
e0d07657
HZ
700 if (adev->family >= AMDGPU_FAMILY_AI)
701 tmp |= (or_mask & and_mask);
702 else
703 tmp |= or_mask;
d38ceaf9
AD
704 }
705 WREG32(reg, tmp);
706 }
707}
708
e3ecdffa
AD
709/**
710 * amdgpu_device_pci_config_reset - reset the GPU
711 *
712 * @adev: amdgpu_device pointer
713 *
714 * Resets the GPU using the pci config reset sequence.
715 * Only applicable to asics prior to vega10.
716 */
8111c387 717void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
718{
719 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
720}
721
722/*
723 * GPU doorbell aperture helpers function.
724 */
725/**
06ec9070 726 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
727 *
728 * @adev: amdgpu_device pointer
729 *
730 * Init doorbell driver information (CIK)
731 * Returns 0 on success, error on failure.
732 */
06ec9070 733static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 734{
6585661d 735
705e519e
CK
736 /* No doorbell on SI hardware generation */
737 if (adev->asic_type < CHIP_BONAIRE) {
738 adev->doorbell.base = 0;
739 adev->doorbell.size = 0;
740 adev->doorbell.num_doorbells = 0;
741 adev->doorbell.ptr = NULL;
742 return 0;
743 }
744
d6895ad3
CK
745 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
746 return -EINVAL;
747
22357775
AD
748 amdgpu_asic_init_doorbell_index(adev);
749
d38ceaf9
AD
750 /* doorbell bar mapping */
751 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
752 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
753
edf600da 754 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 755 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
756 if (adev->doorbell.num_doorbells == 0)
757 return -EINVAL;
758
ec3db8a6 759 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
760 * paging queue doorbell use the second page. The
761 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
762 * doorbells are in the first page. So with paging queue enabled,
763 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
764 */
765 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 766 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 767
8972e5d2
CK
768 adev->doorbell.ptr = ioremap(adev->doorbell.base,
769 adev->doorbell.num_doorbells *
770 sizeof(u32));
771 if (adev->doorbell.ptr == NULL)
d38ceaf9 772 return -ENOMEM;
d38ceaf9
AD
773
774 return 0;
775}
776
777/**
06ec9070 778 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
779 *
780 * @adev: amdgpu_device pointer
781 *
782 * Tear down doorbell driver information (CIK)
783 */
06ec9070 784static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
785{
786 iounmap(adev->doorbell.ptr);
787 adev->doorbell.ptr = NULL;
788}
789
22cb0164 790
d38ceaf9
AD
791
792/*
06ec9070 793 * amdgpu_device_wb_*()
455a7bc2 794 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 795 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
796 */
797
798/**
06ec9070 799 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
800 *
801 * @adev: amdgpu_device pointer
802 *
803 * Disables Writeback and frees the Writeback memory (all asics).
804 * Used at driver shutdown.
805 */
06ec9070 806static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
807{
808 if (adev->wb.wb_obj) {
a76ed485
AD
809 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
810 &adev->wb.gpu_addr,
811 (void **)&adev->wb.wb);
d38ceaf9
AD
812 adev->wb.wb_obj = NULL;
813 }
814}
815
816/**
06ec9070 817 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
818 *
819 * @adev: amdgpu_device pointer
820 *
455a7bc2 821 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
822 * Used at driver startup.
823 * Returns 0 on success or an -error on failure.
824 */
06ec9070 825static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
826{
827 int r;
828
829 if (adev->wb.wb_obj == NULL) {
97407b63
AD
830 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
831 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
832 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
833 &adev->wb.wb_obj, &adev->wb.gpu_addr,
834 (void **)&adev->wb.wb);
d38ceaf9
AD
835 if (r) {
836 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
837 return r;
838 }
d38ceaf9
AD
839
840 adev->wb.num_wb = AMDGPU_MAX_WB;
841 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
842
843 /* clear wb memory */
73469585 844 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
845 }
846
847 return 0;
848}
849
850/**
131b4b36 851 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
852 *
853 * @adev: amdgpu_device pointer
854 * @wb: wb index
855 *
856 * Allocate a wb slot for use by the driver (all asics).
857 * Returns 0 on success or -EINVAL on failure.
858 */
131b4b36 859int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
860{
861 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 862
97407b63 863 if (offset < adev->wb.num_wb) {
7014285a 864 __set_bit(offset, adev->wb.used);
63ae07ca 865 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
866 return 0;
867 } else {
868 return -EINVAL;
869 }
870}
871
d38ceaf9 872/**
131b4b36 873 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
874 *
875 * @adev: amdgpu_device pointer
876 * @wb: wb index
877 *
878 * Free a wb slot allocated for use by the driver (all asics)
879 */
131b4b36 880void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 881{
73469585 882 wb >>= 3;
d38ceaf9 883 if (wb < adev->wb.num_wb)
73469585 884 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
885}
886
d6895ad3
CK
887/**
888 * amdgpu_device_resize_fb_bar - try to resize FB BAR
889 *
890 * @adev: amdgpu_device pointer
891 *
892 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
893 * to fail, but if any of the BARs is not accessible after the size we abort
894 * driver loading by returning -ENODEV.
895 */
896int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
897{
770d13b1 898 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 899 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
900 struct pci_bus *root;
901 struct resource *res;
902 unsigned i;
d6895ad3
CK
903 u16 cmd;
904 int r;
905
0c03b912 906 /* Bypass for VF */
907 if (amdgpu_sriov_vf(adev))
908 return 0;
909
31b8adab
CK
910 /* Check if the root BUS has 64bit memory resources */
911 root = adev->pdev->bus;
912 while (root->parent)
913 root = root->parent;
914
915 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 916 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
917 res->start > 0x100000000ull)
918 break;
919 }
920
921 /* Trying to resize is pointless without a root hub window above 4GB */
922 if (!res)
923 return 0;
924
d6895ad3
CK
925 /* Disable memory decoding while we change the BAR addresses and size */
926 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
927 pci_write_config_word(adev->pdev, PCI_COMMAND,
928 cmd & ~PCI_COMMAND_MEMORY);
929
930 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 931 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
932 if (adev->asic_type >= CHIP_BONAIRE)
933 pci_release_resource(adev->pdev, 2);
934
935 pci_release_resource(adev->pdev, 0);
936
937 r = pci_resize_resource(adev->pdev, 0, rbar_size);
938 if (r == -ENOSPC)
939 DRM_INFO("Not enough PCI address space for a large BAR.");
940 else if (r && r != -ENOTSUPP)
941 DRM_ERROR("Problem resizing BAR0 (%d).", r);
942
943 pci_assign_unassigned_bus_resources(adev->pdev->bus);
944
945 /* When the doorbell or fb BAR isn't available we have no chance of
946 * using the device.
947 */
06ec9070 948 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
949 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
950 return -ENODEV;
951
952 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
953
954 return 0;
955}
a05502e5 956
d38ceaf9
AD
957/*
958 * GPU helpers function.
959 */
960/**
39c640c0 961 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
962 *
963 * @adev: amdgpu_device pointer
964 *
c836fec5
JQ
965 * Check if the asic has been initialized (all asics) at driver startup
966 * or post is needed if hw reset is performed.
967 * Returns true if need or false if not.
d38ceaf9 968 */
39c640c0 969bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
970{
971 uint32_t reg;
972
bec86378
ML
973 if (amdgpu_sriov_vf(adev))
974 return false;
975
976 if (amdgpu_passthrough(adev)) {
1da2c326
ML
977 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
978 * some old smc fw still need driver do vPost otherwise gpu hang, while
979 * those smc fw version above 22.15 doesn't have this flaw, so we force
980 * vpost executed for smc version below 22.15
bec86378
ML
981 */
982 if (adev->asic_type == CHIP_FIJI) {
983 int err;
984 uint32_t fw_ver;
985 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
986 /* force vPost if error occured */
987 if (err)
988 return true;
989
990 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
991 if (fw_ver < 0x00160e00)
992 return true;
bec86378 993 }
bec86378 994 }
91fe77eb 995
996 if (adev->has_hw_reset) {
997 adev->has_hw_reset = false;
998 return true;
999 }
1000
1001 /* bios scratch used on CIK+ */
1002 if (adev->asic_type >= CHIP_BONAIRE)
1003 return amdgpu_atombios_scratch_need_asic_init(adev);
1004
1005 /* check MEM_SIZE for older asics */
1006 reg = amdgpu_asic_get_config_memsize(adev);
1007
1008 if ((reg != 0) && (reg != 0xffffffff))
1009 return false;
1010
1011 return true;
bec86378
ML
1012}
1013
d38ceaf9
AD
1014/* if we get transitioned to only one device, take VGA back */
1015/**
06ec9070 1016 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1017 *
1018 * @cookie: amdgpu_device pointer
1019 * @state: enable/disable vga decode
1020 *
1021 * Enable/disable vga decode (all asics).
1022 * Returns VGA resource flags.
1023 */
06ec9070 1024static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1025{
1026 struct amdgpu_device *adev = cookie;
1027 amdgpu_asic_set_vga_state(adev, state);
1028 if (state)
1029 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1030 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1031 else
1032 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1033}
1034
e3ecdffa
AD
1035/**
1036 * amdgpu_device_check_block_size - validate the vm block size
1037 *
1038 * @adev: amdgpu_device pointer
1039 *
1040 * Validates the vm block size specified via module parameter.
1041 * The vm block size defines number of bits in page table versus page directory,
1042 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1043 * page table and the remaining bits are in the page directory.
1044 */
06ec9070 1045static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1046{
1047 /* defines number of bits in page table versus page directory,
1048 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1049 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1050 if (amdgpu_vm_block_size == -1)
1051 return;
a1adf8be 1052
bab4fee7 1053 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1054 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1055 amdgpu_vm_block_size);
97489129 1056 amdgpu_vm_block_size = -1;
a1adf8be 1057 }
a1adf8be
CZ
1058}
1059
e3ecdffa
AD
1060/**
1061 * amdgpu_device_check_vm_size - validate the vm size
1062 *
1063 * @adev: amdgpu_device pointer
1064 *
1065 * Validates the vm size in GB specified via module parameter.
1066 * The VM size is the size of the GPU virtual memory space in GB.
1067 */
06ec9070 1068static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1069{
64dab074
AD
1070 /* no need to check the default value */
1071 if (amdgpu_vm_size == -1)
1072 return;
1073
83ca145d
ZJ
1074 if (amdgpu_vm_size < 1) {
1075 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1076 amdgpu_vm_size);
f3368128 1077 amdgpu_vm_size = -1;
83ca145d 1078 }
83ca145d
ZJ
1079}
1080
7951e376
RZ
1081static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1082{
1083 struct sysinfo si;
a9d4fe2f 1084 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1085 uint64_t total_memory;
1086 uint64_t dram_size_seven_GB = 0x1B8000000;
1087 uint64_t dram_size_three_GB = 0xB8000000;
1088
1089 if (amdgpu_smu_memory_pool_size == 0)
1090 return;
1091
1092 if (!is_os_64) {
1093 DRM_WARN("Not 64-bit OS, feature not supported\n");
1094 goto def_value;
1095 }
1096 si_meminfo(&si);
1097 total_memory = (uint64_t)si.totalram * si.mem_unit;
1098
1099 if ((amdgpu_smu_memory_pool_size == 1) ||
1100 (amdgpu_smu_memory_pool_size == 2)) {
1101 if (total_memory < dram_size_three_GB)
1102 goto def_value1;
1103 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1104 (amdgpu_smu_memory_pool_size == 8)) {
1105 if (total_memory < dram_size_seven_GB)
1106 goto def_value1;
1107 } else {
1108 DRM_WARN("Smu memory pool size not supported\n");
1109 goto def_value;
1110 }
1111 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1112
1113 return;
1114
1115def_value1:
1116 DRM_WARN("No enough system memory\n");
1117def_value:
1118 adev->pm.smu_prv_buffer_size = 0;
1119}
1120
d38ceaf9 1121/**
06ec9070 1122 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1123 *
1124 * @adev: amdgpu_device pointer
1125 *
1126 * Validates certain module parameters and updates
1127 * the associated values used by the driver (all asics).
1128 */
912dfc84 1129static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1130{
5b011235
CZ
1131 if (amdgpu_sched_jobs < 4) {
1132 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1133 amdgpu_sched_jobs);
1134 amdgpu_sched_jobs = 4;
76117507 1135 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1136 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1137 amdgpu_sched_jobs);
1138 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1139 }
d38ceaf9 1140
83e74db6 1141 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1142 /* gart size must be greater or equal to 32M */
1143 dev_warn(adev->dev, "gart size (%d) too small\n",
1144 amdgpu_gart_size);
83e74db6 1145 amdgpu_gart_size = -1;
d38ceaf9
AD
1146 }
1147
36d38372 1148 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1149 /* gtt size must be greater or equal to 32M */
36d38372
CK
1150 dev_warn(adev->dev, "gtt size (%d) too small\n",
1151 amdgpu_gtt_size);
1152 amdgpu_gtt_size = -1;
d38ceaf9
AD
1153 }
1154
d07f14be
RH
1155 /* valid range is between 4 and 9 inclusive */
1156 if (amdgpu_vm_fragment_size != -1 &&
1157 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1158 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1159 amdgpu_vm_fragment_size = -1;
1160 }
1161
7951e376
RZ
1162 amdgpu_device_check_smu_prv_buffer_size(adev);
1163
06ec9070 1164 amdgpu_device_check_vm_size(adev);
d38ceaf9 1165
06ec9070 1166 amdgpu_device_check_block_size(adev);
6a7f76e7 1167
19aede77 1168 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1169
c6252390 1170 amdgpu_gmc_tmz_set(adev);
01a8dcec 1171
e3c00faa 1172 return 0;
d38ceaf9
AD
1173}
1174
1175/**
1176 * amdgpu_switcheroo_set_state - set switcheroo state
1177 *
1178 * @pdev: pci dev pointer
1694467b 1179 * @state: vga_switcheroo state
d38ceaf9
AD
1180 *
1181 * Callback for the switcheroo driver. Suspends or resumes the
1182 * the asics before or after it is powered up using ACPI methods.
1183 */
1184static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1185{
1186 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1187 int r;
d38ceaf9 1188
31af062a 1189 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1190 return;
1191
1192 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1193 pr_info("switched on\n");
d38ceaf9
AD
1194 /* don't suspend or resume card normally */
1195 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1196
de185019
AD
1197 pci_set_power_state(dev->pdev, PCI_D0);
1198 pci_restore_state(dev->pdev);
1199 r = pci_enable_device(dev->pdev);
1200 if (r)
1201 DRM_WARN("pci_enable_device failed (%d)\n", r);
1202 amdgpu_device_resume(dev, true);
d38ceaf9 1203
d38ceaf9
AD
1204 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1205 drm_kms_helper_poll_enable(dev);
1206 } else {
dd4fa6c1 1207 pr_info("switched off\n");
d38ceaf9
AD
1208 drm_kms_helper_poll_disable(dev);
1209 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1210 amdgpu_device_suspend(dev, true);
1211 pci_save_state(dev->pdev);
1212 /* Shut down the device */
1213 pci_disable_device(dev->pdev);
1214 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1215 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1216 }
1217}
1218
1219/**
1220 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1221 *
1222 * @pdev: pci dev pointer
1223 *
1224 * Callback for the switcheroo driver. Check of the switcheroo
1225 * state can be changed.
1226 * Returns true if the state can be changed, false if not.
1227 */
1228static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1229{
1230 struct drm_device *dev = pci_get_drvdata(pdev);
1231
1232 /*
1233 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1234 * locking inversion with the driver load path. And the access here is
1235 * completely racy anyway. So don't bother with locking for now.
1236 */
7e13ad89 1237 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1238}
1239
1240static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1241 .set_gpu_state = amdgpu_switcheroo_set_state,
1242 .reprobe = NULL,
1243 .can_switch = amdgpu_switcheroo_can_switch,
1244};
1245
e3ecdffa
AD
1246/**
1247 * amdgpu_device_ip_set_clockgating_state - set the CG state
1248 *
87e3f136 1249 * @dev: amdgpu_device pointer
e3ecdffa
AD
1250 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1251 * @state: clockgating state (gate or ungate)
1252 *
1253 * Sets the requested clockgating state for all instances of
1254 * the hardware IP specified.
1255 * Returns the error code from the last instance.
1256 */
43fa561f 1257int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1258 enum amd_ip_block_type block_type,
1259 enum amd_clockgating_state state)
d38ceaf9 1260{
43fa561f 1261 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1262 int i, r = 0;
1263
1264 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1265 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1266 continue;
c722865a
RZ
1267 if (adev->ip_blocks[i].version->type != block_type)
1268 continue;
1269 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1270 continue;
1271 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1272 (void *)adev, state);
1273 if (r)
1274 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1275 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1276 }
1277 return r;
1278}
1279
e3ecdffa
AD
1280/**
1281 * amdgpu_device_ip_set_powergating_state - set the PG state
1282 *
87e3f136 1283 * @dev: amdgpu_device pointer
e3ecdffa
AD
1284 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1285 * @state: powergating state (gate or ungate)
1286 *
1287 * Sets the requested powergating state for all instances of
1288 * the hardware IP specified.
1289 * Returns the error code from the last instance.
1290 */
43fa561f 1291int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1292 enum amd_ip_block_type block_type,
1293 enum amd_powergating_state state)
d38ceaf9 1294{
43fa561f 1295 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1296 int i, r = 0;
1297
1298 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1299 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1300 continue;
c722865a
RZ
1301 if (adev->ip_blocks[i].version->type != block_type)
1302 continue;
1303 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1304 continue;
1305 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1306 (void *)adev, state);
1307 if (r)
1308 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1309 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1310 }
1311 return r;
1312}
1313
e3ecdffa
AD
1314/**
1315 * amdgpu_device_ip_get_clockgating_state - get the CG state
1316 *
1317 * @adev: amdgpu_device pointer
1318 * @flags: clockgating feature flags
1319 *
1320 * Walks the list of IPs on the device and updates the clockgating
1321 * flags for each IP.
1322 * Updates @flags with the feature flags for each hardware IP where
1323 * clockgating is enabled.
1324 */
2990a1fc
AD
1325void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1326 u32 *flags)
6cb2d4e4
HR
1327{
1328 int i;
1329
1330 for (i = 0; i < adev->num_ip_blocks; i++) {
1331 if (!adev->ip_blocks[i].status.valid)
1332 continue;
1333 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1334 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1335 }
1336}
1337
e3ecdffa
AD
1338/**
1339 * amdgpu_device_ip_wait_for_idle - wait for idle
1340 *
1341 * @adev: amdgpu_device pointer
1342 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1343 *
1344 * Waits for the request hardware IP to be idle.
1345 * Returns 0 for success or a negative error code on failure.
1346 */
2990a1fc
AD
1347int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1348 enum amd_ip_block_type block_type)
5dbbb60b
AD
1349{
1350 int i, r;
1351
1352 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1353 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1354 continue;
a1255107
AD
1355 if (adev->ip_blocks[i].version->type == block_type) {
1356 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1357 if (r)
1358 return r;
1359 break;
1360 }
1361 }
1362 return 0;
1363
1364}
1365
e3ecdffa
AD
1366/**
1367 * amdgpu_device_ip_is_idle - is the hardware IP idle
1368 *
1369 * @adev: amdgpu_device pointer
1370 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1371 *
1372 * Check if the hardware IP is idle or not.
1373 * Returns true if it the IP is idle, false if not.
1374 */
2990a1fc
AD
1375bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1376 enum amd_ip_block_type block_type)
5dbbb60b
AD
1377{
1378 int i;
1379
1380 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1381 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1382 continue;
a1255107
AD
1383 if (adev->ip_blocks[i].version->type == block_type)
1384 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1385 }
1386 return true;
1387
1388}
1389
e3ecdffa
AD
1390/**
1391 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1392 *
1393 * @adev: amdgpu_device pointer
87e3f136 1394 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1395 *
1396 * Returns a pointer to the hardware IP block structure
1397 * if it exists for the asic, otherwise NULL.
1398 */
2990a1fc
AD
1399struct amdgpu_ip_block *
1400amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1401 enum amd_ip_block_type type)
d38ceaf9
AD
1402{
1403 int i;
1404
1405 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1406 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1407 return &adev->ip_blocks[i];
1408
1409 return NULL;
1410}
1411
1412/**
2990a1fc 1413 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1414 *
1415 * @adev: amdgpu_device pointer
5fc3aeeb 1416 * @type: enum amd_ip_block_type
d38ceaf9
AD
1417 * @major: major version
1418 * @minor: minor version
1419 *
1420 * return 0 if equal or greater
1421 * return 1 if smaller or the ip_block doesn't exist
1422 */
2990a1fc
AD
1423int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1424 enum amd_ip_block_type type,
1425 u32 major, u32 minor)
d38ceaf9 1426{
2990a1fc 1427 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1428
a1255107
AD
1429 if (ip_block && ((ip_block->version->major > major) ||
1430 ((ip_block->version->major == major) &&
1431 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1432 return 0;
1433
1434 return 1;
1435}
1436
a1255107 1437/**
2990a1fc 1438 * amdgpu_device_ip_block_add
a1255107
AD
1439 *
1440 * @adev: amdgpu_device pointer
1441 * @ip_block_version: pointer to the IP to add
1442 *
1443 * Adds the IP block driver information to the collection of IPs
1444 * on the asic.
1445 */
2990a1fc
AD
1446int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1447 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1448{
1449 if (!ip_block_version)
1450 return -EINVAL;
1451
e966a725 1452 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1453 ip_block_version->funcs->name);
1454
a1255107
AD
1455 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1456
1457 return 0;
1458}
1459
e3ecdffa
AD
1460/**
1461 * amdgpu_device_enable_virtual_display - enable virtual display feature
1462 *
1463 * @adev: amdgpu_device pointer
1464 *
1465 * Enabled the virtual display feature if the user has enabled it via
1466 * the module parameter virtual_display. This feature provides a virtual
1467 * display hardware on headless boards or in virtualized environments.
1468 * This function parses and validates the configuration string specified by
1469 * the user and configues the virtual display configuration (number of
1470 * virtual connectors, crtcs, etc.) specified.
1471 */
483ef985 1472static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1473{
1474 adev->enable_virtual_display = false;
1475
1476 if (amdgpu_virtual_display) {
1477 struct drm_device *ddev = adev->ddev;
1478 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1479 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1480
1481 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1482 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1483 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1484 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1485 if (!strcmp("all", pciaddname)
1486 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1487 long num_crtc;
1488 int res = -1;
1489
9accf2fd 1490 adev->enable_virtual_display = true;
0f66356d
ED
1491
1492 if (pciaddname_tmp)
1493 res = kstrtol(pciaddname_tmp, 10,
1494 &num_crtc);
1495
1496 if (!res) {
1497 if (num_crtc < 1)
1498 num_crtc = 1;
1499 if (num_crtc > 6)
1500 num_crtc = 6;
1501 adev->mode_info.num_crtc = num_crtc;
1502 } else {
1503 adev->mode_info.num_crtc = 1;
1504 }
9accf2fd
ED
1505 break;
1506 }
1507 }
1508
0f66356d
ED
1509 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1510 amdgpu_virtual_display, pci_address_name,
1511 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1512
1513 kfree(pciaddstr);
1514 }
1515}
1516
e3ecdffa
AD
1517/**
1518 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1519 *
1520 * @adev: amdgpu_device pointer
1521 *
1522 * Parses the asic configuration parameters specified in the gpu info
1523 * firmware and makes them availale to the driver for use in configuring
1524 * the asic.
1525 * Returns 0 on success, -EINVAL on failure.
1526 */
e2a75f88
AD
1527static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1528{
e2a75f88
AD
1529 const char *chip_name;
1530 char fw_name[30];
1531 int err;
1532 const struct gpu_info_firmware_header_v1_0 *hdr;
1533
ab4fe3e1
HR
1534 adev->firmware.gpu_info_fw = NULL;
1535
4292b0b2 1536 if (adev->discovery_bin) {
258620d0
AD
1537 amdgpu_discovery_get_gfx_info(adev);
1538 return 0;
1539 }
1540
e2a75f88 1541 switch (adev->asic_type) {
e2a75f88
AD
1542#ifdef CONFIG_DRM_AMDGPU_SI
1543 case CHIP_VERDE:
1544 case CHIP_TAHITI:
1545 case CHIP_PITCAIRN:
1546 case CHIP_OLAND:
1547 case CHIP_HAINAN:
1548#endif
1549#ifdef CONFIG_DRM_AMDGPU_CIK
1550 case CHIP_BONAIRE:
1551 case CHIP_HAWAII:
1552 case CHIP_KAVERI:
1553 case CHIP_KABINI:
1554 case CHIP_MULLINS:
1555#endif
da87c30b
AD
1556 case CHIP_TOPAZ:
1557 case CHIP_TONGA:
1558 case CHIP_FIJI:
1559 case CHIP_POLARIS10:
1560 case CHIP_POLARIS11:
1561 case CHIP_POLARIS12:
1562 case CHIP_VEGAM:
1563 case CHIP_CARRIZO:
1564 case CHIP_STONEY:
27c0bc71 1565 case CHIP_VEGA20:
e2a75f88
AD
1566 default:
1567 return 0;
1568 case CHIP_VEGA10:
1569 chip_name = "vega10";
1570 break;
3f76dced
AD
1571 case CHIP_VEGA12:
1572 chip_name = "vega12";
1573 break;
2d2e5e7e 1574 case CHIP_RAVEN:
54f78a76 1575 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1576 chip_name = "raven2";
54f78a76 1577 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1578 chip_name = "picasso";
54c4d17e
FX
1579 else
1580 chip_name = "raven";
2d2e5e7e 1581 break;
65e60f6e
LM
1582 case CHIP_ARCTURUS:
1583 chip_name = "arcturus";
1584 break;
b51a26a0
HR
1585 case CHIP_RENOIR:
1586 chip_name = "renoir";
1587 break;
23c6268e
HR
1588 case CHIP_NAVI10:
1589 chip_name = "navi10";
1590 break;
ed42cfe1
XY
1591 case CHIP_NAVI14:
1592 chip_name = "navi14";
1593 break;
42b325e5
XY
1594 case CHIP_NAVI12:
1595 chip_name = "navi12";
1596 break;
e2a75f88
AD
1597 }
1598
1599 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1600 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1601 if (err) {
1602 dev_err(adev->dev,
1603 "Failed to load gpu_info firmware \"%s\"\n",
1604 fw_name);
1605 goto out;
1606 }
ab4fe3e1 1607 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1608 if (err) {
1609 dev_err(adev->dev,
1610 "Failed to validate gpu_info firmware \"%s\"\n",
1611 fw_name);
1612 goto out;
1613 }
1614
ab4fe3e1 1615 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1616 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1617
1618 switch (hdr->version_major) {
1619 case 1:
1620 {
1621 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1622 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1623 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1624
b5ab16bf
AD
1625 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1626 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1627 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1628 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1629 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1630 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1631 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1632 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1633 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1634 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1635 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1636 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1637 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1638 adev->gfx.cu_info.max_waves_per_simd =
1639 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1640 adev->gfx.cu_info.max_scratch_slots_per_cu =
1641 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1642 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1643 if (hdr->version_minor >= 1) {
35c2e910
HZ
1644 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1645 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1646 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1647 adev->gfx.config.num_sc_per_sh =
1648 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1649 adev->gfx.config.num_packer_per_sc =
1650 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1651 }
ec51d3fa 1652
ec51d3fa
XY
1653 /*
1654 * soc bounding box info is not integrated in disocovery table,
258620d0 1655 * we always need to parse it from gpu info firmware if needed.
ec51d3fa 1656 */
48321c3d
HW
1657 if (hdr->version_minor == 2) {
1658 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1659 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1660 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1661 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1662 }
e2a75f88
AD
1663 break;
1664 }
1665 default:
1666 dev_err(adev->dev,
1667 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1668 err = -EINVAL;
1669 goto out;
1670 }
1671out:
e2a75f88
AD
1672 return err;
1673}
1674
e3ecdffa
AD
1675/**
1676 * amdgpu_device_ip_early_init - run early init for hardware IPs
1677 *
1678 * @adev: amdgpu_device pointer
1679 *
1680 * Early initialization pass for hardware IPs. The hardware IPs that make
1681 * up each asic are discovered each IP's early_init callback is run. This
1682 * is the first stage in initializing the asic.
1683 * Returns 0 on success, negative error code on failure.
1684 */
06ec9070 1685static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1686{
aaa36a97 1687 int i, r;
d38ceaf9 1688
483ef985 1689 amdgpu_device_enable_virtual_display(adev);
a6be7570 1690
d38ceaf9 1691 switch (adev->asic_type) {
33f34802
KW
1692#ifdef CONFIG_DRM_AMDGPU_SI
1693 case CHIP_VERDE:
1694 case CHIP_TAHITI:
1695 case CHIP_PITCAIRN:
1696 case CHIP_OLAND:
1697 case CHIP_HAINAN:
295d0daf 1698 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1699 r = si_set_ip_blocks(adev);
1700 if (r)
1701 return r;
1702 break;
1703#endif
a2e73f56
AD
1704#ifdef CONFIG_DRM_AMDGPU_CIK
1705 case CHIP_BONAIRE:
1706 case CHIP_HAWAII:
1707 case CHIP_KAVERI:
1708 case CHIP_KABINI:
1709 case CHIP_MULLINS:
e1ad2d53 1710 if (adev->flags & AMD_IS_APU)
a2e73f56 1711 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1712 else
1713 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1714
1715 r = cik_set_ip_blocks(adev);
1716 if (r)
1717 return r;
1718 break;
1719#endif
da87c30b
AD
1720 case CHIP_TOPAZ:
1721 case CHIP_TONGA:
1722 case CHIP_FIJI:
1723 case CHIP_POLARIS10:
1724 case CHIP_POLARIS11:
1725 case CHIP_POLARIS12:
1726 case CHIP_VEGAM:
1727 case CHIP_CARRIZO:
1728 case CHIP_STONEY:
1729 if (adev->flags & AMD_IS_APU)
1730 adev->family = AMDGPU_FAMILY_CZ;
1731 else
1732 adev->family = AMDGPU_FAMILY_VI;
1733
1734 r = vi_set_ip_blocks(adev);
1735 if (r)
1736 return r;
1737 break;
e48a3cd9
AD
1738 case CHIP_VEGA10:
1739 case CHIP_VEGA12:
e4bd8170 1740 case CHIP_VEGA20:
e48a3cd9 1741 case CHIP_RAVEN:
61cf44c1 1742 case CHIP_ARCTURUS:
b51a26a0 1743 case CHIP_RENOIR:
70534d1e 1744 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
1745 adev->family = AMDGPU_FAMILY_RV;
1746 else
1747 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1748
1749 r = soc15_set_ip_blocks(adev);
1750 if (r)
1751 return r;
1752 break;
0a5b8c7b 1753 case CHIP_NAVI10:
7ecb5cd4 1754 case CHIP_NAVI14:
4808cf9c 1755 case CHIP_NAVI12:
0a5b8c7b
HR
1756 adev->family = AMDGPU_FAMILY_NV;
1757
1758 r = nv_set_ip_blocks(adev);
1759 if (r)
1760 return r;
1761 break;
d38ceaf9
AD
1762 default:
1763 /* FIXME: not supported yet */
1764 return -EINVAL;
1765 }
1766
1884734a 1767 amdgpu_amdkfd_device_probe(adev);
1768
3149d9da 1769 if (amdgpu_sriov_vf(adev)) {
122078de
ML
1770 /* handle vbios stuff prior full access mode for new handshake */
1771 if (adev->virt.req_init_data_ver == 1) {
1772 if (!amdgpu_get_bios(adev)) {
1773 DRM_ERROR("failed to get vbios\n");
1774 return -EINVAL;
1775 }
1776
1777 r = amdgpu_atombios_init(adev);
1778 if (r) {
1779 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1780 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1781 return r;
1782 }
1783 }
2f294132 1784 }
122078de 1785
2f294132
ML
1786 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
1787 * will not be prepared by host for this VF */
1788 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
3149d9da
XY
1789 r = amdgpu_virt_request_full_gpu(adev, true);
1790 if (r)
2f294132 1791 return r;
3149d9da
XY
1792 }
1793
3b94fb10 1794 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1795 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1796 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1797
d38ceaf9
AD
1798 for (i = 0; i < adev->num_ip_blocks; i++) {
1799 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1800 DRM_ERROR("disabled ip block: %d <%s>\n",
1801 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1802 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1803 } else {
a1255107
AD
1804 if (adev->ip_blocks[i].version->funcs->early_init) {
1805 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1806 if (r == -ENOENT) {
a1255107 1807 adev->ip_blocks[i].status.valid = false;
2c1a2784 1808 } else if (r) {
a1255107
AD
1809 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1810 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1811 return r;
2c1a2784 1812 } else {
a1255107 1813 adev->ip_blocks[i].status.valid = true;
2c1a2784 1814 }
974e6b64 1815 } else {
a1255107 1816 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1817 }
d38ceaf9 1818 }
21a249ca
AD
1819 /* get the vbios after the asic_funcs are set up */
1820 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
1821 r = amdgpu_device_parse_gpu_info_fw(adev);
1822 if (r)
1823 return r;
1824
122078de
ML
1825 /* skip vbios handling for new handshake */
1826 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
1827 continue;
1828
21a249ca
AD
1829 /* Read BIOS */
1830 if (!amdgpu_get_bios(adev))
1831 return -EINVAL;
1832
1833 r = amdgpu_atombios_init(adev);
1834 if (r) {
1835 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1836 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1837 return r;
1838 }
1839 }
d38ceaf9
AD
1840 }
1841
395d1fb9
NH
1842 adev->cg_flags &= amdgpu_cg_mask;
1843 adev->pg_flags &= amdgpu_pg_mask;
1844
d38ceaf9
AD
1845 return 0;
1846}
1847
0a4f2520
RZ
1848static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1849{
1850 int i, r;
1851
1852 for (i = 0; i < adev->num_ip_blocks; i++) {
1853 if (!adev->ip_blocks[i].status.sw)
1854 continue;
1855 if (adev->ip_blocks[i].status.hw)
1856 continue;
1857 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1858 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1859 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1860 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1861 if (r) {
1862 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1863 adev->ip_blocks[i].version->funcs->name, r);
1864 return r;
1865 }
1866 adev->ip_blocks[i].status.hw = true;
1867 }
1868 }
1869
1870 return 0;
1871}
1872
1873static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1874{
1875 int i, r;
1876
1877 for (i = 0; i < adev->num_ip_blocks; i++) {
1878 if (!adev->ip_blocks[i].status.sw)
1879 continue;
1880 if (adev->ip_blocks[i].status.hw)
1881 continue;
1882 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1883 if (r) {
1884 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1885 adev->ip_blocks[i].version->funcs->name, r);
1886 return r;
1887 }
1888 adev->ip_blocks[i].status.hw = true;
1889 }
1890
1891 return 0;
1892}
1893
7a3e0bb2
RZ
1894static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1895{
1896 int r = 0;
1897 int i;
80f41f84 1898 uint32_t smu_version;
7a3e0bb2
RZ
1899
1900 if (adev->asic_type >= CHIP_VEGA10) {
1901 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1902 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1903 continue;
1904
1905 /* no need to do the fw loading again if already done*/
1906 if (adev->ip_blocks[i].status.hw == true)
1907 break;
1908
1909 if (adev->in_gpu_reset || adev->in_suspend) {
1910 r = adev->ip_blocks[i].version->funcs->resume(adev);
1911 if (r) {
1912 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1913 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1914 return r;
1915 }
1916 } else {
1917 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1918 if (r) {
1919 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1920 adev->ip_blocks[i].version->funcs->name, r);
1921 return r;
7a3e0bb2 1922 }
7a3e0bb2 1923 }
482f0e53
ML
1924
1925 adev->ip_blocks[i].status.hw = true;
1926 break;
7a3e0bb2
RZ
1927 }
1928 }
482f0e53 1929
8973d9ec
ED
1930 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1931 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1932
80f41f84 1933 return r;
7a3e0bb2
RZ
1934}
1935
e3ecdffa
AD
1936/**
1937 * amdgpu_device_ip_init - run init for hardware IPs
1938 *
1939 * @adev: amdgpu_device pointer
1940 *
1941 * Main initialization pass for hardware IPs. The list of all the hardware
1942 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1943 * are run. sw_init initializes the software state associated with each IP
1944 * and hw_init initializes the hardware associated with each IP.
1945 * Returns 0 on success, negative error code on failure.
1946 */
06ec9070 1947static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1948{
1949 int i, r;
1950
c030f2e4 1951 r = amdgpu_ras_init(adev);
1952 if (r)
1953 return r;
1954
2f294132
ML
1955 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
1956 r = amdgpu_virt_request_full_gpu(adev, true);
1957 if (r)
1958 return -EAGAIN;
1959 }
1960
d38ceaf9 1961 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1962 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1963 continue;
a1255107 1964 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1965 if (r) {
a1255107
AD
1966 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1967 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1968 goto init_failed;
2c1a2784 1969 }
a1255107 1970 adev->ip_blocks[i].status.sw = true;
bfca0289 1971
d38ceaf9 1972 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1973 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1974 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1975 if (r) {
1976 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1977 goto init_failed;
2c1a2784 1978 }
a1255107 1979 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1980 if (r) {
1981 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1982 goto init_failed;
2c1a2784 1983 }
06ec9070 1984 r = amdgpu_device_wb_init(adev);
2c1a2784 1985 if (r) {
06ec9070 1986 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1987 goto init_failed;
2c1a2784 1988 }
a1255107 1989 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1990
1991 /* right after GMC hw init, we create CSA */
f92d5c61 1992 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1993 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1994 AMDGPU_GEM_DOMAIN_VRAM,
1995 AMDGPU_CSA_SIZE);
2493664f
ML
1996 if (r) {
1997 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1998 goto init_failed;
2493664f
ML
1999 }
2000 }
d38ceaf9
AD
2001 }
2002 }
2003
c9ffa427
YT
2004 if (amdgpu_sriov_vf(adev))
2005 amdgpu_virt_init_data_exchange(adev);
2006
533aed27
AG
2007 r = amdgpu_ib_pool_init(adev);
2008 if (r) {
2009 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2010 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2011 goto init_failed;
2012 }
2013
c8963ea4
RZ
2014 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2015 if (r)
72d3f592 2016 goto init_failed;
0a4f2520
RZ
2017
2018 r = amdgpu_device_ip_hw_init_phase1(adev);
2019 if (r)
72d3f592 2020 goto init_failed;
0a4f2520 2021
7a3e0bb2
RZ
2022 r = amdgpu_device_fw_loading(adev);
2023 if (r)
72d3f592 2024 goto init_failed;
7a3e0bb2 2025
0a4f2520
RZ
2026 r = amdgpu_device_ip_hw_init_phase2(adev);
2027 if (r)
72d3f592 2028 goto init_failed;
d38ceaf9 2029
121a2bc6
AG
2030 /*
2031 * retired pages will be loaded from eeprom and reserved here,
2032 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2033 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2034 * for I2C communication which only true at this point.
2035 * recovery_init may fail, but it can free all resources allocated by
2036 * itself and its failure should not stop amdgpu init process.
2037 *
2038 * Note: theoretically, this should be called before all vram allocations
2039 * to protect retired page from abusing
2040 */
2041 amdgpu_ras_recovery_init(adev);
2042
3e2e2ab5
HZ
2043 if (adev->gmc.xgmi.num_physical_nodes > 1)
2044 amdgpu_xgmi_add_device(adev);
1884734a 2045 amdgpu_amdkfd_device_init(adev);
c6332b97 2046
bd607166
KR
2047 amdgpu_fru_get_product_info(adev);
2048
72d3f592 2049init_failed:
c9ffa427 2050 if (amdgpu_sriov_vf(adev))
c6332b97 2051 amdgpu_virt_release_full_gpu(adev, true);
2052
72d3f592 2053 return r;
d38ceaf9
AD
2054}
2055
e3ecdffa
AD
2056/**
2057 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2058 *
2059 * @adev: amdgpu_device pointer
2060 *
2061 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2062 * this function before a GPU reset. If the value is retained after a
2063 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2064 */
06ec9070 2065static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2066{
2067 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2068}
2069
e3ecdffa
AD
2070/**
2071 * amdgpu_device_check_vram_lost - check if vram is valid
2072 *
2073 * @adev: amdgpu_device pointer
2074 *
2075 * Checks the reset magic value written to the gart pointer in VRAM.
2076 * The driver calls this after a GPU reset to see if the contents of
2077 * VRAM is lost or now.
2078 * returns true if vram is lost, false if not.
2079 */
06ec9070 2080static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2081{
dadce777
EQ
2082 if (memcmp(adev->gart.ptr, adev->reset_magic,
2083 AMDGPU_RESET_MAGIC_NUM))
2084 return true;
2085
2086 if (!adev->in_gpu_reset)
2087 return false;
2088
2089 /*
2090 * For all ASICs with baco/mode1 reset, the VRAM is
2091 * always assumed to be lost.
2092 */
2093 switch (amdgpu_asic_reset_method(adev)) {
2094 case AMD_RESET_METHOD_BACO:
2095 case AMD_RESET_METHOD_MODE1:
2096 return true;
2097 default:
2098 return false;
2099 }
0c49e0b8
CZ
2100}
2101
e3ecdffa 2102/**
1112a46b 2103 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2104 *
2105 * @adev: amdgpu_device pointer
b8b72130 2106 * @state: clockgating state (gate or ungate)
e3ecdffa 2107 *
e3ecdffa 2108 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2109 * set_clockgating_state callbacks are run.
2110 * Late initialization pass enabling clockgating for hardware IPs.
2111 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2112 * Returns 0 on success, negative error code on failure.
2113 */
fdd34271 2114
1112a46b
RZ
2115static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2116 enum amd_clockgating_state state)
d38ceaf9 2117{
1112a46b 2118 int i, j, r;
d38ceaf9 2119
4a2ba394
SL
2120 if (amdgpu_emu_mode == 1)
2121 return 0;
2122
1112a46b
RZ
2123 for (j = 0; j < adev->num_ip_blocks; j++) {
2124 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2125 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2126 continue;
4a446d55 2127 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2128 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2129 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2130 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2131 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2132 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2133 /* enable clockgating to save power */
a1255107 2134 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2135 state);
4a446d55
AD
2136 if (r) {
2137 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2138 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2139 return r;
2140 }
b0b00ff1 2141 }
d38ceaf9 2142 }
06b18f61 2143
c9f96fd5
RZ
2144 return 0;
2145}
2146
1112a46b 2147static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2148{
1112a46b 2149 int i, j, r;
06b18f61 2150
c9f96fd5
RZ
2151 if (amdgpu_emu_mode == 1)
2152 return 0;
2153
1112a46b
RZ
2154 for (j = 0; j < adev->num_ip_blocks; j++) {
2155 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2156 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2157 continue;
2158 /* skip CG for VCE/UVD, it's handled specially */
2159 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2160 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2161 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2162 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2163 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2164 /* enable powergating to save power */
2165 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2166 state);
c9f96fd5
RZ
2167 if (r) {
2168 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2169 adev->ip_blocks[i].version->funcs->name, r);
2170 return r;
2171 }
2172 }
2173 }
2dc80b00
S
2174 return 0;
2175}
2176
beff74bc
AD
2177static int amdgpu_device_enable_mgpu_fan_boost(void)
2178{
2179 struct amdgpu_gpu_instance *gpu_ins;
2180 struct amdgpu_device *adev;
2181 int i, ret = 0;
2182
2183 mutex_lock(&mgpu_info.mutex);
2184
2185 /*
2186 * MGPU fan boost feature should be enabled
2187 * only when there are two or more dGPUs in
2188 * the system
2189 */
2190 if (mgpu_info.num_dgpu < 2)
2191 goto out;
2192
2193 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2194 gpu_ins = &(mgpu_info.gpu_ins[i]);
2195 adev = gpu_ins->adev;
2196 if (!(adev->flags & AMD_IS_APU) &&
2197 !gpu_ins->mgpu_fan_enabled &&
2198 adev->powerplay.pp_funcs &&
2199 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2200 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2201 if (ret)
2202 break;
2203
2204 gpu_ins->mgpu_fan_enabled = 1;
2205 }
2206 }
2207
2208out:
2209 mutex_unlock(&mgpu_info.mutex);
2210
2211 return ret;
2212}
2213
e3ecdffa
AD
2214/**
2215 * amdgpu_device_ip_late_init - run late init for hardware IPs
2216 *
2217 * @adev: amdgpu_device pointer
2218 *
2219 * Late initialization pass for hardware IPs. The list of all the hardware
2220 * IPs that make up the asic is walked and the late_init callbacks are run.
2221 * late_init covers any special initialization that an IP requires
2222 * after all of the have been initialized or something that needs to happen
2223 * late in the init process.
2224 * Returns 0 on success, negative error code on failure.
2225 */
06ec9070 2226static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2227{
60599a03 2228 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2229 int i = 0, r;
2230
2231 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2232 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2233 continue;
2234 if (adev->ip_blocks[i].version->funcs->late_init) {
2235 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2236 if (r) {
2237 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2238 adev->ip_blocks[i].version->funcs->name, r);
2239 return r;
2240 }
2dc80b00 2241 }
73f847db 2242 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2243 }
2244
a891d239
DL
2245 amdgpu_ras_set_error_query_ready(adev, true);
2246
1112a46b
RZ
2247 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2248 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2249
06ec9070 2250 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2251
beff74bc
AD
2252 r = amdgpu_device_enable_mgpu_fan_boost();
2253 if (r)
2254 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2255
60599a03
EQ
2256
2257 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2258 mutex_lock(&mgpu_info.mutex);
2259
2260 /*
2261 * Reset device p-state to low as this was booted with high.
2262 *
2263 * This should be performed only after all devices from the same
2264 * hive get initialized.
2265 *
2266 * However, it's unknown how many device in the hive in advance.
2267 * As this is counted one by one during devices initializations.
2268 *
2269 * So, we wait for all XGMI interlinked devices initialized.
2270 * This may bring some delays as those devices may come from
2271 * different hives. But that should be OK.
2272 */
2273 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2274 for (i = 0; i < mgpu_info.num_gpu; i++) {
2275 gpu_instance = &(mgpu_info.gpu_ins[i]);
2276 if (gpu_instance->adev->flags & AMD_IS_APU)
2277 continue;
2278
d84a430d
JK
2279 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2280 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2281 if (r) {
2282 DRM_ERROR("pstate setting failed (%d).\n", r);
2283 break;
2284 }
2285 }
2286 }
2287
2288 mutex_unlock(&mgpu_info.mutex);
2289 }
2290
d38ceaf9
AD
2291 return 0;
2292}
2293
e3ecdffa
AD
2294/**
2295 * amdgpu_device_ip_fini - run fini for hardware IPs
2296 *
2297 * @adev: amdgpu_device pointer
2298 *
2299 * Main teardown pass for hardware IPs. The list of all the hardware
2300 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2301 * are run. hw_fini tears down the hardware associated with each IP
2302 * and sw_fini tears down any software state associated with each IP.
2303 * Returns 0 on success, negative error code on failure.
2304 */
06ec9070 2305static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2306{
2307 int i, r;
2308
c030f2e4 2309 amdgpu_ras_pre_fini(adev);
2310
a82400b5
AG
2311 if (adev->gmc.xgmi.num_physical_nodes > 1)
2312 amdgpu_xgmi_remove_device(adev);
2313
1884734a 2314 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2315
2316 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2317 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2318
3e96dbfd
AD
2319 /* need to disable SMC first */
2320 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2321 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2322 continue;
fdd34271 2323 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2324 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2325 /* XXX handle errors */
2326 if (r) {
2327 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2328 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2329 }
a1255107 2330 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2331 break;
2332 }
2333 }
2334
d38ceaf9 2335 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2336 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2337 continue;
8201a67a 2338
a1255107 2339 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2340 /* XXX handle errors */
2c1a2784 2341 if (r) {
a1255107
AD
2342 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2343 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2344 }
8201a67a 2345
a1255107 2346 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2347 }
2348
9950cda2 2349
d38ceaf9 2350 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2351 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2352 continue;
c12aba3a
ML
2353
2354 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2355 amdgpu_ucode_free_bo(adev);
1e256e27 2356 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2357 amdgpu_device_wb_fini(adev);
2358 amdgpu_device_vram_scratch_fini(adev);
533aed27 2359 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2360 }
2361
a1255107 2362 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2363 /* XXX handle errors */
2c1a2784 2364 if (r) {
a1255107
AD
2365 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2366 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2367 }
a1255107
AD
2368 adev->ip_blocks[i].status.sw = false;
2369 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2370 }
2371
a6dcfd9c 2372 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2373 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2374 continue;
a1255107
AD
2375 if (adev->ip_blocks[i].version->funcs->late_fini)
2376 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2377 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2378 }
2379
c030f2e4 2380 amdgpu_ras_fini(adev);
2381
030308fc 2382 if (amdgpu_sriov_vf(adev))
24136135
ML
2383 if (amdgpu_virt_release_full_gpu(adev, false))
2384 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2385
d38ceaf9
AD
2386 return 0;
2387}
2388
e3ecdffa 2389/**
beff74bc 2390 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2391 *
1112a46b 2392 * @work: work_struct.
e3ecdffa 2393 */
beff74bc 2394static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2395{
2396 struct amdgpu_device *adev =
beff74bc 2397 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2398 int r;
2399
2400 r = amdgpu_ib_ring_tests(adev);
2401 if (r)
2402 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2403}
2404
1e317b99
RZ
2405static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2406{
2407 struct amdgpu_device *adev =
2408 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2409
2410 mutex_lock(&adev->gfx.gfx_off_mutex);
2411 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2412 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2413 adev->gfx.gfx_off_state = true;
2414 }
2415 mutex_unlock(&adev->gfx.gfx_off_mutex);
2416}
2417
e3ecdffa 2418/**
e7854a03 2419 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2420 *
2421 * @adev: amdgpu_device pointer
2422 *
2423 * Main suspend function for hardware IPs. The list of all the hardware
2424 * IPs that make up the asic is walked, clockgating is disabled and the
2425 * suspend callbacks are run. suspend puts the hardware and software state
2426 * in each IP into a state suitable for suspend.
2427 * Returns 0 on success, negative error code on failure.
2428 */
e7854a03
AD
2429static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2430{
2431 int i, r;
2432
ced1ba97
PL
2433 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2434 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2435
e7854a03
AD
2436 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2437 if (!adev->ip_blocks[i].status.valid)
2438 continue;
2439 /* displays are handled separately */
2440 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2441 /* XXX handle errors */
2442 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2443 /* XXX handle errors */
2444 if (r) {
2445 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2446 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2447 return r;
e7854a03 2448 }
482f0e53 2449 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2450 }
2451 }
2452
e7854a03
AD
2453 return 0;
2454}
2455
2456/**
2457 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2458 *
2459 * @adev: amdgpu_device pointer
2460 *
2461 * Main suspend function for hardware IPs. The list of all the hardware
2462 * IPs that make up the asic is walked, clockgating is disabled and the
2463 * suspend callbacks are run. suspend puts the hardware and software state
2464 * in each IP into a state suitable for suspend.
2465 * Returns 0 on success, negative error code on failure.
2466 */
2467static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2468{
2469 int i, r;
2470
2471 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2472 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2473 continue;
e7854a03
AD
2474 /* displays are handled in phase1 */
2475 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2476 continue;
bff77e86
LM
2477 /* PSP lost connection when err_event_athub occurs */
2478 if (amdgpu_ras_intr_triggered() &&
2479 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2480 adev->ip_blocks[i].status.hw = false;
2481 continue;
2482 }
d38ceaf9 2483 /* XXX handle errors */
a1255107 2484 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2485 /* XXX handle errors */
2c1a2784 2486 if (r) {
a1255107
AD
2487 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2488 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2489 }
876923fb 2490 adev->ip_blocks[i].status.hw = false;
a3a09142 2491 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2492 if(!amdgpu_sriov_vf(adev)){
2493 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2494 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2495 if (r) {
2496 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2497 adev->mp1_state, r);
2498 return r;
2499 }
a3a09142
AD
2500 }
2501 }
b5507c7e 2502 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2503 }
2504
2505 return 0;
2506}
2507
e7854a03
AD
2508/**
2509 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2510 *
2511 * @adev: amdgpu_device pointer
2512 *
2513 * Main suspend function for hardware IPs. The list of all the hardware
2514 * IPs that make up the asic is walked, clockgating is disabled and the
2515 * suspend callbacks are run. suspend puts the hardware and software state
2516 * in each IP into a state suitable for suspend.
2517 * Returns 0 on success, negative error code on failure.
2518 */
2519int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2520{
2521 int r;
2522
e7819644
YT
2523 if (amdgpu_sriov_vf(adev))
2524 amdgpu_virt_request_full_gpu(adev, false);
2525
e7854a03
AD
2526 r = amdgpu_device_ip_suspend_phase1(adev);
2527 if (r)
2528 return r;
2529 r = amdgpu_device_ip_suspend_phase2(adev);
2530
e7819644
YT
2531 if (amdgpu_sriov_vf(adev))
2532 amdgpu_virt_release_full_gpu(adev, false);
2533
e7854a03
AD
2534 return r;
2535}
2536
06ec9070 2537static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2538{
2539 int i, r;
2540
2cb681b6
ML
2541 static enum amd_ip_block_type ip_order[] = {
2542 AMD_IP_BLOCK_TYPE_GMC,
2543 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2544 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2545 AMD_IP_BLOCK_TYPE_IH,
2546 };
a90ad3c2 2547
2cb681b6
ML
2548 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2549 int j;
2550 struct amdgpu_ip_block *block;
a90ad3c2 2551
2cb681b6
ML
2552 for (j = 0; j < adev->num_ip_blocks; j++) {
2553 block = &adev->ip_blocks[j];
2554
482f0e53 2555 block->status.hw = false;
2cb681b6
ML
2556 if (block->version->type != ip_order[i] ||
2557 !block->status.valid)
2558 continue;
2559
2560 r = block->version->funcs->hw_init(adev);
0aaeefcc 2561 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2562 if (r)
2563 return r;
482f0e53 2564 block->status.hw = true;
a90ad3c2
ML
2565 }
2566 }
2567
2568 return 0;
2569}
2570
06ec9070 2571static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2572{
2573 int i, r;
2574
2cb681b6
ML
2575 static enum amd_ip_block_type ip_order[] = {
2576 AMD_IP_BLOCK_TYPE_SMC,
2577 AMD_IP_BLOCK_TYPE_DCE,
2578 AMD_IP_BLOCK_TYPE_GFX,
2579 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2580 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2581 AMD_IP_BLOCK_TYPE_VCE,
2582 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2583 };
a90ad3c2 2584
2cb681b6
ML
2585 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2586 int j;
2587 struct amdgpu_ip_block *block;
a90ad3c2 2588
2cb681b6
ML
2589 for (j = 0; j < adev->num_ip_blocks; j++) {
2590 block = &adev->ip_blocks[j];
2591
2592 if (block->version->type != ip_order[i] ||
482f0e53
ML
2593 !block->status.valid ||
2594 block->status.hw)
2cb681b6
ML
2595 continue;
2596
895bd048
JZ
2597 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2598 r = block->version->funcs->resume(adev);
2599 else
2600 r = block->version->funcs->hw_init(adev);
2601
0aaeefcc 2602 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2603 if (r)
2604 return r;
482f0e53 2605 block->status.hw = true;
a90ad3c2
ML
2606 }
2607 }
2608
2609 return 0;
2610}
2611
e3ecdffa
AD
2612/**
2613 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2614 *
2615 * @adev: amdgpu_device pointer
2616 *
2617 * First resume function for hardware IPs. The list of all the hardware
2618 * IPs that make up the asic is walked and the resume callbacks are run for
2619 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2620 * after a suspend and updates the software state as necessary. This
2621 * function is also used for restoring the GPU after a GPU reset.
2622 * Returns 0 on success, negative error code on failure.
2623 */
06ec9070 2624static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2625{
2626 int i, r;
2627
a90ad3c2 2628 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2629 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2630 continue;
a90ad3c2 2631 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2632 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2633 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2634
fcf0649f
CZ
2635 r = adev->ip_blocks[i].version->funcs->resume(adev);
2636 if (r) {
2637 DRM_ERROR("resume of IP block <%s> failed %d\n",
2638 adev->ip_blocks[i].version->funcs->name, r);
2639 return r;
2640 }
482f0e53 2641 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2642 }
2643 }
2644
2645 return 0;
2646}
2647
e3ecdffa
AD
2648/**
2649 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2650 *
2651 * @adev: amdgpu_device pointer
2652 *
2653 * First resume function for hardware IPs. The list of all the hardware
2654 * IPs that make up the asic is walked and the resume callbacks are run for
2655 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2656 * functional state after a suspend and updates the software state as
2657 * necessary. This function is also used for restoring the GPU after a GPU
2658 * reset.
2659 * Returns 0 on success, negative error code on failure.
2660 */
06ec9070 2661static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2662{
2663 int i, r;
2664
2665 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2666 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2667 continue;
fcf0649f 2668 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2669 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2670 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2671 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2672 continue;
a1255107 2673 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2674 if (r) {
a1255107
AD
2675 DRM_ERROR("resume of IP block <%s> failed %d\n",
2676 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2677 return r;
2c1a2784 2678 }
482f0e53 2679 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2680 }
2681
2682 return 0;
2683}
2684
e3ecdffa
AD
2685/**
2686 * amdgpu_device_ip_resume - run resume for hardware IPs
2687 *
2688 * @adev: amdgpu_device pointer
2689 *
2690 * Main resume function for hardware IPs. The hardware IPs
2691 * are split into two resume functions because they are
2692 * are also used in in recovering from a GPU reset and some additional
2693 * steps need to be take between them. In this case (S3/S4) they are
2694 * run sequentially.
2695 * Returns 0 on success, negative error code on failure.
2696 */
06ec9070 2697static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2698{
2699 int r;
2700
06ec9070 2701 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2702 if (r)
2703 return r;
7a3e0bb2
RZ
2704
2705 r = amdgpu_device_fw_loading(adev);
2706 if (r)
2707 return r;
2708
06ec9070 2709 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2710
2711 return r;
2712}
2713
e3ecdffa
AD
2714/**
2715 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2716 *
2717 * @adev: amdgpu_device pointer
2718 *
2719 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2720 */
4e99a44e 2721static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2722{
6867e1b5
ML
2723 if (amdgpu_sriov_vf(adev)) {
2724 if (adev->is_atom_fw) {
2725 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2726 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2727 } else {
2728 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2729 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2730 }
2731
2732 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2733 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2734 }
048765ad
AR
2735}
2736
e3ecdffa
AD
2737/**
2738 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2739 *
2740 * @asic_type: AMD asic type
2741 *
2742 * Check if there is DC (new modesetting infrastructre) support for an asic.
2743 * returns true if DC has support, false if not.
2744 */
4562236b
HW
2745bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2746{
2747 switch (asic_type) {
2748#if defined(CONFIG_DRM_AMD_DC)
2749 case CHIP_BONAIRE:
0d6fbccb 2750 case CHIP_KAVERI:
367e6687
AD
2751 case CHIP_KABINI:
2752 case CHIP_MULLINS:
d9fda248
HW
2753 /*
2754 * We have systems in the wild with these ASICs that require
2755 * LVDS and VGA support which is not supported with DC.
2756 *
2757 * Fallback to the non-DC driver here by default so as not to
2758 * cause regressions.
2759 */
2760 return amdgpu_dc > 0;
2761 case CHIP_HAWAII:
4562236b
HW
2762 case CHIP_CARRIZO:
2763 case CHIP_STONEY:
4562236b 2764 case CHIP_POLARIS10:
675fd32b 2765 case CHIP_POLARIS11:
2c8ad2d5 2766 case CHIP_POLARIS12:
675fd32b 2767 case CHIP_VEGAM:
4562236b
HW
2768 case CHIP_TONGA:
2769 case CHIP_FIJI:
42f8ffa1 2770 case CHIP_VEGA10:
dca7b401 2771 case CHIP_VEGA12:
c6034aa2 2772 case CHIP_VEGA20:
b86a1aa3 2773#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2774 case CHIP_RAVEN:
b4f199c7 2775 case CHIP_NAVI10:
8fceceb6 2776 case CHIP_NAVI14:
078655d9 2777 case CHIP_NAVI12:
e1c14c43 2778 case CHIP_RENOIR:
42f8ffa1 2779#endif
fd187853 2780 return amdgpu_dc != 0;
4562236b
HW
2781#endif
2782 default:
93b09a9a
SS
2783 if (amdgpu_dc > 0)
2784 DRM_INFO("Display Core has been requested via kernel parameter "
2785 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2786 return false;
2787 }
2788}
2789
2790/**
2791 * amdgpu_device_has_dc_support - check if dc is supported
2792 *
2793 * @adev: amdgpu_device_pointer
2794 *
2795 * Returns true for supported, false for not supported
2796 */
2797bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2798{
2555039d
XY
2799 if (amdgpu_sriov_vf(adev))
2800 return false;
2801
4562236b
HW
2802 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2803}
2804
d4535e2c
AG
2805
2806static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2807{
2808 struct amdgpu_device *adev =
2809 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2810 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2811
c6a6e2db
AG
2812 /* It's a bug to not have a hive within this function */
2813 if (WARN_ON(!hive))
2814 return;
2815
2816 /*
2817 * Use task barrier to synchronize all xgmi reset works across the
2818 * hive. task_barrier_enter and task_barrier_exit will block
2819 * until all the threads running the xgmi reset works reach
2820 * those points. task_barrier_full will do both blocks.
2821 */
2822 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2823
2824 task_barrier_enter(&hive->tb);
2825 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2826
2827 if (adev->asic_reset_res)
2828 goto fail;
2829
2830 task_barrier_exit(&hive->tb);
2831 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2832
2833 if (adev->asic_reset_res)
2834 goto fail;
43c4d576
JC
2835
2836 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2837 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2838 } else {
2839
2840 task_barrier_full(&hive->tb);
2841 adev->asic_reset_res = amdgpu_asic_reset(adev);
2842 }
ce316fa5 2843
c6a6e2db 2844fail:
d4535e2c 2845 if (adev->asic_reset_res)
fed184e9 2846 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2847 adev->asic_reset_res, adev->ddev->unique);
2848}
2849
71f98027
AD
2850static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2851{
2852 char *input = amdgpu_lockup_timeout;
2853 char *timeout_setting = NULL;
2854 int index = 0;
2855 long timeout;
2856 int ret = 0;
2857
2858 /*
2859 * By default timeout for non compute jobs is 10000.
2860 * And there is no timeout enforced on compute jobs.
2861 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 2862 * jobs are 60000 by default.
71f98027
AD
2863 */
2864 adev->gfx_timeout = msecs_to_jiffies(10000);
2865 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2866 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 2867 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
2868 else
2869 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2870
f440ff44 2871 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2872 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2873 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2874 ret = kstrtol(timeout_setting, 0, &timeout);
2875 if (ret)
2876 return ret;
2877
2878 if (timeout == 0) {
2879 index++;
2880 continue;
2881 } else if (timeout < 0) {
2882 timeout = MAX_SCHEDULE_TIMEOUT;
2883 } else {
2884 timeout = msecs_to_jiffies(timeout);
2885 }
2886
2887 switch (index++) {
2888 case 0:
2889 adev->gfx_timeout = timeout;
2890 break;
2891 case 1:
2892 adev->compute_timeout = timeout;
2893 break;
2894 case 2:
2895 adev->sdma_timeout = timeout;
2896 break;
2897 case 3:
2898 adev->video_timeout = timeout;
2899 break;
2900 default:
2901 break;
2902 }
2903 }
2904 /*
2905 * There is only one value specified and
2906 * it should apply to all non-compute jobs.
2907 */
bcccee89 2908 if (index == 1) {
71f98027 2909 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2910 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2911 adev->compute_timeout = adev->gfx_timeout;
2912 }
71f98027
AD
2913 }
2914
2915 return ret;
2916}
d4535e2c 2917
77f3a5cd
ND
2918static const struct attribute *amdgpu_dev_attributes[] = {
2919 &dev_attr_product_name.attr,
2920 &dev_attr_product_number.attr,
2921 &dev_attr_serial_number.attr,
2922 &dev_attr_pcie_replay_count.attr,
2923 NULL
2924};
2925
d38ceaf9
AD
2926/**
2927 * amdgpu_device_init - initialize the driver
2928 *
2929 * @adev: amdgpu_device pointer
87e3f136 2930 * @ddev: drm dev pointer
d38ceaf9
AD
2931 * @pdev: pci dev pointer
2932 * @flags: driver flags
2933 *
2934 * Initializes the driver info and hw (all asics).
2935 * Returns 0 for success or an error on failure.
2936 * Called at driver startup.
2937 */
2938int amdgpu_device_init(struct amdgpu_device *adev,
2939 struct drm_device *ddev,
2940 struct pci_dev *pdev,
2941 uint32_t flags)
2942{
2943 int r, i;
3840c5bc 2944 bool boco = false;
95844d20 2945 u32 max_MBps;
d38ceaf9
AD
2946
2947 adev->shutdown = false;
2948 adev->dev = &pdev->dev;
2949 adev->ddev = ddev;
2950 adev->pdev = pdev;
2951 adev->flags = flags;
4e66d7d2
YZ
2952
2953 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2954 adev->asic_type = amdgpu_force_asic_type;
2955 else
2956 adev->asic_type = flags & AMD_ASIC_MASK;
2957
d38ceaf9 2958 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2959 if (amdgpu_emu_mode == 1)
8bdab6bb 2960 adev->usec_timeout *= 10;
770d13b1 2961 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2962 adev->accel_working = false;
2963 adev->num_rings = 0;
2964 adev->mman.buffer_funcs = NULL;
2965 adev->mman.buffer_funcs_ring = NULL;
2966 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2967 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2968 adev->gmc.gmc_funcs = NULL;
f54d1867 2969 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2970 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2971
2972 adev->smc_rreg = &amdgpu_invalid_rreg;
2973 adev->smc_wreg = &amdgpu_invalid_wreg;
2974 adev->pcie_rreg = &amdgpu_invalid_rreg;
2975 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2976 adev->pciep_rreg = &amdgpu_invalid_rreg;
2977 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2978 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2979 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2980 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2981 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2982 adev->didt_rreg = &amdgpu_invalid_rreg;
2983 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2984 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2985 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2986 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2987 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2988
3e39ab90
AD
2989 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2990 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2991 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2992
2993 /* mutex initialization are all done here so we
2994 * can recall function without having locking issues */
d38ceaf9 2995 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2996 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2997 mutex_init(&adev->pm.mutex);
2998 mutex_init(&adev->gfx.gpu_clock_mutex);
2999 mutex_init(&adev->srbm_mutex);
b8866c26 3000 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3001 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3002 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3003 mutex_init(&adev->mn_lock);
e23b74aa 3004 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3005 hash_init(adev->mn_hash);
13a752e3 3006 mutex_init(&adev->lock_reset);
32eaeae0 3007 mutex_init(&adev->psp.mutex);
bd052211 3008 mutex_init(&adev->notifier_lock);
d38ceaf9 3009
912dfc84
EQ
3010 r = amdgpu_device_check_arguments(adev);
3011 if (r)
3012 return r;
d38ceaf9 3013
d38ceaf9
AD
3014 spin_lock_init(&adev->mmio_idx_lock);
3015 spin_lock_init(&adev->smc_idx_lock);
3016 spin_lock_init(&adev->pcie_idx_lock);
3017 spin_lock_init(&adev->uvd_ctx_idx_lock);
3018 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3019 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3020 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3021 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3022 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3023
0c4e7fa5
CZ
3024 INIT_LIST_HEAD(&adev->shadow_list);
3025 mutex_init(&adev->shadow_list_lock);
3026
beff74bc
AD
3027 INIT_DELAYED_WORK(&adev->delayed_init_work,
3028 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3029 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3030 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3031
d4535e2c
AG
3032 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3033
d23ee13f 3034 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3035 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3036
b265bdbd
EQ
3037 atomic_set(&adev->throttling_logging_enabled, 1);
3038 /*
3039 * If throttling continues, logging will be performed every minute
3040 * to avoid log flooding. "-1" is subtracted since the thermal
3041 * throttling interrupt comes every second. Thus, the total logging
3042 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3043 * for throttling interrupt) = 60 seconds.
3044 */
3045 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3046 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3047
0fa49558
AX
3048 /* Registers mapping */
3049 /* TODO: block userspace mapping of io register */
da69c161
KW
3050 if (adev->asic_type >= CHIP_BONAIRE) {
3051 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3052 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3053 } else {
3054 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3055 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3056 }
d38ceaf9 3057
d38ceaf9
AD
3058 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3059 if (adev->rmmio == NULL) {
3060 return -ENOMEM;
3061 }
3062 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3063 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3064
d38ceaf9
AD
3065 /* io port mapping */
3066 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3067 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3068 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3069 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3070 break;
3071 }
3072 }
3073 if (adev->rio_mem == NULL)
b64a18c5 3074 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3075
b2109d8e
JX
3076 /* enable PCIE atomic ops */
3077 r = pci_enable_atomic_ops_to_root(adev->pdev,
3078 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3079 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3080 if (r) {
3081 adev->have_atomics_support = false;
3082 DRM_INFO("PCIE atomic ops is not supported\n");
3083 } else {
3084 adev->have_atomics_support = true;
3085 }
3086
5494d864
AD
3087 amdgpu_device_get_pcie_info(adev);
3088
b239c017
JX
3089 if (amdgpu_mcbp)
3090 DRM_INFO("MCBP is enabled\n");
3091
5f84cc63
JX
3092 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3093 adev->enable_mes = true;
3094
3aa0115d
ML
3095 /* detect hw virtualization here */
3096 amdgpu_detect_virtualization(adev);
3097
dffa11b4
ML
3098 r = amdgpu_device_get_job_timeout_settings(adev);
3099 if (r) {
3100 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3101 return r;
a190d1c7
XY
3102 }
3103
d38ceaf9 3104 /* early init functions */
06ec9070 3105 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3106 if (r)
3107 return r;
3108
6585661d
OZ
3109 /* doorbell bar mapping and doorbell index init*/
3110 amdgpu_device_doorbell_init(adev);
3111
d38ceaf9
AD
3112 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3113 /* this will fail for cards that aren't VGA class devices, just
3114 * ignore it */
06ec9070 3115 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3116
31af062a 3117 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3118 boco = true;
3119 if (amdgpu_has_atpx() &&
3120 (amdgpu_is_atpx_hybrid() ||
3121 amdgpu_has_atpx_dgpu_power_cntl()) &&
3122 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3123 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3124 &amdgpu_switcheroo_ops, boco);
3125 if (boco)
d38ceaf9
AD
3126 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3127
9475a943
SL
3128 if (amdgpu_emu_mode == 1) {
3129 /* post the asic on emulation mode */
3130 emu_soc_asic_init(adev);
bfca0289 3131 goto fence_driver_init;
9475a943 3132 }
bfca0289 3133
4e99a44e
ML
3134 /* detect if we are with an SRIOV vbios */
3135 amdgpu_device_detect_sriov_bios(adev);
048765ad 3136
95e8e59e
AD
3137 /* check if we need to reset the asic
3138 * E.g., driver was not cleanly unloaded previously, etc.
3139 */
f14899fd 3140 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3141 r = amdgpu_asic_reset(adev);
3142 if (r) {
3143 dev_err(adev->dev, "asic reset on init failed\n");
3144 goto failed;
3145 }
3146 }
3147
d38ceaf9 3148 /* Post card if necessary */
39c640c0 3149 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3150 if (!adev->bios) {
bec86378 3151 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3152 r = -EINVAL;
3153 goto failed;
d38ceaf9 3154 }
bec86378 3155 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3156 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3157 if (r) {
3158 dev_err(adev->dev, "gpu post error!\n");
3159 goto failed;
3160 }
d38ceaf9
AD
3161 }
3162
88b64e95
AD
3163 if (adev->is_atom_fw) {
3164 /* Initialize clocks */
3165 r = amdgpu_atomfirmware_get_clock_info(adev);
3166 if (r) {
3167 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3168 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3169 goto failed;
3170 }
3171 } else {
a5bde2f9
AD
3172 /* Initialize clocks */
3173 r = amdgpu_atombios_get_clock_info(adev);
3174 if (r) {
3175 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3176 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3177 goto failed;
a5bde2f9
AD
3178 }
3179 /* init i2c buses */
4562236b
HW
3180 if (!amdgpu_device_has_dc_support(adev))
3181 amdgpu_atombios_i2c_init(adev);
2c1a2784 3182 }
d38ceaf9 3183
bfca0289 3184fence_driver_init:
d38ceaf9
AD
3185 /* Fence driver */
3186 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3187 if (r) {
3188 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3189 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3190 goto failed;
2c1a2784 3191 }
d38ceaf9
AD
3192
3193 /* init the mode config */
3194 drm_mode_config_init(adev->ddev);
3195
06ec9070 3196 r = amdgpu_device_ip_init(adev);
d38ceaf9 3197 if (r) {
8840a387 3198 /* failed in exclusive mode due to timeout */
3199 if (amdgpu_sriov_vf(adev) &&
3200 !amdgpu_sriov_runtime(adev) &&
3201 amdgpu_virt_mmio_blocked(adev) &&
3202 !amdgpu_virt_wait_reset(adev)) {
3203 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3204 /* Don't send request since VF is inactive. */
3205 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3206 adev->virt.ops = NULL;
8840a387 3207 r = -EAGAIN;
3208 goto failed;
3209 }
06ec9070 3210 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3211 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3212 goto failed;
d38ceaf9
AD
3213 }
3214
d69b8971
YZ
3215 dev_info(adev->dev,
3216 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3217 adev->gfx.config.max_shader_engines,
3218 adev->gfx.config.max_sh_per_se,
3219 adev->gfx.config.max_cu_per_sh,
3220 adev->gfx.cu_info.number);
3221
d38ceaf9
AD
3222 adev->accel_working = true;
3223
e59c0205
AX
3224 amdgpu_vm_check_compute_bug(adev);
3225
95844d20
MO
3226 /* Initialize the buffer migration limit. */
3227 if (amdgpu_moverate >= 0)
3228 max_MBps = amdgpu_moverate;
3229 else
3230 max_MBps = 8; /* Allow 8 MB/s. */
3231 /* Get a log2 for easy divisions. */
3232 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3233
9bc92b9c
ML
3234 amdgpu_fbdev_init(adev);
3235
d2f52ac8 3236 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3237 if (r) {
3238 adev->pm_sysfs_en = false;
d2f52ac8 3239 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3240 } else
3241 adev->pm_sysfs_en = true;
d2f52ac8 3242
5bb23532 3243 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3244 if (r) {
3245 adev->ucode_sysfs_en = false;
5bb23532 3246 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3247 } else
3248 adev->ucode_sysfs_en = true;
5bb23532 3249
d38ceaf9
AD
3250 if ((amdgpu_testing & 1)) {
3251 if (adev->accel_working)
3252 amdgpu_test_moves(adev);
3253 else
3254 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3255 }
d38ceaf9
AD
3256 if (amdgpu_benchmarking) {
3257 if (adev->accel_working)
3258 amdgpu_benchmark(adev, amdgpu_benchmarking);
3259 else
3260 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3261 }
3262
b0adca4d
EQ
3263 /*
3264 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3265 * Otherwise the mgpu fan boost feature will be skipped due to the
3266 * gpu instance is counted less.
3267 */
3268 amdgpu_register_gpu_instance(adev);
3269
d38ceaf9
AD
3270 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3271 * explicit gating rather than handling it automatically.
3272 */
06ec9070 3273 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3274 if (r) {
06ec9070 3275 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3276 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3277 goto failed;
2c1a2784 3278 }
d38ceaf9 3279
108c6a63 3280 /* must succeed. */
511fdbc3 3281 amdgpu_ras_resume(adev);
108c6a63 3282
beff74bc
AD
3283 queue_delayed_work(system_wq, &adev->delayed_init_work,
3284 msecs_to_jiffies(AMDGPU_RESUME_MS));
3285
77f3a5cd 3286 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
bd607166 3287 if (r) {
77f3a5cd 3288 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166
KR
3289 return r;
3290 }
3291
d155bef0
AB
3292 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3293 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3294 if (r)
3295 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3296
d38ceaf9 3297 return 0;
83ba126a
AD
3298
3299failed:
89041940 3300 amdgpu_vf_error_trans_all(adev);
3840c5bc 3301 if (boco)
83ba126a 3302 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3303
83ba126a 3304 return r;
d38ceaf9
AD
3305}
3306
d38ceaf9
AD
3307/**
3308 * amdgpu_device_fini - tear down the driver
3309 *
3310 * @adev: amdgpu_device pointer
3311 *
3312 * Tear down the driver info (all asics).
3313 * Called at driver shutdown.
3314 */
3315void amdgpu_device_fini(struct amdgpu_device *adev)
3316{
3317 int r;
3318
3319 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3320 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3321 adev->shutdown = true;
9f875167 3322
752c683d
ML
3323 /* make sure IB test finished before entering exclusive mode
3324 * to avoid preemption on IB test
3325 * */
3326 if (amdgpu_sriov_vf(adev))
3327 amdgpu_virt_request_full_gpu(adev, false);
3328
e5b03032
ML
3329 /* disable all interrupts */
3330 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3331 if (adev->mode_info.mode_config_initialized){
3332 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3333 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3334 else
3335 drm_atomic_helper_shutdown(adev->ddev);
3336 }
d38ceaf9 3337 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3338 if (adev->pm_sysfs_en)
3339 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3340 amdgpu_fbdev_fini(adev);
06ec9070 3341 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3342 if (adev->firmware.gpu_info_fw) {
3343 release_firmware(adev->firmware.gpu_info_fw);
3344 adev->firmware.gpu_info_fw = NULL;
3345 }
d38ceaf9
AD
3346 adev->accel_working = false;
3347 /* free i2c buses */
4562236b
HW
3348 if (!amdgpu_device_has_dc_support(adev))
3349 amdgpu_i2c_fini(adev);
bfca0289
SL
3350
3351 if (amdgpu_emu_mode != 1)
3352 amdgpu_atombios_fini(adev);
3353
d38ceaf9
AD
3354 kfree(adev->bios);
3355 adev->bios = NULL;
3840c5bc
AD
3356 if (amdgpu_has_atpx() &&
3357 (amdgpu_is_atpx_hybrid() ||
3358 amdgpu_has_atpx_dgpu_power_cntl()) &&
3359 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3360 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3361 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3362 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3363 vga_client_register(adev->pdev, NULL, NULL, NULL);
3364 if (adev->rio_mem)
3365 pci_iounmap(adev->pdev, adev->rio_mem);
3366 adev->rio_mem = NULL;
3367 iounmap(adev->rmmio);
3368 adev->rmmio = NULL;
06ec9070 3369 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3370
7c868b59
YT
3371 if (adev->ucode_sysfs_en)
3372 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3373
3374 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3375 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3376 amdgpu_pmu_fini(adev);
4292b0b2 3377 if (adev->discovery_bin)
a190d1c7 3378 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3379}
3380
3381
3382/*
3383 * Suspend & resume.
3384 */
3385/**
810ddc3a 3386 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3387 *
87e3f136
DP
3388 * @dev: drm dev pointer
3389 * @suspend: suspend state
3390 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3391 *
3392 * Puts the hw in the suspend state (all asics).
3393 * Returns 0 for success or an error on failure.
3394 * Called at driver suspend.
3395 */
de185019 3396int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3397{
3398 struct amdgpu_device *adev;
3399 struct drm_crtc *crtc;
3400 struct drm_connector *connector;
f8d2d39e 3401 struct drm_connector_list_iter iter;
5ceb54c6 3402 int r;
d38ceaf9
AD
3403
3404 if (dev == NULL || dev->dev_private == NULL) {
3405 return -ENODEV;
3406 }
3407
3408 adev = dev->dev_private;
3409
3410 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3411 return 0;
3412
44779b43 3413 adev->in_suspend = true;
d38ceaf9
AD
3414 drm_kms_helper_poll_disable(dev);
3415
5f818173
S
3416 if (fbcon)
3417 amdgpu_fbdev_set_suspend(adev, 1);
3418
beff74bc 3419 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3420
4562236b
HW
3421 if (!amdgpu_device_has_dc_support(adev)) {
3422 /* turn off display hw */
3423 drm_modeset_lock_all(dev);
f8d2d39e
LP
3424 drm_connector_list_iter_begin(dev, &iter);
3425 drm_for_each_connector_iter(connector, &iter)
3426 drm_helper_connector_dpms(connector,
3427 DRM_MODE_DPMS_OFF);
3428 drm_connector_list_iter_end(&iter);
4562236b 3429 drm_modeset_unlock_all(dev);
fe1053b7
AD
3430 /* unpin the front buffers and cursors */
3431 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3432 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3433 struct drm_framebuffer *fb = crtc->primary->fb;
3434 struct amdgpu_bo *robj;
3435
91334223 3436 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3437 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3438 r = amdgpu_bo_reserve(aobj, true);
3439 if (r == 0) {
3440 amdgpu_bo_unpin(aobj);
3441 amdgpu_bo_unreserve(aobj);
3442 }
756e6880 3443 }
756e6880 3444
fe1053b7
AD
3445 if (fb == NULL || fb->obj[0] == NULL) {
3446 continue;
3447 }
3448 robj = gem_to_amdgpu_bo(fb->obj[0]);
3449 /* don't unpin kernel fb objects */
3450 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3451 r = amdgpu_bo_reserve(robj, true);
3452 if (r == 0) {
3453 amdgpu_bo_unpin(robj);
3454 amdgpu_bo_unreserve(robj);
3455 }
d38ceaf9
AD
3456 }
3457 }
3458 }
fe1053b7 3459
5e6932fe 3460 amdgpu_ras_suspend(adev);
3461
fe1053b7
AD
3462 r = amdgpu_device_ip_suspend_phase1(adev);
3463
94fa5660
EQ
3464 amdgpu_amdkfd_suspend(adev, !fbcon);
3465
d38ceaf9
AD
3466 /* evict vram memory */
3467 amdgpu_bo_evict_vram(adev);
3468
5ceb54c6 3469 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3470
fe1053b7 3471 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3472
a0a71e49
AD
3473 /* evict remaining vram memory
3474 * This second call to evict vram is to evict the gart page table
3475 * using the CPU.
3476 */
d38ceaf9
AD
3477 amdgpu_bo_evict_vram(adev);
3478
d38ceaf9
AD
3479 return 0;
3480}
3481
3482/**
810ddc3a 3483 * amdgpu_device_resume - initiate device resume
d38ceaf9 3484 *
87e3f136
DP
3485 * @dev: drm dev pointer
3486 * @resume: resume state
3487 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3488 *
3489 * Bring the hw back to operating state (all asics).
3490 * Returns 0 for success or an error on failure.
3491 * Called at driver resume.
3492 */
de185019 3493int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3494{
3495 struct drm_connector *connector;
f8d2d39e 3496 struct drm_connector_list_iter iter;
d38ceaf9 3497 struct amdgpu_device *adev = dev->dev_private;
756e6880 3498 struct drm_crtc *crtc;
03161a6e 3499 int r = 0;
d38ceaf9
AD
3500
3501 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3502 return 0;
3503
d38ceaf9 3504 /* post card */
39c640c0 3505 if (amdgpu_device_need_post(adev)) {
74b0b157 3506 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3507 if (r)
3508 DRM_ERROR("amdgpu asic init failed\n");
3509 }
d38ceaf9 3510
06ec9070 3511 r = amdgpu_device_ip_resume(adev);
e6707218 3512 if (r) {
06ec9070 3513 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3514 return r;
e6707218 3515 }
5ceb54c6
AD
3516 amdgpu_fence_driver_resume(adev);
3517
d38ceaf9 3518
06ec9070 3519 r = amdgpu_device_ip_late_init(adev);
03161a6e 3520 if (r)
4d3b9ae5 3521 return r;
d38ceaf9 3522
beff74bc
AD
3523 queue_delayed_work(system_wq, &adev->delayed_init_work,
3524 msecs_to_jiffies(AMDGPU_RESUME_MS));
3525
fe1053b7
AD
3526 if (!amdgpu_device_has_dc_support(adev)) {
3527 /* pin cursors */
3528 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3529 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3530
91334223 3531 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3532 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3533 r = amdgpu_bo_reserve(aobj, true);
3534 if (r == 0) {
3535 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3536 if (r != 0)
3537 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3538 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3539 amdgpu_bo_unreserve(aobj);
3540 }
756e6880
AD
3541 }
3542 }
3543 }
9593f4d6 3544 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3545 if (r)
3546 return r;
756e6880 3547
96a5d8d4 3548 /* Make sure IB tests flushed */
beff74bc 3549 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3550
d38ceaf9
AD
3551 /* blat the mode back in */
3552 if (fbcon) {
4562236b
HW
3553 if (!amdgpu_device_has_dc_support(adev)) {
3554 /* pre DCE11 */
3555 drm_helper_resume_force_mode(dev);
3556
3557 /* turn on display hw */
3558 drm_modeset_lock_all(dev);
f8d2d39e
LP
3559
3560 drm_connector_list_iter_begin(dev, &iter);
3561 drm_for_each_connector_iter(connector, &iter)
3562 drm_helper_connector_dpms(connector,
3563 DRM_MODE_DPMS_ON);
3564 drm_connector_list_iter_end(&iter);
3565
4562236b 3566 drm_modeset_unlock_all(dev);
d38ceaf9 3567 }
4d3b9ae5 3568 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3569 }
3570
3571 drm_kms_helper_poll_enable(dev);
23a1a9e5 3572
5e6932fe 3573 amdgpu_ras_resume(adev);
3574
23a1a9e5
L
3575 /*
3576 * Most of the connector probing functions try to acquire runtime pm
3577 * refs to ensure that the GPU is powered on when connector polling is
3578 * performed. Since we're calling this from a runtime PM callback,
3579 * trying to acquire rpm refs will cause us to deadlock.
3580 *
3581 * Since we're guaranteed to be holding the rpm lock, it's safe to
3582 * temporarily disable the rpm helpers so this doesn't deadlock us.
3583 */
3584#ifdef CONFIG_PM
3585 dev->dev->power.disable_depth++;
3586#endif
4562236b
HW
3587 if (!amdgpu_device_has_dc_support(adev))
3588 drm_helper_hpd_irq_event(dev);
3589 else
3590 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3591#ifdef CONFIG_PM
3592 dev->dev->power.disable_depth--;
3593#endif
44779b43
RZ
3594 adev->in_suspend = false;
3595
4d3b9ae5 3596 return 0;
d38ceaf9
AD
3597}
3598
e3ecdffa
AD
3599/**
3600 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3601 *
3602 * @adev: amdgpu_device pointer
3603 *
3604 * The list of all the hardware IPs that make up the asic is walked and
3605 * the check_soft_reset callbacks are run. check_soft_reset determines
3606 * if the asic is still hung or not.
3607 * Returns true if any of the IPs are still in a hung state, false if not.
3608 */
06ec9070 3609static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3610{
3611 int i;
3612 bool asic_hang = false;
3613
f993d628
ML
3614 if (amdgpu_sriov_vf(adev))
3615 return true;
3616
8bc04c29
AD
3617 if (amdgpu_asic_need_full_reset(adev))
3618 return true;
3619
63fbf42f 3620 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3621 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3622 continue;
a1255107
AD
3623 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3624 adev->ip_blocks[i].status.hang =
3625 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3626 if (adev->ip_blocks[i].status.hang) {
3627 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3628 asic_hang = true;
3629 }
3630 }
3631 return asic_hang;
3632}
3633
e3ecdffa
AD
3634/**
3635 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3636 *
3637 * @adev: amdgpu_device pointer
3638 *
3639 * The list of all the hardware IPs that make up the asic is walked and the
3640 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3641 * handles any IP specific hardware or software state changes that are
3642 * necessary for a soft reset to succeed.
3643 * Returns 0 on success, negative error code on failure.
3644 */
06ec9070 3645static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3646{
3647 int i, r = 0;
3648
3649 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3650 if (!adev->ip_blocks[i].status.valid)
d31a501e 3651 continue;
a1255107
AD
3652 if (adev->ip_blocks[i].status.hang &&
3653 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3654 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3655 if (r)
3656 return r;
3657 }
3658 }
3659
3660 return 0;
3661}
3662
e3ecdffa
AD
3663/**
3664 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3665 *
3666 * @adev: amdgpu_device pointer
3667 *
3668 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3669 * reset is necessary to recover.
3670 * Returns true if a full asic reset is required, false if not.
3671 */
06ec9070 3672static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3673{
da146d3b
AD
3674 int i;
3675
8bc04c29
AD
3676 if (amdgpu_asic_need_full_reset(adev))
3677 return true;
3678
da146d3b 3679 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3680 if (!adev->ip_blocks[i].status.valid)
da146d3b 3681 continue;
a1255107
AD
3682 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3683 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3684 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3685 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3686 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3687 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3688 DRM_INFO("Some block need full reset!\n");
3689 return true;
3690 }
3691 }
35d782fe
CZ
3692 }
3693 return false;
3694}
3695
e3ecdffa
AD
3696/**
3697 * amdgpu_device_ip_soft_reset - do a soft reset
3698 *
3699 * @adev: amdgpu_device pointer
3700 *
3701 * The list of all the hardware IPs that make up the asic is walked and the
3702 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3703 * IP specific hardware or software state changes that are necessary to soft
3704 * reset the IP.
3705 * Returns 0 on success, negative error code on failure.
3706 */
06ec9070 3707static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3708{
3709 int i, r = 0;
3710
3711 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3712 if (!adev->ip_blocks[i].status.valid)
35d782fe 3713 continue;
a1255107
AD
3714 if (adev->ip_blocks[i].status.hang &&
3715 adev->ip_blocks[i].version->funcs->soft_reset) {
3716 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3717 if (r)
3718 return r;
3719 }
3720 }
3721
3722 return 0;
3723}
3724
e3ecdffa
AD
3725/**
3726 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3727 *
3728 * @adev: amdgpu_device pointer
3729 *
3730 * The list of all the hardware IPs that make up the asic is walked and the
3731 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3732 * handles any IP specific hardware or software state changes that are
3733 * necessary after the IP has been soft reset.
3734 * Returns 0 on success, negative error code on failure.
3735 */
06ec9070 3736static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3737{
3738 int i, r = 0;
3739
3740 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3741 if (!adev->ip_blocks[i].status.valid)
35d782fe 3742 continue;
a1255107
AD
3743 if (adev->ip_blocks[i].status.hang &&
3744 adev->ip_blocks[i].version->funcs->post_soft_reset)
3745 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3746 if (r)
3747 return r;
3748 }
3749
3750 return 0;
3751}
3752
e3ecdffa 3753/**
c33adbc7 3754 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3755 *
3756 * @adev: amdgpu_device pointer
3757 *
3758 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3759 * restore things like GPUVM page tables after a GPU reset where
3760 * the contents of VRAM might be lost.
403009bf
CK
3761 *
3762 * Returns:
3763 * 0 on success, negative error code on failure.
e3ecdffa 3764 */
c33adbc7 3765static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3766{
c41d1cf6 3767 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3768 struct amdgpu_bo *shadow;
3769 long r = 1, tmo;
c41d1cf6
ML
3770
3771 if (amdgpu_sriov_runtime(adev))
b045d3af 3772 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3773 else
3774 tmo = msecs_to_jiffies(100);
3775
3776 DRM_INFO("recover vram bo from shadow start\n");
3777 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3778 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3779
3780 /* No need to recover an evicted BO */
3781 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3782 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3783 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3784 continue;
3785
3786 r = amdgpu_bo_restore_shadow(shadow, &next);
3787 if (r)
3788 break;
3789
c41d1cf6 3790 if (fence) {
1712fb1a 3791 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3792 dma_fence_put(fence);
3793 fence = next;
1712fb1a 3794 if (tmo == 0) {
3795 r = -ETIMEDOUT;
c41d1cf6 3796 break;
1712fb1a 3797 } else if (tmo < 0) {
3798 r = tmo;
3799 break;
3800 }
403009bf
CK
3801 } else {
3802 fence = next;
c41d1cf6 3803 }
c41d1cf6
ML
3804 }
3805 mutex_unlock(&adev->shadow_list_lock);
3806
403009bf
CK
3807 if (fence)
3808 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3809 dma_fence_put(fence);
3810
1712fb1a 3811 if (r < 0 || tmo <= 0) {
3812 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3813 return -EIO;
3814 }
c41d1cf6 3815
403009bf
CK
3816 DRM_INFO("recover vram bo from shadow done\n");
3817 return 0;
c41d1cf6
ML
3818}
3819
a90ad3c2 3820
e3ecdffa 3821/**
06ec9070 3822 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3823 *
3824 * @adev: amdgpu device pointer
87e3f136 3825 * @from_hypervisor: request from hypervisor
5740682e
ML
3826 *
3827 * do VF FLR and reinitialize Asic
3f48c681 3828 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3829 */
3830static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3831 bool from_hypervisor)
5740682e
ML
3832{
3833 int r;
3834
3835 if (from_hypervisor)
3836 r = amdgpu_virt_request_full_gpu(adev, true);
3837 else
3838 r = amdgpu_virt_reset_gpu(adev);
3839 if (r)
3840 return r;
a90ad3c2 3841
b639c22c
JZ
3842 amdgpu_amdkfd_pre_reset(adev);
3843
a90ad3c2 3844 /* Resume IP prior to SMC */
06ec9070 3845 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3846 if (r)
3847 goto error;
a90ad3c2 3848
c9ffa427 3849 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3850 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3851 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3852
7a3e0bb2
RZ
3853 r = amdgpu_device_fw_loading(adev);
3854 if (r)
3855 return r;
3856
a90ad3c2 3857 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3858 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3859 if (r)
3860 goto error;
a90ad3c2
ML
3861
3862 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3863 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3864 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3865
abc34253
ED
3866error:
3867 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3868 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3869 amdgpu_inc_vram_lost(adev);
c33adbc7 3870 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3871 }
3872
3873 return r;
3874}
3875
12938fad
CK
3876/**
3877 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3878 *
3879 * @adev: amdgpu device pointer
3880 *
3881 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3882 * a hung GPU.
3883 */
3884bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3885{
3886 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3887 DRM_INFO("Timeout, but no hardware hang detected.\n");
3888 return false;
3889 }
3890
3ba7b418
AG
3891 if (amdgpu_gpu_recovery == 0)
3892 goto disabled;
3893
3894 if (amdgpu_sriov_vf(adev))
3895 return true;
3896
3897 if (amdgpu_gpu_recovery == -1) {
3898 switch (adev->asic_type) {
fc42d47c
AG
3899 case CHIP_BONAIRE:
3900 case CHIP_HAWAII:
3ba7b418
AG
3901 case CHIP_TOPAZ:
3902 case CHIP_TONGA:
3903 case CHIP_FIJI:
3904 case CHIP_POLARIS10:
3905 case CHIP_POLARIS11:
3906 case CHIP_POLARIS12:
3907 case CHIP_VEGAM:
3908 case CHIP_VEGA20:
3909 case CHIP_VEGA10:
3910 case CHIP_VEGA12:
c43b849f 3911 case CHIP_RAVEN:
e9d4cf91 3912 case CHIP_ARCTURUS:
2cb44fb0 3913 case CHIP_RENOIR:
658c6639
AD
3914 case CHIP_NAVI10:
3915 case CHIP_NAVI14:
3916 case CHIP_NAVI12:
3ba7b418
AG
3917 break;
3918 default:
3919 goto disabled;
3920 }
12938fad
CK
3921 }
3922
3923 return true;
3ba7b418
AG
3924
3925disabled:
3926 DRM_INFO("GPU recovery disabled.\n");
3927 return false;
12938fad
CK
3928}
3929
5c6dd71e 3930
26bc5340
AG
3931static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3932 struct amdgpu_job *job,
3933 bool *need_full_reset_arg)
3934{
3935 int i, r = 0;
3936 bool need_full_reset = *need_full_reset_arg;
71182665 3937
728e7e0c
JZ
3938 amdgpu_debugfs_wait_dump(adev);
3939
71182665 3940 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3941 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3942 struct amdgpu_ring *ring = adev->rings[i];
3943
51687759 3944 if (!ring || !ring->sched.thread)
0875dc9e 3945 continue;
5740682e 3946
2f9d4084
ML
3947 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3948 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3949 }
d38ceaf9 3950
222b5f04
AG
3951 if(job)
3952 drm_sched_increase_karma(&job->base);
3953
1d721ed6 3954 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3955 if (!amdgpu_sriov_vf(adev)) {
3956
3957 if (!need_full_reset)
3958 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3959
3960 if (!need_full_reset) {
3961 amdgpu_device_ip_pre_soft_reset(adev);
3962 r = amdgpu_device_ip_soft_reset(adev);
3963 amdgpu_device_ip_post_soft_reset(adev);
3964 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3965 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3966 need_full_reset = true;
3967 }
3968 }
3969
3970 if (need_full_reset)
3971 r = amdgpu_device_ip_suspend(adev);
3972
3973 *need_full_reset_arg = need_full_reset;
3974 }
3975
3976 return r;
3977}
3978
041a62bc 3979static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3980 struct list_head *device_list_handle,
3981 bool *need_full_reset_arg)
3982{
3983 struct amdgpu_device *tmp_adev = NULL;
3984 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3985 int r = 0;
3986
3987 /*
3988 * ASIC reset has to be done on all HGMI hive nodes ASAP
3989 * to allow proper links negotiation in FW (within 1 sec)
3990 */
3991 if (need_full_reset) {
3992 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3993 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3994 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3995 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3996 r = -EALREADY;
3997 } else
3998 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 3999
041a62bc
AG
4000 if (r) {
4001 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
4002 r, tmp_adev->ddev->unique);
4003 break;
ce316fa5
LM
4004 }
4005 }
4006
041a62bc
AG
4007 /* For XGMI wait for all resets to complete before proceed */
4008 if (!r) {
ce316fa5
LM
4009 list_for_each_entry(tmp_adev, device_list_handle,
4010 gmc.xgmi.head) {
4011 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4012 flush_work(&tmp_adev->xgmi_reset_work);
4013 r = tmp_adev->asic_reset_res;
4014 if (r)
4015 break;
ce316fa5
LM
4016 }
4017 }
4018 }
ce316fa5 4019 }
26bc5340 4020
43c4d576
JC
4021 if (!r && amdgpu_ras_intr_triggered()) {
4022 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4023 if (tmp_adev->mmhub.funcs &&
4024 tmp_adev->mmhub.funcs->reset_ras_error_count)
4025 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4026 }
4027
00eaa571 4028 amdgpu_ras_intr_cleared();
43c4d576 4029 }
00eaa571 4030
26bc5340
AG
4031 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4032 if (need_full_reset) {
4033 /* post card */
4034 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4035 DRM_WARN("asic atom init failed!");
4036
4037 if (!r) {
4038 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4039 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4040 if (r)
4041 goto out;
4042
4043 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4044 if (vram_lost) {
77e7f829 4045 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4046 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4047 }
4048
4049 r = amdgpu_gtt_mgr_recover(
4050 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4051 if (r)
4052 goto out;
4053
4054 r = amdgpu_device_fw_loading(tmp_adev);
4055 if (r)
4056 return r;
4057
4058 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4059 if (r)
4060 goto out;
4061
4062 if (vram_lost)
4063 amdgpu_device_fill_reset_magic(tmp_adev);
4064
fdafb359
EQ
4065 /*
4066 * Add this ASIC as tracked as reset was already
4067 * complete successfully.
4068 */
4069 amdgpu_register_gpu_instance(tmp_adev);
4070
7c04ca50 4071 r = amdgpu_device_ip_late_init(tmp_adev);
4072 if (r)
4073 goto out;
4074
565d1941
EQ
4075 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4076
e79a04d5 4077 /* must succeed. */
511fdbc3 4078 amdgpu_ras_resume(tmp_adev);
e79a04d5 4079
26bc5340
AG
4080 /* Update PSP FW topology after reset */
4081 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4082 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4083 }
4084 }
4085
4086
4087out:
4088 if (!r) {
4089 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4090 r = amdgpu_ib_ring_tests(tmp_adev);
4091 if (r) {
4092 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4093 r = amdgpu_device_ip_suspend(tmp_adev);
4094 need_full_reset = true;
4095 r = -EAGAIN;
4096 goto end;
4097 }
4098 }
4099
4100 if (!r)
4101 r = amdgpu_device_recover_vram(tmp_adev);
4102 else
4103 tmp_adev->asic_reset_res = r;
4104 }
4105
4106end:
4107 *need_full_reset_arg = need_full_reset;
4108 return r;
4109}
4110
1d721ed6 4111static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 4112{
1d721ed6
AG
4113 if (trylock) {
4114 if (!mutex_trylock(&adev->lock_reset))
4115 return false;
4116 } else
4117 mutex_lock(&adev->lock_reset);
5740682e 4118
26bc5340 4119 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 4120 adev->in_gpu_reset = true;
a3a09142
AD
4121 switch (amdgpu_asic_reset_method(adev)) {
4122 case AMD_RESET_METHOD_MODE1:
4123 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4124 break;
4125 case AMD_RESET_METHOD_MODE2:
4126 adev->mp1_state = PP_MP1_STATE_RESET;
4127 break;
4128 default:
4129 adev->mp1_state = PP_MP1_STATE_NONE;
4130 break;
4131 }
1d721ed6
AG
4132
4133 return true;
26bc5340 4134}
d38ceaf9 4135
26bc5340
AG
4136static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4137{
89041940 4138 amdgpu_vf_error_trans_all(adev);
a3a09142 4139 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 4140 adev->in_gpu_reset = false;
13a752e3 4141 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4142}
4143
3f12acc8
EQ
4144static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4145{
4146 struct pci_dev *p = NULL;
4147
4148 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4149 adev->pdev->bus->number, 1);
4150 if (p) {
4151 pm_runtime_enable(&(p->dev));
4152 pm_runtime_resume(&(p->dev));
4153 }
4154}
4155
4156static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4157{
4158 enum amd_reset_method reset_method;
4159 struct pci_dev *p = NULL;
4160 u64 expires;
4161
4162 /*
4163 * For now, only BACO and mode1 reset are confirmed
4164 * to suffer the audio issue without proper suspended.
4165 */
4166 reset_method = amdgpu_asic_reset_method(adev);
4167 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4168 (reset_method != AMD_RESET_METHOD_MODE1))
4169 return -EINVAL;
4170
4171 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4172 adev->pdev->bus->number, 1);
4173 if (!p)
4174 return -ENODEV;
4175
4176 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4177 if (!expires)
4178 /*
4179 * If we cannot get the audio device autosuspend delay,
4180 * a fixed 4S interval will be used. Considering 3S is
4181 * the audio controller default autosuspend delay setting.
4182 * 4S used here is guaranteed to cover that.
4183 */
54b7feb9 4184 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4185
4186 while (!pm_runtime_status_suspended(&(p->dev))) {
4187 if (!pm_runtime_suspend(&(p->dev)))
4188 break;
4189
4190 if (expires < ktime_get_mono_fast_ns()) {
4191 dev_warn(adev->dev, "failed to suspend display audio\n");
4192 /* TODO: abort the succeeding gpu reset? */
4193 return -ETIMEDOUT;
4194 }
4195 }
4196
4197 pm_runtime_disable(&(p->dev));
4198
4199 return 0;
4200}
4201
26bc5340
AG
4202/**
4203 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4204 *
4205 * @adev: amdgpu device pointer
4206 * @job: which job trigger hang
4207 *
4208 * Attempt to reset the GPU if it has hung (all asics).
4209 * Attempt to do soft-reset or full-reset and reinitialize Asic
4210 * Returns 0 for success or an error on failure.
4211 */
4212
4213int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4214 struct amdgpu_job *job)
4215{
1d721ed6 4216 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4217 bool need_full_reset = false;
4218 bool job_signaled = false;
26bc5340 4219 struct amdgpu_hive_info *hive = NULL;
26bc5340 4220 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4221 int i, r = 0;
7c6e68c7 4222 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4223 bool use_baco =
4224 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4225 true : false;
3f12acc8 4226 bool audio_suspended = false;
26bc5340 4227
d5ea093e
AG
4228 /*
4229 * Flush RAM to disk so that after reboot
4230 * the user can read log and see why the system rebooted.
4231 */
b823821f 4232 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4233
4234 DRM_WARN("Emergency reboot.");
4235
4236 ksys_sync_helper();
4237 emergency_restart();
4238 }
4239
b823821f
LM
4240 dev_info(adev->dev, "GPU %s begin!\n",
4241 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340
AG
4242
4243 /*
1d721ed6
AG
4244 * Here we trylock to avoid chain of resets executing from
4245 * either trigger by jobs on different adevs in XGMI hive or jobs on
4246 * different schedulers for same device while this TO handler is running.
4247 * We always reset all schedulers for device and all devices for XGMI
4248 * hive so that should take care of them too.
26bc5340 4249 */
7dd8c205 4250 hive = amdgpu_get_xgmi_hive(adev, true);
1d721ed6
AG
4251 if (hive && !mutex_trylock(&hive->reset_lock)) {
4252 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4253 job ? job->base.id : -1, hive->hive_id);
9e94d22c 4254 mutex_unlock(&hive->hive_lock);
26bc5340 4255 return 0;
1d721ed6 4256 }
26bc5340 4257
9e94d22c
EQ
4258 /*
4259 * Build list of devices to reset.
4260 * In case we are in XGMI hive mode, resort the device list
4261 * to put adev in the 1st position.
4262 */
4263 INIT_LIST_HEAD(&device_list);
4264 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4265 if (!hive)
26bc5340 4266 return -ENODEV;
9e94d22c
EQ
4267 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4268 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4269 device_list_handle = &hive->device_list;
4270 } else {
4271 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4272 device_list_handle = &device_list;
4273 }
4274
1d721ed6
AG
4275 /* block all schedulers and reset given job's ring */
4276 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
9e94d22c
EQ
4277 if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
4278 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4279 job ? job->base.id : -1);
4280 mutex_unlock(&hive->hive_lock);
4281 return 0;
7c6e68c7
AG
4282 }
4283
3f12acc8
EQ
4284 /*
4285 * Try to put the audio codec into suspend state
4286 * before gpu reset started.
4287 *
4288 * Due to the power domain of the graphics device
4289 * is shared with AZ power domain. Without this,
4290 * we may change the audio hardware from behind
4291 * the audio driver's back. That will trigger
4292 * some audio codec errors.
4293 */
4294 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4295 audio_suspended = true;
4296
9e94d22c
EQ
4297 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4298
52fb44cf
EQ
4299 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4300
9e94d22c
EQ
4301 if (!amdgpu_sriov_vf(tmp_adev))
4302 amdgpu_amdkfd_pre_reset(tmp_adev);
4303
12ffa55d
AG
4304 /*
4305 * Mark these ASICs to be reseted as untracked first
4306 * And add them back after reset completed
4307 */
4308 amdgpu_unregister_gpu_instance(tmp_adev);
4309
a2f63ee8 4310 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4311
f1c1314b 4312 /* disable ras on ALL IPs */
b823821f
LM
4313 if (!(in_ras_intr && !use_baco) &&
4314 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4315 amdgpu_ras_suspend(tmp_adev);
4316
1d721ed6
AG
4317 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4318 struct amdgpu_ring *ring = tmp_adev->rings[i];
4319
4320 if (!ring || !ring->sched.thread)
4321 continue;
4322
0b2d2c2e 4323 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4324
b823821f 4325 if (in_ras_intr && !use_baco)
7c6e68c7 4326 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4327 }
4328 }
4329
b823821f 4330 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4331 goto skip_sched_resume;
4332
1d721ed6
AG
4333 /*
4334 * Must check guilty signal here since after this point all old
4335 * HW fences are force signaled.
4336 *
4337 * job->base holds a reference to parent fence
4338 */
4339 if (job && job->base.s_fence->parent &&
7dd8c205 4340 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4341 job_signaled = true;
1d721ed6
AG
4342 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4343 goto skip_hw_reset;
4344 }
4345
26bc5340
AG
4346retry: /* Rest of adevs pre asic reset from XGMI hive. */
4347 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340
AG
4348 r = amdgpu_device_pre_asic_reset(tmp_adev,
4349 NULL,
4350 &need_full_reset);
4351 /*TODO Should we stop ?*/
4352 if (r) {
4353 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4354 r, tmp_adev->ddev->unique);
4355 tmp_adev->asic_reset_res = r;
4356 }
4357 }
4358
4359 /* Actual ASIC resets if needed.*/
4360 /* TODO Implement XGMI hive reset logic for SRIOV */
4361 if (amdgpu_sriov_vf(adev)) {
4362 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4363 if (r)
4364 adev->asic_reset_res = r;
4365 } else {
041a62bc 4366 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4367 if (r && r == -EAGAIN)
4368 goto retry;
4369 }
4370
1d721ed6
AG
4371skip_hw_reset:
4372
26bc5340
AG
4373 /* Post ASIC reset for all devs .*/
4374 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4375
1d721ed6
AG
4376 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4377 struct amdgpu_ring *ring = tmp_adev->rings[i];
4378
4379 if (!ring || !ring->sched.thread)
4380 continue;
4381
4382 /* No point to resubmit jobs if we didn't HW reset*/
4383 if (!tmp_adev->asic_reset_res && !job_signaled)
4384 drm_sched_resubmit_jobs(&ring->sched);
4385
4386 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4387 }
4388
4389 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4390 drm_helper_resume_force_mode(tmp_adev->ddev);
4391 }
4392
4393 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4394
4395 if (r) {
4396 /* bad news, how to tell it to userspace ? */
12ffa55d 4397 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4398 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4399 } else {
12ffa55d 4400 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4401 }
7c6e68c7 4402 }
26bc5340 4403
7c6e68c7
AG
4404skip_sched_resume:
4405 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4406 /*unlock kfd: SRIOV would do it separately */
b823821f 4407 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4408 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4409 if (audio_suspended)
4410 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4411 amdgpu_device_unlock_adev(tmp_adev);
4412 }
4413
9e94d22c 4414 if (hive) {
22d6575b 4415 mutex_unlock(&hive->reset_lock);
9e94d22c
EQ
4416 mutex_unlock(&hive->hive_lock);
4417 }
26bc5340
AG
4418
4419 if (r)
4420 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4421 return r;
4422}
4423
e3ecdffa
AD
4424/**
4425 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4426 *
4427 * @adev: amdgpu_device pointer
4428 *
4429 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4430 * and lanes) of the slot the device is in. Handles APUs and
4431 * virtualized environments where PCIE config space may not be available.
4432 */
5494d864 4433static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4434{
5d9a6330 4435 struct pci_dev *pdev;
c5313457
HK
4436 enum pci_bus_speed speed_cap, platform_speed_cap;
4437 enum pcie_link_width platform_link_width;
d0dd7f0c 4438
cd474ba0
AD
4439 if (amdgpu_pcie_gen_cap)
4440 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4441
cd474ba0
AD
4442 if (amdgpu_pcie_lane_cap)
4443 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4444
cd474ba0
AD
4445 /* covers APUs as well */
4446 if (pci_is_root_bus(adev->pdev->bus)) {
4447 if (adev->pm.pcie_gen_mask == 0)
4448 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4449 if (adev->pm.pcie_mlw_mask == 0)
4450 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4451 return;
cd474ba0 4452 }
d0dd7f0c 4453
c5313457
HK
4454 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4455 return;
4456
dbaa922b
AD
4457 pcie_bandwidth_available(adev->pdev, NULL,
4458 &platform_speed_cap, &platform_link_width);
c5313457 4459
cd474ba0 4460 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4461 /* asic caps */
4462 pdev = adev->pdev;
4463 speed_cap = pcie_get_speed_cap(pdev);
4464 if (speed_cap == PCI_SPEED_UNKNOWN) {
4465 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4466 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4467 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4468 } else {
5d9a6330
AD
4469 if (speed_cap == PCIE_SPEED_16_0GT)
4470 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4471 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4472 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4473 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4474 else if (speed_cap == PCIE_SPEED_8_0GT)
4475 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4476 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4477 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4478 else if (speed_cap == PCIE_SPEED_5_0GT)
4479 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4480 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4481 else
4482 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4483 }
4484 /* platform caps */
c5313457 4485 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4486 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4487 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4488 } else {
c5313457 4489 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4490 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4491 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4492 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4493 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4494 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4495 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4496 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4497 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4498 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4499 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4500 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4501 else
4502 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4503
cd474ba0
AD
4504 }
4505 }
4506 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4507 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4508 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4509 } else {
c5313457 4510 switch (platform_link_width) {
5d9a6330 4511 case PCIE_LNK_X32:
cd474ba0
AD
4512 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4513 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4519 break;
5d9a6330 4520 case PCIE_LNK_X16:
cd474ba0
AD
4521 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4522 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4525 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4526 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4527 break;
5d9a6330 4528 case PCIE_LNK_X12:
cd474ba0
AD
4529 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4530 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4531 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4532 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4533 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4534 break;
5d9a6330 4535 case PCIE_LNK_X8:
cd474ba0
AD
4536 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4537 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4538 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4539 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4540 break;
5d9a6330 4541 case PCIE_LNK_X4:
cd474ba0
AD
4542 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4543 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4544 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4545 break;
5d9a6330 4546 case PCIE_LNK_X2:
cd474ba0
AD
4547 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4548 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4549 break;
5d9a6330 4550 case PCIE_LNK_X1:
cd474ba0
AD
4551 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4552 break;
4553 default:
4554 break;
4555 }
d0dd7f0c
AD
4556 }
4557 }
4558}
d38ceaf9 4559
361dbd01
AD
4560int amdgpu_device_baco_enter(struct drm_device *dev)
4561{
4562 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4563 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4564
4565 if (!amdgpu_device_supports_baco(adev->ddev))
4566 return -ENOTSUPP;
4567
7a22677b
LM
4568 if (ras && ras->supported)
4569 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4570
9530273e 4571 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4572}
4573
4574int amdgpu_device_baco_exit(struct drm_device *dev)
4575{
4576 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4577 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4578 int ret = 0;
361dbd01
AD
4579
4580 if (!amdgpu_device_supports_baco(adev->ddev))
4581 return -ENOTSUPP;
4582
9530273e
EQ
4583 ret = amdgpu_dpm_baco_exit(adev);
4584 if (ret)
4585 return ret;
7a22677b
LM
4586
4587 if (ras && ras->supported)
4588 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4589
4590 return 0;
361dbd01 4591}