drm/amd/powerplay: give better names for the thermal IRQ related APIs
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
bd607166 67#include "amdgpu_fru_eeprom.h"
5183411b 68
d5ea093e 69#include <linux/suspend.h>
c6a6e2db 70#include <drm/task_barrier.h>
3f12acc8 71#include <linux/pm_runtime.h>
d5ea093e 72
e2a75f88 73MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 74MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 75MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 76MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 77MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 78MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 79MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 80MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 81MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 82MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 83
2dc80b00
S
84#define AMDGPU_RESUME_MS 2000
85
050091ab 86const char *amdgpu_asic_name[] = {
da69c161
KW
87 "TAHITI",
88 "PITCAIRN",
89 "VERDE",
90 "OLAND",
91 "HAINAN",
d38ceaf9
AD
92 "BONAIRE",
93 "KAVERI",
94 "KABINI",
95 "HAWAII",
96 "MULLINS",
97 "TOPAZ",
98 "TONGA",
48299f95 99 "FIJI",
d38ceaf9 100 "CARRIZO",
139f4917 101 "STONEY",
2cc0c0b5
FC
102 "POLARIS10",
103 "POLARIS11",
c4642a47 104 "POLARIS12",
48ff108d 105 "VEGAM",
d4196f01 106 "VEGA10",
8fab806a 107 "VEGA12",
956fcddc 108 "VEGA20",
2ca8a5d2 109 "RAVEN",
d6c3b24e 110 "ARCTURUS",
1eee4228 111 "RENOIR",
852a6626 112 "NAVI10",
87dbad02 113 "NAVI14",
9802f5d7 114 "NAVI12",
d38ceaf9
AD
115 "LAST",
116};
117
dcea6e65
KR
118/**
119 * DOC: pcie_replay_count
120 *
121 * The amdgpu driver provides a sysfs API for reporting the total number
122 * of PCIe replays (NAKs)
123 * The file pcie_replay_count is used for this and returns the total
124 * number of replays as a sum of the NAKs generated and NAKs received
125 */
126
127static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
128 struct device_attribute *attr, char *buf)
129{
130 struct drm_device *ddev = dev_get_drvdata(dev);
131 struct amdgpu_device *adev = ddev->dev_private;
132 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
133
134 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
135}
136
137static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
138 amdgpu_device_get_pcie_replay_count, NULL);
139
5494d864
AD
140static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
141
bd607166
KR
142/**
143 * DOC: product_name
144 *
145 * The amdgpu driver provides a sysfs API for reporting the product name
146 * for the device
147 * The file serial_number is used for this and returns the product name
148 * as returned from the FRU.
149 * NOTE: This is only available for certain server cards
150 */
151
152static ssize_t amdgpu_device_get_product_name(struct device *dev,
153 struct device_attribute *attr, char *buf)
154{
155 struct drm_device *ddev = dev_get_drvdata(dev);
156 struct amdgpu_device *adev = ddev->dev_private;
157
158 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name);
159}
160
161static DEVICE_ATTR(product_name, S_IRUGO,
162 amdgpu_device_get_product_name, NULL);
163
164/**
165 * DOC: product_number
166 *
167 * The amdgpu driver provides a sysfs API for reporting the part number
168 * for the device
169 * The file serial_number is used for this and returns the part number
170 * as returned from the FRU.
171 * NOTE: This is only available for certain server cards
172 */
173
174static ssize_t amdgpu_device_get_product_number(struct device *dev,
175 struct device_attribute *attr, char *buf)
176{
177 struct drm_device *ddev = dev_get_drvdata(dev);
178 struct amdgpu_device *adev = ddev->dev_private;
179
180 return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number);
181}
182
183static DEVICE_ATTR(product_number, S_IRUGO,
184 amdgpu_device_get_product_number, NULL);
185
186/**
187 * DOC: serial_number
188 *
189 * The amdgpu driver provides a sysfs API for reporting the serial number
190 * for the device
191 * The file serial_number is used for this and returns the serial number
192 * as returned from the FRU.
193 * NOTE: This is only available for certain server cards
194 */
195
196static ssize_t amdgpu_device_get_serial_number(struct device *dev,
197 struct device_attribute *attr, char *buf)
198{
199 struct drm_device *ddev = dev_get_drvdata(dev);
200 struct amdgpu_device *adev = ddev->dev_private;
201
202 return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial);
203}
204
205static DEVICE_ATTR(serial_number, S_IRUGO,
206 amdgpu_device_get_serial_number, NULL);
207
e3ecdffa 208/**
31af062a 209 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
210 *
211 * @dev: drm_device pointer
212 *
213 * Returns true if the device is a dGPU with HG/PX power control,
214 * otherwise return false.
215 */
31af062a 216bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
217{
218 struct amdgpu_device *adev = dev->dev_private;
219
2f7d10b3 220 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
221 return true;
222 return false;
223}
224
a69cba42
AD
225/**
226 * amdgpu_device_supports_baco - Does the device support BACO
227 *
228 * @dev: drm_device pointer
229 *
230 * Returns true if the device supporte BACO,
231 * otherwise return false.
232 */
233bool amdgpu_device_supports_baco(struct drm_device *dev)
234{
235 struct amdgpu_device *adev = dev->dev_private;
236
237 return amdgpu_asic_supports_baco(adev);
238}
239
e35e2b11
TY
240/**
241 * VRAM access helper functions.
242 *
243 * amdgpu_device_vram_access - read/write a buffer in vram
244 *
245 * @adev: amdgpu_device pointer
246 * @pos: offset of the buffer in vram
247 * @buf: virtual address of the buffer in system memory
248 * @size: read/write size, sizeof(@buf) must > @size
249 * @write: true - write to vram, otherwise - read from vram
250 */
251void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
252 uint32_t *buf, size_t size, bool write)
253{
e35e2b11 254 unsigned long flags;
ce05ac56
CK
255 uint32_t hi = ~0;
256 uint64_t last;
257
9d11eb0d
CK
258
259#ifdef CONFIG_64BIT
260 last = min(pos + size, adev->gmc.visible_vram_size);
261 if (last > pos) {
262 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
263 size_t count = last - pos;
264
265 if (write) {
266 memcpy_toio(addr, buf, count);
267 mb();
268 amdgpu_asic_flush_hdp(adev, NULL);
269 } else {
270 amdgpu_asic_invalidate_hdp(adev, NULL);
271 mb();
272 memcpy_fromio(buf, addr, count);
273 }
274
275 if (count == size)
276 return;
277
278 pos += count;
279 buf += count / 4;
280 size -= count;
281 }
282#endif
283
ce05ac56
CK
284 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
285 for (last = pos + size; pos < last; pos += 4) {
286 uint32_t tmp = pos >> 31;
e35e2b11 287
e35e2b11 288 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
289 if (tmp != hi) {
290 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
291 hi = tmp;
292 }
e35e2b11
TY
293 if (write)
294 WREG32_NO_KIQ(mmMM_DATA, *buf++);
295 else
296 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 297 }
ce05ac56 298 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
299}
300
d38ceaf9 301/*
2eee0229 302 * device register access helper functions.
d38ceaf9 303 */
e3ecdffa 304/**
2eee0229 305 * amdgpu_device_rreg - read a register
e3ecdffa
AD
306 *
307 * @adev: amdgpu_device pointer
308 * @reg: dword aligned register offset
309 * @acc_flags: access flags which require special behavior
310 *
311 * Returns the 32 bit value from the offset specified.
312 */
2eee0229
HZ
313uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg,
314 uint32_t acc_flags)
d38ceaf9 315{
f4b373f4
TSD
316 uint32_t ret;
317
f384ff95 318 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 319 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 320
ec59847e 321 if ((reg * 4) < adev->rmmio_size)
f4b373f4 322 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
323 else
324 ret = adev->pcie_rreg(adev, (reg * 4));
325 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
f4b373f4 326 return ret;
d38ceaf9
AD
327}
328
421a2a30
ML
329/*
330 * MMIO register read with bytes helper functions
331 * @offset:bytes offset from MMIO start
332 *
333*/
334
e3ecdffa
AD
335/**
336 * amdgpu_mm_rreg8 - read a memory mapped IO register
337 *
338 * @adev: amdgpu_device pointer
339 * @offset: byte aligned register offset
340 *
341 * Returns the 8 bit value from the offset specified.
342 */
421a2a30
ML
343uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
344 if (offset < adev->rmmio_size)
345 return (readb(adev->rmmio + offset));
346 BUG();
347}
348
349/*
350 * MMIO register write with bytes helper functions
351 * @offset:bytes offset from MMIO start
352 * @value: the value want to be written to the register
353 *
354*/
e3ecdffa
AD
355/**
356 * amdgpu_mm_wreg8 - read a memory mapped IO register
357 *
358 * @adev: amdgpu_device pointer
359 * @offset: byte aligned register offset
360 * @value: 8 bit value to write
361 *
362 * Writes the value specified to the offset specified.
363 */
421a2a30
ML
364void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
365 if (offset < adev->rmmio_size)
366 writeb(value, adev->rmmio + offset);
367 else
368 BUG();
369}
370
2eee0229
HZ
371void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg,
372 uint32_t v, uint32_t acc_flags)
2e0cc4d4 373{
2eee0229 374 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
2e0cc4d4 375
ec59847e 376 if ((reg * 4) < adev->rmmio_size)
2e0cc4d4 377 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
2eee0229
HZ
378 else
379 adev->pcie_wreg(adev, (reg * 4), v);
2e0cc4d4
ML
380}
381
e3ecdffa 382/**
2eee0229 383 * amdgpu_device_wreg - write to a register
e3ecdffa
AD
384 *
385 * @adev: amdgpu_device pointer
386 * @reg: dword aligned register offset
387 * @v: 32 bit value to write to the register
388 * @acc_flags: access flags which require special behavior
389 *
390 * Writes the value specified to the offset specified.
391 */
2eee0229
HZ
392void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
393 uint32_t acc_flags)
d38ceaf9 394{
f384ff95 395 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
d33a99c4 396 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 397
2eee0229 398 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
2e0cc4d4 399}
d38ceaf9 400
2e0cc4d4
ML
401/*
402 * amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
403 *
404 * this function is invoked only the debugfs register access
405 * */
406void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
407 uint32_t acc_flags)
408{
409 if (amdgpu_sriov_fullaccess(adev) &&
410 adev->gfx.rlc.funcs &&
411 adev->gfx.rlc.funcs->is_rlcg_access_range) {
47ed4e1c 412
2e0cc4d4
ML
413 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
414 return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
47ed4e1c 415 }
2e0cc4d4 416
2eee0229 417 amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags);
d38ceaf9
AD
418}
419
e3ecdffa
AD
420/**
421 * amdgpu_io_rreg - read an IO register
422 *
423 * @adev: amdgpu_device pointer
424 * @reg: dword aligned register offset
425 *
426 * Returns the 32 bit value from the offset specified.
427 */
d38ceaf9
AD
428u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
429{
430 if ((reg * 4) < adev->rio_mem_size)
431 return ioread32(adev->rio_mem + (reg * 4));
432 else {
433 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
434 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
435 }
436}
437
e3ecdffa
AD
438/**
439 * amdgpu_io_wreg - write to an IO register
440 *
441 * @adev: amdgpu_device pointer
442 * @reg: dword aligned register offset
443 * @v: 32 bit value to write to the register
444 *
445 * Writes the value specified to the offset specified.
446 */
d38ceaf9
AD
447void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
448{
d38ceaf9
AD
449 if ((reg * 4) < adev->rio_mem_size)
450 iowrite32(v, adev->rio_mem + (reg * 4));
451 else {
452 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
453 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
454 }
455}
456
457/**
458 * amdgpu_mm_rdoorbell - read a doorbell dword
459 *
460 * @adev: amdgpu_device pointer
461 * @index: doorbell index
462 *
463 * Returns the value in the doorbell aperture at the
464 * requested doorbell index (CIK).
465 */
466u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
467{
468 if (index < adev->doorbell.num_doorbells) {
469 return readl(adev->doorbell.ptr + index);
470 } else {
471 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
472 return 0;
473 }
474}
475
476/**
477 * amdgpu_mm_wdoorbell - write a doorbell dword
478 *
479 * @adev: amdgpu_device pointer
480 * @index: doorbell index
481 * @v: value to write
482 *
483 * Writes @v to the doorbell aperture at the
484 * requested doorbell index (CIK).
485 */
486void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
487{
488 if (index < adev->doorbell.num_doorbells) {
489 writel(v, adev->doorbell.ptr + index);
490 } else {
491 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
492 }
493}
494
832be404
KW
495/**
496 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
497 *
498 * @adev: amdgpu_device pointer
499 * @index: doorbell index
500 *
501 * Returns the value in the doorbell aperture at the
502 * requested doorbell index (VEGA10+).
503 */
504u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
505{
506 if (index < adev->doorbell.num_doorbells) {
507 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
508 } else {
509 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
510 return 0;
511 }
512}
513
514/**
515 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
516 *
517 * @adev: amdgpu_device pointer
518 * @index: doorbell index
519 * @v: value to write
520 *
521 * Writes @v to the doorbell aperture at the
522 * requested doorbell index (VEGA10+).
523 */
524void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
525{
526 if (index < adev->doorbell.num_doorbells) {
527 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
528 } else {
529 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
530 }
531}
532
d38ceaf9
AD
533/**
534 * amdgpu_invalid_rreg - dummy reg read function
535 *
536 * @adev: amdgpu device pointer
537 * @reg: offset of register
538 *
539 * Dummy register read function. Used for register blocks
540 * that certain asics don't have (all asics).
541 * Returns the value in the register.
542 */
543static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
544{
545 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
546 BUG();
547 return 0;
548}
549
550/**
551 * amdgpu_invalid_wreg - dummy reg write function
552 *
553 * @adev: amdgpu device pointer
554 * @reg: offset of register
555 * @v: value to write to the register
556 *
557 * Dummy register read function. Used for register blocks
558 * that certain asics don't have (all asics).
559 */
560static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
561{
562 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
563 reg, v);
564 BUG();
565}
566
4fa1c6a6
TZ
567/**
568 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
569 *
570 * @adev: amdgpu device pointer
571 * @reg: offset of register
572 *
573 * Dummy register read function. Used for register blocks
574 * that certain asics don't have (all asics).
575 * Returns the value in the register.
576 */
577static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
578{
579 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
580 BUG();
581 return 0;
582}
583
584/**
585 * amdgpu_invalid_wreg64 - dummy reg write function
586 *
587 * @adev: amdgpu device pointer
588 * @reg: offset of register
589 * @v: value to write to the register
590 *
591 * Dummy register read function. Used for register blocks
592 * that certain asics don't have (all asics).
593 */
594static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
595{
596 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
597 reg, v);
598 BUG();
599}
600
d38ceaf9
AD
601/**
602 * amdgpu_block_invalid_rreg - dummy reg read function
603 *
604 * @adev: amdgpu device pointer
605 * @block: offset of instance
606 * @reg: offset of register
607 *
608 * Dummy register read function. Used for register blocks
609 * that certain asics don't have (all asics).
610 * Returns the value in the register.
611 */
612static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
613 uint32_t block, uint32_t reg)
614{
615 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
616 reg, block);
617 BUG();
618 return 0;
619}
620
621/**
622 * amdgpu_block_invalid_wreg - dummy reg write function
623 *
624 * @adev: amdgpu device pointer
625 * @block: offset of instance
626 * @reg: offset of register
627 * @v: value to write to the register
628 *
629 * Dummy register read function. Used for register blocks
630 * that certain asics don't have (all asics).
631 */
632static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
633 uint32_t block,
634 uint32_t reg, uint32_t v)
635{
636 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
637 reg, block, v);
638 BUG();
639}
640
e3ecdffa
AD
641/**
642 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
643 *
644 * @adev: amdgpu device pointer
645 *
646 * Allocates a scratch page of VRAM for use by various things in the
647 * driver.
648 */
06ec9070 649static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 650{
a4a02777
CK
651 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
652 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
653 &adev->vram_scratch.robj,
654 &adev->vram_scratch.gpu_addr,
655 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
656}
657
e3ecdffa
AD
658/**
659 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
660 *
661 * @adev: amdgpu device pointer
662 *
663 * Frees the VRAM scratch page.
664 */
06ec9070 665static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 666{
078af1a3 667 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
668}
669
670/**
9c3f2b54 671 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
672 *
673 * @adev: amdgpu_device pointer
674 * @registers: pointer to the register array
675 * @array_size: size of the register array
676 *
677 * Programs an array or registers with and and or masks.
678 * This is a helper for setting golden registers.
679 */
9c3f2b54
AD
680void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
681 const u32 *registers,
682 const u32 array_size)
d38ceaf9
AD
683{
684 u32 tmp, reg, and_mask, or_mask;
685 int i;
686
687 if (array_size % 3)
688 return;
689
690 for (i = 0; i < array_size; i +=3) {
691 reg = registers[i + 0];
692 and_mask = registers[i + 1];
693 or_mask = registers[i + 2];
694
695 if (and_mask == 0xffffffff) {
696 tmp = or_mask;
697 } else {
698 tmp = RREG32(reg);
699 tmp &= ~and_mask;
e0d07657
HZ
700 if (adev->family >= AMDGPU_FAMILY_AI)
701 tmp |= (or_mask & and_mask);
702 else
703 tmp |= or_mask;
d38ceaf9
AD
704 }
705 WREG32(reg, tmp);
706 }
707}
708
e3ecdffa
AD
709/**
710 * amdgpu_device_pci_config_reset - reset the GPU
711 *
712 * @adev: amdgpu_device pointer
713 *
714 * Resets the GPU using the pci config reset sequence.
715 * Only applicable to asics prior to vega10.
716 */
8111c387 717void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
718{
719 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
720}
721
722/*
723 * GPU doorbell aperture helpers function.
724 */
725/**
06ec9070 726 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
727 *
728 * @adev: amdgpu_device pointer
729 *
730 * Init doorbell driver information (CIK)
731 * Returns 0 on success, error on failure.
732 */
06ec9070 733static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 734{
6585661d 735
705e519e
CK
736 /* No doorbell on SI hardware generation */
737 if (adev->asic_type < CHIP_BONAIRE) {
738 adev->doorbell.base = 0;
739 adev->doorbell.size = 0;
740 adev->doorbell.num_doorbells = 0;
741 adev->doorbell.ptr = NULL;
742 return 0;
743 }
744
d6895ad3
CK
745 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
746 return -EINVAL;
747
22357775
AD
748 amdgpu_asic_init_doorbell_index(adev);
749
d38ceaf9
AD
750 /* doorbell bar mapping */
751 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
752 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
753
edf600da 754 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 755 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
756 if (adev->doorbell.num_doorbells == 0)
757 return -EINVAL;
758
ec3db8a6 759 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
760 * paging queue doorbell use the second page. The
761 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
762 * doorbells are in the first page. So with paging queue enabled,
763 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
764 */
765 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 766 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 767
8972e5d2
CK
768 adev->doorbell.ptr = ioremap(adev->doorbell.base,
769 adev->doorbell.num_doorbells *
770 sizeof(u32));
771 if (adev->doorbell.ptr == NULL)
d38ceaf9 772 return -ENOMEM;
d38ceaf9
AD
773
774 return 0;
775}
776
777/**
06ec9070 778 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
779 *
780 * @adev: amdgpu_device pointer
781 *
782 * Tear down doorbell driver information (CIK)
783 */
06ec9070 784static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
785{
786 iounmap(adev->doorbell.ptr);
787 adev->doorbell.ptr = NULL;
788}
789
22cb0164 790
d38ceaf9
AD
791
792/*
06ec9070 793 * amdgpu_device_wb_*()
455a7bc2 794 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 795 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
796 */
797
798/**
06ec9070 799 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
800 *
801 * @adev: amdgpu_device pointer
802 *
803 * Disables Writeback and frees the Writeback memory (all asics).
804 * Used at driver shutdown.
805 */
06ec9070 806static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
807{
808 if (adev->wb.wb_obj) {
a76ed485
AD
809 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
810 &adev->wb.gpu_addr,
811 (void **)&adev->wb.wb);
d38ceaf9
AD
812 adev->wb.wb_obj = NULL;
813 }
814}
815
816/**
06ec9070 817 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
818 *
819 * @adev: amdgpu_device pointer
820 *
455a7bc2 821 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
822 * Used at driver startup.
823 * Returns 0 on success or an -error on failure.
824 */
06ec9070 825static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
826{
827 int r;
828
829 if (adev->wb.wb_obj == NULL) {
97407b63
AD
830 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
831 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
832 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
833 &adev->wb.wb_obj, &adev->wb.gpu_addr,
834 (void **)&adev->wb.wb);
d38ceaf9
AD
835 if (r) {
836 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
837 return r;
838 }
d38ceaf9
AD
839
840 adev->wb.num_wb = AMDGPU_MAX_WB;
841 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
842
843 /* clear wb memory */
73469585 844 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
845 }
846
847 return 0;
848}
849
850/**
131b4b36 851 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
852 *
853 * @adev: amdgpu_device pointer
854 * @wb: wb index
855 *
856 * Allocate a wb slot for use by the driver (all asics).
857 * Returns 0 on success or -EINVAL on failure.
858 */
131b4b36 859int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
860{
861 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 862
97407b63 863 if (offset < adev->wb.num_wb) {
7014285a 864 __set_bit(offset, adev->wb.used);
63ae07ca 865 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
866 return 0;
867 } else {
868 return -EINVAL;
869 }
870}
871
d38ceaf9 872/**
131b4b36 873 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
874 *
875 * @adev: amdgpu_device pointer
876 * @wb: wb index
877 *
878 * Free a wb slot allocated for use by the driver (all asics)
879 */
131b4b36 880void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 881{
73469585 882 wb >>= 3;
d38ceaf9 883 if (wb < adev->wb.num_wb)
73469585 884 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
885}
886
d6895ad3
CK
887/**
888 * amdgpu_device_resize_fb_bar - try to resize FB BAR
889 *
890 * @adev: amdgpu_device pointer
891 *
892 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
893 * to fail, but if any of the BARs is not accessible after the size we abort
894 * driver loading by returning -ENODEV.
895 */
896int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
897{
770d13b1 898 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 899 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
900 struct pci_bus *root;
901 struct resource *res;
902 unsigned i;
d6895ad3
CK
903 u16 cmd;
904 int r;
905
0c03b912 906 /* Bypass for VF */
907 if (amdgpu_sriov_vf(adev))
908 return 0;
909
31b8adab
CK
910 /* Check if the root BUS has 64bit memory resources */
911 root = adev->pdev->bus;
912 while (root->parent)
913 root = root->parent;
914
915 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 916 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
917 res->start > 0x100000000ull)
918 break;
919 }
920
921 /* Trying to resize is pointless without a root hub window above 4GB */
922 if (!res)
923 return 0;
924
d6895ad3
CK
925 /* Disable memory decoding while we change the BAR addresses and size */
926 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
927 pci_write_config_word(adev->pdev, PCI_COMMAND,
928 cmd & ~PCI_COMMAND_MEMORY);
929
930 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 931 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
932 if (adev->asic_type >= CHIP_BONAIRE)
933 pci_release_resource(adev->pdev, 2);
934
935 pci_release_resource(adev->pdev, 0);
936
937 r = pci_resize_resource(adev->pdev, 0, rbar_size);
938 if (r == -ENOSPC)
939 DRM_INFO("Not enough PCI address space for a large BAR.");
940 else if (r && r != -ENOTSUPP)
941 DRM_ERROR("Problem resizing BAR0 (%d).", r);
942
943 pci_assign_unassigned_bus_resources(adev->pdev->bus);
944
945 /* When the doorbell or fb BAR isn't available we have no chance of
946 * using the device.
947 */
06ec9070 948 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
949 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
950 return -ENODEV;
951
952 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
953
954 return 0;
955}
a05502e5 956
d38ceaf9
AD
957/*
958 * GPU helpers function.
959 */
960/**
39c640c0 961 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
962 *
963 * @adev: amdgpu_device pointer
964 *
c836fec5
JQ
965 * Check if the asic has been initialized (all asics) at driver startup
966 * or post is needed if hw reset is performed.
967 * Returns true if need or false if not.
d38ceaf9 968 */
39c640c0 969bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
970{
971 uint32_t reg;
972
bec86378
ML
973 if (amdgpu_sriov_vf(adev))
974 return false;
975
976 if (amdgpu_passthrough(adev)) {
1da2c326
ML
977 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
978 * some old smc fw still need driver do vPost otherwise gpu hang, while
979 * those smc fw version above 22.15 doesn't have this flaw, so we force
980 * vpost executed for smc version below 22.15
bec86378
ML
981 */
982 if (adev->asic_type == CHIP_FIJI) {
983 int err;
984 uint32_t fw_ver;
985 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
986 /* force vPost if error occured */
987 if (err)
988 return true;
989
990 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
991 if (fw_ver < 0x00160e00)
992 return true;
bec86378 993 }
bec86378 994 }
91fe77eb 995
996 if (adev->has_hw_reset) {
997 adev->has_hw_reset = false;
998 return true;
999 }
1000
1001 /* bios scratch used on CIK+ */
1002 if (adev->asic_type >= CHIP_BONAIRE)
1003 return amdgpu_atombios_scratch_need_asic_init(adev);
1004
1005 /* check MEM_SIZE for older asics */
1006 reg = amdgpu_asic_get_config_memsize(adev);
1007
1008 if ((reg != 0) && (reg != 0xffffffff))
1009 return false;
1010
1011 return true;
bec86378
ML
1012}
1013
d38ceaf9
AD
1014/* if we get transitioned to only one device, take VGA back */
1015/**
06ec9070 1016 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
1017 *
1018 * @cookie: amdgpu_device pointer
1019 * @state: enable/disable vga decode
1020 *
1021 * Enable/disable vga decode (all asics).
1022 * Returns VGA resource flags.
1023 */
06ec9070 1024static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
1025{
1026 struct amdgpu_device *adev = cookie;
1027 amdgpu_asic_set_vga_state(adev, state);
1028 if (state)
1029 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1030 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1031 else
1032 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1033}
1034
e3ecdffa
AD
1035/**
1036 * amdgpu_device_check_block_size - validate the vm block size
1037 *
1038 * @adev: amdgpu_device pointer
1039 *
1040 * Validates the vm block size specified via module parameter.
1041 * The vm block size defines number of bits in page table versus page directory,
1042 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1043 * page table and the remaining bits are in the page directory.
1044 */
06ec9070 1045static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
1046{
1047 /* defines number of bits in page table versus page directory,
1048 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1049 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
1050 if (amdgpu_vm_block_size == -1)
1051 return;
a1adf8be 1052
bab4fee7 1053 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
1054 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1055 amdgpu_vm_block_size);
97489129 1056 amdgpu_vm_block_size = -1;
a1adf8be 1057 }
a1adf8be
CZ
1058}
1059
e3ecdffa
AD
1060/**
1061 * amdgpu_device_check_vm_size - validate the vm size
1062 *
1063 * @adev: amdgpu_device pointer
1064 *
1065 * Validates the vm size in GB specified via module parameter.
1066 * The VM size is the size of the GPU virtual memory space in GB.
1067 */
06ec9070 1068static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1069{
64dab074
AD
1070 /* no need to check the default value */
1071 if (amdgpu_vm_size == -1)
1072 return;
1073
83ca145d
ZJ
1074 if (amdgpu_vm_size < 1) {
1075 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1076 amdgpu_vm_size);
f3368128 1077 amdgpu_vm_size = -1;
83ca145d 1078 }
83ca145d
ZJ
1079}
1080
7951e376
RZ
1081static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1082{
1083 struct sysinfo si;
a9d4fe2f 1084 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1085 uint64_t total_memory;
1086 uint64_t dram_size_seven_GB = 0x1B8000000;
1087 uint64_t dram_size_three_GB = 0xB8000000;
1088
1089 if (amdgpu_smu_memory_pool_size == 0)
1090 return;
1091
1092 if (!is_os_64) {
1093 DRM_WARN("Not 64-bit OS, feature not supported\n");
1094 goto def_value;
1095 }
1096 si_meminfo(&si);
1097 total_memory = (uint64_t)si.totalram * si.mem_unit;
1098
1099 if ((amdgpu_smu_memory_pool_size == 1) ||
1100 (amdgpu_smu_memory_pool_size == 2)) {
1101 if (total_memory < dram_size_three_GB)
1102 goto def_value1;
1103 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1104 (amdgpu_smu_memory_pool_size == 8)) {
1105 if (total_memory < dram_size_seven_GB)
1106 goto def_value1;
1107 } else {
1108 DRM_WARN("Smu memory pool size not supported\n");
1109 goto def_value;
1110 }
1111 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1112
1113 return;
1114
1115def_value1:
1116 DRM_WARN("No enough system memory\n");
1117def_value:
1118 adev->pm.smu_prv_buffer_size = 0;
1119}
1120
d38ceaf9 1121/**
06ec9070 1122 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1123 *
1124 * @adev: amdgpu_device pointer
1125 *
1126 * Validates certain module parameters and updates
1127 * the associated values used by the driver (all asics).
1128 */
912dfc84 1129static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1130{
5b011235
CZ
1131 if (amdgpu_sched_jobs < 4) {
1132 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1133 amdgpu_sched_jobs);
1134 amdgpu_sched_jobs = 4;
76117507 1135 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1136 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1137 amdgpu_sched_jobs);
1138 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1139 }
d38ceaf9 1140
83e74db6 1141 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1142 /* gart size must be greater or equal to 32M */
1143 dev_warn(adev->dev, "gart size (%d) too small\n",
1144 amdgpu_gart_size);
83e74db6 1145 amdgpu_gart_size = -1;
d38ceaf9
AD
1146 }
1147
36d38372 1148 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1149 /* gtt size must be greater or equal to 32M */
36d38372
CK
1150 dev_warn(adev->dev, "gtt size (%d) too small\n",
1151 amdgpu_gtt_size);
1152 amdgpu_gtt_size = -1;
d38ceaf9
AD
1153 }
1154
d07f14be
RH
1155 /* valid range is between 4 and 9 inclusive */
1156 if (amdgpu_vm_fragment_size != -1 &&
1157 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1158 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1159 amdgpu_vm_fragment_size = -1;
1160 }
1161
7951e376
RZ
1162 amdgpu_device_check_smu_prv_buffer_size(adev);
1163
06ec9070 1164 amdgpu_device_check_vm_size(adev);
d38ceaf9 1165
06ec9070 1166 amdgpu_device_check_block_size(adev);
6a7f76e7 1167
19aede77 1168 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1169
c6252390 1170 amdgpu_gmc_tmz_set(adev);
01a8dcec 1171
e3c00faa 1172 return 0;
d38ceaf9
AD
1173}
1174
1175/**
1176 * amdgpu_switcheroo_set_state - set switcheroo state
1177 *
1178 * @pdev: pci dev pointer
1694467b 1179 * @state: vga_switcheroo state
d38ceaf9
AD
1180 *
1181 * Callback for the switcheroo driver. Suspends or resumes the
1182 * the asics before or after it is powered up using ACPI methods.
1183 */
1184static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1185{
1186 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1187 int r;
d38ceaf9 1188
31af062a 1189 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1190 return;
1191
1192 if (state == VGA_SWITCHEROO_ON) {
dd4fa6c1 1193 pr_info("switched on\n");
d38ceaf9
AD
1194 /* don't suspend or resume card normally */
1195 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1196
de185019
AD
1197 pci_set_power_state(dev->pdev, PCI_D0);
1198 pci_restore_state(dev->pdev);
1199 r = pci_enable_device(dev->pdev);
1200 if (r)
1201 DRM_WARN("pci_enable_device failed (%d)\n", r);
1202 amdgpu_device_resume(dev, true);
d38ceaf9 1203
d38ceaf9
AD
1204 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1205 drm_kms_helper_poll_enable(dev);
1206 } else {
dd4fa6c1 1207 pr_info("switched off\n");
d38ceaf9
AD
1208 drm_kms_helper_poll_disable(dev);
1209 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1210 amdgpu_device_suspend(dev, true);
1211 pci_save_state(dev->pdev);
1212 /* Shut down the device */
1213 pci_disable_device(dev->pdev);
1214 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1215 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1216 }
1217}
1218
1219/**
1220 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1221 *
1222 * @pdev: pci dev pointer
1223 *
1224 * Callback for the switcheroo driver. Check of the switcheroo
1225 * state can be changed.
1226 * Returns true if the state can be changed, false if not.
1227 */
1228static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1229{
1230 struct drm_device *dev = pci_get_drvdata(pdev);
1231
1232 /*
1233 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1234 * locking inversion with the driver load path. And the access here is
1235 * completely racy anyway. So don't bother with locking for now.
1236 */
7e13ad89 1237 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1238}
1239
1240static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1241 .set_gpu_state = amdgpu_switcheroo_set_state,
1242 .reprobe = NULL,
1243 .can_switch = amdgpu_switcheroo_can_switch,
1244};
1245
e3ecdffa
AD
1246/**
1247 * amdgpu_device_ip_set_clockgating_state - set the CG state
1248 *
87e3f136 1249 * @dev: amdgpu_device pointer
e3ecdffa
AD
1250 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1251 * @state: clockgating state (gate or ungate)
1252 *
1253 * Sets the requested clockgating state for all instances of
1254 * the hardware IP specified.
1255 * Returns the error code from the last instance.
1256 */
43fa561f 1257int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1258 enum amd_ip_block_type block_type,
1259 enum amd_clockgating_state state)
d38ceaf9 1260{
43fa561f 1261 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1262 int i, r = 0;
1263
1264 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1265 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1266 continue;
c722865a
RZ
1267 if (adev->ip_blocks[i].version->type != block_type)
1268 continue;
1269 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1270 continue;
1271 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1272 (void *)adev, state);
1273 if (r)
1274 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1275 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1276 }
1277 return r;
1278}
1279
e3ecdffa
AD
1280/**
1281 * amdgpu_device_ip_set_powergating_state - set the PG state
1282 *
87e3f136 1283 * @dev: amdgpu_device pointer
e3ecdffa
AD
1284 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1285 * @state: powergating state (gate or ungate)
1286 *
1287 * Sets the requested powergating state for all instances of
1288 * the hardware IP specified.
1289 * Returns the error code from the last instance.
1290 */
43fa561f 1291int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1292 enum amd_ip_block_type block_type,
1293 enum amd_powergating_state state)
d38ceaf9 1294{
43fa561f 1295 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1296 int i, r = 0;
1297
1298 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1299 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1300 continue;
c722865a
RZ
1301 if (adev->ip_blocks[i].version->type != block_type)
1302 continue;
1303 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1304 continue;
1305 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1306 (void *)adev, state);
1307 if (r)
1308 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1309 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1310 }
1311 return r;
1312}
1313
e3ecdffa
AD
1314/**
1315 * amdgpu_device_ip_get_clockgating_state - get the CG state
1316 *
1317 * @adev: amdgpu_device pointer
1318 * @flags: clockgating feature flags
1319 *
1320 * Walks the list of IPs on the device and updates the clockgating
1321 * flags for each IP.
1322 * Updates @flags with the feature flags for each hardware IP where
1323 * clockgating is enabled.
1324 */
2990a1fc
AD
1325void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1326 u32 *flags)
6cb2d4e4
HR
1327{
1328 int i;
1329
1330 for (i = 0; i < adev->num_ip_blocks; i++) {
1331 if (!adev->ip_blocks[i].status.valid)
1332 continue;
1333 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1334 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1335 }
1336}
1337
e3ecdffa
AD
1338/**
1339 * amdgpu_device_ip_wait_for_idle - wait for idle
1340 *
1341 * @adev: amdgpu_device pointer
1342 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1343 *
1344 * Waits for the request hardware IP to be idle.
1345 * Returns 0 for success or a negative error code on failure.
1346 */
2990a1fc
AD
1347int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1348 enum amd_ip_block_type block_type)
5dbbb60b
AD
1349{
1350 int i, r;
1351
1352 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1353 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1354 continue;
a1255107
AD
1355 if (adev->ip_blocks[i].version->type == block_type) {
1356 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1357 if (r)
1358 return r;
1359 break;
1360 }
1361 }
1362 return 0;
1363
1364}
1365
e3ecdffa
AD
1366/**
1367 * amdgpu_device_ip_is_idle - is the hardware IP idle
1368 *
1369 * @adev: amdgpu_device pointer
1370 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1371 *
1372 * Check if the hardware IP is idle or not.
1373 * Returns true if it the IP is idle, false if not.
1374 */
2990a1fc
AD
1375bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1376 enum amd_ip_block_type block_type)
5dbbb60b
AD
1377{
1378 int i;
1379
1380 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1381 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1382 continue;
a1255107
AD
1383 if (adev->ip_blocks[i].version->type == block_type)
1384 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1385 }
1386 return true;
1387
1388}
1389
e3ecdffa
AD
1390/**
1391 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1392 *
1393 * @adev: amdgpu_device pointer
87e3f136 1394 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1395 *
1396 * Returns a pointer to the hardware IP block structure
1397 * if it exists for the asic, otherwise NULL.
1398 */
2990a1fc
AD
1399struct amdgpu_ip_block *
1400amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1401 enum amd_ip_block_type type)
d38ceaf9
AD
1402{
1403 int i;
1404
1405 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1406 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1407 return &adev->ip_blocks[i];
1408
1409 return NULL;
1410}
1411
1412/**
2990a1fc 1413 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1414 *
1415 * @adev: amdgpu_device pointer
5fc3aeeb 1416 * @type: enum amd_ip_block_type
d38ceaf9
AD
1417 * @major: major version
1418 * @minor: minor version
1419 *
1420 * return 0 if equal or greater
1421 * return 1 if smaller or the ip_block doesn't exist
1422 */
2990a1fc
AD
1423int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1424 enum amd_ip_block_type type,
1425 u32 major, u32 minor)
d38ceaf9 1426{
2990a1fc 1427 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1428
a1255107
AD
1429 if (ip_block && ((ip_block->version->major > major) ||
1430 ((ip_block->version->major == major) &&
1431 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1432 return 0;
1433
1434 return 1;
1435}
1436
a1255107 1437/**
2990a1fc 1438 * amdgpu_device_ip_block_add
a1255107
AD
1439 *
1440 * @adev: amdgpu_device pointer
1441 * @ip_block_version: pointer to the IP to add
1442 *
1443 * Adds the IP block driver information to the collection of IPs
1444 * on the asic.
1445 */
2990a1fc
AD
1446int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1447 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1448{
1449 if (!ip_block_version)
1450 return -EINVAL;
1451
e966a725 1452 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1453 ip_block_version->funcs->name);
1454
a1255107
AD
1455 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1456
1457 return 0;
1458}
1459
e3ecdffa
AD
1460/**
1461 * amdgpu_device_enable_virtual_display - enable virtual display feature
1462 *
1463 * @adev: amdgpu_device pointer
1464 *
1465 * Enabled the virtual display feature if the user has enabled it via
1466 * the module parameter virtual_display. This feature provides a virtual
1467 * display hardware on headless boards or in virtualized environments.
1468 * This function parses and validates the configuration string specified by
1469 * the user and configues the virtual display configuration (number of
1470 * virtual connectors, crtcs, etc.) specified.
1471 */
483ef985 1472static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1473{
1474 adev->enable_virtual_display = false;
1475
1476 if (amdgpu_virtual_display) {
1477 struct drm_device *ddev = adev->ddev;
1478 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1479 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1480
1481 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1482 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1483 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1484 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1485 if (!strcmp("all", pciaddname)
1486 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1487 long num_crtc;
1488 int res = -1;
1489
9accf2fd 1490 adev->enable_virtual_display = true;
0f66356d
ED
1491
1492 if (pciaddname_tmp)
1493 res = kstrtol(pciaddname_tmp, 10,
1494 &num_crtc);
1495
1496 if (!res) {
1497 if (num_crtc < 1)
1498 num_crtc = 1;
1499 if (num_crtc > 6)
1500 num_crtc = 6;
1501 adev->mode_info.num_crtc = num_crtc;
1502 } else {
1503 adev->mode_info.num_crtc = 1;
1504 }
9accf2fd
ED
1505 break;
1506 }
1507 }
1508
0f66356d
ED
1509 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1510 amdgpu_virtual_display, pci_address_name,
1511 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1512
1513 kfree(pciaddstr);
1514 }
1515}
1516
e3ecdffa
AD
1517/**
1518 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1519 *
1520 * @adev: amdgpu_device pointer
1521 *
1522 * Parses the asic configuration parameters specified in the gpu info
1523 * firmware and makes them availale to the driver for use in configuring
1524 * the asic.
1525 * Returns 0 on success, -EINVAL on failure.
1526 */
e2a75f88
AD
1527static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1528{
e2a75f88
AD
1529 const char *chip_name;
1530 char fw_name[30];
1531 int err;
1532 const struct gpu_info_firmware_header_v1_0 *hdr;
1533
ab4fe3e1
HR
1534 adev->firmware.gpu_info_fw = NULL;
1535
e2a75f88 1536 switch (adev->asic_type) {
e2a75f88
AD
1537#ifdef CONFIG_DRM_AMDGPU_SI
1538 case CHIP_VERDE:
1539 case CHIP_TAHITI:
1540 case CHIP_PITCAIRN:
1541 case CHIP_OLAND:
1542 case CHIP_HAINAN:
1543#endif
1544#ifdef CONFIG_DRM_AMDGPU_CIK
1545 case CHIP_BONAIRE:
1546 case CHIP_HAWAII:
1547 case CHIP_KAVERI:
1548 case CHIP_KABINI:
1549 case CHIP_MULLINS:
1550#endif
da87c30b
AD
1551 case CHIP_TOPAZ:
1552 case CHIP_TONGA:
1553 case CHIP_FIJI:
1554 case CHIP_POLARIS10:
1555 case CHIP_POLARIS11:
1556 case CHIP_POLARIS12:
1557 case CHIP_VEGAM:
1558 case CHIP_CARRIZO:
1559 case CHIP_STONEY:
27c0bc71 1560 case CHIP_VEGA20:
e2a75f88
AD
1561 default:
1562 return 0;
1563 case CHIP_VEGA10:
1564 chip_name = "vega10";
1565 break;
3f76dced
AD
1566 case CHIP_VEGA12:
1567 chip_name = "vega12";
1568 break;
2d2e5e7e 1569 case CHIP_RAVEN:
54f78a76 1570 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
54c4d17e 1571 chip_name = "raven2";
54f78a76 1572 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
741deade 1573 chip_name = "picasso";
54c4d17e
FX
1574 else
1575 chip_name = "raven";
2d2e5e7e 1576 break;
65e60f6e
LM
1577 case CHIP_ARCTURUS:
1578 chip_name = "arcturus";
1579 break;
b51a26a0
HR
1580 case CHIP_RENOIR:
1581 chip_name = "renoir";
1582 break;
23c6268e
HR
1583 case CHIP_NAVI10:
1584 chip_name = "navi10";
1585 break;
ed42cfe1
XY
1586 case CHIP_NAVI14:
1587 chip_name = "navi14";
1588 break;
42b325e5
XY
1589 case CHIP_NAVI12:
1590 chip_name = "navi12";
1591 break;
e2a75f88
AD
1592 }
1593
1594 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1595 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1596 if (err) {
1597 dev_err(adev->dev,
1598 "Failed to load gpu_info firmware \"%s\"\n",
1599 fw_name);
1600 goto out;
1601 }
ab4fe3e1 1602 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1603 if (err) {
1604 dev_err(adev->dev,
1605 "Failed to validate gpu_info firmware \"%s\"\n",
1606 fw_name);
1607 goto out;
1608 }
1609
ab4fe3e1 1610 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1611 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1612
1613 switch (hdr->version_major) {
1614 case 1:
1615 {
1616 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1617 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1618 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1619
6ba57b7a
AD
1620 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
1621 amdgpu_discovery_get_gfx_info(adev);
ec51d3fa 1622 goto parse_soc_bounding_box;
6ba57b7a 1623 }
ec51d3fa 1624
b5ab16bf
AD
1625 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1626 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1627 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1628 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1629 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1630 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1631 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1632 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1633 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1634 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1635 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1636 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1637 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1638 adev->gfx.cu_info.max_waves_per_simd =
1639 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1640 adev->gfx.cu_info.max_scratch_slots_per_cu =
1641 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1642 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1643 if (hdr->version_minor >= 1) {
35c2e910
HZ
1644 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1645 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1646 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1647 adev->gfx.config.num_sc_per_sh =
1648 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1649 adev->gfx.config.num_packer_per_sc =
1650 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1651 }
ec51d3fa
XY
1652
1653parse_soc_bounding_box:
ec51d3fa
XY
1654 /*
1655 * soc bounding box info is not integrated in disocovery table,
1656 * we always need to parse it from gpu info firmware.
1657 */
48321c3d
HW
1658 if (hdr->version_minor == 2) {
1659 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1660 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1661 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1662 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1663 }
e2a75f88
AD
1664 break;
1665 }
1666 default:
1667 dev_err(adev->dev,
1668 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1669 err = -EINVAL;
1670 goto out;
1671 }
1672out:
e2a75f88
AD
1673 return err;
1674}
1675
e3ecdffa
AD
1676/**
1677 * amdgpu_device_ip_early_init - run early init for hardware IPs
1678 *
1679 * @adev: amdgpu_device pointer
1680 *
1681 * Early initialization pass for hardware IPs. The hardware IPs that make
1682 * up each asic are discovered each IP's early_init callback is run. This
1683 * is the first stage in initializing the asic.
1684 * Returns 0 on success, negative error code on failure.
1685 */
06ec9070 1686static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1687{
aaa36a97 1688 int i, r;
d38ceaf9 1689
483ef985 1690 amdgpu_device_enable_virtual_display(adev);
a6be7570 1691
d38ceaf9 1692 switch (adev->asic_type) {
33f34802
KW
1693#ifdef CONFIG_DRM_AMDGPU_SI
1694 case CHIP_VERDE:
1695 case CHIP_TAHITI:
1696 case CHIP_PITCAIRN:
1697 case CHIP_OLAND:
1698 case CHIP_HAINAN:
295d0daf 1699 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1700 r = si_set_ip_blocks(adev);
1701 if (r)
1702 return r;
1703 break;
1704#endif
a2e73f56
AD
1705#ifdef CONFIG_DRM_AMDGPU_CIK
1706 case CHIP_BONAIRE:
1707 case CHIP_HAWAII:
1708 case CHIP_KAVERI:
1709 case CHIP_KABINI:
1710 case CHIP_MULLINS:
e1ad2d53 1711 if (adev->flags & AMD_IS_APU)
a2e73f56 1712 adev->family = AMDGPU_FAMILY_KV;
e1ad2d53
AD
1713 else
1714 adev->family = AMDGPU_FAMILY_CI;
a2e73f56
AD
1715
1716 r = cik_set_ip_blocks(adev);
1717 if (r)
1718 return r;
1719 break;
1720#endif
da87c30b
AD
1721 case CHIP_TOPAZ:
1722 case CHIP_TONGA:
1723 case CHIP_FIJI:
1724 case CHIP_POLARIS10:
1725 case CHIP_POLARIS11:
1726 case CHIP_POLARIS12:
1727 case CHIP_VEGAM:
1728 case CHIP_CARRIZO:
1729 case CHIP_STONEY:
1730 if (adev->flags & AMD_IS_APU)
1731 adev->family = AMDGPU_FAMILY_CZ;
1732 else
1733 adev->family = AMDGPU_FAMILY_VI;
1734
1735 r = vi_set_ip_blocks(adev);
1736 if (r)
1737 return r;
1738 break;
e48a3cd9
AD
1739 case CHIP_VEGA10:
1740 case CHIP_VEGA12:
e4bd8170 1741 case CHIP_VEGA20:
e48a3cd9 1742 case CHIP_RAVEN:
61cf44c1 1743 case CHIP_ARCTURUS:
b51a26a0 1744 case CHIP_RENOIR:
70534d1e 1745 if (adev->flags & AMD_IS_APU)
2ca8a5d2
CZ
1746 adev->family = AMDGPU_FAMILY_RV;
1747 else
1748 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1749
1750 r = soc15_set_ip_blocks(adev);
1751 if (r)
1752 return r;
1753 break;
0a5b8c7b 1754 case CHIP_NAVI10:
7ecb5cd4 1755 case CHIP_NAVI14:
4808cf9c 1756 case CHIP_NAVI12:
0a5b8c7b
HR
1757 adev->family = AMDGPU_FAMILY_NV;
1758
1759 r = nv_set_ip_blocks(adev);
1760 if (r)
1761 return r;
1762 break;
d38ceaf9
AD
1763 default:
1764 /* FIXME: not supported yet */
1765 return -EINVAL;
1766 }
1767
1884734a 1768 amdgpu_amdkfd_device_probe(adev);
1769
3149d9da 1770 if (amdgpu_sriov_vf(adev)) {
122078de
ML
1771 /* handle vbios stuff prior full access mode for new handshake */
1772 if (adev->virt.req_init_data_ver == 1) {
1773 if (!amdgpu_get_bios(adev)) {
1774 DRM_ERROR("failed to get vbios\n");
1775 return -EINVAL;
1776 }
1777
1778 r = amdgpu_atombios_init(adev);
1779 if (r) {
1780 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1781 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1782 return r;
1783 }
1784 }
2f294132 1785 }
122078de 1786
2f294132
ML
1787 /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios
1788 * will not be prepared by host for this VF */
1789 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) {
3149d9da
XY
1790 r = amdgpu_virt_request_full_gpu(adev, true);
1791 if (r)
2f294132 1792 return r;
3149d9da
XY
1793 }
1794
3b94fb10 1795 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1796 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1797 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1798
d38ceaf9
AD
1799 for (i = 0; i < adev->num_ip_blocks; i++) {
1800 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1801 DRM_ERROR("disabled ip block: %d <%s>\n",
1802 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1803 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1804 } else {
a1255107
AD
1805 if (adev->ip_blocks[i].version->funcs->early_init) {
1806 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1807 if (r == -ENOENT) {
a1255107 1808 adev->ip_blocks[i].status.valid = false;
2c1a2784 1809 } else if (r) {
a1255107
AD
1810 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1811 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1812 return r;
2c1a2784 1813 } else {
a1255107 1814 adev->ip_blocks[i].status.valid = true;
2c1a2784 1815 }
974e6b64 1816 } else {
a1255107 1817 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1818 }
d38ceaf9 1819 }
21a249ca
AD
1820 /* get the vbios after the asic_funcs are set up */
1821 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
6e29c227
AD
1822 r = amdgpu_device_parse_gpu_info_fw(adev);
1823 if (r)
1824 return r;
1825
122078de
ML
1826 /* skip vbios handling for new handshake */
1827 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1)
1828 continue;
1829
21a249ca
AD
1830 /* Read BIOS */
1831 if (!amdgpu_get_bios(adev))
1832 return -EINVAL;
1833
1834 r = amdgpu_atombios_init(adev);
1835 if (r) {
1836 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1837 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1838 return r;
1839 }
1840 }
d38ceaf9
AD
1841 }
1842
395d1fb9
NH
1843 adev->cg_flags &= amdgpu_cg_mask;
1844 adev->pg_flags &= amdgpu_pg_mask;
1845
d38ceaf9
AD
1846 return 0;
1847}
1848
0a4f2520
RZ
1849static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1850{
1851 int i, r;
1852
1853 for (i = 0; i < adev->num_ip_blocks; i++) {
1854 if (!adev->ip_blocks[i].status.sw)
1855 continue;
1856 if (adev->ip_blocks[i].status.hw)
1857 continue;
1858 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1859 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1860 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1861 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1862 if (r) {
1863 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1864 adev->ip_blocks[i].version->funcs->name, r);
1865 return r;
1866 }
1867 adev->ip_blocks[i].status.hw = true;
1868 }
1869 }
1870
1871 return 0;
1872}
1873
1874static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1875{
1876 int i, r;
1877
1878 for (i = 0; i < adev->num_ip_blocks; i++) {
1879 if (!adev->ip_blocks[i].status.sw)
1880 continue;
1881 if (adev->ip_blocks[i].status.hw)
1882 continue;
1883 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1884 if (r) {
1885 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1886 adev->ip_blocks[i].version->funcs->name, r);
1887 return r;
1888 }
1889 adev->ip_blocks[i].status.hw = true;
1890 }
1891
1892 return 0;
1893}
1894
7a3e0bb2
RZ
1895static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1896{
1897 int r = 0;
1898 int i;
80f41f84 1899 uint32_t smu_version;
7a3e0bb2
RZ
1900
1901 if (adev->asic_type >= CHIP_VEGA10) {
1902 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1903 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1904 continue;
1905
1906 /* no need to do the fw loading again if already done*/
1907 if (adev->ip_blocks[i].status.hw == true)
1908 break;
1909
1910 if (adev->in_gpu_reset || adev->in_suspend) {
1911 r = adev->ip_blocks[i].version->funcs->resume(adev);
1912 if (r) {
1913 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1914 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1915 return r;
1916 }
1917 } else {
1918 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1919 if (r) {
1920 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1921 adev->ip_blocks[i].version->funcs->name, r);
1922 return r;
7a3e0bb2 1923 }
7a3e0bb2 1924 }
482f0e53
ML
1925
1926 adev->ip_blocks[i].status.hw = true;
1927 break;
7a3e0bb2
RZ
1928 }
1929 }
482f0e53 1930
8973d9ec
ED
1931 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1932 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1933
80f41f84 1934 return r;
7a3e0bb2
RZ
1935}
1936
e3ecdffa
AD
1937/**
1938 * amdgpu_device_ip_init - run init for hardware IPs
1939 *
1940 * @adev: amdgpu_device pointer
1941 *
1942 * Main initialization pass for hardware IPs. The list of all the hardware
1943 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1944 * are run. sw_init initializes the software state associated with each IP
1945 * and hw_init initializes the hardware associated with each IP.
1946 * Returns 0 on success, negative error code on failure.
1947 */
06ec9070 1948static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1949{
1950 int i, r;
1951
c030f2e4 1952 r = amdgpu_ras_init(adev);
1953 if (r)
1954 return r;
1955
2f294132
ML
1956 if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) {
1957 r = amdgpu_virt_request_full_gpu(adev, true);
1958 if (r)
1959 return -EAGAIN;
1960 }
1961
d38ceaf9 1962 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1963 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1964 continue;
a1255107 1965 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1966 if (r) {
a1255107
AD
1967 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1968 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1969 goto init_failed;
2c1a2784 1970 }
a1255107 1971 adev->ip_blocks[i].status.sw = true;
bfca0289 1972
d38ceaf9 1973 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1974 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1975 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1976 if (r) {
1977 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1978 goto init_failed;
2c1a2784 1979 }
a1255107 1980 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1981 if (r) {
1982 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1983 goto init_failed;
2c1a2784 1984 }
06ec9070 1985 r = amdgpu_device_wb_init(adev);
2c1a2784 1986 if (r) {
06ec9070 1987 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1988 goto init_failed;
2c1a2784 1989 }
a1255107 1990 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1991
1992 /* right after GMC hw init, we create CSA */
f92d5c61 1993 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1994 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1995 AMDGPU_GEM_DOMAIN_VRAM,
1996 AMDGPU_CSA_SIZE);
2493664f
ML
1997 if (r) {
1998 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1999 goto init_failed;
2493664f
ML
2000 }
2001 }
d38ceaf9
AD
2002 }
2003 }
2004
c9ffa427
YT
2005 if (amdgpu_sriov_vf(adev))
2006 amdgpu_virt_init_data_exchange(adev);
2007
533aed27
AG
2008 r = amdgpu_ib_pool_init(adev);
2009 if (r) {
2010 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2011 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2012 goto init_failed;
2013 }
2014
c8963ea4
RZ
2015 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2016 if (r)
72d3f592 2017 goto init_failed;
0a4f2520
RZ
2018
2019 r = amdgpu_device_ip_hw_init_phase1(adev);
2020 if (r)
72d3f592 2021 goto init_failed;
0a4f2520 2022
7a3e0bb2
RZ
2023 r = amdgpu_device_fw_loading(adev);
2024 if (r)
72d3f592 2025 goto init_failed;
7a3e0bb2 2026
0a4f2520
RZ
2027 r = amdgpu_device_ip_hw_init_phase2(adev);
2028 if (r)
72d3f592 2029 goto init_failed;
d38ceaf9 2030
121a2bc6
AG
2031 /*
2032 * retired pages will be loaded from eeprom and reserved here,
2033 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2034 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2035 * for I2C communication which only true at this point.
2036 * recovery_init may fail, but it can free all resources allocated by
2037 * itself and its failure should not stop amdgpu init process.
2038 *
2039 * Note: theoretically, this should be called before all vram allocations
2040 * to protect retired page from abusing
2041 */
2042 amdgpu_ras_recovery_init(adev);
2043
3e2e2ab5
HZ
2044 if (adev->gmc.xgmi.num_physical_nodes > 1)
2045 amdgpu_xgmi_add_device(adev);
1884734a 2046 amdgpu_amdkfd_device_init(adev);
c6332b97 2047
bd607166
KR
2048 amdgpu_fru_get_product_info(adev);
2049
72d3f592 2050init_failed:
c9ffa427 2051 if (amdgpu_sriov_vf(adev))
c6332b97 2052 amdgpu_virt_release_full_gpu(adev, true);
2053
72d3f592 2054 return r;
d38ceaf9
AD
2055}
2056
e3ecdffa
AD
2057/**
2058 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2059 *
2060 * @adev: amdgpu_device pointer
2061 *
2062 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2063 * this function before a GPU reset. If the value is retained after a
2064 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2065 */
06ec9070 2066static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
2067{
2068 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2069}
2070
e3ecdffa
AD
2071/**
2072 * amdgpu_device_check_vram_lost - check if vram is valid
2073 *
2074 * @adev: amdgpu_device pointer
2075 *
2076 * Checks the reset magic value written to the gart pointer in VRAM.
2077 * The driver calls this after a GPU reset to see if the contents of
2078 * VRAM is lost or now.
2079 * returns true if vram is lost, false if not.
2080 */
06ec9070 2081static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8 2082{
dadce777
EQ
2083 if (memcmp(adev->gart.ptr, adev->reset_magic,
2084 AMDGPU_RESET_MAGIC_NUM))
2085 return true;
2086
2087 if (!adev->in_gpu_reset)
2088 return false;
2089
2090 /*
2091 * For all ASICs with baco/mode1 reset, the VRAM is
2092 * always assumed to be lost.
2093 */
2094 switch (amdgpu_asic_reset_method(adev)) {
2095 case AMD_RESET_METHOD_BACO:
2096 case AMD_RESET_METHOD_MODE1:
2097 return true;
2098 default:
2099 return false;
2100 }
0c49e0b8
CZ
2101}
2102
e3ecdffa 2103/**
1112a46b 2104 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
2105 *
2106 * @adev: amdgpu_device pointer
b8b72130 2107 * @state: clockgating state (gate or ungate)
e3ecdffa 2108 *
e3ecdffa 2109 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
2110 * set_clockgating_state callbacks are run.
2111 * Late initialization pass enabling clockgating for hardware IPs.
2112 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2113 * Returns 0 on success, negative error code on failure.
2114 */
fdd34271 2115
1112a46b
RZ
2116static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2117 enum amd_clockgating_state state)
d38ceaf9 2118{
1112a46b 2119 int i, j, r;
d38ceaf9 2120
4a2ba394
SL
2121 if (amdgpu_emu_mode == 1)
2122 return 0;
2123
1112a46b
RZ
2124 for (j = 0; j < adev->num_ip_blocks; j++) {
2125 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2126 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2127 continue;
4a446d55 2128 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2129 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2130 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2131 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2132 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2133 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2134 /* enable clockgating to save power */
a1255107 2135 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2136 state);
4a446d55
AD
2137 if (r) {
2138 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2139 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2140 return r;
2141 }
b0b00ff1 2142 }
d38ceaf9 2143 }
06b18f61 2144
c9f96fd5
RZ
2145 return 0;
2146}
2147
1112a46b 2148static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2149{
1112a46b 2150 int i, j, r;
06b18f61 2151
c9f96fd5
RZ
2152 if (amdgpu_emu_mode == 1)
2153 return 0;
2154
1112a46b
RZ
2155 for (j = 0; j < adev->num_ip_blocks; j++) {
2156 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2157 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2158 continue;
2159 /* skip CG for VCE/UVD, it's handled specially */
2160 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2161 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2162 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2163 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2164 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2165 /* enable powergating to save power */
2166 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2167 state);
c9f96fd5
RZ
2168 if (r) {
2169 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2170 adev->ip_blocks[i].version->funcs->name, r);
2171 return r;
2172 }
2173 }
2174 }
2dc80b00
S
2175 return 0;
2176}
2177
beff74bc
AD
2178static int amdgpu_device_enable_mgpu_fan_boost(void)
2179{
2180 struct amdgpu_gpu_instance *gpu_ins;
2181 struct amdgpu_device *adev;
2182 int i, ret = 0;
2183
2184 mutex_lock(&mgpu_info.mutex);
2185
2186 /*
2187 * MGPU fan boost feature should be enabled
2188 * only when there are two or more dGPUs in
2189 * the system
2190 */
2191 if (mgpu_info.num_dgpu < 2)
2192 goto out;
2193
2194 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2195 gpu_ins = &(mgpu_info.gpu_ins[i]);
2196 adev = gpu_ins->adev;
2197 if (!(adev->flags & AMD_IS_APU) &&
2198 !gpu_ins->mgpu_fan_enabled &&
2199 adev->powerplay.pp_funcs &&
2200 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2201 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2202 if (ret)
2203 break;
2204
2205 gpu_ins->mgpu_fan_enabled = 1;
2206 }
2207 }
2208
2209out:
2210 mutex_unlock(&mgpu_info.mutex);
2211
2212 return ret;
2213}
2214
e3ecdffa
AD
2215/**
2216 * amdgpu_device_ip_late_init - run late init for hardware IPs
2217 *
2218 * @adev: amdgpu_device pointer
2219 *
2220 * Late initialization pass for hardware IPs. The list of all the hardware
2221 * IPs that make up the asic is walked and the late_init callbacks are run.
2222 * late_init covers any special initialization that an IP requires
2223 * after all of the have been initialized or something that needs to happen
2224 * late in the init process.
2225 * Returns 0 on success, negative error code on failure.
2226 */
06ec9070 2227static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2228{
60599a03 2229 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2230 int i = 0, r;
2231
2232 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2233 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2234 continue;
2235 if (adev->ip_blocks[i].version->funcs->late_init) {
2236 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2237 if (r) {
2238 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2239 adev->ip_blocks[i].version->funcs->name, r);
2240 return r;
2241 }
2dc80b00 2242 }
73f847db 2243 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2244 }
2245
a891d239
DL
2246 amdgpu_ras_set_error_query_ready(adev, true);
2247
1112a46b
RZ
2248 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2249 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2250
06ec9070 2251 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2252
beff74bc
AD
2253 r = amdgpu_device_enable_mgpu_fan_boost();
2254 if (r)
2255 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2256
60599a03
EQ
2257
2258 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2259 mutex_lock(&mgpu_info.mutex);
2260
2261 /*
2262 * Reset device p-state to low as this was booted with high.
2263 *
2264 * This should be performed only after all devices from the same
2265 * hive get initialized.
2266 *
2267 * However, it's unknown how many device in the hive in advance.
2268 * As this is counted one by one during devices initializations.
2269 *
2270 * So, we wait for all XGMI interlinked devices initialized.
2271 * This may bring some delays as those devices may come from
2272 * different hives. But that should be OK.
2273 */
2274 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2275 for (i = 0; i < mgpu_info.num_gpu; i++) {
2276 gpu_instance = &(mgpu_info.gpu_ins[i]);
2277 if (gpu_instance->adev->flags & AMD_IS_APU)
2278 continue;
2279
d84a430d
JK
2280 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2281 AMDGPU_XGMI_PSTATE_MIN);
60599a03
EQ
2282 if (r) {
2283 DRM_ERROR("pstate setting failed (%d).\n", r);
2284 break;
2285 }
2286 }
2287 }
2288
2289 mutex_unlock(&mgpu_info.mutex);
2290 }
2291
d38ceaf9
AD
2292 return 0;
2293}
2294
e3ecdffa
AD
2295/**
2296 * amdgpu_device_ip_fini - run fini for hardware IPs
2297 *
2298 * @adev: amdgpu_device pointer
2299 *
2300 * Main teardown pass for hardware IPs. The list of all the hardware
2301 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2302 * are run. hw_fini tears down the hardware associated with each IP
2303 * and sw_fini tears down any software state associated with each IP.
2304 * Returns 0 on success, negative error code on failure.
2305 */
06ec9070 2306static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2307{
2308 int i, r;
2309
c030f2e4 2310 amdgpu_ras_pre_fini(adev);
2311
a82400b5
AG
2312 if (adev->gmc.xgmi.num_physical_nodes > 1)
2313 amdgpu_xgmi_remove_device(adev);
2314
1884734a 2315 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2316
2317 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2318 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2319
3e96dbfd
AD
2320 /* need to disable SMC first */
2321 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2322 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2323 continue;
fdd34271 2324 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2325 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2326 /* XXX handle errors */
2327 if (r) {
2328 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2329 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2330 }
a1255107 2331 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2332 break;
2333 }
2334 }
2335
d38ceaf9 2336 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2337 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2338 continue;
8201a67a 2339
a1255107 2340 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2341 /* XXX handle errors */
2c1a2784 2342 if (r) {
a1255107
AD
2343 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2344 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2345 }
8201a67a 2346
a1255107 2347 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2348 }
2349
9950cda2 2350
d38ceaf9 2351 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2352 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2353 continue;
c12aba3a
ML
2354
2355 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2356 amdgpu_ucode_free_bo(adev);
1e256e27 2357 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2358 amdgpu_device_wb_fini(adev);
2359 amdgpu_device_vram_scratch_fini(adev);
533aed27 2360 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2361 }
2362
a1255107 2363 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2364 /* XXX handle errors */
2c1a2784 2365 if (r) {
a1255107
AD
2366 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2367 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2368 }
a1255107
AD
2369 adev->ip_blocks[i].status.sw = false;
2370 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2371 }
2372
a6dcfd9c 2373 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2374 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2375 continue;
a1255107
AD
2376 if (adev->ip_blocks[i].version->funcs->late_fini)
2377 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2378 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2379 }
2380
c030f2e4 2381 amdgpu_ras_fini(adev);
2382
030308fc 2383 if (amdgpu_sriov_vf(adev))
24136135
ML
2384 if (amdgpu_virt_release_full_gpu(adev, false))
2385 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2386
d38ceaf9
AD
2387 return 0;
2388}
2389
e3ecdffa 2390/**
beff74bc 2391 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2392 *
1112a46b 2393 * @work: work_struct.
e3ecdffa 2394 */
beff74bc 2395static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2396{
2397 struct amdgpu_device *adev =
beff74bc 2398 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2399 int r;
2400
2401 r = amdgpu_ib_ring_tests(adev);
2402 if (r)
2403 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2404}
2405
1e317b99
RZ
2406static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2407{
2408 struct amdgpu_device *adev =
2409 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2410
2411 mutex_lock(&adev->gfx.gfx_off_mutex);
2412 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2413 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2414 adev->gfx.gfx_off_state = true;
2415 }
2416 mutex_unlock(&adev->gfx.gfx_off_mutex);
2417}
2418
e3ecdffa 2419/**
e7854a03 2420 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2421 *
2422 * @adev: amdgpu_device pointer
2423 *
2424 * Main suspend function for hardware IPs. The list of all the hardware
2425 * IPs that make up the asic is walked, clockgating is disabled and the
2426 * suspend callbacks are run. suspend puts the hardware and software state
2427 * in each IP into a state suitable for suspend.
2428 * Returns 0 on success, negative error code on failure.
2429 */
e7854a03
AD
2430static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2431{
2432 int i, r;
2433
ced1ba97
PL
2434 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2435 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2436
e7854a03
AD
2437 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2438 if (!adev->ip_blocks[i].status.valid)
2439 continue;
2440 /* displays are handled separately */
2441 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2442 /* XXX handle errors */
2443 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2444 /* XXX handle errors */
2445 if (r) {
2446 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2447 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2448 return r;
e7854a03 2449 }
482f0e53 2450 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2451 }
2452 }
2453
e7854a03
AD
2454 return 0;
2455}
2456
2457/**
2458 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2459 *
2460 * @adev: amdgpu_device pointer
2461 *
2462 * Main suspend function for hardware IPs. The list of all the hardware
2463 * IPs that make up the asic is walked, clockgating is disabled and the
2464 * suspend callbacks are run. suspend puts the hardware and software state
2465 * in each IP into a state suitable for suspend.
2466 * Returns 0 on success, negative error code on failure.
2467 */
2468static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2469{
2470 int i, r;
2471
2472 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2473 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2474 continue;
e7854a03
AD
2475 /* displays are handled in phase1 */
2476 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2477 continue;
bff77e86
LM
2478 /* PSP lost connection when err_event_athub occurs */
2479 if (amdgpu_ras_intr_triggered() &&
2480 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2481 adev->ip_blocks[i].status.hw = false;
2482 continue;
2483 }
d38ceaf9 2484 /* XXX handle errors */
a1255107 2485 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2486 /* XXX handle errors */
2c1a2784 2487 if (r) {
a1255107
AD
2488 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2489 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2490 }
876923fb 2491 adev->ip_blocks[i].status.hw = false;
a3a09142 2492 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2493 if(!amdgpu_sriov_vf(adev)){
2494 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2495 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2496 if (r) {
2497 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2498 adev->mp1_state, r);
2499 return r;
2500 }
a3a09142
AD
2501 }
2502 }
b5507c7e 2503 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2504 }
2505
2506 return 0;
2507}
2508
e7854a03
AD
2509/**
2510 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2511 *
2512 * @adev: amdgpu_device pointer
2513 *
2514 * Main suspend function for hardware IPs. The list of all the hardware
2515 * IPs that make up the asic is walked, clockgating is disabled and the
2516 * suspend callbacks are run. suspend puts the hardware and software state
2517 * in each IP into a state suitable for suspend.
2518 * Returns 0 on success, negative error code on failure.
2519 */
2520int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2521{
2522 int r;
2523
e7819644
YT
2524 if (amdgpu_sriov_vf(adev))
2525 amdgpu_virt_request_full_gpu(adev, false);
2526
e7854a03
AD
2527 r = amdgpu_device_ip_suspend_phase1(adev);
2528 if (r)
2529 return r;
2530 r = amdgpu_device_ip_suspend_phase2(adev);
2531
e7819644
YT
2532 if (amdgpu_sriov_vf(adev))
2533 amdgpu_virt_release_full_gpu(adev, false);
2534
e7854a03
AD
2535 return r;
2536}
2537
06ec9070 2538static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2539{
2540 int i, r;
2541
2cb681b6
ML
2542 static enum amd_ip_block_type ip_order[] = {
2543 AMD_IP_BLOCK_TYPE_GMC,
2544 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2545 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2546 AMD_IP_BLOCK_TYPE_IH,
2547 };
a90ad3c2 2548
2cb681b6
ML
2549 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2550 int j;
2551 struct amdgpu_ip_block *block;
a90ad3c2 2552
2cb681b6
ML
2553 for (j = 0; j < adev->num_ip_blocks; j++) {
2554 block = &adev->ip_blocks[j];
2555
482f0e53 2556 block->status.hw = false;
2cb681b6
ML
2557 if (block->version->type != ip_order[i] ||
2558 !block->status.valid)
2559 continue;
2560
2561 r = block->version->funcs->hw_init(adev);
0aaeefcc 2562 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2563 if (r)
2564 return r;
482f0e53 2565 block->status.hw = true;
a90ad3c2
ML
2566 }
2567 }
2568
2569 return 0;
2570}
2571
06ec9070 2572static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2573{
2574 int i, r;
2575
2cb681b6
ML
2576 static enum amd_ip_block_type ip_order[] = {
2577 AMD_IP_BLOCK_TYPE_SMC,
2578 AMD_IP_BLOCK_TYPE_DCE,
2579 AMD_IP_BLOCK_TYPE_GFX,
2580 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2581 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2582 AMD_IP_BLOCK_TYPE_VCE,
2583 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2584 };
a90ad3c2 2585
2cb681b6
ML
2586 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2587 int j;
2588 struct amdgpu_ip_block *block;
a90ad3c2 2589
2cb681b6
ML
2590 for (j = 0; j < adev->num_ip_blocks; j++) {
2591 block = &adev->ip_blocks[j];
2592
2593 if (block->version->type != ip_order[i] ||
482f0e53
ML
2594 !block->status.valid ||
2595 block->status.hw)
2cb681b6
ML
2596 continue;
2597
895bd048
JZ
2598 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2599 r = block->version->funcs->resume(adev);
2600 else
2601 r = block->version->funcs->hw_init(adev);
2602
0aaeefcc 2603 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2604 if (r)
2605 return r;
482f0e53 2606 block->status.hw = true;
a90ad3c2
ML
2607 }
2608 }
2609
2610 return 0;
2611}
2612
e3ecdffa
AD
2613/**
2614 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2615 *
2616 * @adev: amdgpu_device pointer
2617 *
2618 * First resume function for hardware IPs. The list of all the hardware
2619 * IPs that make up the asic is walked and the resume callbacks are run for
2620 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2621 * after a suspend and updates the software state as necessary. This
2622 * function is also used for restoring the GPU after a GPU reset.
2623 * Returns 0 on success, negative error code on failure.
2624 */
06ec9070 2625static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2626{
2627 int i, r;
2628
a90ad3c2 2629 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2630 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2631 continue;
a90ad3c2 2632 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2633 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2634 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2635
fcf0649f
CZ
2636 r = adev->ip_blocks[i].version->funcs->resume(adev);
2637 if (r) {
2638 DRM_ERROR("resume of IP block <%s> failed %d\n",
2639 adev->ip_blocks[i].version->funcs->name, r);
2640 return r;
2641 }
482f0e53 2642 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2643 }
2644 }
2645
2646 return 0;
2647}
2648
e3ecdffa
AD
2649/**
2650 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2651 *
2652 * @adev: amdgpu_device pointer
2653 *
2654 * First resume function for hardware IPs. The list of all the hardware
2655 * IPs that make up the asic is walked and the resume callbacks are run for
2656 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2657 * functional state after a suspend and updates the software state as
2658 * necessary. This function is also used for restoring the GPU after a GPU
2659 * reset.
2660 * Returns 0 on success, negative error code on failure.
2661 */
06ec9070 2662static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2663{
2664 int i, r;
2665
2666 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2667 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2668 continue;
fcf0649f 2669 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2670 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2671 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2672 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2673 continue;
a1255107 2674 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2675 if (r) {
a1255107
AD
2676 DRM_ERROR("resume of IP block <%s> failed %d\n",
2677 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2678 return r;
2c1a2784 2679 }
482f0e53 2680 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2681 }
2682
2683 return 0;
2684}
2685
e3ecdffa
AD
2686/**
2687 * amdgpu_device_ip_resume - run resume for hardware IPs
2688 *
2689 * @adev: amdgpu_device pointer
2690 *
2691 * Main resume function for hardware IPs. The hardware IPs
2692 * are split into two resume functions because they are
2693 * are also used in in recovering from a GPU reset and some additional
2694 * steps need to be take between them. In this case (S3/S4) they are
2695 * run sequentially.
2696 * Returns 0 on success, negative error code on failure.
2697 */
06ec9070 2698static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2699{
2700 int r;
2701
06ec9070 2702 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2703 if (r)
2704 return r;
7a3e0bb2
RZ
2705
2706 r = amdgpu_device_fw_loading(adev);
2707 if (r)
2708 return r;
2709
06ec9070 2710 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2711
2712 return r;
2713}
2714
e3ecdffa
AD
2715/**
2716 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2717 *
2718 * @adev: amdgpu_device pointer
2719 *
2720 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2721 */
4e99a44e 2722static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2723{
6867e1b5
ML
2724 if (amdgpu_sriov_vf(adev)) {
2725 if (adev->is_atom_fw) {
2726 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2727 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2728 } else {
2729 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2730 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2731 }
2732
2733 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2734 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2735 }
048765ad
AR
2736}
2737
e3ecdffa
AD
2738/**
2739 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2740 *
2741 * @asic_type: AMD asic type
2742 *
2743 * Check if there is DC (new modesetting infrastructre) support for an asic.
2744 * returns true if DC has support, false if not.
2745 */
4562236b
HW
2746bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2747{
2748 switch (asic_type) {
2749#if defined(CONFIG_DRM_AMD_DC)
2750 case CHIP_BONAIRE:
0d6fbccb 2751 case CHIP_KAVERI:
367e6687
AD
2752 case CHIP_KABINI:
2753 case CHIP_MULLINS:
d9fda248
HW
2754 /*
2755 * We have systems in the wild with these ASICs that require
2756 * LVDS and VGA support which is not supported with DC.
2757 *
2758 * Fallback to the non-DC driver here by default so as not to
2759 * cause regressions.
2760 */
2761 return amdgpu_dc > 0;
2762 case CHIP_HAWAII:
4562236b
HW
2763 case CHIP_CARRIZO:
2764 case CHIP_STONEY:
4562236b 2765 case CHIP_POLARIS10:
675fd32b 2766 case CHIP_POLARIS11:
2c8ad2d5 2767 case CHIP_POLARIS12:
675fd32b 2768 case CHIP_VEGAM:
4562236b
HW
2769 case CHIP_TONGA:
2770 case CHIP_FIJI:
42f8ffa1 2771 case CHIP_VEGA10:
dca7b401 2772 case CHIP_VEGA12:
c6034aa2 2773 case CHIP_VEGA20:
b86a1aa3 2774#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2775 case CHIP_RAVEN:
b4f199c7 2776 case CHIP_NAVI10:
8fceceb6 2777 case CHIP_NAVI14:
078655d9 2778 case CHIP_NAVI12:
e1c14c43 2779 case CHIP_RENOIR:
42f8ffa1 2780#endif
fd187853 2781 return amdgpu_dc != 0;
4562236b
HW
2782#endif
2783 default:
93b09a9a
SS
2784 if (amdgpu_dc > 0)
2785 DRM_INFO("Display Core has been requested via kernel parameter "
2786 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2787 return false;
2788 }
2789}
2790
2791/**
2792 * amdgpu_device_has_dc_support - check if dc is supported
2793 *
2794 * @adev: amdgpu_device_pointer
2795 *
2796 * Returns true for supported, false for not supported
2797 */
2798bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2799{
2555039d
XY
2800 if (amdgpu_sriov_vf(adev))
2801 return false;
2802
4562236b
HW
2803 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2804}
2805
d4535e2c
AG
2806
2807static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2808{
2809 struct amdgpu_device *adev =
2810 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2811 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2812
c6a6e2db
AG
2813 /* It's a bug to not have a hive within this function */
2814 if (WARN_ON(!hive))
2815 return;
2816
2817 /*
2818 * Use task barrier to synchronize all xgmi reset works across the
2819 * hive. task_barrier_enter and task_barrier_exit will block
2820 * until all the threads running the xgmi reset works reach
2821 * those points. task_barrier_full will do both blocks.
2822 */
2823 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2824
2825 task_barrier_enter(&hive->tb);
2826 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2827
2828 if (adev->asic_reset_res)
2829 goto fail;
2830
2831 task_barrier_exit(&hive->tb);
2832 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2833
2834 if (adev->asic_reset_res)
2835 goto fail;
43c4d576
JC
2836
2837 if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
2838 adev->mmhub.funcs->reset_ras_error_count(adev);
c6a6e2db
AG
2839 } else {
2840
2841 task_barrier_full(&hive->tb);
2842 adev->asic_reset_res = amdgpu_asic_reset(adev);
2843 }
ce316fa5 2844
c6a6e2db 2845fail:
d4535e2c 2846 if (adev->asic_reset_res)
fed184e9 2847 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2848 adev->asic_reset_res, adev->ddev->unique);
2849}
2850
71f98027
AD
2851static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2852{
2853 char *input = amdgpu_lockup_timeout;
2854 char *timeout_setting = NULL;
2855 int index = 0;
2856 long timeout;
2857 int ret = 0;
2858
2859 /*
2860 * By default timeout for non compute jobs is 10000.
2861 * And there is no timeout enforced on compute jobs.
2862 * In SR-IOV or passthrough mode, timeout for compute
b7b2a316 2863 * jobs are 60000 by default.
71f98027
AD
2864 */
2865 adev->gfx_timeout = msecs_to_jiffies(10000);
2866 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2867 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
b7b2a316 2868 adev->compute_timeout = msecs_to_jiffies(60000);
71f98027
AD
2869 else
2870 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2871
f440ff44 2872 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2873 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2874 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2875 ret = kstrtol(timeout_setting, 0, &timeout);
2876 if (ret)
2877 return ret;
2878
2879 if (timeout == 0) {
2880 index++;
2881 continue;
2882 } else if (timeout < 0) {
2883 timeout = MAX_SCHEDULE_TIMEOUT;
2884 } else {
2885 timeout = msecs_to_jiffies(timeout);
2886 }
2887
2888 switch (index++) {
2889 case 0:
2890 adev->gfx_timeout = timeout;
2891 break;
2892 case 1:
2893 adev->compute_timeout = timeout;
2894 break;
2895 case 2:
2896 adev->sdma_timeout = timeout;
2897 break;
2898 case 3:
2899 adev->video_timeout = timeout;
2900 break;
2901 default:
2902 break;
2903 }
2904 }
2905 /*
2906 * There is only one value specified and
2907 * it should apply to all non-compute jobs.
2908 */
bcccee89 2909 if (index == 1) {
71f98027 2910 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2911 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2912 adev->compute_timeout = adev->gfx_timeout;
2913 }
71f98027
AD
2914 }
2915
2916 return ret;
2917}
d4535e2c 2918
77f3a5cd
ND
2919static const struct attribute *amdgpu_dev_attributes[] = {
2920 &dev_attr_product_name.attr,
2921 &dev_attr_product_number.attr,
2922 &dev_attr_serial_number.attr,
2923 &dev_attr_pcie_replay_count.attr,
2924 NULL
2925};
2926
d38ceaf9
AD
2927/**
2928 * amdgpu_device_init - initialize the driver
2929 *
2930 * @adev: amdgpu_device pointer
87e3f136 2931 * @ddev: drm dev pointer
d38ceaf9
AD
2932 * @pdev: pci dev pointer
2933 * @flags: driver flags
2934 *
2935 * Initializes the driver info and hw (all asics).
2936 * Returns 0 for success or an error on failure.
2937 * Called at driver startup.
2938 */
2939int amdgpu_device_init(struct amdgpu_device *adev,
2940 struct drm_device *ddev,
2941 struct pci_dev *pdev,
2942 uint32_t flags)
2943{
2944 int r, i;
3840c5bc 2945 bool boco = false;
95844d20 2946 u32 max_MBps;
d38ceaf9
AD
2947
2948 adev->shutdown = false;
2949 adev->dev = &pdev->dev;
2950 adev->ddev = ddev;
2951 adev->pdev = pdev;
2952 adev->flags = flags;
4e66d7d2
YZ
2953
2954 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2955 adev->asic_type = amdgpu_force_asic_type;
2956 else
2957 adev->asic_type = flags & AMD_ASIC_MASK;
2958
d38ceaf9 2959 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2960 if (amdgpu_emu_mode == 1)
8bdab6bb 2961 adev->usec_timeout *= 10;
770d13b1 2962 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2963 adev->accel_working = false;
2964 adev->num_rings = 0;
2965 adev->mman.buffer_funcs = NULL;
2966 adev->mman.buffer_funcs_ring = NULL;
2967 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2968 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2969 adev->gmc.gmc_funcs = NULL;
f54d1867 2970 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2971 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2972
2973 adev->smc_rreg = &amdgpu_invalid_rreg;
2974 adev->smc_wreg = &amdgpu_invalid_wreg;
2975 adev->pcie_rreg = &amdgpu_invalid_rreg;
2976 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2977 adev->pciep_rreg = &amdgpu_invalid_rreg;
2978 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2979 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2980 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2981 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2982 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2983 adev->didt_rreg = &amdgpu_invalid_rreg;
2984 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2985 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2986 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2987 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2988 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2989
3e39ab90
AD
2990 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2991 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2992 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2993
2994 /* mutex initialization are all done here so we
2995 * can recall function without having locking issues */
d38ceaf9 2996 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2997 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2998 mutex_init(&adev->pm.mutex);
2999 mutex_init(&adev->gfx.gpu_clock_mutex);
3000 mutex_init(&adev->srbm_mutex);
b8866c26 3001 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 3002 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 3003 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 3004 mutex_init(&adev->mn_lock);
e23b74aa 3005 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 3006 hash_init(adev->mn_hash);
13a752e3 3007 mutex_init(&adev->lock_reset);
32eaeae0 3008 mutex_init(&adev->psp.mutex);
bd052211 3009 mutex_init(&adev->notifier_lock);
d38ceaf9 3010
912dfc84
EQ
3011 r = amdgpu_device_check_arguments(adev);
3012 if (r)
3013 return r;
d38ceaf9 3014
d38ceaf9
AD
3015 spin_lock_init(&adev->mmio_idx_lock);
3016 spin_lock_init(&adev->smc_idx_lock);
3017 spin_lock_init(&adev->pcie_idx_lock);
3018 spin_lock_init(&adev->uvd_ctx_idx_lock);
3019 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 3020 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 3021 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 3022 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 3023 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 3024
0c4e7fa5
CZ
3025 INIT_LIST_HEAD(&adev->shadow_list);
3026 mutex_init(&adev->shadow_list_lock);
3027
beff74bc
AD
3028 INIT_DELAYED_WORK(&adev->delayed_init_work,
3029 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
3030 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3031 amdgpu_device_delay_enable_gfx_off);
2dc80b00 3032
d4535e2c
AG
3033 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3034
d23ee13f 3035 adev->gfx.gfx_off_req_count = 1;
b6e79d9a 3036 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
b1ddf548 3037
b265bdbd
EQ
3038 atomic_set(&adev->throttling_logging_enabled, 1);
3039 /*
3040 * If throttling continues, logging will be performed every minute
3041 * to avoid log flooding. "-1" is subtracted since the thermal
3042 * throttling interrupt comes every second. Thus, the total logging
3043 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3044 * for throttling interrupt) = 60 seconds.
3045 */
3046 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3047 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3048
0fa49558
AX
3049 /* Registers mapping */
3050 /* TODO: block userspace mapping of io register */
da69c161
KW
3051 if (adev->asic_type >= CHIP_BONAIRE) {
3052 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3053 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3054 } else {
3055 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3056 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3057 }
d38ceaf9 3058
d38ceaf9
AD
3059 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3060 if (adev->rmmio == NULL) {
3061 return -ENOMEM;
3062 }
3063 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3064 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3065
d38ceaf9
AD
3066 /* io port mapping */
3067 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3068 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
3069 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
3070 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
3071 break;
3072 }
3073 }
3074 if (adev->rio_mem == NULL)
b64a18c5 3075 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 3076
b2109d8e
JX
3077 /* enable PCIE atomic ops */
3078 r = pci_enable_atomic_ops_to_root(adev->pdev,
3079 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3080 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3081 if (r) {
3082 adev->have_atomics_support = false;
3083 DRM_INFO("PCIE atomic ops is not supported\n");
3084 } else {
3085 adev->have_atomics_support = true;
3086 }
3087
5494d864
AD
3088 amdgpu_device_get_pcie_info(adev);
3089
b239c017
JX
3090 if (amdgpu_mcbp)
3091 DRM_INFO("MCBP is enabled\n");
3092
5f84cc63
JX
3093 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
3094 adev->enable_mes = true;
3095
3aa0115d
ML
3096 /* detect hw virtualization here */
3097 amdgpu_detect_virtualization(adev);
3098
dffa11b4
ML
3099 r = amdgpu_device_get_job_timeout_settings(adev);
3100 if (r) {
3101 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3102 return r;
a190d1c7
XY
3103 }
3104
d38ceaf9 3105 /* early init functions */
06ec9070 3106 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
3107 if (r)
3108 return r;
3109
6585661d
OZ
3110 /* doorbell bar mapping and doorbell index init*/
3111 amdgpu_device_doorbell_init(adev);
3112
d38ceaf9
AD
3113 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3114 /* this will fail for cards that aren't VGA class devices, just
3115 * ignore it */
06ec9070 3116 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 3117
31af062a 3118 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
3119 boco = true;
3120 if (amdgpu_has_atpx() &&
3121 (amdgpu_is_atpx_hybrid() ||
3122 amdgpu_has_atpx_dgpu_power_cntl()) &&
3123 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3124 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
3125 &amdgpu_switcheroo_ops, boco);
3126 if (boco)
d38ceaf9
AD
3127 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3128
9475a943
SL
3129 if (amdgpu_emu_mode == 1) {
3130 /* post the asic on emulation mode */
3131 emu_soc_asic_init(adev);
bfca0289 3132 goto fence_driver_init;
9475a943 3133 }
bfca0289 3134
4e99a44e
ML
3135 /* detect if we are with an SRIOV vbios */
3136 amdgpu_device_detect_sriov_bios(adev);
048765ad 3137
95e8e59e
AD
3138 /* check if we need to reset the asic
3139 * E.g., driver was not cleanly unloaded previously, etc.
3140 */
f14899fd 3141 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3142 r = amdgpu_asic_reset(adev);
3143 if (r) {
3144 dev_err(adev->dev, "asic reset on init failed\n");
3145 goto failed;
3146 }
3147 }
3148
d38ceaf9 3149 /* Post card if necessary */
39c640c0 3150 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3151 if (!adev->bios) {
bec86378 3152 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3153 r = -EINVAL;
3154 goto failed;
d38ceaf9 3155 }
bec86378 3156 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3157 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3158 if (r) {
3159 dev_err(adev->dev, "gpu post error!\n");
3160 goto failed;
3161 }
d38ceaf9
AD
3162 }
3163
88b64e95
AD
3164 if (adev->is_atom_fw) {
3165 /* Initialize clocks */
3166 r = amdgpu_atomfirmware_get_clock_info(adev);
3167 if (r) {
3168 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3169 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3170 goto failed;
3171 }
3172 } else {
a5bde2f9
AD
3173 /* Initialize clocks */
3174 r = amdgpu_atombios_get_clock_info(adev);
3175 if (r) {
3176 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3177 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3178 goto failed;
a5bde2f9
AD
3179 }
3180 /* init i2c buses */
4562236b
HW
3181 if (!amdgpu_device_has_dc_support(adev))
3182 amdgpu_atombios_i2c_init(adev);
2c1a2784 3183 }
d38ceaf9 3184
bfca0289 3185fence_driver_init:
d38ceaf9
AD
3186 /* Fence driver */
3187 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3188 if (r) {
3189 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3190 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3191 goto failed;
2c1a2784 3192 }
d38ceaf9
AD
3193
3194 /* init the mode config */
3195 drm_mode_config_init(adev->ddev);
3196
06ec9070 3197 r = amdgpu_device_ip_init(adev);
d38ceaf9 3198 if (r) {
8840a387 3199 /* failed in exclusive mode due to timeout */
3200 if (amdgpu_sriov_vf(adev) &&
3201 !amdgpu_sriov_runtime(adev) &&
3202 amdgpu_virt_mmio_blocked(adev) &&
3203 !amdgpu_virt_wait_reset(adev)) {
3204 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3205 /* Don't send request since VF is inactive. */
3206 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3207 adev->virt.ops = NULL;
8840a387 3208 r = -EAGAIN;
3209 goto failed;
3210 }
06ec9070 3211 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3212 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3213 goto failed;
d38ceaf9
AD
3214 }
3215
d69b8971
YZ
3216 dev_info(adev->dev,
3217 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
d7f72fe4
YZ
3218 adev->gfx.config.max_shader_engines,
3219 adev->gfx.config.max_sh_per_se,
3220 adev->gfx.config.max_cu_per_sh,
3221 adev->gfx.cu_info.number);
3222
d38ceaf9
AD
3223 adev->accel_working = true;
3224
e59c0205
AX
3225 amdgpu_vm_check_compute_bug(adev);
3226
95844d20
MO
3227 /* Initialize the buffer migration limit. */
3228 if (amdgpu_moverate >= 0)
3229 max_MBps = amdgpu_moverate;
3230 else
3231 max_MBps = 8; /* Allow 8 MB/s. */
3232 /* Get a log2 for easy divisions. */
3233 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3234
9bc92b9c
ML
3235 amdgpu_fbdev_init(adev);
3236
d2f52ac8 3237 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3238 if (r) {
3239 adev->pm_sysfs_en = false;
d2f52ac8 3240 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3241 } else
3242 adev->pm_sysfs_en = true;
d2f52ac8 3243
5bb23532 3244 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3245 if (r) {
3246 adev->ucode_sysfs_en = false;
5bb23532 3247 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3248 } else
3249 adev->ucode_sysfs_en = true;
5bb23532 3250
d38ceaf9
AD
3251 if ((amdgpu_testing & 1)) {
3252 if (adev->accel_working)
3253 amdgpu_test_moves(adev);
3254 else
3255 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3256 }
d38ceaf9
AD
3257 if (amdgpu_benchmarking) {
3258 if (adev->accel_working)
3259 amdgpu_benchmark(adev, amdgpu_benchmarking);
3260 else
3261 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3262 }
3263
b0adca4d
EQ
3264 /*
3265 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3266 * Otherwise the mgpu fan boost feature will be skipped due to the
3267 * gpu instance is counted less.
3268 */
3269 amdgpu_register_gpu_instance(adev);
3270
d38ceaf9
AD
3271 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3272 * explicit gating rather than handling it automatically.
3273 */
06ec9070 3274 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3275 if (r) {
06ec9070 3276 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3277 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3278 goto failed;
2c1a2784 3279 }
d38ceaf9 3280
108c6a63 3281 /* must succeed. */
511fdbc3 3282 amdgpu_ras_resume(adev);
108c6a63 3283
beff74bc
AD
3284 queue_delayed_work(system_wq, &adev->delayed_init_work,
3285 msecs_to_jiffies(AMDGPU_RESUME_MS));
3286
77f3a5cd 3287 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
bd607166 3288 if (r) {
77f3a5cd 3289 dev_err(adev->dev, "Could not create amdgpu device attr\n");
bd607166
KR
3290 return r;
3291 }
3292
d155bef0
AB
3293 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3294 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3295 if (r)
3296 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3297
d38ceaf9 3298 return 0;
83ba126a
AD
3299
3300failed:
89041940 3301 amdgpu_vf_error_trans_all(adev);
3840c5bc 3302 if (boco)
83ba126a 3303 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3304
83ba126a 3305 return r;
d38ceaf9
AD
3306}
3307
d38ceaf9
AD
3308/**
3309 * amdgpu_device_fini - tear down the driver
3310 *
3311 * @adev: amdgpu_device pointer
3312 *
3313 * Tear down the driver info (all asics).
3314 * Called at driver shutdown.
3315 */
3316void amdgpu_device_fini(struct amdgpu_device *adev)
3317{
3318 int r;
3319
3320 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3321 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3322 adev->shutdown = true;
9f875167 3323
752c683d
ML
3324 /* make sure IB test finished before entering exclusive mode
3325 * to avoid preemption on IB test
3326 * */
3327 if (amdgpu_sriov_vf(adev))
3328 amdgpu_virt_request_full_gpu(adev, false);
3329
e5b03032
ML
3330 /* disable all interrupts */
3331 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3332 if (adev->mode_info.mode_config_initialized){
3333 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3334 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3335 else
3336 drm_atomic_helper_shutdown(adev->ddev);
3337 }
d38ceaf9 3338 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3339 if (adev->pm_sysfs_en)
3340 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3341 amdgpu_fbdev_fini(adev);
06ec9070 3342 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3343 if (adev->firmware.gpu_info_fw) {
3344 release_firmware(adev->firmware.gpu_info_fw);
3345 adev->firmware.gpu_info_fw = NULL;
3346 }
d38ceaf9
AD
3347 adev->accel_working = false;
3348 /* free i2c buses */
4562236b
HW
3349 if (!amdgpu_device_has_dc_support(adev))
3350 amdgpu_i2c_fini(adev);
bfca0289
SL
3351
3352 if (amdgpu_emu_mode != 1)
3353 amdgpu_atombios_fini(adev);
3354
d38ceaf9
AD
3355 kfree(adev->bios);
3356 adev->bios = NULL;
3840c5bc
AD
3357 if (amdgpu_has_atpx() &&
3358 (amdgpu_is_atpx_hybrid() ||
3359 amdgpu_has_atpx_dgpu_power_cntl()) &&
3360 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3361 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3362 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3363 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3364 vga_client_register(adev->pdev, NULL, NULL, NULL);
3365 if (adev->rio_mem)
3366 pci_iounmap(adev->pdev, adev->rio_mem);
3367 adev->rio_mem = NULL;
3368 iounmap(adev->rmmio);
3369 adev->rmmio = NULL;
06ec9070 3370 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3371
7c868b59
YT
3372 if (adev->ucode_sysfs_en)
3373 amdgpu_ucode_sysfs_fini(adev);
77f3a5cd
ND
3374
3375 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
d155bef0
AB
3376 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3377 amdgpu_pmu_fini(adev);
f54eeab4 3378 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3379 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3380}
3381
3382
3383/*
3384 * Suspend & resume.
3385 */
3386/**
810ddc3a 3387 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3388 *
87e3f136
DP
3389 * @dev: drm dev pointer
3390 * @suspend: suspend state
3391 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3392 *
3393 * Puts the hw in the suspend state (all asics).
3394 * Returns 0 for success or an error on failure.
3395 * Called at driver suspend.
3396 */
de185019 3397int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3398{
3399 struct amdgpu_device *adev;
3400 struct drm_crtc *crtc;
3401 struct drm_connector *connector;
f8d2d39e 3402 struct drm_connector_list_iter iter;
5ceb54c6 3403 int r;
d38ceaf9
AD
3404
3405 if (dev == NULL || dev->dev_private == NULL) {
3406 return -ENODEV;
3407 }
3408
3409 adev = dev->dev_private;
3410
3411 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3412 return 0;
3413
44779b43 3414 adev->in_suspend = true;
d38ceaf9
AD
3415 drm_kms_helper_poll_disable(dev);
3416
5f818173
S
3417 if (fbcon)
3418 amdgpu_fbdev_set_suspend(adev, 1);
3419
beff74bc 3420 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3421
4562236b
HW
3422 if (!amdgpu_device_has_dc_support(adev)) {
3423 /* turn off display hw */
3424 drm_modeset_lock_all(dev);
f8d2d39e
LP
3425 drm_connector_list_iter_begin(dev, &iter);
3426 drm_for_each_connector_iter(connector, &iter)
3427 drm_helper_connector_dpms(connector,
3428 DRM_MODE_DPMS_OFF);
3429 drm_connector_list_iter_end(&iter);
4562236b 3430 drm_modeset_unlock_all(dev);
fe1053b7
AD
3431 /* unpin the front buffers and cursors */
3432 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3433 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3434 struct drm_framebuffer *fb = crtc->primary->fb;
3435 struct amdgpu_bo *robj;
3436
91334223 3437 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3438 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3439 r = amdgpu_bo_reserve(aobj, true);
3440 if (r == 0) {
3441 amdgpu_bo_unpin(aobj);
3442 amdgpu_bo_unreserve(aobj);
3443 }
756e6880 3444 }
756e6880 3445
fe1053b7
AD
3446 if (fb == NULL || fb->obj[0] == NULL) {
3447 continue;
3448 }
3449 robj = gem_to_amdgpu_bo(fb->obj[0]);
3450 /* don't unpin kernel fb objects */
3451 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3452 r = amdgpu_bo_reserve(robj, true);
3453 if (r == 0) {
3454 amdgpu_bo_unpin(robj);
3455 amdgpu_bo_unreserve(robj);
3456 }
d38ceaf9
AD
3457 }
3458 }
3459 }
fe1053b7 3460
5e6932fe 3461 amdgpu_ras_suspend(adev);
3462
fe1053b7
AD
3463 r = amdgpu_device_ip_suspend_phase1(adev);
3464
94fa5660
EQ
3465 amdgpu_amdkfd_suspend(adev, !fbcon);
3466
d38ceaf9
AD
3467 /* evict vram memory */
3468 amdgpu_bo_evict_vram(adev);
3469
5ceb54c6 3470 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3471
fe1053b7 3472 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3473
a0a71e49
AD
3474 /* evict remaining vram memory
3475 * This second call to evict vram is to evict the gart page table
3476 * using the CPU.
3477 */
d38ceaf9
AD
3478 amdgpu_bo_evict_vram(adev);
3479
d38ceaf9
AD
3480 return 0;
3481}
3482
3483/**
810ddc3a 3484 * amdgpu_device_resume - initiate device resume
d38ceaf9 3485 *
87e3f136
DP
3486 * @dev: drm dev pointer
3487 * @resume: resume state
3488 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3489 *
3490 * Bring the hw back to operating state (all asics).
3491 * Returns 0 for success or an error on failure.
3492 * Called at driver resume.
3493 */
de185019 3494int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3495{
3496 struct drm_connector *connector;
f8d2d39e 3497 struct drm_connector_list_iter iter;
d38ceaf9 3498 struct amdgpu_device *adev = dev->dev_private;
756e6880 3499 struct drm_crtc *crtc;
03161a6e 3500 int r = 0;
d38ceaf9
AD
3501
3502 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3503 return 0;
3504
d38ceaf9 3505 /* post card */
39c640c0 3506 if (amdgpu_device_need_post(adev)) {
74b0b157 3507 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3508 if (r)
3509 DRM_ERROR("amdgpu asic init failed\n");
3510 }
d38ceaf9 3511
06ec9070 3512 r = amdgpu_device_ip_resume(adev);
e6707218 3513 if (r) {
06ec9070 3514 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3515 return r;
e6707218 3516 }
5ceb54c6
AD
3517 amdgpu_fence_driver_resume(adev);
3518
d38ceaf9 3519
06ec9070 3520 r = amdgpu_device_ip_late_init(adev);
03161a6e 3521 if (r)
4d3b9ae5 3522 return r;
d38ceaf9 3523
beff74bc
AD
3524 queue_delayed_work(system_wq, &adev->delayed_init_work,
3525 msecs_to_jiffies(AMDGPU_RESUME_MS));
3526
fe1053b7
AD
3527 if (!amdgpu_device_has_dc_support(adev)) {
3528 /* pin cursors */
3529 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3530 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3531
91334223 3532 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3533 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3534 r = amdgpu_bo_reserve(aobj, true);
3535 if (r == 0) {
3536 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3537 if (r != 0)
3538 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3539 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3540 amdgpu_bo_unreserve(aobj);
3541 }
756e6880
AD
3542 }
3543 }
3544 }
9593f4d6 3545 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3546 if (r)
3547 return r;
756e6880 3548
96a5d8d4 3549 /* Make sure IB tests flushed */
beff74bc 3550 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3551
d38ceaf9
AD
3552 /* blat the mode back in */
3553 if (fbcon) {
4562236b
HW
3554 if (!amdgpu_device_has_dc_support(adev)) {
3555 /* pre DCE11 */
3556 drm_helper_resume_force_mode(dev);
3557
3558 /* turn on display hw */
3559 drm_modeset_lock_all(dev);
f8d2d39e
LP
3560
3561 drm_connector_list_iter_begin(dev, &iter);
3562 drm_for_each_connector_iter(connector, &iter)
3563 drm_helper_connector_dpms(connector,
3564 DRM_MODE_DPMS_ON);
3565 drm_connector_list_iter_end(&iter);
3566
4562236b 3567 drm_modeset_unlock_all(dev);
d38ceaf9 3568 }
4d3b9ae5 3569 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3570 }
3571
3572 drm_kms_helper_poll_enable(dev);
23a1a9e5 3573
5e6932fe 3574 amdgpu_ras_resume(adev);
3575
23a1a9e5
L
3576 /*
3577 * Most of the connector probing functions try to acquire runtime pm
3578 * refs to ensure that the GPU is powered on when connector polling is
3579 * performed. Since we're calling this from a runtime PM callback,
3580 * trying to acquire rpm refs will cause us to deadlock.
3581 *
3582 * Since we're guaranteed to be holding the rpm lock, it's safe to
3583 * temporarily disable the rpm helpers so this doesn't deadlock us.
3584 */
3585#ifdef CONFIG_PM
3586 dev->dev->power.disable_depth++;
3587#endif
4562236b
HW
3588 if (!amdgpu_device_has_dc_support(adev))
3589 drm_helper_hpd_irq_event(dev);
3590 else
3591 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3592#ifdef CONFIG_PM
3593 dev->dev->power.disable_depth--;
3594#endif
44779b43
RZ
3595 adev->in_suspend = false;
3596
4d3b9ae5 3597 return 0;
d38ceaf9
AD
3598}
3599
e3ecdffa
AD
3600/**
3601 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3602 *
3603 * @adev: amdgpu_device pointer
3604 *
3605 * The list of all the hardware IPs that make up the asic is walked and
3606 * the check_soft_reset callbacks are run. check_soft_reset determines
3607 * if the asic is still hung or not.
3608 * Returns true if any of the IPs are still in a hung state, false if not.
3609 */
06ec9070 3610static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3611{
3612 int i;
3613 bool asic_hang = false;
3614
f993d628
ML
3615 if (amdgpu_sriov_vf(adev))
3616 return true;
3617
8bc04c29
AD
3618 if (amdgpu_asic_need_full_reset(adev))
3619 return true;
3620
63fbf42f 3621 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3622 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3623 continue;
a1255107
AD
3624 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3625 adev->ip_blocks[i].status.hang =
3626 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3627 if (adev->ip_blocks[i].status.hang) {
3628 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3629 asic_hang = true;
3630 }
3631 }
3632 return asic_hang;
3633}
3634
e3ecdffa
AD
3635/**
3636 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3637 *
3638 * @adev: amdgpu_device pointer
3639 *
3640 * The list of all the hardware IPs that make up the asic is walked and the
3641 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3642 * handles any IP specific hardware or software state changes that are
3643 * necessary for a soft reset to succeed.
3644 * Returns 0 on success, negative error code on failure.
3645 */
06ec9070 3646static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3647{
3648 int i, r = 0;
3649
3650 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3651 if (!adev->ip_blocks[i].status.valid)
d31a501e 3652 continue;
a1255107
AD
3653 if (adev->ip_blocks[i].status.hang &&
3654 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3655 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3656 if (r)
3657 return r;
3658 }
3659 }
3660
3661 return 0;
3662}
3663
e3ecdffa
AD
3664/**
3665 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3666 *
3667 * @adev: amdgpu_device pointer
3668 *
3669 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3670 * reset is necessary to recover.
3671 * Returns true if a full asic reset is required, false if not.
3672 */
06ec9070 3673static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3674{
da146d3b
AD
3675 int i;
3676
8bc04c29
AD
3677 if (amdgpu_asic_need_full_reset(adev))
3678 return true;
3679
da146d3b 3680 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3681 if (!adev->ip_blocks[i].status.valid)
da146d3b 3682 continue;
a1255107
AD
3683 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3684 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3685 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3686 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3687 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3688 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3689 DRM_INFO("Some block need full reset!\n");
3690 return true;
3691 }
3692 }
35d782fe
CZ
3693 }
3694 return false;
3695}
3696
e3ecdffa
AD
3697/**
3698 * amdgpu_device_ip_soft_reset - do a soft reset
3699 *
3700 * @adev: amdgpu_device pointer
3701 *
3702 * The list of all the hardware IPs that make up the asic is walked and the
3703 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3704 * IP specific hardware or software state changes that are necessary to soft
3705 * reset the IP.
3706 * Returns 0 on success, negative error code on failure.
3707 */
06ec9070 3708static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3709{
3710 int i, r = 0;
3711
3712 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3713 if (!adev->ip_blocks[i].status.valid)
35d782fe 3714 continue;
a1255107
AD
3715 if (adev->ip_blocks[i].status.hang &&
3716 adev->ip_blocks[i].version->funcs->soft_reset) {
3717 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3718 if (r)
3719 return r;
3720 }
3721 }
3722
3723 return 0;
3724}
3725
e3ecdffa
AD
3726/**
3727 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3728 *
3729 * @adev: amdgpu_device pointer
3730 *
3731 * The list of all the hardware IPs that make up the asic is walked and the
3732 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3733 * handles any IP specific hardware or software state changes that are
3734 * necessary after the IP has been soft reset.
3735 * Returns 0 on success, negative error code on failure.
3736 */
06ec9070 3737static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3738{
3739 int i, r = 0;
3740
3741 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3742 if (!adev->ip_blocks[i].status.valid)
35d782fe 3743 continue;
a1255107
AD
3744 if (adev->ip_blocks[i].status.hang &&
3745 adev->ip_blocks[i].version->funcs->post_soft_reset)
3746 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3747 if (r)
3748 return r;
3749 }
3750
3751 return 0;
3752}
3753
e3ecdffa 3754/**
c33adbc7 3755 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3756 *
3757 * @adev: amdgpu_device pointer
3758 *
3759 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3760 * restore things like GPUVM page tables after a GPU reset where
3761 * the contents of VRAM might be lost.
403009bf
CK
3762 *
3763 * Returns:
3764 * 0 on success, negative error code on failure.
e3ecdffa 3765 */
c33adbc7 3766static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3767{
c41d1cf6 3768 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3769 struct amdgpu_bo *shadow;
3770 long r = 1, tmo;
c41d1cf6
ML
3771
3772 if (amdgpu_sriov_runtime(adev))
b045d3af 3773 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3774 else
3775 tmo = msecs_to_jiffies(100);
3776
3777 DRM_INFO("recover vram bo from shadow start\n");
3778 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3779 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3780
3781 /* No need to recover an evicted BO */
3782 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3783 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3784 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3785 continue;
3786
3787 r = amdgpu_bo_restore_shadow(shadow, &next);
3788 if (r)
3789 break;
3790
c41d1cf6 3791 if (fence) {
1712fb1a 3792 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3793 dma_fence_put(fence);
3794 fence = next;
1712fb1a 3795 if (tmo == 0) {
3796 r = -ETIMEDOUT;
c41d1cf6 3797 break;
1712fb1a 3798 } else if (tmo < 0) {
3799 r = tmo;
3800 break;
3801 }
403009bf
CK
3802 } else {
3803 fence = next;
c41d1cf6 3804 }
c41d1cf6
ML
3805 }
3806 mutex_unlock(&adev->shadow_list_lock);
3807
403009bf
CK
3808 if (fence)
3809 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3810 dma_fence_put(fence);
3811
1712fb1a 3812 if (r < 0 || tmo <= 0) {
3813 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3814 return -EIO;
3815 }
c41d1cf6 3816
403009bf
CK
3817 DRM_INFO("recover vram bo from shadow done\n");
3818 return 0;
c41d1cf6
ML
3819}
3820
a90ad3c2 3821
e3ecdffa 3822/**
06ec9070 3823 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3824 *
3825 * @adev: amdgpu device pointer
87e3f136 3826 * @from_hypervisor: request from hypervisor
5740682e
ML
3827 *
3828 * do VF FLR and reinitialize Asic
3f48c681 3829 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3830 */
3831static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3832 bool from_hypervisor)
5740682e
ML
3833{
3834 int r;
3835
3836 if (from_hypervisor)
3837 r = amdgpu_virt_request_full_gpu(adev, true);
3838 else
3839 r = amdgpu_virt_reset_gpu(adev);
3840 if (r)
3841 return r;
a90ad3c2 3842
b639c22c
JZ
3843 amdgpu_amdkfd_pre_reset(adev);
3844
a90ad3c2 3845 /* Resume IP prior to SMC */
06ec9070 3846 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3847 if (r)
3848 goto error;
a90ad3c2 3849
c9ffa427 3850 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3851 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3852 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3853
7a3e0bb2
RZ
3854 r = amdgpu_device_fw_loading(adev);
3855 if (r)
3856 return r;
3857
a90ad3c2 3858 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3859 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3860 if (r)
3861 goto error;
a90ad3c2
ML
3862
3863 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3864 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3865 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3866
abc34253
ED
3867error:
3868 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3869 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3870 amdgpu_inc_vram_lost(adev);
c33adbc7 3871 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3872 }
3873
3874 return r;
3875}
3876
12938fad
CK
3877/**
3878 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3879 *
3880 * @adev: amdgpu device pointer
3881 *
3882 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3883 * a hung GPU.
3884 */
3885bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3886{
3887 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3888 DRM_INFO("Timeout, but no hardware hang detected.\n");
3889 return false;
3890 }
3891
3ba7b418
AG
3892 if (amdgpu_gpu_recovery == 0)
3893 goto disabled;
3894
3895 if (amdgpu_sriov_vf(adev))
3896 return true;
3897
3898 if (amdgpu_gpu_recovery == -1) {
3899 switch (adev->asic_type) {
fc42d47c
AG
3900 case CHIP_BONAIRE:
3901 case CHIP_HAWAII:
3ba7b418
AG
3902 case CHIP_TOPAZ:
3903 case CHIP_TONGA:
3904 case CHIP_FIJI:
3905 case CHIP_POLARIS10:
3906 case CHIP_POLARIS11:
3907 case CHIP_POLARIS12:
3908 case CHIP_VEGAM:
3909 case CHIP_VEGA20:
3910 case CHIP_VEGA10:
3911 case CHIP_VEGA12:
c43b849f 3912 case CHIP_RAVEN:
e9d4cf91 3913 case CHIP_ARCTURUS:
2cb44fb0 3914 case CHIP_RENOIR:
658c6639
AD
3915 case CHIP_NAVI10:
3916 case CHIP_NAVI14:
3917 case CHIP_NAVI12:
3ba7b418
AG
3918 break;
3919 default:
3920 goto disabled;
3921 }
12938fad
CK
3922 }
3923
3924 return true;
3ba7b418
AG
3925
3926disabled:
3927 DRM_INFO("GPU recovery disabled.\n");
3928 return false;
12938fad
CK
3929}
3930
5c6dd71e 3931
26bc5340
AG
3932static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3933 struct amdgpu_job *job,
3934 bool *need_full_reset_arg)
3935{
3936 int i, r = 0;
3937 bool need_full_reset = *need_full_reset_arg;
71182665 3938
728e7e0c
JZ
3939 amdgpu_debugfs_wait_dump(adev);
3940
71182665 3941 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3942 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3943 struct amdgpu_ring *ring = adev->rings[i];
3944
51687759 3945 if (!ring || !ring->sched.thread)
0875dc9e 3946 continue;
5740682e 3947
2f9d4084
ML
3948 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3949 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3950 }
d38ceaf9 3951
222b5f04
AG
3952 if(job)
3953 drm_sched_increase_karma(&job->base);
3954
1d721ed6 3955 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3956 if (!amdgpu_sriov_vf(adev)) {
3957
3958 if (!need_full_reset)
3959 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3960
3961 if (!need_full_reset) {
3962 amdgpu_device_ip_pre_soft_reset(adev);
3963 r = amdgpu_device_ip_soft_reset(adev);
3964 amdgpu_device_ip_post_soft_reset(adev);
3965 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3966 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3967 need_full_reset = true;
3968 }
3969 }
3970
3971 if (need_full_reset)
3972 r = amdgpu_device_ip_suspend(adev);
3973
3974 *need_full_reset_arg = need_full_reset;
3975 }
3976
3977 return r;
3978}
3979
041a62bc 3980static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3981 struct list_head *device_list_handle,
3982 bool *need_full_reset_arg)
3983{
3984 struct amdgpu_device *tmp_adev = NULL;
3985 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3986 int r = 0;
3987
3988 /*
3989 * ASIC reset has to be done on all HGMI hive nodes ASAP
3990 * to allow proper links negotiation in FW (within 1 sec)
3991 */
3992 if (need_full_reset) {
3993 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3994 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3995 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3996 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3997 r = -EALREADY;
3998 } else
3999 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 4000
041a62bc
AG
4001 if (r) {
4002 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
4003 r, tmp_adev->ddev->unique);
4004 break;
ce316fa5
LM
4005 }
4006 }
4007
041a62bc
AG
4008 /* For XGMI wait for all resets to complete before proceed */
4009 if (!r) {
ce316fa5
LM
4010 list_for_each_entry(tmp_adev, device_list_handle,
4011 gmc.xgmi.head) {
4012 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4013 flush_work(&tmp_adev->xgmi_reset_work);
4014 r = tmp_adev->asic_reset_res;
4015 if (r)
4016 break;
ce316fa5
LM
4017 }
4018 }
4019 }
ce316fa5 4020 }
26bc5340 4021
43c4d576
JC
4022 if (!r && amdgpu_ras_intr_triggered()) {
4023 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4024 if (tmp_adev->mmhub.funcs &&
4025 tmp_adev->mmhub.funcs->reset_ras_error_count)
4026 tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev);
4027 }
4028
00eaa571 4029 amdgpu_ras_intr_cleared();
43c4d576 4030 }
00eaa571 4031
26bc5340
AG
4032 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4033 if (need_full_reset) {
4034 /* post card */
4035 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
4036 DRM_WARN("asic atom init failed!");
4037
4038 if (!r) {
4039 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4040 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4041 if (r)
4042 goto out;
4043
4044 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4045 if (vram_lost) {
77e7f829 4046 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 4047 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
4048 }
4049
4050 r = amdgpu_gtt_mgr_recover(
4051 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
4052 if (r)
4053 goto out;
4054
4055 r = amdgpu_device_fw_loading(tmp_adev);
4056 if (r)
4057 return r;
4058
4059 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4060 if (r)
4061 goto out;
4062
4063 if (vram_lost)
4064 amdgpu_device_fill_reset_magic(tmp_adev);
4065
fdafb359
EQ
4066 /*
4067 * Add this ASIC as tracked as reset was already
4068 * complete successfully.
4069 */
4070 amdgpu_register_gpu_instance(tmp_adev);
4071
7c04ca50 4072 r = amdgpu_device_ip_late_init(tmp_adev);
4073 if (r)
4074 goto out;
4075
565d1941
EQ
4076 amdgpu_fbdev_set_suspend(tmp_adev, 0);
4077
e79a04d5 4078 /* must succeed. */
511fdbc3 4079 amdgpu_ras_resume(tmp_adev);
e79a04d5 4080
26bc5340
AG
4081 /* Update PSP FW topology after reset */
4082 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4083 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
4084 }
4085 }
4086
4087
4088out:
4089 if (!r) {
4090 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4091 r = amdgpu_ib_ring_tests(tmp_adev);
4092 if (r) {
4093 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4094 r = amdgpu_device_ip_suspend(tmp_adev);
4095 need_full_reset = true;
4096 r = -EAGAIN;
4097 goto end;
4098 }
4099 }
4100
4101 if (!r)
4102 r = amdgpu_device_recover_vram(tmp_adev);
4103 else
4104 tmp_adev->asic_reset_res = r;
4105 }
4106
4107end:
4108 *need_full_reset_arg = need_full_reset;
4109 return r;
4110}
4111
1d721ed6 4112static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 4113{
1d721ed6
AG
4114 if (trylock) {
4115 if (!mutex_trylock(&adev->lock_reset))
4116 return false;
4117 } else
4118 mutex_lock(&adev->lock_reset);
5740682e 4119
26bc5340 4120 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 4121 adev->in_gpu_reset = true;
a3a09142
AD
4122 switch (amdgpu_asic_reset_method(adev)) {
4123 case AMD_RESET_METHOD_MODE1:
4124 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4125 break;
4126 case AMD_RESET_METHOD_MODE2:
4127 adev->mp1_state = PP_MP1_STATE_RESET;
4128 break;
4129 default:
4130 adev->mp1_state = PP_MP1_STATE_NONE;
4131 break;
4132 }
1d721ed6
AG
4133
4134 return true;
26bc5340 4135}
d38ceaf9 4136
26bc5340
AG
4137static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
4138{
89041940 4139 amdgpu_vf_error_trans_all(adev);
a3a09142 4140 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 4141 adev->in_gpu_reset = false;
13a752e3 4142 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4143}
4144
3f12acc8
EQ
4145static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4146{
4147 struct pci_dev *p = NULL;
4148
4149 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4150 adev->pdev->bus->number, 1);
4151 if (p) {
4152 pm_runtime_enable(&(p->dev));
4153 pm_runtime_resume(&(p->dev));
4154 }
4155}
4156
4157static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4158{
4159 enum amd_reset_method reset_method;
4160 struct pci_dev *p = NULL;
4161 u64 expires;
4162
4163 /*
4164 * For now, only BACO and mode1 reset are confirmed
4165 * to suffer the audio issue without proper suspended.
4166 */
4167 reset_method = amdgpu_asic_reset_method(adev);
4168 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4169 (reset_method != AMD_RESET_METHOD_MODE1))
4170 return -EINVAL;
4171
4172 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4173 adev->pdev->bus->number, 1);
4174 if (!p)
4175 return -ENODEV;
4176
4177 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4178 if (!expires)
4179 /*
4180 * If we cannot get the audio device autosuspend delay,
4181 * a fixed 4S interval will be used. Considering 3S is
4182 * the audio controller default autosuspend delay setting.
4183 * 4S used here is guaranteed to cover that.
4184 */
54b7feb9 4185 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
3f12acc8
EQ
4186
4187 while (!pm_runtime_status_suspended(&(p->dev))) {
4188 if (!pm_runtime_suspend(&(p->dev)))
4189 break;
4190
4191 if (expires < ktime_get_mono_fast_ns()) {
4192 dev_warn(adev->dev, "failed to suspend display audio\n");
4193 /* TODO: abort the succeeding gpu reset? */
4194 return -ETIMEDOUT;
4195 }
4196 }
4197
4198 pm_runtime_disable(&(p->dev));
4199
4200 return 0;
4201}
4202
26bc5340
AG
4203/**
4204 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4205 *
4206 * @adev: amdgpu device pointer
4207 * @job: which job trigger hang
4208 *
4209 * Attempt to reset the GPU if it has hung (all asics).
4210 * Attempt to do soft-reset or full-reset and reinitialize Asic
4211 * Returns 0 for success or an error on failure.
4212 */
4213
4214int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4215 struct amdgpu_job *job)
4216{
1d721ed6 4217 struct list_head device_list, *device_list_handle = NULL;
7dd8c205
EQ
4218 bool need_full_reset = false;
4219 bool job_signaled = false;
26bc5340 4220 struct amdgpu_hive_info *hive = NULL;
26bc5340 4221 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4222 int i, r = 0;
7c6e68c7 4223 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4224 bool use_baco =
4225 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4226 true : false;
3f12acc8 4227 bool audio_suspended = false;
26bc5340 4228
d5ea093e
AG
4229 /*
4230 * Flush RAM to disk so that after reboot
4231 * the user can read log and see why the system rebooted.
4232 */
b823821f 4233 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4234
4235 DRM_WARN("Emergency reboot.");
4236
4237 ksys_sync_helper();
4238 emergency_restart();
4239 }
4240
b823821f
LM
4241 dev_info(adev->dev, "GPU %s begin!\n",
4242 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340
AG
4243
4244 /*
1d721ed6
AG
4245 * Here we trylock to avoid chain of resets executing from
4246 * either trigger by jobs on different adevs in XGMI hive or jobs on
4247 * different schedulers for same device while this TO handler is running.
4248 * We always reset all schedulers for device and all devices for XGMI
4249 * hive so that should take care of them too.
26bc5340 4250 */
7dd8c205 4251 hive = amdgpu_get_xgmi_hive(adev, true);
1d721ed6
AG
4252 if (hive && !mutex_trylock(&hive->reset_lock)) {
4253 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4254 job ? job->base.id : -1, hive->hive_id);
9e94d22c 4255 mutex_unlock(&hive->hive_lock);
26bc5340 4256 return 0;
1d721ed6 4257 }
26bc5340 4258
9e94d22c
EQ
4259 /*
4260 * Build list of devices to reset.
4261 * In case we are in XGMI hive mode, resort the device list
4262 * to put adev in the 1st position.
4263 */
4264 INIT_LIST_HEAD(&device_list);
4265 if (adev->gmc.xgmi.num_physical_nodes > 1) {
4266 if (!hive)
26bc5340 4267 return -ENODEV;
9e94d22c
EQ
4268 if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list))
4269 list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list);
26bc5340
AG
4270 device_list_handle = &hive->device_list;
4271 } else {
4272 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4273 device_list_handle = &device_list;
4274 }
4275
1d721ed6
AG
4276 /* block all schedulers and reset given job's ring */
4277 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
9e94d22c
EQ
4278 if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
4279 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
4280 job ? job->base.id : -1);
4281 mutex_unlock(&hive->hive_lock);
4282 return 0;
7c6e68c7
AG
4283 }
4284
3f12acc8
EQ
4285 /*
4286 * Try to put the audio codec into suspend state
4287 * before gpu reset started.
4288 *
4289 * Due to the power domain of the graphics device
4290 * is shared with AZ power domain. Without this,
4291 * we may change the audio hardware from behind
4292 * the audio driver's back. That will trigger
4293 * some audio codec errors.
4294 */
4295 if (!amdgpu_device_suspend_display_audio(tmp_adev))
4296 audio_suspended = true;
4297
9e94d22c
EQ
4298 amdgpu_ras_set_error_query_ready(tmp_adev, false);
4299
52fb44cf
EQ
4300 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
4301
9e94d22c
EQ
4302 if (!amdgpu_sriov_vf(tmp_adev))
4303 amdgpu_amdkfd_pre_reset(tmp_adev);
4304
12ffa55d
AG
4305 /*
4306 * Mark these ASICs to be reseted as untracked first
4307 * And add them back after reset completed
4308 */
4309 amdgpu_unregister_gpu_instance(tmp_adev);
4310
a2f63ee8 4311 amdgpu_fbdev_set_suspend(tmp_adev, 1);
565d1941 4312
f1c1314b 4313 /* disable ras on ALL IPs */
b823821f
LM
4314 if (!(in_ras_intr && !use_baco) &&
4315 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4316 amdgpu_ras_suspend(tmp_adev);
4317
1d721ed6
AG
4318 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4319 struct amdgpu_ring *ring = tmp_adev->rings[i];
4320
4321 if (!ring || !ring->sched.thread)
4322 continue;
4323
0b2d2c2e 4324 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4325
b823821f 4326 if (in_ras_intr && !use_baco)
7c6e68c7 4327 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4328 }
4329 }
4330
b823821f 4331 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4332 goto skip_sched_resume;
4333
1d721ed6
AG
4334 /*
4335 * Must check guilty signal here since after this point all old
4336 * HW fences are force signaled.
4337 *
4338 * job->base holds a reference to parent fence
4339 */
4340 if (job && job->base.s_fence->parent &&
7dd8c205 4341 dma_fence_is_signaled(job->base.s_fence->parent)) {
1d721ed6 4342 job_signaled = true;
1d721ed6
AG
4343 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4344 goto skip_hw_reset;
4345 }
4346
26bc5340
AG
4347retry: /* Rest of adevs pre asic reset from XGMI hive. */
4348 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
26bc5340
AG
4349 r = amdgpu_device_pre_asic_reset(tmp_adev,
4350 NULL,
4351 &need_full_reset);
4352 /*TODO Should we stop ?*/
4353 if (r) {
4354 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4355 r, tmp_adev->ddev->unique);
4356 tmp_adev->asic_reset_res = r;
4357 }
4358 }
4359
4360 /* Actual ASIC resets if needed.*/
4361 /* TODO Implement XGMI hive reset logic for SRIOV */
4362 if (amdgpu_sriov_vf(adev)) {
4363 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4364 if (r)
4365 adev->asic_reset_res = r;
4366 } else {
041a62bc 4367 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4368 if (r && r == -EAGAIN)
4369 goto retry;
4370 }
4371
1d721ed6
AG
4372skip_hw_reset:
4373
26bc5340
AG
4374 /* Post ASIC reset for all devs .*/
4375 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4376
1d721ed6
AG
4377 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4378 struct amdgpu_ring *ring = tmp_adev->rings[i];
4379
4380 if (!ring || !ring->sched.thread)
4381 continue;
4382
4383 /* No point to resubmit jobs if we didn't HW reset*/
4384 if (!tmp_adev->asic_reset_res && !job_signaled)
4385 drm_sched_resubmit_jobs(&ring->sched);
4386
4387 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4388 }
4389
4390 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4391 drm_helper_resume_force_mode(tmp_adev->ddev);
4392 }
4393
4394 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4395
4396 if (r) {
4397 /* bad news, how to tell it to userspace ? */
12ffa55d 4398 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4399 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4400 } else {
12ffa55d 4401 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4402 }
7c6e68c7 4403 }
26bc5340 4404
7c6e68c7
AG
4405skip_sched_resume:
4406 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4407 /*unlock kfd: SRIOV would do it separately */
b823821f 4408 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4409 amdgpu_amdkfd_post_reset(tmp_adev);
3f12acc8
EQ
4410 if (audio_suspended)
4411 amdgpu_device_resume_display_audio(tmp_adev);
26bc5340
AG
4412 amdgpu_device_unlock_adev(tmp_adev);
4413 }
4414
9e94d22c 4415 if (hive) {
22d6575b 4416 mutex_unlock(&hive->reset_lock);
9e94d22c
EQ
4417 mutex_unlock(&hive->hive_lock);
4418 }
26bc5340
AG
4419
4420 if (r)
4421 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4422 return r;
4423}
4424
e3ecdffa
AD
4425/**
4426 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4427 *
4428 * @adev: amdgpu_device pointer
4429 *
4430 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4431 * and lanes) of the slot the device is in. Handles APUs and
4432 * virtualized environments where PCIE config space may not be available.
4433 */
5494d864 4434static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4435{
5d9a6330 4436 struct pci_dev *pdev;
c5313457
HK
4437 enum pci_bus_speed speed_cap, platform_speed_cap;
4438 enum pcie_link_width platform_link_width;
d0dd7f0c 4439
cd474ba0
AD
4440 if (amdgpu_pcie_gen_cap)
4441 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4442
cd474ba0
AD
4443 if (amdgpu_pcie_lane_cap)
4444 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4445
cd474ba0
AD
4446 /* covers APUs as well */
4447 if (pci_is_root_bus(adev->pdev->bus)) {
4448 if (adev->pm.pcie_gen_mask == 0)
4449 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4450 if (adev->pm.pcie_mlw_mask == 0)
4451 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4452 return;
cd474ba0 4453 }
d0dd7f0c 4454
c5313457
HK
4455 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4456 return;
4457
dbaa922b
AD
4458 pcie_bandwidth_available(adev->pdev, NULL,
4459 &platform_speed_cap, &platform_link_width);
c5313457 4460
cd474ba0 4461 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4462 /* asic caps */
4463 pdev = adev->pdev;
4464 speed_cap = pcie_get_speed_cap(pdev);
4465 if (speed_cap == PCI_SPEED_UNKNOWN) {
4466 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4467 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4468 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4469 } else {
5d9a6330
AD
4470 if (speed_cap == PCIE_SPEED_16_0GT)
4471 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4472 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4473 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4474 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4475 else if (speed_cap == PCIE_SPEED_8_0GT)
4476 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4477 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4478 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4479 else if (speed_cap == PCIE_SPEED_5_0GT)
4480 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4481 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4482 else
4483 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4484 }
4485 /* platform caps */
c5313457 4486 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4487 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4488 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4489 } else {
c5313457 4490 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4491 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4492 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4493 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4494 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4495 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4496 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4497 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4498 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4499 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4500 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4501 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4502 else
4503 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4504
cd474ba0
AD
4505 }
4506 }
4507 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4508 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4509 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4510 } else {
c5313457 4511 switch (platform_link_width) {
5d9a6330 4512 case PCIE_LNK_X32:
cd474ba0
AD
4513 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4514 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4515 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4516 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4517 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4518 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4519 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4520 break;
5d9a6330 4521 case PCIE_LNK_X16:
cd474ba0
AD
4522 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4523 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4524 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4525 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4526 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4527 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4528 break;
5d9a6330 4529 case PCIE_LNK_X12:
cd474ba0
AD
4530 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4531 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4532 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4533 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4534 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4535 break;
5d9a6330 4536 case PCIE_LNK_X8:
cd474ba0
AD
4537 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4538 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4539 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4540 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4541 break;
5d9a6330 4542 case PCIE_LNK_X4:
cd474ba0
AD
4543 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4544 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4545 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4546 break;
5d9a6330 4547 case PCIE_LNK_X2:
cd474ba0
AD
4548 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4549 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4550 break;
5d9a6330 4551 case PCIE_LNK_X1:
cd474ba0
AD
4552 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4553 break;
4554 default:
4555 break;
4556 }
d0dd7f0c
AD
4557 }
4558 }
4559}
d38ceaf9 4560
361dbd01
AD
4561int amdgpu_device_baco_enter(struct drm_device *dev)
4562{
4563 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4564 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4565
4566 if (!amdgpu_device_supports_baco(adev->ddev))
4567 return -ENOTSUPP;
4568
7a22677b
LM
4569 if (ras && ras->supported)
4570 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4571
9530273e 4572 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4573}
4574
4575int amdgpu_device_baco_exit(struct drm_device *dev)
4576{
4577 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4578 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4579 int ret = 0;
361dbd01
AD
4580
4581 if (!amdgpu_device_supports_baco(adev->ddev))
4582 return -ENOTSUPP;
4583
9530273e
EQ
4584 ret = amdgpu_dpm_baco_exit(adev);
4585 if (ret)
4586 return ret;
7a22677b
LM
4587
4588 if (ras && ras->supported)
4589 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4590
4591 return 0;
361dbd01 4592}