drm/scheduler: fix inconsistent locking of job_list_lock
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
d5ea093e 68#include <linux/suspend.h>
c6a6e2db 69#include <drm/task_barrier.h>
d5ea093e 70
e2a75f88 71MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 72MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 73MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 74MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 75MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 76MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 77MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 78MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 79MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 80MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 81
2dc80b00
S
82#define AMDGPU_RESUME_MS 2000
83
050091ab 84const char *amdgpu_asic_name[] = {
da69c161
KW
85 "TAHITI",
86 "PITCAIRN",
87 "VERDE",
88 "OLAND",
89 "HAINAN",
d38ceaf9
AD
90 "BONAIRE",
91 "KAVERI",
92 "KABINI",
93 "HAWAII",
94 "MULLINS",
95 "TOPAZ",
96 "TONGA",
48299f95 97 "FIJI",
d38ceaf9 98 "CARRIZO",
139f4917 99 "STONEY",
2cc0c0b5
FC
100 "POLARIS10",
101 "POLARIS11",
c4642a47 102 "POLARIS12",
48ff108d 103 "VEGAM",
d4196f01 104 "VEGA10",
8fab806a 105 "VEGA12",
956fcddc 106 "VEGA20",
2ca8a5d2 107 "RAVEN",
d6c3b24e 108 "ARCTURUS",
1eee4228 109 "RENOIR",
852a6626 110 "NAVI10",
87dbad02 111 "NAVI14",
9802f5d7 112 "NAVI12",
d38ceaf9
AD
113 "LAST",
114};
115
dcea6e65
KR
116/**
117 * DOC: pcie_replay_count
118 *
119 * The amdgpu driver provides a sysfs API for reporting the total number
120 * of PCIe replays (NAKs)
121 * The file pcie_replay_count is used for this and returns the total
122 * number of replays as a sum of the NAKs generated and NAKs received
123 */
124
125static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
126 struct device_attribute *attr, char *buf)
127{
128 struct drm_device *ddev = dev_get_drvdata(dev);
129 struct amdgpu_device *adev = ddev->dev_private;
130 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
131
132 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
133}
134
135static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
136 amdgpu_device_get_pcie_replay_count, NULL);
137
5494d864
AD
138static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
139
e3ecdffa 140/**
31af062a 141 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
142 *
143 * @dev: drm_device pointer
144 *
145 * Returns true if the device is a dGPU with HG/PX power control,
146 * otherwise return false.
147 */
31af062a 148bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
149{
150 struct amdgpu_device *adev = dev->dev_private;
151
2f7d10b3 152 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
153 return true;
154 return false;
155}
156
a69cba42
AD
157/**
158 * amdgpu_device_supports_baco - Does the device support BACO
159 *
160 * @dev: drm_device pointer
161 *
162 * Returns true if the device supporte BACO,
163 * otherwise return false.
164 */
165bool amdgpu_device_supports_baco(struct drm_device *dev)
166{
167 struct amdgpu_device *adev = dev->dev_private;
168
169 return amdgpu_asic_supports_baco(adev);
170}
171
e35e2b11
TY
172/**
173 * VRAM access helper functions.
174 *
175 * amdgpu_device_vram_access - read/write a buffer in vram
176 *
177 * @adev: amdgpu_device pointer
178 * @pos: offset of the buffer in vram
179 * @buf: virtual address of the buffer in system memory
180 * @size: read/write size, sizeof(@buf) must > @size
181 * @write: true - write to vram, otherwise - read from vram
182 */
183void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
184 uint32_t *buf, size_t size, bool write)
185{
e35e2b11 186 unsigned long flags;
ce05ac56
CK
187 uint32_t hi = ~0;
188 uint64_t last;
189
c12b84d6
CK
190
191#ifdef CONFIG_64BIT
192 last = min(pos + size, adev->gmc.visible_vram_size);
193 if (last > pos) {
194 void __iomem *addr = adev->mman.aper_base_kaddr + pos;
195 size_t count = last - pos;
196
197 if (write) {
198 memcpy_toio(addr, buf, count);
199 mb();
200 amdgpu_asic_flush_hdp(adev, NULL);
201 } else {
202 amdgpu_asic_invalidate_hdp(adev, NULL);
203 mb();
204 memcpy_fromio(buf, addr, count);
205 }
206
207 if (count == size)
208 return;
209
210 pos += count;
211 buf += count / 4;
212 size -= count;
213 }
214#endif
215
ce05ac56
CK
216 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
217 for (last = pos + size; pos < last; pos += 4) {
218 uint32_t tmp = pos >> 31;
e35e2b11 219
e35e2b11 220 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
ce05ac56
CK
221 if (tmp != hi) {
222 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
223 hi = tmp;
224 }
e35e2b11
TY
225 if (write)
226 WREG32_NO_KIQ(mmMM_DATA, *buf++);
227 else
228 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
e35e2b11 229 }
ce05ac56 230 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
e35e2b11
TY
231}
232
d38ceaf9
AD
233/*
234 * MMIO register access helper functions.
235 */
e3ecdffa
AD
236/**
237 * amdgpu_mm_rreg - read a memory mapped IO register
238 *
239 * @adev: amdgpu_device pointer
240 * @reg: dword aligned register offset
241 * @acc_flags: access flags which require special behavior
242 *
243 * Returns the 32 bit value from the offset specified.
244 */
d38ceaf9 245uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 246 uint32_t acc_flags)
d38ceaf9 247{
f4b373f4
TSD
248 uint32_t ret;
249
c68dbcd8 250 if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
d33a99c4 251 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 252
15d72fd7 253 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 254 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
255 else {
256 unsigned long flags;
d38ceaf9
AD
257
258 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
259 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
260 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
261 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 262 }
f4b373f4
TSD
263 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
264 return ret;
d38ceaf9
AD
265}
266
421a2a30
ML
267/*
268 * MMIO register read with bytes helper functions
269 * @offset:bytes offset from MMIO start
270 *
271*/
272
e3ecdffa
AD
273/**
274 * amdgpu_mm_rreg8 - read a memory mapped IO register
275 *
276 * @adev: amdgpu_device pointer
277 * @offset: byte aligned register offset
278 *
279 * Returns the 8 bit value from the offset specified.
280 */
421a2a30
ML
281uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
282 if (offset < adev->rmmio_size)
283 return (readb(adev->rmmio + offset));
284 BUG();
285}
286
287/*
288 * MMIO register write with bytes helper functions
289 * @offset:bytes offset from MMIO start
290 * @value: the value want to be written to the register
291 *
292*/
e3ecdffa
AD
293/**
294 * amdgpu_mm_wreg8 - read a memory mapped IO register
295 *
296 * @adev: amdgpu_device pointer
297 * @offset: byte aligned register offset
298 * @value: 8 bit value to write
299 *
300 * Writes the value specified to the offset specified.
301 */
421a2a30
ML
302void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
303 if (offset < adev->rmmio_size)
304 writeb(value, adev->rmmio + offset);
305 else
306 BUG();
307}
308
e3ecdffa
AD
309/**
310 * amdgpu_mm_wreg - write to a memory mapped IO register
311 *
312 * @adev: amdgpu_device pointer
313 * @reg: dword aligned register offset
314 * @v: 32 bit value to write to the register
315 * @acc_flags: access flags which require special behavior
316 *
317 * Writes the value specified to the offset specified.
318 */
d38ceaf9 319void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 320 uint32_t acc_flags)
d38ceaf9 321{
f4b373f4 322 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 323
47ed4e1c
KW
324 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
325 adev->last_mm_index = v;
326 }
327
c68dbcd8 328 if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
d33a99c4 329 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 330
15d72fd7 331 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
332 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
333 else {
334 unsigned long flags;
335
336 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
337 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
338 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
339 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
340 }
47ed4e1c
KW
341
342 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
343 udelay(500);
344 }
d38ceaf9
AD
345}
346
e3ecdffa
AD
347/**
348 * amdgpu_io_rreg - read an IO register
349 *
350 * @adev: amdgpu_device pointer
351 * @reg: dword aligned register offset
352 *
353 * Returns the 32 bit value from the offset specified.
354 */
d38ceaf9
AD
355u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
356{
357 if ((reg * 4) < adev->rio_mem_size)
358 return ioread32(adev->rio_mem + (reg * 4));
359 else {
360 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
361 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
362 }
363}
364
e3ecdffa
AD
365/**
366 * amdgpu_io_wreg - write to an IO register
367 *
368 * @adev: amdgpu_device pointer
369 * @reg: dword aligned register offset
370 * @v: 32 bit value to write to the register
371 *
372 * Writes the value specified to the offset specified.
373 */
d38ceaf9
AD
374void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
375{
47ed4e1c
KW
376 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
377 adev->last_mm_index = v;
378 }
d38ceaf9
AD
379
380 if ((reg * 4) < adev->rio_mem_size)
381 iowrite32(v, adev->rio_mem + (reg * 4));
382 else {
383 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
384 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
385 }
47ed4e1c
KW
386
387 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
388 udelay(500);
389 }
d38ceaf9
AD
390}
391
392/**
393 * amdgpu_mm_rdoorbell - read a doorbell dword
394 *
395 * @adev: amdgpu_device pointer
396 * @index: doorbell index
397 *
398 * Returns the value in the doorbell aperture at the
399 * requested doorbell index (CIK).
400 */
401u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
402{
403 if (index < adev->doorbell.num_doorbells) {
404 return readl(adev->doorbell.ptr + index);
405 } else {
406 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
407 return 0;
408 }
409}
410
411/**
412 * amdgpu_mm_wdoorbell - write a doorbell dword
413 *
414 * @adev: amdgpu_device pointer
415 * @index: doorbell index
416 * @v: value to write
417 *
418 * Writes @v to the doorbell aperture at the
419 * requested doorbell index (CIK).
420 */
421void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
422{
423 if (index < adev->doorbell.num_doorbells) {
424 writel(v, adev->doorbell.ptr + index);
425 } else {
426 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
427 }
428}
429
832be404
KW
430/**
431 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
432 *
433 * @adev: amdgpu_device pointer
434 * @index: doorbell index
435 *
436 * Returns the value in the doorbell aperture at the
437 * requested doorbell index (VEGA10+).
438 */
439u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
440{
441 if (index < adev->doorbell.num_doorbells) {
442 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
443 } else {
444 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
445 return 0;
446 }
447}
448
449/**
450 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
451 *
452 * @adev: amdgpu_device pointer
453 * @index: doorbell index
454 * @v: value to write
455 *
456 * Writes @v to the doorbell aperture at the
457 * requested doorbell index (VEGA10+).
458 */
459void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
460{
461 if (index < adev->doorbell.num_doorbells) {
462 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
463 } else {
464 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
465 }
466}
467
d38ceaf9
AD
468/**
469 * amdgpu_invalid_rreg - dummy reg read function
470 *
471 * @adev: amdgpu device pointer
472 * @reg: offset of register
473 *
474 * Dummy register read function. Used for register blocks
475 * that certain asics don't have (all asics).
476 * Returns the value in the register.
477 */
478static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
479{
480 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
481 BUG();
482 return 0;
483}
484
485/**
486 * amdgpu_invalid_wreg - dummy reg write function
487 *
488 * @adev: amdgpu device pointer
489 * @reg: offset of register
490 * @v: value to write to the register
491 *
492 * Dummy register read function. Used for register blocks
493 * that certain asics don't have (all asics).
494 */
495static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
496{
497 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
498 reg, v);
499 BUG();
500}
501
4fa1c6a6
TZ
502/**
503 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
504 *
505 * @adev: amdgpu device pointer
506 * @reg: offset of register
507 *
508 * Dummy register read function. Used for register blocks
509 * that certain asics don't have (all asics).
510 * Returns the value in the register.
511 */
512static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
513{
514 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
515 BUG();
516 return 0;
517}
518
519/**
520 * amdgpu_invalid_wreg64 - dummy reg write function
521 *
522 * @adev: amdgpu device pointer
523 * @reg: offset of register
524 * @v: value to write to the register
525 *
526 * Dummy register read function. Used for register blocks
527 * that certain asics don't have (all asics).
528 */
529static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
530{
531 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
532 reg, v);
533 BUG();
534}
535
d38ceaf9
AD
536/**
537 * amdgpu_block_invalid_rreg - dummy reg read function
538 *
539 * @adev: amdgpu device pointer
540 * @block: offset of instance
541 * @reg: offset of register
542 *
543 * Dummy register read function. Used for register blocks
544 * that certain asics don't have (all asics).
545 * Returns the value in the register.
546 */
547static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
548 uint32_t block, uint32_t reg)
549{
550 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
551 reg, block);
552 BUG();
553 return 0;
554}
555
556/**
557 * amdgpu_block_invalid_wreg - dummy reg write function
558 *
559 * @adev: amdgpu device pointer
560 * @block: offset of instance
561 * @reg: offset of register
562 * @v: value to write to the register
563 *
564 * Dummy register read function. Used for register blocks
565 * that certain asics don't have (all asics).
566 */
567static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
568 uint32_t block,
569 uint32_t reg, uint32_t v)
570{
571 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
572 reg, block, v);
573 BUG();
574}
575
e3ecdffa
AD
576/**
577 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
578 *
579 * @adev: amdgpu device pointer
580 *
581 * Allocates a scratch page of VRAM for use by various things in the
582 * driver.
583 */
06ec9070 584static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 585{
a4a02777
CK
586 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
587 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
588 &adev->vram_scratch.robj,
589 &adev->vram_scratch.gpu_addr,
590 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
591}
592
e3ecdffa
AD
593/**
594 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
595 *
596 * @adev: amdgpu device pointer
597 *
598 * Frees the VRAM scratch page.
599 */
06ec9070 600static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 601{
078af1a3 602 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
603}
604
605/**
9c3f2b54 606 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
607 *
608 * @adev: amdgpu_device pointer
609 * @registers: pointer to the register array
610 * @array_size: size of the register array
611 *
612 * Programs an array or registers with and and or masks.
613 * This is a helper for setting golden registers.
614 */
9c3f2b54
AD
615void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
616 const u32 *registers,
617 const u32 array_size)
d38ceaf9
AD
618{
619 u32 tmp, reg, and_mask, or_mask;
620 int i;
621
622 if (array_size % 3)
623 return;
624
625 for (i = 0; i < array_size; i +=3) {
626 reg = registers[i + 0];
627 and_mask = registers[i + 1];
628 or_mask = registers[i + 2];
629
630 if (and_mask == 0xffffffff) {
631 tmp = or_mask;
632 } else {
633 tmp = RREG32(reg);
634 tmp &= ~and_mask;
e0d07657
HZ
635 if (adev->family >= AMDGPU_FAMILY_AI)
636 tmp |= (or_mask & and_mask);
637 else
638 tmp |= or_mask;
d38ceaf9
AD
639 }
640 WREG32(reg, tmp);
641 }
642}
643
e3ecdffa
AD
644/**
645 * amdgpu_device_pci_config_reset - reset the GPU
646 *
647 * @adev: amdgpu_device pointer
648 *
649 * Resets the GPU using the pci config reset sequence.
650 * Only applicable to asics prior to vega10.
651 */
8111c387 652void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
653{
654 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
655}
656
657/*
658 * GPU doorbell aperture helpers function.
659 */
660/**
06ec9070 661 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
662 *
663 * @adev: amdgpu_device pointer
664 *
665 * Init doorbell driver information (CIK)
666 * Returns 0 on success, error on failure.
667 */
06ec9070 668static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 669{
6585661d 670
705e519e
CK
671 /* No doorbell on SI hardware generation */
672 if (adev->asic_type < CHIP_BONAIRE) {
673 adev->doorbell.base = 0;
674 adev->doorbell.size = 0;
675 adev->doorbell.num_doorbells = 0;
676 adev->doorbell.ptr = NULL;
677 return 0;
678 }
679
d6895ad3
CK
680 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
681 return -EINVAL;
682
22357775
AD
683 amdgpu_asic_init_doorbell_index(adev);
684
d38ceaf9
AD
685 /* doorbell bar mapping */
686 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
687 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
688
edf600da 689 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 690 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
691 if (adev->doorbell.num_doorbells == 0)
692 return -EINVAL;
693
ec3db8a6 694 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
695 * paging queue doorbell use the second page. The
696 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
697 * doorbells are in the first page. So with paging queue enabled,
698 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
699 */
700 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 701 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 702
8972e5d2
CK
703 adev->doorbell.ptr = ioremap(adev->doorbell.base,
704 adev->doorbell.num_doorbells *
705 sizeof(u32));
706 if (adev->doorbell.ptr == NULL)
d38ceaf9 707 return -ENOMEM;
d38ceaf9
AD
708
709 return 0;
710}
711
712/**
06ec9070 713 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
714 *
715 * @adev: amdgpu_device pointer
716 *
717 * Tear down doorbell driver information (CIK)
718 */
06ec9070 719static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
720{
721 iounmap(adev->doorbell.ptr);
722 adev->doorbell.ptr = NULL;
723}
724
22cb0164 725
d38ceaf9
AD
726
727/*
06ec9070 728 * amdgpu_device_wb_*()
455a7bc2 729 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 730 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
731 */
732
733/**
06ec9070 734 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
735 *
736 * @adev: amdgpu_device pointer
737 *
738 * Disables Writeback and frees the Writeback memory (all asics).
739 * Used at driver shutdown.
740 */
06ec9070 741static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
742{
743 if (adev->wb.wb_obj) {
a76ed485
AD
744 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
745 &adev->wb.gpu_addr,
746 (void **)&adev->wb.wb);
d38ceaf9
AD
747 adev->wb.wb_obj = NULL;
748 }
749}
750
751/**
06ec9070 752 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
753 *
754 * @adev: amdgpu_device pointer
755 *
455a7bc2 756 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
757 * Used at driver startup.
758 * Returns 0 on success or an -error on failure.
759 */
06ec9070 760static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
761{
762 int r;
763
764 if (adev->wb.wb_obj == NULL) {
97407b63
AD
765 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
766 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
767 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
768 &adev->wb.wb_obj, &adev->wb.gpu_addr,
769 (void **)&adev->wb.wb);
d38ceaf9
AD
770 if (r) {
771 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
772 return r;
773 }
d38ceaf9
AD
774
775 adev->wb.num_wb = AMDGPU_MAX_WB;
776 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
777
778 /* clear wb memory */
73469585 779 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
780 }
781
782 return 0;
783}
784
785/**
131b4b36 786 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
787 *
788 * @adev: amdgpu_device pointer
789 * @wb: wb index
790 *
791 * Allocate a wb slot for use by the driver (all asics).
792 * Returns 0 on success or -EINVAL on failure.
793 */
131b4b36 794int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
795{
796 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 797
97407b63 798 if (offset < adev->wb.num_wb) {
7014285a 799 __set_bit(offset, adev->wb.used);
63ae07ca 800 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
801 return 0;
802 } else {
803 return -EINVAL;
804 }
805}
806
d38ceaf9 807/**
131b4b36 808 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
809 *
810 * @adev: amdgpu_device pointer
811 * @wb: wb index
812 *
813 * Free a wb slot allocated for use by the driver (all asics)
814 */
131b4b36 815void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 816{
73469585 817 wb >>= 3;
d38ceaf9 818 if (wb < adev->wb.num_wb)
73469585 819 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
820}
821
d6895ad3
CK
822/**
823 * amdgpu_device_resize_fb_bar - try to resize FB BAR
824 *
825 * @adev: amdgpu_device pointer
826 *
827 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
828 * to fail, but if any of the BARs is not accessible after the size we abort
829 * driver loading by returning -ENODEV.
830 */
831int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
832{
770d13b1 833 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 834 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
835 struct pci_bus *root;
836 struct resource *res;
837 unsigned i;
d6895ad3
CK
838 u16 cmd;
839 int r;
840
0c03b912 841 /* Bypass for VF */
842 if (amdgpu_sriov_vf(adev))
843 return 0;
844
31b8adab
CK
845 /* Check if the root BUS has 64bit memory resources */
846 root = adev->pdev->bus;
847 while (root->parent)
848 root = root->parent;
849
850 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 851 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
852 res->start > 0x100000000ull)
853 break;
854 }
855
856 /* Trying to resize is pointless without a root hub window above 4GB */
857 if (!res)
858 return 0;
859
d6895ad3
CK
860 /* Disable memory decoding while we change the BAR addresses and size */
861 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
862 pci_write_config_word(adev->pdev, PCI_COMMAND,
863 cmd & ~PCI_COMMAND_MEMORY);
864
865 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 866 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
867 if (adev->asic_type >= CHIP_BONAIRE)
868 pci_release_resource(adev->pdev, 2);
869
870 pci_release_resource(adev->pdev, 0);
871
872 r = pci_resize_resource(adev->pdev, 0, rbar_size);
873 if (r == -ENOSPC)
874 DRM_INFO("Not enough PCI address space for a large BAR.");
875 else if (r && r != -ENOTSUPP)
876 DRM_ERROR("Problem resizing BAR0 (%d).", r);
877
878 pci_assign_unassigned_bus_resources(adev->pdev->bus);
879
880 /* When the doorbell or fb BAR isn't available we have no chance of
881 * using the device.
882 */
06ec9070 883 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
884 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
885 return -ENODEV;
886
887 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
888
889 return 0;
890}
a05502e5 891
d38ceaf9
AD
892/*
893 * GPU helpers function.
894 */
895/**
39c640c0 896 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
897 *
898 * @adev: amdgpu_device pointer
899 *
c836fec5
JQ
900 * Check if the asic has been initialized (all asics) at driver startup
901 * or post is needed if hw reset is performed.
902 * Returns true if need or false if not.
d38ceaf9 903 */
39c640c0 904bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
905{
906 uint32_t reg;
907
bec86378
ML
908 if (amdgpu_sriov_vf(adev))
909 return false;
910
911 if (amdgpu_passthrough(adev)) {
1da2c326
ML
912 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
913 * some old smc fw still need driver do vPost otherwise gpu hang, while
914 * those smc fw version above 22.15 doesn't have this flaw, so we force
915 * vpost executed for smc version below 22.15
bec86378
ML
916 */
917 if (adev->asic_type == CHIP_FIJI) {
918 int err;
919 uint32_t fw_ver;
920 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
921 /* force vPost if error occured */
922 if (err)
923 return true;
924
925 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
926 if (fw_ver < 0x00160e00)
927 return true;
bec86378 928 }
bec86378 929 }
91fe77eb 930
931 if (adev->has_hw_reset) {
932 adev->has_hw_reset = false;
933 return true;
934 }
935
936 /* bios scratch used on CIK+ */
937 if (adev->asic_type >= CHIP_BONAIRE)
938 return amdgpu_atombios_scratch_need_asic_init(adev);
939
940 /* check MEM_SIZE for older asics */
941 reg = amdgpu_asic_get_config_memsize(adev);
942
943 if ((reg != 0) && (reg != 0xffffffff))
944 return false;
945
946 return true;
bec86378
ML
947}
948
d38ceaf9
AD
949/* if we get transitioned to only one device, take VGA back */
950/**
06ec9070 951 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
952 *
953 * @cookie: amdgpu_device pointer
954 * @state: enable/disable vga decode
955 *
956 * Enable/disable vga decode (all asics).
957 * Returns VGA resource flags.
958 */
06ec9070 959static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
960{
961 struct amdgpu_device *adev = cookie;
962 amdgpu_asic_set_vga_state(adev, state);
963 if (state)
964 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
965 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
966 else
967 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
968}
969
e3ecdffa
AD
970/**
971 * amdgpu_device_check_block_size - validate the vm block size
972 *
973 * @adev: amdgpu_device pointer
974 *
975 * Validates the vm block size specified via module parameter.
976 * The vm block size defines number of bits in page table versus page directory,
977 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
978 * page table and the remaining bits are in the page directory.
979 */
06ec9070 980static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
981{
982 /* defines number of bits in page table versus page directory,
983 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
984 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
985 if (amdgpu_vm_block_size == -1)
986 return;
a1adf8be 987
bab4fee7 988 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
989 dev_warn(adev->dev, "VM page table size (%d) too small\n",
990 amdgpu_vm_block_size);
97489129 991 amdgpu_vm_block_size = -1;
a1adf8be 992 }
a1adf8be
CZ
993}
994
e3ecdffa
AD
995/**
996 * amdgpu_device_check_vm_size - validate the vm size
997 *
998 * @adev: amdgpu_device pointer
999 *
1000 * Validates the vm size in GB specified via module parameter.
1001 * The VM size is the size of the GPU virtual memory space in GB.
1002 */
06ec9070 1003static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 1004{
64dab074
AD
1005 /* no need to check the default value */
1006 if (amdgpu_vm_size == -1)
1007 return;
1008
83ca145d
ZJ
1009 if (amdgpu_vm_size < 1) {
1010 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1011 amdgpu_vm_size);
f3368128 1012 amdgpu_vm_size = -1;
83ca145d 1013 }
83ca145d
ZJ
1014}
1015
7951e376
RZ
1016static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1017{
1018 struct sysinfo si;
a9d4fe2f 1019 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
1020 uint64_t total_memory;
1021 uint64_t dram_size_seven_GB = 0x1B8000000;
1022 uint64_t dram_size_three_GB = 0xB8000000;
1023
1024 if (amdgpu_smu_memory_pool_size == 0)
1025 return;
1026
1027 if (!is_os_64) {
1028 DRM_WARN("Not 64-bit OS, feature not supported\n");
1029 goto def_value;
1030 }
1031 si_meminfo(&si);
1032 total_memory = (uint64_t)si.totalram * si.mem_unit;
1033
1034 if ((amdgpu_smu_memory_pool_size == 1) ||
1035 (amdgpu_smu_memory_pool_size == 2)) {
1036 if (total_memory < dram_size_three_GB)
1037 goto def_value1;
1038 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1039 (amdgpu_smu_memory_pool_size == 8)) {
1040 if (total_memory < dram_size_seven_GB)
1041 goto def_value1;
1042 } else {
1043 DRM_WARN("Smu memory pool size not supported\n");
1044 goto def_value;
1045 }
1046 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1047
1048 return;
1049
1050def_value1:
1051 DRM_WARN("No enough system memory\n");
1052def_value:
1053 adev->pm.smu_prv_buffer_size = 0;
1054}
1055
d38ceaf9 1056/**
06ec9070 1057 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1058 *
1059 * @adev: amdgpu_device pointer
1060 *
1061 * Validates certain module parameters and updates
1062 * the associated values used by the driver (all asics).
1063 */
912dfc84 1064static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1065{
5b011235
CZ
1066 if (amdgpu_sched_jobs < 4) {
1067 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1068 amdgpu_sched_jobs);
1069 amdgpu_sched_jobs = 4;
76117507 1070 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1071 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1072 amdgpu_sched_jobs);
1073 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1074 }
d38ceaf9 1075
83e74db6 1076 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1077 /* gart size must be greater or equal to 32M */
1078 dev_warn(adev->dev, "gart size (%d) too small\n",
1079 amdgpu_gart_size);
83e74db6 1080 amdgpu_gart_size = -1;
d38ceaf9
AD
1081 }
1082
36d38372 1083 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1084 /* gtt size must be greater or equal to 32M */
36d38372
CK
1085 dev_warn(adev->dev, "gtt size (%d) too small\n",
1086 amdgpu_gtt_size);
1087 amdgpu_gtt_size = -1;
d38ceaf9
AD
1088 }
1089
d07f14be
RH
1090 /* valid range is between 4 and 9 inclusive */
1091 if (amdgpu_vm_fragment_size != -1 &&
1092 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1093 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1094 amdgpu_vm_fragment_size = -1;
1095 }
1096
7951e376
RZ
1097 amdgpu_device_check_smu_prv_buffer_size(adev);
1098
06ec9070 1099 amdgpu_device_check_vm_size(adev);
d38ceaf9 1100
06ec9070 1101 amdgpu_device_check_block_size(adev);
6a7f76e7 1102
19aede77 1103 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1104
e3c00faa 1105 return 0;
d38ceaf9
AD
1106}
1107
1108/**
1109 * amdgpu_switcheroo_set_state - set switcheroo state
1110 *
1111 * @pdev: pci dev pointer
1694467b 1112 * @state: vga_switcheroo state
d38ceaf9
AD
1113 *
1114 * Callback for the switcheroo driver. Suspends or resumes the
1115 * the asics before or after it is powered up using ACPI methods.
1116 */
1117static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1118{
1119 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1120 int r;
d38ceaf9 1121
31af062a 1122 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1123 return;
1124
1125 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1126 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1127 /* don't suspend or resume card normally */
1128 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1129
de185019
AD
1130 pci_set_power_state(dev->pdev, PCI_D0);
1131 pci_restore_state(dev->pdev);
1132 r = pci_enable_device(dev->pdev);
1133 if (r)
1134 DRM_WARN("pci_enable_device failed (%d)\n", r);
1135 amdgpu_device_resume(dev, true);
d38ceaf9 1136
d38ceaf9
AD
1137 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1138 drm_kms_helper_poll_enable(dev);
1139 } else {
7ca85295 1140 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1141 drm_kms_helper_poll_disable(dev);
1142 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1143 amdgpu_device_suspend(dev, true);
1144 pci_save_state(dev->pdev);
1145 /* Shut down the device */
1146 pci_disable_device(dev->pdev);
1147 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1148 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1149 }
1150}
1151
1152/**
1153 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1154 *
1155 * @pdev: pci dev pointer
1156 *
1157 * Callback for the switcheroo driver. Check of the switcheroo
1158 * state can be changed.
1159 * Returns true if the state can be changed, false if not.
1160 */
1161static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1162{
1163 struct drm_device *dev = pci_get_drvdata(pdev);
1164
1165 /*
1166 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1167 * locking inversion with the driver load path. And the access here is
1168 * completely racy anyway. So don't bother with locking for now.
1169 */
7e13ad89 1170 return atomic_read(&dev->open_count) == 0;
d38ceaf9
AD
1171}
1172
1173static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1174 .set_gpu_state = amdgpu_switcheroo_set_state,
1175 .reprobe = NULL,
1176 .can_switch = amdgpu_switcheroo_can_switch,
1177};
1178
e3ecdffa
AD
1179/**
1180 * amdgpu_device_ip_set_clockgating_state - set the CG state
1181 *
87e3f136 1182 * @dev: amdgpu_device pointer
e3ecdffa
AD
1183 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1184 * @state: clockgating state (gate or ungate)
1185 *
1186 * Sets the requested clockgating state for all instances of
1187 * the hardware IP specified.
1188 * Returns the error code from the last instance.
1189 */
43fa561f 1190int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1191 enum amd_ip_block_type block_type,
1192 enum amd_clockgating_state state)
d38ceaf9 1193{
43fa561f 1194 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1195 int i, r = 0;
1196
1197 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1198 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1199 continue;
c722865a
RZ
1200 if (adev->ip_blocks[i].version->type != block_type)
1201 continue;
1202 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1203 continue;
1204 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1205 (void *)adev, state);
1206 if (r)
1207 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1208 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1209 }
1210 return r;
1211}
1212
e3ecdffa
AD
1213/**
1214 * amdgpu_device_ip_set_powergating_state - set the PG state
1215 *
87e3f136 1216 * @dev: amdgpu_device pointer
e3ecdffa
AD
1217 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1218 * @state: powergating state (gate or ungate)
1219 *
1220 * Sets the requested powergating state for all instances of
1221 * the hardware IP specified.
1222 * Returns the error code from the last instance.
1223 */
43fa561f 1224int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1225 enum amd_ip_block_type block_type,
1226 enum amd_powergating_state state)
d38ceaf9 1227{
43fa561f 1228 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1229 int i, r = 0;
1230
1231 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1232 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1233 continue;
c722865a
RZ
1234 if (adev->ip_blocks[i].version->type != block_type)
1235 continue;
1236 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1237 continue;
1238 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1239 (void *)adev, state);
1240 if (r)
1241 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1242 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1243 }
1244 return r;
1245}
1246
e3ecdffa
AD
1247/**
1248 * amdgpu_device_ip_get_clockgating_state - get the CG state
1249 *
1250 * @adev: amdgpu_device pointer
1251 * @flags: clockgating feature flags
1252 *
1253 * Walks the list of IPs on the device and updates the clockgating
1254 * flags for each IP.
1255 * Updates @flags with the feature flags for each hardware IP where
1256 * clockgating is enabled.
1257 */
2990a1fc
AD
1258void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1259 u32 *flags)
6cb2d4e4
HR
1260{
1261 int i;
1262
1263 for (i = 0; i < adev->num_ip_blocks; i++) {
1264 if (!adev->ip_blocks[i].status.valid)
1265 continue;
1266 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1267 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1268 }
1269}
1270
e3ecdffa
AD
1271/**
1272 * amdgpu_device_ip_wait_for_idle - wait for idle
1273 *
1274 * @adev: amdgpu_device pointer
1275 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1276 *
1277 * Waits for the request hardware IP to be idle.
1278 * Returns 0 for success or a negative error code on failure.
1279 */
2990a1fc
AD
1280int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1281 enum amd_ip_block_type block_type)
5dbbb60b
AD
1282{
1283 int i, r;
1284
1285 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1286 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1287 continue;
a1255107
AD
1288 if (adev->ip_blocks[i].version->type == block_type) {
1289 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1290 if (r)
1291 return r;
1292 break;
1293 }
1294 }
1295 return 0;
1296
1297}
1298
e3ecdffa
AD
1299/**
1300 * amdgpu_device_ip_is_idle - is the hardware IP idle
1301 *
1302 * @adev: amdgpu_device pointer
1303 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1304 *
1305 * Check if the hardware IP is idle or not.
1306 * Returns true if it the IP is idle, false if not.
1307 */
2990a1fc
AD
1308bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1309 enum amd_ip_block_type block_type)
5dbbb60b
AD
1310{
1311 int i;
1312
1313 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1314 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1315 continue;
a1255107
AD
1316 if (adev->ip_blocks[i].version->type == block_type)
1317 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1318 }
1319 return true;
1320
1321}
1322
e3ecdffa
AD
1323/**
1324 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1325 *
1326 * @adev: amdgpu_device pointer
87e3f136 1327 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1328 *
1329 * Returns a pointer to the hardware IP block structure
1330 * if it exists for the asic, otherwise NULL.
1331 */
2990a1fc
AD
1332struct amdgpu_ip_block *
1333amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1334 enum amd_ip_block_type type)
d38ceaf9
AD
1335{
1336 int i;
1337
1338 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1339 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1340 return &adev->ip_blocks[i];
1341
1342 return NULL;
1343}
1344
1345/**
2990a1fc 1346 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1347 *
1348 * @adev: amdgpu_device pointer
5fc3aeeb 1349 * @type: enum amd_ip_block_type
d38ceaf9
AD
1350 * @major: major version
1351 * @minor: minor version
1352 *
1353 * return 0 if equal or greater
1354 * return 1 if smaller or the ip_block doesn't exist
1355 */
2990a1fc
AD
1356int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1357 enum amd_ip_block_type type,
1358 u32 major, u32 minor)
d38ceaf9 1359{
2990a1fc 1360 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1361
a1255107
AD
1362 if (ip_block && ((ip_block->version->major > major) ||
1363 ((ip_block->version->major == major) &&
1364 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1365 return 0;
1366
1367 return 1;
1368}
1369
a1255107 1370/**
2990a1fc 1371 * amdgpu_device_ip_block_add
a1255107
AD
1372 *
1373 * @adev: amdgpu_device pointer
1374 * @ip_block_version: pointer to the IP to add
1375 *
1376 * Adds the IP block driver information to the collection of IPs
1377 * on the asic.
1378 */
2990a1fc
AD
1379int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1380 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1381{
1382 if (!ip_block_version)
1383 return -EINVAL;
1384
e966a725 1385 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1386 ip_block_version->funcs->name);
1387
a1255107
AD
1388 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1389
1390 return 0;
1391}
1392
e3ecdffa
AD
1393/**
1394 * amdgpu_device_enable_virtual_display - enable virtual display feature
1395 *
1396 * @adev: amdgpu_device pointer
1397 *
1398 * Enabled the virtual display feature if the user has enabled it via
1399 * the module parameter virtual_display. This feature provides a virtual
1400 * display hardware on headless boards or in virtualized environments.
1401 * This function parses and validates the configuration string specified by
1402 * the user and configues the virtual display configuration (number of
1403 * virtual connectors, crtcs, etc.) specified.
1404 */
483ef985 1405static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1406{
1407 adev->enable_virtual_display = false;
1408
1409 if (amdgpu_virtual_display) {
1410 struct drm_device *ddev = adev->ddev;
1411 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1412 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1413
1414 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1415 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1416 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1417 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1418 if (!strcmp("all", pciaddname)
1419 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1420 long num_crtc;
1421 int res = -1;
1422
9accf2fd 1423 adev->enable_virtual_display = true;
0f66356d
ED
1424
1425 if (pciaddname_tmp)
1426 res = kstrtol(pciaddname_tmp, 10,
1427 &num_crtc);
1428
1429 if (!res) {
1430 if (num_crtc < 1)
1431 num_crtc = 1;
1432 if (num_crtc > 6)
1433 num_crtc = 6;
1434 adev->mode_info.num_crtc = num_crtc;
1435 } else {
1436 adev->mode_info.num_crtc = 1;
1437 }
9accf2fd
ED
1438 break;
1439 }
1440 }
1441
0f66356d
ED
1442 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1443 amdgpu_virtual_display, pci_address_name,
1444 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1445
1446 kfree(pciaddstr);
1447 }
1448}
1449
e3ecdffa
AD
1450/**
1451 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1452 *
1453 * @adev: amdgpu_device pointer
1454 *
1455 * Parses the asic configuration parameters specified in the gpu info
1456 * firmware and makes them availale to the driver for use in configuring
1457 * the asic.
1458 * Returns 0 on success, -EINVAL on failure.
1459 */
e2a75f88
AD
1460static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1461{
e2a75f88
AD
1462 const char *chip_name;
1463 char fw_name[30];
1464 int err;
1465 const struct gpu_info_firmware_header_v1_0 *hdr;
1466
ab4fe3e1
HR
1467 adev->firmware.gpu_info_fw = NULL;
1468
e2a75f88
AD
1469 switch (adev->asic_type) {
1470 case CHIP_TOPAZ:
1471 case CHIP_TONGA:
1472 case CHIP_FIJI:
e2a75f88 1473 case CHIP_POLARIS10:
cc07f18d 1474 case CHIP_POLARIS11:
e2a75f88 1475 case CHIP_POLARIS12:
cc07f18d 1476 case CHIP_VEGAM:
e2a75f88
AD
1477 case CHIP_CARRIZO:
1478 case CHIP_STONEY:
1479#ifdef CONFIG_DRM_AMDGPU_SI
1480 case CHIP_VERDE:
1481 case CHIP_TAHITI:
1482 case CHIP_PITCAIRN:
1483 case CHIP_OLAND:
1484 case CHIP_HAINAN:
1485#endif
1486#ifdef CONFIG_DRM_AMDGPU_CIK
1487 case CHIP_BONAIRE:
1488 case CHIP_HAWAII:
1489 case CHIP_KAVERI:
1490 case CHIP_KABINI:
1491 case CHIP_MULLINS:
1492#endif
27c0bc71 1493 case CHIP_VEGA20:
e2a75f88
AD
1494 default:
1495 return 0;
1496 case CHIP_VEGA10:
1497 chip_name = "vega10";
1498 break;
3f76dced
AD
1499 case CHIP_VEGA12:
1500 chip_name = "vega12";
1501 break;
2d2e5e7e 1502 case CHIP_RAVEN:
54c4d17e
FX
1503 if (adev->rev_id >= 8)
1504 chip_name = "raven2";
741deade
AD
1505 else if (adev->pdev->device == 0x15d8)
1506 chip_name = "picasso";
54c4d17e
FX
1507 else
1508 chip_name = "raven";
2d2e5e7e 1509 break;
65e60f6e
LM
1510 case CHIP_ARCTURUS:
1511 chip_name = "arcturus";
1512 break;
b51a26a0
HR
1513 case CHIP_RENOIR:
1514 chip_name = "renoir";
1515 break;
23c6268e
HR
1516 case CHIP_NAVI10:
1517 chip_name = "navi10";
1518 break;
ed42cfe1
XY
1519 case CHIP_NAVI14:
1520 chip_name = "navi14";
1521 break;
42b325e5
XY
1522 case CHIP_NAVI12:
1523 chip_name = "navi12";
1524 break;
e2a75f88
AD
1525 }
1526
1527 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1528 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1529 if (err) {
1530 dev_err(adev->dev,
1531 "Failed to load gpu_info firmware \"%s\"\n",
1532 fw_name);
1533 goto out;
1534 }
ab4fe3e1 1535 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1536 if (err) {
1537 dev_err(adev->dev,
1538 "Failed to validate gpu_info firmware \"%s\"\n",
1539 fw_name);
1540 goto out;
1541 }
1542
ab4fe3e1 1543 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1544 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1545
1546 switch (hdr->version_major) {
1547 case 1:
1548 {
1549 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1550 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1551 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1552
ec51d3fa
XY
1553 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1554 goto parse_soc_bounding_box;
1555
b5ab16bf
AD
1556 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1557 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1558 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1559 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1560 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1561 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1562 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1563 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1564 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1565 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1566 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1567 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1568 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1569 adev->gfx.cu_info.max_waves_per_simd =
1570 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1571 adev->gfx.cu_info.max_scratch_slots_per_cu =
1572 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1573 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1574 if (hdr->version_minor >= 1) {
35c2e910
HZ
1575 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1576 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1577 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1578 adev->gfx.config.num_sc_per_sh =
1579 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1580 adev->gfx.config.num_packer_per_sc =
1581 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1582 }
ec51d3fa
XY
1583
1584parse_soc_bounding_box:
ec51d3fa
XY
1585 /*
1586 * soc bounding box info is not integrated in disocovery table,
1587 * we always need to parse it from gpu info firmware.
1588 */
48321c3d
HW
1589 if (hdr->version_minor == 2) {
1590 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1591 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1592 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1593 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1594 }
e2a75f88
AD
1595 break;
1596 }
1597 default:
1598 dev_err(adev->dev,
1599 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1600 err = -EINVAL;
1601 goto out;
1602 }
1603out:
e2a75f88
AD
1604 return err;
1605}
1606
e3ecdffa
AD
1607/**
1608 * amdgpu_device_ip_early_init - run early init for hardware IPs
1609 *
1610 * @adev: amdgpu_device pointer
1611 *
1612 * Early initialization pass for hardware IPs. The hardware IPs that make
1613 * up each asic are discovered each IP's early_init callback is run. This
1614 * is the first stage in initializing the asic.
1615 * Returns 0 on success, negative error code on failure.
1616 */
06ec9070 1617static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1618{
aaa36a97 1619 int i, r;
d38ceaf9 1620
483ef985 1621 amdgpu_device_enable_virtual_display(adev);
a6be7570 1622
d38ceaf9 1623 switch (adev->asic_type) {
aaa36a97
AD
1624 case CHIP_TOPAZ:
1625 case CHIP_TONGA:
48299f95 1626 case CHIP_FIJI:
2cc0c0b5 1627 case CHIP_POLARIS10:
32cc7e53 1628 case CHIP_POLARIS11:
c4642a47 1629 case CHIP_POLARIS12:
32cc7e53 1630 case CHIP_VEGAM:
aaa36a97 1631 case CHIP_CARRIZO:
39bb0c92
SL
1632 case CHIP_STONEY:
1633 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1634 adev->family = AMDGPU_FAMILY_CZ;
1635 else
1636 adev->family = AMDGPU_FAMILY_VI;
1637
1638 r = vi_set_ip_blocks(adev);
1639 if (r)
1640 return r;
1641 break;
33f34802
KW
1642#ifdef CONFIG_DRM_AMDGPU_SI
1643 case CHIP_VERDE:
1644 case CHIP_TAHITI:
1645 case CHIP_PITCAIRN:
1646 case CHIP_OLAND:
1647 case CHIP_HAINAN:
295d0daf 1648 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1649 r = si_set_ip_blocks(adev);
1650 if (r)
1651 return r;
1652 break;
1653#endif
a2e73f56
AD
1654#ifdef CONFIG_DRM_AMDGPU_CIK
1655 case CHIP_BONAIRE:
1656 case CHIP_HAWAII:
1657 case CHIP_KAVERI:
1658 case CHIP_KABINI:
1659 case CHIP_MULLINS:
1660 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1661 adev->family = AMDGPU_FAMILY_CI;
1662 else
1663 adev->family = AMDGPU_FAMILY_KV;
1664
1665 r = cik_set_ip_blocks(adev);
1666 if (r)
1667 return r;
1668 break;
1669#endif
e48a3cd9
AD
1670 case CHIP_VEGA10:
1671 case CHIP_VEGA12:
e4bd8170 1672 case CHIP_VEGA20:
e48a3cd9 1673 case CHIP_RAVEN:
61cf44c1 1674 case CHIP_ARCTURUS:
b51a26a0
HR
1675 case CHIP_RENOIR:
1676 if (adev->asic_type == CHIP_RAVEN ||
1677 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1678 adev->family = AMDGPU_FAMILY_RV;
1679 else
1680 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1681
1682 r = soc15_set_ip_blocks(adev);
1683 if (r)
1684 return r;
1685 break;
0a5b8c7b 1686 case CHIP_NAVI10:
7ecb5cd4 1687 case CHIP_NAVI14:
4808cf9c 1688 case CHIP_NAVI12:
0a5b8c7b
HR
1689 adev->family = AMDGPU_FAMILY_NV;
1690
1691 r = nv_set_ip_blocks(adev);
1692 if (r)
1693 return r;
1694 break;
d38ceaf9
AD
1695 default:
1696 /* FIXME: not supported yet */
1697 return -EINVAL;
1698 }
1699
e2a75f88
AD
1700 r = amdgpu_device_parse_gpu_info_fw(adev);
1701 if (r)
1702 return r;
1703
ec51d3fa
XY
1704 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1705 amdgpu_discovery_get_gfx_info(adev);
1706
1884734a 1707 amdgpu_amdkfd_device_probe(adev);
1708
3149d9da
XY
1709 if (amdgpu_sriov_vf(adev)) {
1710 r = amdgpu_virt_request_full_gpu(adev, true);
1711 if (r)
5ffa61c1 1712 return -EAGAIN;
3149d9da
XY
1713 }
1714
3b94fb10 1715 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1716 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1717 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1718
d38ceaf9
AD
1719 for (i = 0; i < adev->num_ip_blocks; i++) {
1720 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1721 DRM_ERROR("disabled ip block: %d <%s>\n",
1722 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1723 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1724 } else {
a1255107
AD
1725 if (adev->ip_blocks[i].version->funcs->early_init) {
1726 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1727 if (r == -ENOENT) {
a1255107 1728 adev->ip_blocks[i].status.valid = false;
2c1a2784 1729 } else if (r) {
a1255107
AD
1730 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1731 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1732 return r;
2c1a2784 1733 } else {
a1255107 1734 adev->ip_blocks[i].status.valid = true;
2c1a2784 1735 }
974e6b64 1736 } else {
a1255107 1737 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1738 }
d38ceaf9 1739 }
21a249ca
AD
1740 /* get the vbios after the asic_funcs are set up */
1741 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1742 /* Read BIOS */
1743 if (!amdgpu_get_bios(adev))
1744 return -EINVAL;
1745
1746 r = amdgpu_atombios_init(adev);
1747 if (r) {
1748 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1749 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1750 return r;
1751 }
1752 }
d38ceaf9
AD
1753 }
1754
395d1fb9
NH
1755 adev->cg_flags &= amdgpu_cg_mask;
1756 adev->pg_flags &= amdgpu_pg_mask;
1757
d38ceaf9
AD
1758 return 0;
1759}
1760
0a4f2520
RZ
1761static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1762{
1763 int i, r;
1764
1765 for (i = 0; i < adev->num_ip_blocks; i++) {
1766 if (!adev->ip_blocks[i].status.sw)
1767 continue;
1768 if (adev->ip_blocks[i].status.hw)
1769 continue;
1770 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1771 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1772 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1773 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1774 if (r) {
1775 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1776 adev->ip_blocks[i].version->funcs->name, r);
1777 return r;
1778 }
1779 adev->ip_blocks[i].status.hw = true;
1780 }
1781 }
1782
1783 return 0;
1784}
1785
1786static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1787{
1788 int i, r;
1789
1790 for (i = 0; i < adev->num_ip_blocks; i++) {
1791 if (!adev->ip_blocks[i].status.sw)
1792 continue;
1793 if (adev->ip_blocks[i].status.hw)
1794 continue;
1795 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1796 if (r) {
1797 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1798 adev->ip_blocks[i].version->funcs->name, r);
1799 return r;
1800 }
1801 adev->ip_blocks[i].status.hw = true;
1802 }
1803
1804 return 0;
1805}
1806
7a3e0bb2
RZ
1807static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1808{
1809 int r = 0;
1810 int i;
80f41f84 1811 uint32_t smu_version;
7a3e0bb2
RZ
1812
1813 if (adev->asic_type >= CHIP_VEGA10) {
1814 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1815 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1816 continue;
1817
1818 /* no need to do the fw loading again if already done*/
1819 if (adev->ip_blocks[i].status.hw == true)
1820 break;
1821
1822 if (adev->in_gpu_reset || adev->in_suspend) {
1823 r = adev->ip_blocks[i].version->funcs->resume(adev);
1824 if (r) {
1825 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1826 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1827 return r;
1828 }
1829 } else {
1830 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1831 if (r) {
1832 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1833 adev->ip_blocks[i].version->funcs->name, r);
1834 return r;
7a3e0bb2 1835 }
7a3e0bb2 1836 }
482f0e53
ML
1837
1838 adev->ip_blocks[i].status.hw = true;
1839 break;
7a3e0bb2
RZ
1840 }
1841 }
482f0e53 1842
8973d9ec
ED
1843 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1844 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1845
80f41f84 1846 return r;
7a3e0bb2
RZ
1847}
1848
e3ecdffa
AD
1849/**
1850 * amdgpu_device_ip_init - run init for hardware IPs
1851 *
1852 * @adev: amdgpu_device pointer
1853 *
1854 * Main initialization pass for hardware IPs. The list of all the hardware
1855 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1856 * are run. sw_init initializes the software state associated with each IP
1857 * and hw_init initializes the hardware associated with each IP.
1858 * Returns 0 on success, negative error code on failure.
1859 */
06ec9070 1860static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1861{
1862 int i, r;
1863
c030f2e4 1864 r = amdgpu_ras_init(adev);
1865 if (r)
1866 return r;
1867
d38ceaf9 1868 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1869 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1870 continue;
a1255107 1871 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1872 if (r) {
a1255107
AD
1873 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1874 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1875 goto init_failed;
2c1a2784 1876 }
a1255107 1877 adev->ip_blocks[i].status.sw = true;
bfca0289 1878
d38ceaf9 1879 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1880 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1881 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1882 if (r) {
1883 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1884 goto init_failed;
2c1a2784 1885 }
a1255107 1886 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1887 if (r) {
1888 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1889 goto init_failed;
2c1a2784 1890 }
06ec9070 1891 r = amdgpu_device_wb_init(adev);
2c1a2784 1892 if (r) {
06ec9070 1893 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1894 goto init_failed;
2c1a2784 1895 }
a1255107 1896 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1897
1898 /* right after GMC hw init, we create CSA */
f92d5c61 1899 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1900 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1901 AMDGPU_GEM_DOMAIN_VRAM,
1902 AMDGPU_CSA_SIZE);
2493664f
ML
1903 if (r) {
1904 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1905 goto init_failed;
2493664f
ML
1906 }
1907 }
d38ceaf9
AD
1908 }
1909 }
1910
c9ffa427
YT
1911 if (amdgpu_sriov_vf(adev))
1912 amdgpu_virt_init_data_exchange(adev);
1913
533aed27
AG
1914 r = amdgpu_ib_pool_init(adev);
1915 if (r) {
1916 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1917 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1918 goto init_failed;
1919 }
1920
c8963ea4
RZ
1921 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1922 if (r)
72d3f592 1923 goto init_failed;
0a4f2520
RZ
1924
1925 r = amdgpu_device_ip_hw_init_phase1(adev);
1926 if (r)
72d3f592 1927 goto init_failed;
0a4f2520 1928
7a3e0bb2
RZ
1929 r = amdgpu_device_fw_loading(adev);
1930 if (r)
72d3f592 1931 goto init_failed;
7a3e0bb2 1932
0a4f2520
RZ
1933 r = amdgpu_device_ip_hw_init_phase2(adev);
1934 if (r)
72d3f592 1935 goto init_failed;
d38ceaf9 1936
121a2bc6
AG
1937 /*
1938 * retired pages will be loaded from eeprom and reserved here,
1939 * it should be called after amdgpu_device_ip_hw_init_phase2 since
1940 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
1941 * for I2C communication which only true at this point.
1942 * recovery_init may fail, but it can free all resources allocated by
1943 * itself and its failure should not stop amdgpu init process.
1944 *
1945 * Note: theoretically, this should be called before all vram allocations
1946 * to protect retired page from abusing
1947 */
1948 amdgpu_ras_recovery_init(adev);
1949
3e2e2ab5
HZ
1950 if (adev->gmc.xgmi.num_physical_nodes > 1)
1951 amdgpu_xgmi_add_device(adev);
1884734a 1952 amdgpu_amdkfd_device_init(adev);
c6332b97 1953
72d3f592 1954init_failed:
c9ffa427 1955 if (amdgpu_sriov_vf(adev))
c6332b97 1956 amdgpu_virt_release_full_gpu(adev, true);
1957
72d3f592 1958 return r;
d38ceaf9
AD
1959}
1960
e3ecdffa
AD
1961/**
1962 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1963 *
1964 * @adev: amdgpu_device pointer
1965 *
1966 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1967 * this function before a GPU reset. If the value is retained after a
1968 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1969 */
06ec9070 1970static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1971{
1972 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1973}
1974
e3ecdffa
AD
1975/**
1976 * amdgpu_device_check_vram_lost - check if vram is valid
1977 *
1978 * @adev: amdgpu_device pointer
1979 *
1980 * Checks the reset magic value written to the gart pointer in VRAM.
1981 * The driver calls this after a GPU reset to see if the contents of
1982 * VRAM is lost or now.
1983 * returns true if vram is lost, false if not.
1984 */
06ec9070 1985static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1986{
1987 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1988 AMDGPU_RESET_MAGIC_NUM);
1989}
1990
e3ecdffa 1991/**
1112a46b 1992 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1993 *
1994 * @adev: amdgpu_device pointer
b8b72130 1995 * @state: clockgating state (gate or ungate)
e3ecdffa 1996 *
e3ecdffa 1997 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1998 * set_clockgating_state callbacks are run.
1999 * Late initialization pass enabling clockgating for hardware IPs.
2000 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
2001 * Returns 0 on success, negative error code on failure.
2002 */
fdd34271 2003
1112a46b
RZ
2004static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2005 enum amd_clockgating_state state)
d38ceaf9 2006{
1112a46b 2007 int i, j, r;
d38ceaf9 2008
4a2ba394
SL
2009 if (amdgpu_emu_mode == 1)
2010 return 0;
2011
1112a46b
RZ
2012 for (j = 0; j < adev->num_ip_blocks; j++) {
2013 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2014 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 2015 continue;
4a446d55 2016 /* skip CG for VCE/UVD, it's handled specially */
a1255107 2017 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 2018 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 2019 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2020 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 2021 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 2022 /* enable clockgating to save power */
a1255107 2023 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 2024 state);
4a446d55
AD
2025 if (r) {
2026 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 2027 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
2028 return r;
2029 }
b0b00ff1 2030 }
d38ceaf9 2031 }
06b18f61 2032
c9f96fd5
RZ
2033 return 0;
2034}
2035
1112a46b 2036static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2037{
1112a46b 2038 int i, j, r;
06b18f61 2039
c9f96fd5
RZ
2040 if (amdgpu_emu_mode == 1)
2041 return 0;
2042
1112a46b
RZ
2043 for (j = 0; j < adev->num_ip_blocks; j++) {
2044 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2045 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2046 continue;
2047 /* skip CG for VCE/UVD, it's handled specially */
2048 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2049 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2050 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2051 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2052 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2053 /* enable powergating to save power */
2054 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2055 state);
c9f96fd5
RZ
2056 if (r) {
2057 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2058 adev->ip_blocks[i].version->funcs->name, r);
2059 return r;
2060 }
2061 }
2062 }
2dc80b00
S
2063 return 0;
2064}
2065
beff74bc
AD
2066static int amdgpu_device_enable_mgpu_fan_boost(void)
2067{
2068 struct amdgpu_gpu_instance *gpu_ins;
2069 struct amdgpu_device *adev;
2070 int i, ret = 0;
2071
2072 mutex_lock(&mgpu_info.mutex);
2073
2074 /*
2075 * MGPU fan boost feature should be enabled
2076 * only when there are two or more dGPUs in
2077 * the system
2078 */
2079 if (mgpu_info.num_dgpu < 2)
2080 goto out;
2081
2082 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2083 gpu_ins = &(mgpu_info.gpu_ins[i]);
2084 adev = gpu_ins->adev;
2085 if (!(adev->flags & AMD_IS_APU) &&
2086 !gpu_ins->mgpu_fan_enabled &&
2087 adev->powerplay.pp_funcs &&
2088 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2089 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2090 if (ret)
2091 break;
2092
2093 gpu_ins->mgpu_fan_enabled = 1;
2094 }
2095 }
2096
2097out:
2098 mutex_unlock(&mgpu_info.mutex);
2099
2100 return ret;
2101}
2102
e3ecdffa
AD
2103/**
2104 * amdgpu_device_ip_late_init - run late init for hardware IPs
2105 *
2106 * @adev: amdgpu_device pointer
2107 *
2108 * Late initialization pass for hardware IPs. The list of all the hardware
2109 * IPs that make up the asic is walked and the late_init callbacks are run.
2110 * late_init covers any special initialization that an IP requires
2111 * after all of the have been initialized or something that needs to happen
2112 * late in the init process.
2113 * Returns 0 on success, negative error code on failure.
2114 */
06ec9070 2115static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2116{
60599a03 2117 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2118 int i = 0, r;
2119
2120 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2121 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2122 continue;
2123 if (adev->ip_blocks[i].version->funcs->late_init) {
2124 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2125 if (r) {
2126 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2127 adev->ip_blocks[i].version->funcs->name, r);
2128 return r;
2129 }
2dc80b00 2130 }
73f847db 2131 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2132 }
2133
1112a46b
RZ
2134 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2135 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2136
06ec9070 2137 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2138
beff74bc
AD
2139 r = amdgpu_device_enable_mgpu_fan_boost();
2140 if (r)
2141 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2142
60599a03
EQ
2143
2144 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2145 mutex_lock(&mgpu_info.mutex);
2146
2147 /*
2148 * Reset device p-state to low as this was booted with high.
2149 *
2150 * This should be performed only after all devices from the same
2151 * hive get initialized.
2152 *
2153 * However, it's unknown how many device in the hive in advance.
2154 * As this is counted one by one during devices initializations.
2155 *
2156 * So, we wait for all XGMI interlinked devices initialized.
2157 * This may bring some delays as those devices may come from
2158 * different hives. But that should be OK.
2159 */
2160 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2161 for (i = 0; i < mgpu_info.num_gpu; i++) {
2162 gpu_instance = &(mgpu_info.gpu_ins[i]);
2163 if (gpu_instance->adev->flags & AMD_IS_APU)
2164 continue;
2165
2166 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2167 if (r) {
2168 DRM_ERROR("pstate setting failed (%d).\n", r);
2169 break;
2170 }
2171 }
2172 }
2173
2174 mutex_unlock(&mgpu_info.mutex);
2175 }
2176
d38ceaf9
AD
2177 return 0;
2178}
2179
e3ecdffa
AD
2180/**
2181 * amdgpu_device_ip_fini - run fini for hardware IPs
2182 *
2183 * @adev: amdgpu_device pointer
2184 *
2185 * Main teardown pass for hardware IPs. The list of all the hardware
2186 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2187 * are run. hw_fini tears down the hardware associated with each IP
2188 * and sw_fini tears down any software state associated with each IP.
2189 * Returns 0 on success, negative error code on failure.
2190 */
06ec9070 2191static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2192{
2193 int i, r;
2194
c030f2e4 2195 amdgpu_ras_pre_fini(adev);
2196
a82400b5
AG
2197 if (adev->gmc.xgmi.num_physical_nodes > 1)
2198 amdgpu_xgmi_remove_device(adev);
2199
1884734a 2200 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2201
2202 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2203 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2204
3e96dbfd
AD
2205 /* need to disable SMC first */
2206 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2207 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2208 continue;
fdd34271 2209 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2210 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2211 /* XXX handle errors */
2212 if (r) {
2213 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2214 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2215 }
a1255107 2216 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2217 break;
2218 }
2219 }
2220
d38ceaf9 2221 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2222 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2223 continue;
8201a67a 2224
a1255107 2225 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2226 /* XXX handle errors */
2c1a2784 2227 if (r) {
a1255107
AD
2228 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2229 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2230 }
8201a67a 2231
a1255107 2232 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2233 }
2234
9950cda2 2235
d38ceaf9 2236 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2237 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2238 continue;
c12aba3a
ML
2239
2240 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2241 amdgpu_ucode_free_bo(adev);
1e256e27 2242 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2243 amdgpu_device_wb_fini(adev);
2244 amdgpu_device_vram_scratch_fini(adev);
533aed27 2245 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2246 }
2247
a1255107 2248 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2249 /* XXX handle errors */
2c1a2784 2250 if (r) {
a1255107
AD
2251 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2252 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2253 }
a1255107
AD
2254 adev->ip_blocks[i].status.sw = false;
2255 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2256 }
2257
a6dcfd9c 2258 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2259 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2260 continue;
a1255107
AD
2261 if (adev->ip_blocks[i].version->funcs->late_fini)
2262 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2263 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2264 }
2265
c030f2e4 2266 amdgpu_ras_fini(adev);
2267
030308fc 2268 if (amdgpu_sriov_vf(adev))
24136135
ML
2269 if (amdgpu_virt_release_full_gpu(adev, false))
2270 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2271
d38ceaf9
AD
2272 return 0;
2273}
2274
e3ecdffa 2275/**
beff74bc 2276 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2277 *
1112a46b 2278 * @work: work_struct.
e3ecdffa 2279 */
beff74bc 2280static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2281{
2282 struct amdgpu_device *adev =
beff74bc 2283 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2284 int r;
2285
2286 r = amdgpu_ib_ring_tests(adev);
2287 if (r)
2288 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2289}
2290
1e317b99
RZ
2291static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2292{
2293 struct amdgpu_device *adev =
2294 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2295
2296 mutex_lock(&adev->gfx.gfx_off_mutex);
2297 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2298 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2299 adev->gfx.gfx_off_state = true;
2300 }
2301 mutex_unlock(&adev->gfx.gfx_off_mutex);
2302}
2303
e3ecdffa 2304/**
e7854a03 2305 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2306 *
2307 * @adev: amdgpu_device pointer
2308 *
2309 * Main suspend function for hardware IPs. The list of all the hardware
2310 * IPs that make up the asic is walked, clockgating is disabled and the
2311 * suspend callbacks are run. suspend puts the hardware and software state
2312 * in each IP into a state suitable for suspend.
2313 * Returns 0 on success, negative error code on failure.
2314 */
e7854a03
AD
2315static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2316{
2317 int i, r;
2318
05df1f01 2319 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2320 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2321
e7854a03
AD
2322 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2323 if (!adev->ip_blocks[i].status.valid)
2324 continue;
2325 /* displays are handled separately */
2326 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2327 /* XXX handle errors */
2328 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2329 /* XXX handle errors */
2330 if (r) {
2331 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2332 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2333 return r;
e7854a03 2334 }
482f0e53 2335 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2336 }
2337 }
2338
e7854a03
AD
2339 return 0;
2340}
2341
2342/**
2343 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2344 *
2345 * @adev: amdgpu_device pointer
2346 *
2347 * Main suspend function for hardware IPs. The list of all the hardware
2348 * IPs that make up the asic is walked, clockgating is disabled and the
2349 * suspend callbacks are run. suspend puts the hardware and software state
2350 * in each IP into a state suitable for suspend.
2351 * Returns 0 on success, negative error code on failure.
2352 */
2353static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2354{
2355 int i, r;
2356
2357 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2358 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2359 continue;
e7854a03
AD
2360 /* displays are handled in phase1 */
2361 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2362 continue;
bff77e86
LM
2363 /* PSP lost connection when err_event_athub occurs */
2364 if (amdgpu_ras_intr_triggered() &&
2365 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2366 adev->ip_blocks[i].status.hw = false;
2367 continue;
2368 }
d38ceaf9 2369 /* XXX handle errors */
a1255107 2370 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2371 /* XXX handle errors */
2c1a2784 2372 if (r) {
a1255107
AD
2373 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2374 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2375 }
876923fb 2376 adev->ip_blocks[i].status.hw = false;
a3a09142 2377 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2378 if(!amdgpu_sriov_vf(adev)){
2379 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2380 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2381 if (r) {
2382 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2383 adev->mp1_state, r);
2384 return r;
2385 }
a3a09142
AD
2386 }
2387 }
b5507c7e 2388 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2389 }
2390
2391 return 0;
2392}
2393
e7854a03
AD
2394/**
2395 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2396 *
2397 * @adev: amdgpu_device pointer
2398 *
2399 * Main suspend function for hardware IPs. The list of all the hardware
2400 * IPs that make up the asic is walked, clockgating is disabled and the
2401 * suspend callbacks are run. suspend puts the hardware and software state
2402 * in each IP into a state suitable for suspend.
2403 * Returns 0 on success, negative error code on failure.
2404 */
2405int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2406{
2407 int r;
2408
e7819644
YT
2409 if (amdgpu_sriov_vf(adev))
2410 amdgpu_virt_request_full_gpu(adev, false);
2411
e7854a03
AD
2412 r = amdgpu_device_ip_suspend_phase1(adev);
2413 if (r)
2414 return r;
2415 r = amdgpu_device_ip_suspend_phase2(adev);
2416
e7819644
YT
2417 if (amdgpu_sriov_vf(adev))
2418 amdgpu_virt_release_full_gpu(adev, false);
2419
e7854a03
AD
2420 return r;
2421}
2422
06ec9070 2423static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2424{
2425 int i, r;
2426
2cb681b6
ML
2427 static enum amd_ip_block_type ip_order[] = {
2428 AMD_IP_BLOCK_TYPE_GMC,
2429 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2430 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2431 AMD_IP_BLOCK_TYPE_IH,
2432 };
a90ad3c2 2433
2cb681b6
ML
2434 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2435 int j;
2436 struct amdgpu_ip_block *block;
a90ad3c2 2437
2cb681b6
ML
2438 for (j = 0; j < adev->num_ip_blocks; j++) {
2439 block = &adev->ip_blocks[j];
2440
482f0e53 2441 block->status.hw = false;
2cb681b6
ML
2442 if (block->version->type != ip_order[i] ||
2443 !block->status.valid)
2444 continue;
2445
2446 r = block->version->funcs->hw_init(adev);
0aaeefcc 2447 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2448 if (r)
2449 return r;
482f0e53 2450 block->status.hw = true;
a90ad3c2
ML
2451 }
2452 }
2453
2454 return 0;
2455}
2456
06ec9070 2457static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2458{
2459 int i, r;
2460
2cb681b6
ML
2461 static enum amd_ip_block_type ip_order[] = {
2462 AMD_IP_BLOCK_TYPE_SMC,
2463 AMD_IP_BLOCK_TYPE_DCE,
2464 AMD_IP_BLOCK_TYPE_GFX,
2465 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2466 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2467 AMD_IP_BLOCK_TYPE_VCE,
2468 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2469 };
a90ad3c2 2470
2cb681b6
ML
2471 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2472 int j;
2473 struct amdgpu_ip_block *block;
a90ad3c2 2474
2cb681b6
ML
2475 for (j = 0; j < adev->num_ip_blocks; j++) {
2476 block = &adev->ip_blocks[j];
2477
2478 if (block->version->type != ip_order[i] ||
482f0e53
ML
2479 !block->status.valid ||
2480 block->status.hw)
2cb681b6
ML
2481 continue;
2482
895bd048
JZ
2483 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2484 r = block->version->funcs->resume(adev);
2485 else
2486 r = block->version->funcs->hw_init(adev);
2487
0aaeefcc 2488 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2489 if (r)
2490 return r;
482f0e53 2491 block->status.hw = true;
a90ad3c2
ML
2492 }
2493 }
2494
2495 return 0;
2496}
2497
e3ecdffa
AD
2498/**
2499 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2500 *
2501 * @adev: amdgpu_device pointer
2502 *
2503 * First resume function for hardware IPs. The list of all the hardware
2504 * IPs that make up the asic is walked and the resume callbacks are run for
2505 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2506 * after a suspend and updates the software state as necessary. This
2507 * function is also used for restoring the GPU after a GPU reset.
2508 * Returns 0 on success, negative error code on failure.
2509 */
06ec9070 2510static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2511{
2512 int i, r;
2513
a90ad3c2 2514 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2515 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2516 continue;
a90ad3c2 2517 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2518 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2519 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2520
fcf0649f
CZ
2521 r = adev->ip_blocks[i].version->funcs->resume(adev);
2522 if (r) {
2523 DRM_ERROR("resume of IP block <%s> failed %d\n",
2524 adev->ip_blocks[i].version->funcs->name, r);
2525 return r;
2526 }
482f0e53 2527 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2528 }
2529 }
2530
2531 return 0;
2532}
2533
e3ecdffa
AD
2534/**
2535 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2536 *
2537 * @adev: amdgpu_device pointer
2538 *
2539 * First resume function for hardware IPs. The list of all the hardware
2540 * IPs that make up the asic is walked and the resume callbacks are run for
2541 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2542 * functional state after a suspend and updates the software state as
2543 * necessary. This function is also used for restoring the GPU after a GPU
2544 * reset.
2545 * Returns 0 on success, negative error code on failure.
2546 */
06ec9070 2547static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2548{
2549 int i, r;
2550
2551 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2552 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2553 continue;
fcf0649f 2554 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2555 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2556 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2557 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2558 continue;
a1255107 2559 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2560 if (r) {
a1255107
AD
2561 DRM_ERROR("resume of IP block <%s> failed %d\n",
2562 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2563 return r;
2c1a2784 2564 }
482f0e53 2565 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2566 }
2567
2568 return 0;
2569}
2570
e3ecdffa
AD
2571/**
2572 * amdgpu_device_ip_resume - run resume for hardware IPs
2573 *
2574 * @adev: amdgpu_device pointer
2575 *
2576 * Main resume function for hardware IPs. The hardware IPs
2577 * are split into two resume functions because they are
2578 * are also used in in recovering from a GPU reset and some additional
2579 * steps need to be take between them. In this case (S3/S4) they are
2580 * run sequentially.
2581 * Returns 0 on success, negative error code on failure.
2582 */
06ec9070 2583static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2584{
2585 int r;
2586
06ec9070 2587 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2588 if (r)
2589 return r;
7a3e0bb2
RZ
2590
2591 r = amdgpu_device_fw_loading(adev);
2592 if (r)
2593 return r;
2594
06ec9070 2595 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2596
2597 return r;
2598}
2599
e3ecdffa
AD
2600/**
2601 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2602 *
2603 * @adev: amdgpu_device pointer
2604 *
2605 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2606 */
4e99a44e 2607static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2608{
6867e1b5
ML
2609 if (amdgpu_sriov_vf(adev)) {
2610 if (adev->is_atom_fw) {
2611 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2612 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2613 } else {
2614 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2615 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2616 }
2617
2618 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2619 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2620 }
048765ad
AR
2621}
2622
e3ecdffa
AD
2623/**
2624 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2625 *
2626 * @asic_type: AMD asic type
2627 *
2628 * Check if there is DC (new modesetting infrastructre) support for an asic.
2629 * returns true if DC has support, false if not.
2630 */
4562236b
HW
2631bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2632{
2633 switch (asic_type) {
2634#if defined(CONFIG_DRM_AMD_DC)
2635 case CHIP_BONAIRE:
0d6fbccb 2636 case CHIP_KAVERI:
367e6687
AD
2637 case CHIP_KABINI:
2638 case CHIP_MULLINS:
d9fda248
HW
2639 /*
2640 * We have systems in the wild with these ASICs that require
2641 * LVDS and VGA support which is not supported with DC.
2642 *
2643 * Fallback to the non-DC driver here by default so as not to
2644 * cause regressions.
2645 */
2646 return amdgpu_dc > 0;
2647 case CHIP_HAWAII:
4562236b
HW
2648 case CHIP_CARRIZO:
2649 case CHIP_STONEY:
4562236b 2650 case CHIP_POLARIS10:
675fd32b 2651 case CHIP_POLARIS11:
2c8ad2d5 2652 case CHIP_POLARIS12:
675fd32b 2653 case CHIP_VEGAM:
4562236b
HW
2654 case CHIP_TONGA:
2655 case CHIP_FIJI:
42f8ffa1 2656 case CHIP_VEGA10:
dca7b401 2657 case CHIP_VEGA12:
c6034aa2 2658 case CHIP_VEGA20:
b86a1aa3 2659#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2660 case CHIP_RAVEN:
b4f199c7 2661 case CHIP_NAVI10:
8fceceb6 2662 case CHIP_NAVI14:
078655d9 2663 case CHIP_NAVI12:
e1c14c43 2664 case CHIP_RENOIR:
42f8ffa1 2665#endif
fd187853 2666 return amdgpu_dc != 0;
4562236b
HW
2667#endif
2668 default:
93b09a9a
SS
2669 if (amdgpu_dc > 0)
2670 DRM_INFO("Display Core has been requested via kernel parameter "
2671 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2672 return false;
2673 }
2674}
2675
2676/**
2677 * amdgpu_device_has_dc_support - check if dc is supported
2678 *
2679 * @adev: amdgpu_device_pointer
2680 *
2681 * Returns true for supported, false for not supported
2682 */
2683bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2684{
2555039d
XY
2685 if (amdgpu_sriov_vf(adev))
2686 return false;
2687
4562236b
HW
2688 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2689}
2690
d4535e2c
AG
2691
2692static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2693{
2694 struct amdgpu_device *adev =
2695 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2696 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2697
c6a6e2db
AG
2698 /* It's a bug to not have a hive within this function */
2699 if (WARN_ON(!hive))
2700 return;
2701
2702 /*
2703 * Use task barrier to synchronize all xgmi reset works across the
2704 * hive. task_barrier_enter and task_barrier_exit will block
2705 * until all the threads running the xgmi reset works reach
2706 * those points. task_barrier_full will do both blocks.
2707 */
2708 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2709
2710 task_barrier_enter(&hive->tb);
2711 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2712
2713 if (adev->asic_reset_res)
2714 goto fail;
2715
2716 task_barrier_exit(&hive->tb);
2717 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2718
2719 if (adev->asic_reset_res)
2720 goto fail;
2721 } else {
2722
2723 task_barrier_full(&hive->tb);
2724 adev->asic_reset_res = amdgpu_asic_reset(adev);
2725 }
ce316fa5 2726
c6a6e2db 2727fail:
d4535e2c 2728 if (adev->asic_reset_res)
fed184e9 2729 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2730 adev->asic_reset_res, adev->ddev->unique);
2731}
2732
71f98027
AD
2733static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2734{
2735 char *input = amdgpu_lockup_timeout;
2736 char *timeout_setting = NULL;
2737 int index = 0;
2738 long timeout;
2739 int ret = 0;
2740
2741 /*
2742 * By default timeout for non compute jobs is 10000.
2743 * And there is no timeout enforced on compute jobs.
2744 * In SR-IOV or passthrough mode, timeout for compute
2745 * jobs are 10000 by default.
2746 */
2747 adev->gfx_timeout = msecs_to_jiffies(10000);
2748 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2749 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2750 adev->compute_timeout = adev->gfx_timeout;
2751 else
2752 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2753
f440ff44 2754 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2755 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2756 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2757 ret = kstrtol(timeout_setting, 0, &timeout);
2758 if (ret)
2759 return ret;
2760
2761 if (timeout == 0) {
2762 index++;
2763 continue;
2764 } else if (timeout < 0) {
2765 timeout = MAX_SCHEDULE_TIMEOUT;
2766 } else {
2767 timeout = msecs_to_jiffies(timeout);
2768 }
2769
2770 switch (index++) {
2771 case 0:
2772 adev->gfx_timeout = timeout;
2773 break;
2774 case 1:
2775 adev->compute_timeout = timeout;
2776 break;
2777 case 2:
2778 adev->sdma_timeout = timeout;
2779 break;
2780 case 3:
2781 adev->video_timeout = timeout;
2782 break;
2783 default:
2784 break;
2785 }
2786 }
2787 /*
2788 * There is only one value specified and
2789 * it should apply to all non-compute jobs.
2790 */
bcccee89 2791 if (index == 1) {
71f98027 2792 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2793 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2794 adev->compute_timeout = adev->gfx_timeout;
2795 }
71f98027
AD
2796 }
2797
2798 return ret;
2799}
d4535e2c 2800
d38ceaf9
AD
2801/**
2802 * amdgpu_device_init - initialize the driver
2803 *
2804 * @adev: amdgpu_device pointer
87e3f136 2805 * @ddev: drm dev pointer
d38ceaf9
AD
2806 * @pdev: pci dev pointer
2807 * @flags: driver flags
2808 *
2809 * Initializes the driver info and hw (all asics).
2810 * Returns 0 for success or an error on failure.
2811 * Called at driver startup.
2812 */
2813int amdgpu_device_init(struct amdgpu_device *adev,
2814 struct drm_device *ddev,
2815 struct pci_dev *pdev,
2816 uint32_t flags)
2817{
2818 int r, i;
3840c5bc 2819 bool boco = false;
95844d20 2820 u32 max_MBps;
d38ceaf9
AD
2821
2822 adev->shutdown = false;
2823 adev->dev = &pdev->dev;
2824 adev->ddev = ddev;
2825 adev->pdev = pdev;
2826 adev->flags = flags;
4e66d7d2
YZ
2827
2828 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2829 adev->asic_type = amdgpu_force_asic_type;
2830 else
2831 adev->asic_type = flags & AMD_ASIC_MASK;
2832
d38ceaf9 2833 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2 2834 if (amdgpu_emu_mode == 1)
8bdab6bb 2835 adev->usec_timeout *= 10;
770d13b1 2836 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2837 adev->accel_working = false;
2838 adev->num_rings = 0;
2839 adev->mman.buffer_funcs = NULL;
2840 adev->mman.buffer_funcs_ring = NULL;
2841 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2842 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2843 adev->gmc.gmc_funcs = NULL;
f54d1867 2844 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2845 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2846
2847 adev->smc_rreg = &amdgpu_invalid_rreg;
2848 adev->smc_wreg = &amdgpu_invalid_wreg;
2849 adev->pcie_rreg = &amdgpu_invalid_rreg;
2850 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2851 adev->pciep_rreg = &amdgpu_invalid_rreg;
2852 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2853 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2854 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2855 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2856 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2857 adev->didt_rreg = &amdgpu_invalid_rreg;
2858 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2859 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2860 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2861 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2862 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2863
3e39ab90
AD
2864 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2865 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2866 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2867
2868 /* mutex initialization are all done here so we
2869 * can recall function without having locking issues */
d38ceaf9 2870 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2871 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2872 mutex_init(&adev->pm.mutex);
2873 mutex_init(&adev->gfx.gpu_clock_mutex);
2874 mutex_init(&adev->srbm_mutex);
b8866c26 2875 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2876 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2877 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2878 mutex_init(&adev->mn_lock);
e23b74aa 2879 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2880 hash_init(adev->mn_hash);
13a752e3 2881 mutex_init(&adev->lock_reset);
32eaeae0 2882 mutex_init(&adev->psp.mutex);
bd052211 2883 mutex_init(&adev->notifier_lock);
d38ceaf9 2884
912dfc84
EQ
2885 r = amdgpu_device_check_arguments(adev);
2886 if (r)
2887 return r;
d38ceaf9 2888
d38ceaf9
AD
2889 spin_lock_init(&adev->mmio_idx_lock);
2890 spin_lock_init(&adev->smc_idx_lock);
2891 spin_lock_init(&adev->pcie_idx_lock);
2892 spin_lock_init(&adev->uvd_ctx_idx_lock);
2893 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2894 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2895 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2896 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2897 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2898
0c4e7fa5
CZ
2899 INIT_LIST_HEAD(&adev->shadow_list);
2900 mutex_init(&adev->shadow_list_lock);
2901
795f2813
AR
2902 INIT_LIST_HEAD(&adev->ring_lru_list);
2903 spin_lock_init(&adev->ring_lru_list_lock);
2904
beff74bc
AD
2905 INIT_DELAYED_WORK(&adev->delayed_init_work,
2906 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2907 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2908 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2909
d4535e2c
AG
2910 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2911
d23ee13f 2912 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2913 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2914
0fa49558
AX
2915 /* Registers mapping */
2916 /* TODO: block userspace mapping of io register */
da69c161
KW
2917 if (adev->asic_type >= CHIP_BONAIRE) {
2918 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2919 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2920 } else {
2921 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2922 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2923 }
d38ceaf9 2924
d38ceaf9
AD
2925 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2926 if (adev->rmmio == NULL) {
2927 return -ENOMEM;
2928 }
2929 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2930 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2931
d38ceaf9
AD
2932 /* io port mapping */
2933 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2934 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2935 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2936 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2937 break;
2938 }
2939 }
2940 if (adev->rio_mem == NULL)
b64a18c5 2941 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2942
b2109d8e
JX
2943 /* enable PCIE atomic ops */
2944 r = pci_enable_atomic_ops_to_root(adev->pdev,
2945 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2946 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2947 if (r) {
2948 adev->have_atomics_support = false;
2949 DRM_INFO("PCIE atomic ops is not supported\n");
2950 } else {
2951 adev->have_atomics_support = true;
2952 }
2953
5494d864
AD
2954 amdgpu_device_get_pcie_info(adev);
2955
b239c017
JX
2956 if (amdgpu_mcbp)
2957 DRM_INFO("MCBP is enabled\n");
2958
5f84cc63
JX
2959 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2960 adev->enable_mes = true;
2961
f54eeab4 2962 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2963 r = amdgpu_discovery_init(adev);
2964 if (r) {
2965 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2966 return r;
2967 }
2968 }
2969
d38ceaf9 2970 /* early init functions */
06ec9070 2971 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2972 if (r)
2973 return r;
2974
df99ac0f
JZ
2975 r = amdgpu_device_get_job_timeout_settings(adev);
2976 if (r) {
2977 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
2978 return r;
2979 }
2980
6585661d
OZ
2981 /* doorbell bar mapping and doorbell index init*/
2982 amdgpu_device_doorbell_init(adev);
2983
d38ceaf9
AD
2984 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2985 /* this will fail for cards that aren't VGA class devices, just
2986 * ignore it */
06ec9070 2987 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2988
31af062a 2989 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
2990 boco = true;
2991 if (amdgpu_has_atpx() &&
2992 (amdgpu_is_atpx_hybrid() ||
2993 amdgpu_has_atpx_dgpu_power_cntl()) &&
2994 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 2995 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
2996 &amdgpu_switcheroo_ops, boco);
2997 if (boco)
d38ceaf9
AD
2998 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2999
9475a943
SL
3000 if (amdgpu_emu_mode == 1) {
3001 /* post the asic on emulation mode */
3002 emu_soc_asic_init(adev);
bfca0289 3003 goto fence_driver_init;
9475a943 3004 }
bfca0289 3005
4e99a44e
ML
3006 /* detect if we are with an SRIOV vbios */
3007 amdgpu_device_detect_sriov_bios(adev);
048765ad 3008
95e8e59e
AD
3009 /* check if we need to reset the asic
3010 * E.g., driver was not cleanly unloaded previously, etc.
3011 */
f14899fd 3012 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
3013 r = amdgpu_asic_reset(adev);
3014 if (r) {
3015 dev_err(adev->dev, "asic reset on init failed\n");
3016 goto failed;
3017 }
3018 }
3019
d38ceaf9 3020 /* Post card if necessary */
39c640c0 3021 if (amdgpu_device_need_post(adev)) {
d38ceaf9 3022 if (!adev->bios) {
bec86378 3023 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
3024 r = -EINVAL;
3025 goto failed;
d38ceaf9 3026 }
bec86378 3027 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
3028 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3029 if (r) {
3030 dev_err(adev->dev, "gpu post error!\n");
3031 goto failed;
3032 }
d38ceaf9
AD
3033 }
3034
88b64e95
AD
3035 if (adev->is_atom_fw) {
3036 /* Initialize clocks */
3037 r = amdgpu_atomfirmware_get_clock_info(adev);
3038 if (r) {
3039 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3040 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3041 goto failed;
3042 }
3043 } else {
a5bde2f9
AD
3044 /* Initialize clocks */
3045 r = amdgpu_atombios_get_clock_info(adev);
3046 if (r) {
3047 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3048 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3049 goto failed;
a5bde2f9
AD
3050 }
3051 /* init i2c buses */
4562236b
HW
3052 if (!amdgpu_device_has_dc_support(adev))
3053 amdgpu_atombios_i2c_init(adev);
2c1a2784 3054 }
d38ceaf9 3055
bfca0289 3056fence_driver_init:
d38ceaf9
AD
3057 /* Fence driver */
3058 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3059 if (r) {
3060 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3061 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3062 goto failed;
2c1a2784 3063 }
d38ceaf9
AD
3064
3065 /* init the mode config */
3066 drm_mode_config_init(adev->ddev);
3067
06ec9070 3068 r = amdgpu_device_ip_init(adev);
d38ceaf9 3069 if (r) {
8840a387 3070 /* failed in exclusive mode due to timeout */
3071 if (amdgpu_sriov_vf(adev) &&
3072 !amdgpu_sriov_runtime(adev) &&
3073 amdgpu_virt_mmio_blocked(adev) &&
3074 !amdgpu_virt_wait_reset(adev)) {
3075 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3076 /* Don't send request since VF is inactive. */
3077 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3078 adev->virt.ops = NULL;
8840a387 3079 r = -EAGAIN;
3080 goto failed;
3081 }
06ec9070 3082 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3083 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3084 goto failed;
d38ceaf9
AD
3085 }
3086
d7f72fe4
YZ
3087 DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3088 adev->gfx.config.max_shader_engines,
3089 adev->gfx.config.max_sh_per_se,
3090 adev->gfx.config.max_cu_per_sh,
3091 adev->gfx.cu_info.number);
3092
f880799d
ND
3093 amdgpu_ctx_init_sched(adev);
3094
d38ceaf9
AD
3095 adev->accel_working = true;
3096
e59c0205
AX
3097 amdgpu_vm_check_compute_bug(adev);
3098
95844d20
MO
3099 /* Initialize the buffer migration limit. */
3100 if (amdgpu_moverate >= 0)
3101 max_MBps = amdgpu_moverate;
3102 else
3103 max_MBps = 8; /* Allow 8 MB/s. */
3104 /* Get a log2 for easy divisions. */
3105 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3106
9bc92b9c
ML
3107 amdgpu_fbdev_init(adev);
3108
d2f52ac8 3109 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3110 if (r) {
3111 adev->pm_sysfs_en = false;
d2f52ac8 3112 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3113 } else
3114 adev->pm_sysfs_en = true;
d2f52ac8 3115
5bb23532 3116 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3117 if (r) {
3118 adev->ucode_sysfs_en = false;
5bb23532 3119 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3120 } else
3121 adev->ucode_sysfs_en = true;
5bb23532 3122
d38ceaf9
AD
3123 if ((amdgpu_testing & 1)) {
3124 if (adev->accel_working)
3125 amdgpu_test_moves(adev);
3126 else
3127 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3128 }
d38ceaf9
AD
3129 if (amdgpu_benchmarking) {
3130 if (adev->accel_working)
3131 amdgpu_benchmark(adev, amdgpu_benchmarking);
3132 else
3133 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3134 }
3135
b0adca4d
EQ
3136 /*
3137 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3138 * Otherwise the mgpu fan boost feature will be skipped due to the
3139 * gpu instance is counted less.
3140 */
3141 amdgpu_register_gpu_instance(adev);
3142
d38ceaf9
AD
3143 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3144 * explicit gating rather than handling it automatically.
3145 */
06ec9070 3146 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3147 if (r) {
06ec9070 3148 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3149 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3150 goto failed;
2c1a2784 3151 }
d38ceaf9 3152
108c6a63 3153 /* must succeed. */
511fdbc3 3154 amdgpu_ras_resume(adev);
108c6a63 3155
beff74bc
AD
3156 queue_delayed_work(system_wq, &adev->delayed_init_work,
3157 msecs_to_jiffies(AMDGPU_RESUME_MS));
3158
dcea6e65
KR
3159 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3160 if (r) {
3161 dev_err(adev->dev, "Could not create pcie_replay_count");
3162 return r;
3163 }
108c6a63 3164
d155bef0
AB
3165 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3166 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3167 if (r)
3168 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3169
d38ceaf9 3170 return 0;
83ba126a
AD
3171
3172failed:
89041940 3173 amdgpu_vf_error_trans_all(adev);
3840c5bc 3174 if (boco)
83ba126a 3175 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3176
83ba126a 3177 return r;
d38ceaf9
AD
3178}
3179
d38ceaf9
AD
3180/**
3181 * amdgpu_device_fini - tear down the driver
3182 *
3183 * @adev: amdgpu_device pointer
3184 *
3185 * Tear down the driver info (all asics).
3186 * Called at driver shutdown.
3187 */
3188void amdgpu_device_fini(struct amdgpu_device *adev)
3189{
3190 int r;
3191
3192 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3193 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3194 adev->shutdown = true;
9f875167 3195
752c683d
ML
3196 /* make sure IB test finished before entering exclusive mode
3197 * to avoid preemption on IB test
3198 * */
3199 if (amdgpu_sriov_vf(adev))
3200 amdgpu_virt_request_full_gpu(adev, false);
3201
e5b03032
ML
3202 /* disable all interrupts */
3203 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3204 if (adev->mode_info.mode_config_initialized){
3205 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3206 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3207 else
3208 drm_atomic_helper_shutdown(adev->ddev);
3209 }
d38ceaf9 3210 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3211 if (adev->pm_sysfs_en)
3212 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3213 amdgpu_fbdev_fini(adev);
06ec9070 3214 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3215 if (adev->firmware.gpu_info_fw) {
3216 release_firmware(adev->firmware.gpu_info_fw);
3217 adev->firmware.gpu_info_fw = NULL;
3218 }
d38ceaf9
AD
3219 adev->accel_working = false;
3220 /* free i2c buses */
4562236b
HW
3221 if (!amdgpu_device_has_dc_support(adev))
3222 amdgpu_i2c_fini(adev);
bfca0289
SL
3223
3224 if (amdgpu_emu_mode != 1)
3225 amdgpu_atombios_fini(adev);
3226
d38ceaf9
AD
3227 kfree(adev->bios);
3228 adev->bios = NULL;
3840c5bc
AD
3229 if (amdgpu_has_atpx() &&
3230 (amdgpu_is_atpx_hybrid() ||
3231 amdgpu_has_atpx_dgpu_power_cntl()) &&
3232 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3233 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3234 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3235 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3236 vga_client_register(adev->pdev, NULL, NULL, NULL);
3237 if (adev->rio_mem)
3238 pci_iounmap(adev->pdev, adev->rio_mem);
3239 adev->rio_mem = NULL;
3240 iounmap(adev->rmmio);
3241 adev->rmmio = NULL;
06ec9070 3242 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3243
dcea6e65 3244 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
7c868b59
YT
3245 if (adev->ucode_sysfs_en)
3246 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
3247 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3248 amdgpu_pmu_fini(adev);
f54eeab4 3249 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3250 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3251}
3252
3253
3254/*
3255 * Suspend & resume.
3256 */
3257/**
810ddc3a 3258 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3259 *
87e3f136
DP
3260 * @dev: drm dev pointer
3261 * @suspend: suspend state
3262 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3263 *
3264 * Puts the hw in the suspend state (all asics).
3265 * Returns 0 for success or an error on failure.
3266 * Called at driver suspend.
3267 */
de185019 3268int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3269{
3270 struct amdgpu_device *adev;
3271 struct drm_crtc *crtc;
3272 struct drm_connector *connector;
f8d2d39e 3273 struct drm_connector_list_iter iter;
5ceb54c6 3274 int r;
d38ceaf9
AD
3275
3276 if (dev == NULL || dev->dev_private == NULL) {
3277 return -ENODEV;
3278 }
3279
3280 adev = dev->dev_private;
3281
3282 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3283 return 0;
3284
44779b43 3285 adev->in_suspend = true;
d38ceaf9
AD
3286 drm_kms_helper_poll_disable(dev);
3287
5f818173
S
3288 if (fbcon)
3289 amdgpu_fbdev_set_suspend(adev, 1);
3290
beff74bc 3291 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3292
4562236b
HW
3293 if (!amdgpu_device_has_dc_support(adev)) {
3294 /* turn off display hw */
3295 drm_modeset_lock_all(dev);
f8d2d39e
LP
3296 drm_connector_list_iter_begin(dev, &iter);
3297 drm_for_each_connector_iter(connector, &iter)
3298 drm_helper_connector_dpms(connector,
3299 DRM_MODE_DPMS_OFF);
3300 drm_connector_list_iter_end(&iter);
4562236b 3301 drm_modeset_unlock_all(dev);
fe1053b7
AD
3302 /* unpin the front buffers and cursors */
3303 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3304 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3305 struct drm_framebuffer *fb = crtc->primary->fb;
3306 struct amdgpu_bo *robj;
3307
91334223 3308 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3309 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3310 r = amdgpu_bo_reserve(aobj, true);
3311 if (r == 0) {
3312 amdgpu_bo_unpin(aobj);
3313 amdgpu_bo_unreserve(aobj);
3314 }
756e6880 3315 }
756e6880 3316
fe1053b7
AD
3317 if (fb == NULL || fb->obj[0] == NULL) {
3318 continue;
3319 }
3320 robj = gem_to_amdgpu_bo(fb->obj[0]);
3321 /* don't unpin kernel fb objects */
3322 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3323 r = amdgpu_bo_reserve(robj, true);
3324 if (r == 0) {
3325 amdgpu_bo_unpin(robj);
3326 amdgpu_bo_unreserve(robj);
3327 }
d38ceaf9
AD
3328 }
3329 }
3330 }
fe1053b7 3331
9593f4d6 3332 amdgpu_amdkfd_suspend(adev, !fbcon);
fe1053b7 3333
5e6932fe 3334 amdgpu_ras_suspend(adev);
3335
fe1053b7
AD
3336 r = amdgpu_device_ip_suspend_phase1(adev);
3337
d38ceaf9
AD
3338 /* evict vram memory */
3339 amdgpu_bo_evict_vram(adev);
3340
5ceb54c6 3341 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3342
fe1053b7 3343 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3344
a0a71e49
AD
3345 /* evict remaining vram memory
3346 * This second call to evict vram is to evict the gart page table
3347 * using the CPU.
3348 */
d38ceaf9
AD
3349 amdgpu_bo_evict_vram(adev);
3350
d38ceaf9
AD
3351 return 0;
3352}
3353
3354/**
810ddc3a 3355 * amdgpu_device_resume - initiate device resume
d38ceaf9 3356 *
87e3f136
DP
3357 * @dev: drm dev pointer
3358 * @resume: resume state
3359 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3360 *
3361 * Bring the hw back to operating state (all asics).
3362 * Returns 0 for success or an error on failure.
3363 * Called at driver resume.
3364 */
de185019 3365int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3366{
3367 struct drm_connector *connector;
f8d2d39e 3368 struct drm_connector_list_iter iter;
d38ceaf9 3369 struct amdgpu_device *adev = dev->dev_private;
756e6880 3370 struct drm_crtc *crtc;
03161a6e 3371 int r = 0;
d38ceaf9
AD
3372
3373 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3374 return 0;
3375
d38ceaf9 3376 /* post card */
39c640c0 3377 if (amdgpu_device_need_post(adev)) {
74b0b157 3378 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3379 if (r)
3380 DRM_ERROR("amdgpu asic init failed\n");
3381 }
d38ceaf9 3382
06ec9070 3383 r = amdgpu_device_ip_resume(adev);
e6707218 3384 if (r) {
06ec9070 3385 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3386 return r;
e6707218 3387 }
5ceb54c6
AD
3388 amdgpu_fence_driver_resume(adev);
3389
d38ceaf9 3390
06ec9070 3391 r = amdgpu_device_ip_late_init(adev);
03161a6e 3392 if (r)
4d3b9ae5 3393 return r;
d38ceaf9 3394
beff74bc
AD
3395 queue_delayed_work(system_wq, &adev->delayed_init_work,
3396 msecs_to_jiffies(AMDGPU_RESUME_MS));
3397
fe1053b7
AD
3398 if (!amdgpu_device_has_dc_support(adev)) {
3399 /* pin cursors */
3400 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3401 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3402
91334223 3403 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3404 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3405 r = amdgpu_bo_reserve(aobj, true);
3406 if (r == 0) {
3407 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3408 if (r != 0)
3409 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3410 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3411 amdgpu_bo_unreserve(aobj);
3412 }
756e6880
AD
3413 }
3414 }
3415 }
9593f4d6 3416 r = amdgpu_amdkfd_resume(adev, !fbcon);
ba997709
YZ
3417 if (r)
3418 return r;
756e6880 3419
96a5d8d4 3420 /* Make sure IB tests flushed */
beff74bc 3421 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3422
d38ceaf9
AD
3423 /* blat the mode back in */
3424 if (fbcon) {
4562236b
HW
3425 if (!amdgpu_device_has_dc_support(adev)) {
3426 /* pre DCE11 */
3427 drm_helper_resume_force_mode(dev);
3428
3429 /* turn on display hw */
3430 drm_modeset_lock_all(dev);
f8d2d39e
LP
3431
3432 drm_connector_list_iter_begin(dev, &iter);
3433 drm_for_each_connector_iter(connector, &iter)
3434 drm_helper_connector_dpms(connector,
3435 DRM_MODE_DPMS_ON);
3436 drm_connector_list_iter_end(&iter);
3437
4562236b 3438 drm_modeset_unlock_all(dev);
d38ceaf9 3439 }
4d3b9ae5 3440 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3441 }
3442
3443 drm_kms_helper_poll_enable(dev);
23a1a9e5 3444
5e6932fe 3445 amdgpu_ras_resume(adev);
3446
23a1a9e5
L
3447 /*
3448 * Most of the connector probing functions try to acquire runtime pm
3449 * refs to ensure that the GPU is powered on when connector polling is
3450 * performed. Since we're calling this from a runtime PM callback,
3451 * trying to acquire rpm refs will cause us to deadlock.
3452 *
3453 * Since we're guaranteed to be holding the rpm lock, it's safe to
3454 * temporarily disable the rpm helpers so this doesn't deadlock us.
3455 */
3456#ifdef CONFIG_PM
3457 dev->dev->power.disable_depth++;
3458#endif
4562236b
HW
3459 if (!amdgpu_device_has_dc_support(adev))
3460 drm_helper_hpd_irq_event(dev);
3461 else
3462 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3463#ifdef CONFIG_PM
3464 dev->dev->power.disable_depth--;
3465#endif
44779b43
RZ
3466 adev->in_suspend = false;
3467
4d3b9ae5 3468 return 0;
d38ceaf9
AD
3469}
3470
e3ecdffa
AD
3471/**
3472 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3473 *
3474 * @adev: amdgpu_device pointer
3475 *
3476 * The list of all the hardware IPs that make up the asic is walked and
3477 * the check_soft_reset callbacks are run. check_soft_reset determines
3478 * if the asic is still hung or not.
3479 * Returns true if any of the IPs are still in a hung state, false if not.
3480 */
06ec9070 3481static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3482{
3483 int i;
3484 bool asic_hang = false;
3485
f993d628
ML
3486 if (amdgpu_sriov_vf(adev))
3487 return true;
3488
8bc04c29
AD
3489 if (amdgpu_asic_need_full_reset(adev))
3490 return true;
3491
63fbf42f 3492 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3493 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3494 continue;
a1255107
AD
3495 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3496 adev->ip_blocks[i].status.hang =
3497 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3498 if (adev->ip_blocks[i].status.hang) {
3499 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3500 asic_hang = true;
3501 }
3502 }
3503 return asic_hang;
3504}
3505
e3ecdffa
AD
3506/**
3507 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3508 *
3509 * @adev: amdgpu_device pointer
3510 *
3511 * The list of all the hardware IPs that make up the asic is walked and the
3512 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3513 * handles any IP specific hardware or software state changes that are
3514 * necessary for a soft reset to succeed.
3515 * Returns 0 on success, negative error code on failure.
3516 */
06ec9070 3517static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3518{
3519 int i, r = 0;
3520
3521 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3522 if (!adev->ip_blocks[i].status.valid)
d31a501e 3523 continue;
a1255107
AD
3524 if (adev->ip_blocks[i].status.hang &&
3525 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3526 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3527 if (r)
3528 return r;
3529 }
3530 }
3531
3532 return 0;
3533}
3534
e3ecdffa
AD
3535/**
3536 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3537 *
3538 * @adev: amdgpu_device pointer
3539 *
3540 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3541 * reset is necessary to recover.
3542 * Returns true if a full asic reset is required, false if not.
3543 */
06ec9070 3544static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3545{
da146d3b
AD
3546 int i;
3547
8bc04c29
AD
3548 if (amdgpu_asic_need_full_reset(adev))
3549 return true;
3550
da146d3b 3551 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3552 if (!adev->ip_blocks[i].status.valid)
da146d3b 3553 continue;
a1255107
AD
3554 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3555 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3556 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3557 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3558 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3559 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3560 DRM_INFO("Some block need full reset!\n");
3561 return true;
3562 }
3563 }
35d782fe
CZ
3564 }
3565 return false;
3566}
3567
e3ecdffa
AD
3568/**
3569 * amdgpu_device_ip_soft_reset - do a soft reset
3570 *
3571 * @adev: amdgpu_device pointer
3572 *
3573 * The list of all the hardware IPs that make up the asic is walked and the
3574 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3575 * IP specific hardware or software state changes that are necessary to soft
3576 * reset the IP.
3577 * Returns 0 on success, negative error code on failure.
3578 */
06ec9070 3579static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3580{
3581 int i, r = 0;
3582
3583 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3584 if (!adev->ip_blocks[i].status.valid)
35d782fe 3585 continue;
a1255107
AD
3586 if (adev->ip_blocks[i].status.hang &&
3587 adev->ip_blocks[i].version->funcs->soft_reset) {
3588 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3589 if (r)
3590 return r;
3591 }
3592 }
3593
3594 return 0;
3595}
3596
e3ecdffa
AD
3597/**
3598 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3599 *
3600 * @adev: amdgpu_device pointer
3601 *
3602 * The list of all the hardware IPs that make up the asic is walked and the
3603 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3604 * handles any IP specific hardware or software state changes that are
3605 * necessary after the IP has been soft reset.
3606 * Returns 0 on success, negative error code on failure.
3607 */
06ec9070 3608static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3609{
3610 int i, r = 0;
3611
3612 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3613 if (!adev->ip_blocks[i].status.valid)
35d782fe 3614 continue;
a1255107
AD
3615 if (adev->ip_blocks[i].status.hang &&
3616 adev->ip_blocks[i].version->funcs->post_soft_reset)
3617 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3618 if (r)
3619 return r;
3620 }
3621
3622 return 0;
3623}
3624
e3ecdffa 3625/**
c33adbc7 3626 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3627 *
3628 * @adev: amdgpu_device pointer
3629 *
3630 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3631 * restore things like GPUVM page tables after a GPU reset where
3632 * the contents of VRAM might be lost.
403009bf
CK
3633 *
3634 * Returns:
3635 * 0 on success, negative error code on failure.
e3ecdffa 3636 */
c33adbc7 3637static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3638{
c41d1cf6 3639 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3640 struct amdgpu_bo *shadow;
3641 long r = 1, tmo;
c41d1cf6
ML
3642
3643 if (amdgpu_sriov_runtime(adev))
b045d3af 3644 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3645 else
3646 tmo = msecs_to_jiffies(100);
3647
3648 DRM_INFO("recover vram bo from shadow start\n");
3649 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3650 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3651
3652 /* No need to recover an evicted BO */
3653 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3654 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3655 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3656 continue;
3657
3658 r = amdgpu_bo_restore_shadow(shadow, &next);
3659 if (r)
3660 break;
3661
c41d1cf6 3662 if (fence) {
1712fb1a 3663 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3664 dma_fence_put(fence);
3665 fence = next;
1712fb1a 3666 if (tmo == 0) {
3667 r = -ETIMEDOUT;
c41d1cf6 3668 break;
1712fb1a 3669 } else if (tmo < 0) {
3670 r = tmo;
3671 break;
3672 }
403009bf
CK
3673 } else {
3674 fence = next;
c41d1cf6 3675 }
c41d1cf6
ML
3676 }
3677 mutex_unlock(&adev->shadow_list_lock);
3678
403009bf
CK
3679 if (fence)
3680 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3681 dma_fence_put(fence);
3682
1712fb1a 3683 if (r < 0 || tmo <= 0) {
3684 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3685 return -EIO;
3686 }
c41d1cf6 3687
403009bf
CK
3688 DRM_INFO("recover vram bo from shadow done\n");
3689 return 0;
c41d1cf6
ML
3690}
3691
a90ad3c2 3692
e3ecdffa 3693/**
06ec9070 3694 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3695 *
3696 * @adev: amdgpu device pointer
87e3f136 3697 * @from_hypervisor: request from hypervisor
5740682e
ML
3698 *
3699 * do VF FLR and reinitialize Asic
3f48c681 3700 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3701 */
3702static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3703 bool from_hypervisor)
5740682e
ML
3704{
3705 int r;
3706
3707 if (from_hypervisor)
3708 r = amdgpu_virt_request_full_gpu(adev, true);
3709 else
3710 r = amdgpu_virt_reset_gpu(adev);
3711 if (r)
3712 return r;
a90ad3c2
ML
3713
3714 /* Resume IP prior to SMC */
06ec9070 3715 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3716 if (r)
3717 goto error;
a90ad3c2 3718
c9ffa427 3719 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3720 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3721 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3722
7a3e0bb2
RZ
3723 r = amdgpu_device_fw_loading(adev);
3724 if (r)
3725 return r;
3726
a90ad3c2 3727 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3728 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3729 if (r)
3730 goto error;
a90ad3c2
ML
3731
3732 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3733 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3734 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3735
abc34253
ED
3736error:
3737 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3738 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3739 amdgpu_inc_vram_lost(adev);
c33adbc7 3740 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3741 }
3742
3743 return r;
3744}
3745
12938fad
CK
3746/**
3747 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3748 *
3749 * @adev: amdgpu device pointer
3750 *
3751 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3752 * a hung GPU.
3753 */
3754bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3755{
3756 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3757 DRM_INFO("Timeout, but no hardware hang detected.\n");
3758 return false;
3759 }
3760
3ba7b418
AG
3761 if (amdgpu_gpu_recovery == 0)
3762 goto disabled;
3763
3764 if (amdgpu_sriov_vf(adev))
3765 return true;
3766
3767 if (amdgpu_gpu_recovery == -1) {
3768 switch (adev->asic_type) {
fc42d47c
AG
3769 case CHIP_BONAIRE:
3770 case CHIP_HAWAII:
3ba7b418
AG
3771 case CHIP_TOPAZ:
3772 case CHIP_TONGA:
3773 case CHIP_FIJI:
3774 case CHIP_POLARIS10:
3775 case CHIP_POLARIS11:
3776 case CHIP_POLARIS12:
3777 case CHIP_VEGAM:
3778 case CHIP_VEGA20:
3779 case CHIP_VEGA10:
3780 case CHIP_VEGA12:
c43b849f 3781 case CHIP_RAVEN:
e9d4cf91 3782 case CHIP_ARCTURUS:
2cb44fb0 3783 case CHIP_RENOIR:
658c6639
AD
3784 case CHIP_NAVI10:
3785 case CHIP_NAVI14:
3786 case CHIP_NAVI12:
3ba7b418
AG
3787 break;
3788 default:
3789 goto disabled;
3790 }
12938fad
CK
3791 }
3792
3793 return true;
3ba7b418
AG
3794
3795disabled:
3796 DRM_INFO("GPU recovery disabled.\n");
3797 return false;
12938fad
CK
3798}
3799
5c6dd71e 3800
26bc5340
AG
3801static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3802 struct amdgpu_job *job,
3803 bool *need_full_reset_arg)
3804{
3805 int i, r = 0;
3806 bool need_full_reset = *need_full_reset_arg;
71182665 3807
71182665 3808 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3809 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3810 struct amdgpu_ring *ring = adev->rings[i];
3811
51687759 3812 if (!ring || !ring->sched.thread)
0875dc9e 3813 continue;
5740682e 3814
2f9d4084
ML
3815 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3816 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3817 }
d38ceaf9 3818
222b5f04
AG
3819 if(job)
3820 drm_sched_increase_karma(&job->base);
3821
1d721ed6 3822 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3823 if (!amdgpu_sriov_vf(adev)) {
3824
3825 if (!need_full_reset)
3826 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3827
3828 if (!need_full_reset) {
3829 amdgpu_device_ip_pre_soft_reset(adev);
3830 r = amdgpu_device_ip_soft_reset(adev);
3831 amdgpu_device_ip_post_soft_reset(adev);
3832 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3833 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3834 need_full_reset = true;
3835 }
3836 }
3837
3838 if (need_full_reset)
3839 r = amdgpu_device_ip_suspend(adev);
3840
3841 *need_full_reset_arg = need_full_reset;
3842 }
3843
3844 return r;
3845}
3846
041a62bc 3847static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3848 struct list_head *device_list_handle,
3849 bool *need_full_reset_arg)
3850{
3851 struct amdgpu_device *tmp_adev = NULL;
3852 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3853 int r = 0;
3854
3855 /*
3856 * ASIC reset has to be done on all HGMI hive nodes ASAP
3857 * to allow proper links negotiation in FW (within 1 sec)
3858 */
3859 if (need_full_reset) {
3860 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3861 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3862 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3863 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3864 r = -EALREADY;
3865 } else
3866 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 3867
041a62bc
AG
3868 if (r) {
3869 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3870 r, tmp_adev->ddev->unique);
3871 break;
ce316fa5
LM
3872 }
3873 }
3874
041a62bc
AG
3875 /* For XGMI wait for all resets to complete before proceed */
3876 if (!r) {
ce316fa5
LM
3877 list_for_each_entry(tmp_adev, device_list_handle,
3878 gmc.xgmi.head) {
3879 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3880 flush_work(&tmp_adev->xgmi_reset_work);
3881 r = tmp_adev->asic_reset_res;
3882 if (r)
3883 break;
ce316fa5
LM
3884 }
3885 }
3886 }
ce316fa5 3887 }
26bc5340 3888
00eaa571
LM
3889 if (!r && amdgpu_ras_intr_triggered())
3890 amdgpu_ras_intr_cleared();
3891
26bc5340
AG
3892 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3893 if (need_full_reset) {
3894 /* post card */
3895 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3896 DRM_WARN("asic atom init failed!");
3897
3898 if (!r) {
3899 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3900 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3901 if (r)
3902 goto out;
3903
3904 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3905 if (vram_lost) {
77e7f829 3906 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 3907 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
3908 }
3909
3910 r = amdgpu_gtt_mgr_recover(
3911 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3912 if (r)
3913 goto out;
3914
3915 r = amdgpu_device_fw_loading(tmp_adev);
3916 if (r)
3917 return r;
3918
3919 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3920 if (r)
3921 goto out;
3922
3923 if (vram_lost)
3924 amdgpu_device_fill_reset_magic(tmp_adev);
3925
fdafb359
EQ
3926 /*
3927 * Add this ASIC as tracked as reset was already
3928 * complete successfully.
3929 */
3930 amdgpu_register_gpu_instance(tmp_adev);
3931
7c04ca50 3932 r = amdgpu_device_ip_late_init(tmp_adev);
3933 if (r)
3934 goto out;
3935
565d1941
EQ
3936 amdgpu_fbdev_set_suspend(tmp_adev, 0);
3937
e79a04d5 3938 /* must succeed. */
511fdbc3 3939 amdgpu_ras_resume(tmp_adev);
e79a04d5 3940
26bc5340
AG
3941 /* Update PSP FW topology after reset */
3942 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3943 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3944 }
3945 }
3946
3947
3948out:
3949 if (!r) {
3950 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3951 r = amdgpu_ib_ring_tests(tmp_adev);
3952 if (r) {
3953 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3954 r = amdgpu_device_ip_suspend(tmp_adev);
3955 need_full_reset = true;
3956 r = -EAGAIN;
3957 goto end;
3958 }
3959 }
3960
3961 if (!r)
3962 r = amdgpu_device_recover_vram(tmp_adev);
3963 else
3964 tmp_adev->asic_reset_res = r;
3965 }
3966
3967end:
3968 *need_full_reset_arg = need_full_reset;
3969 return r;
3970}
3971
1d721ed6 3972static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3973{
1d721ed6
AG
3974 if (trylock) {
3975 if (!mutex_trylock(&adev->lock_reset))
3976 return false;
3977 } else
3978 mutex_lock(&adev->lock_reset);
5740682e 3979
26bc5340 3980 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 3981 adev->in_gpu_reset = true;
a3a09142
AD
3982 switch (amdgpu_asic_reset_method(adev)) {
3983 case AMD_RESET_METHOD_MODE1:
3984 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3985 break;
3986 case AMD_RESET_METHOD_MODE2:
3987 adev->mp1_state = PP_MP1_STATE_RESET;
3988 break;
3989 default:
3990 adev->mp1_state = PP_MP1_STATE_NONE;
3991 break;
3992 }
1d721ed6
AG
3993
3994 return true;
26bc5340 3995}
d38ceaf9 3996
26bc5340
AG
3997static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3998{
89041940 3999 amdgpu_vf_error_trans_all(adev);
a3a09142 4000 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 4001 adev->in_gpu_reset = false;
13a752e3 4002 mutex_unlock(&adev->lock_reset);
26bc5340
AG
4003}
4004
26bc5340
AG
4005/**
4006 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4007 *
4008 * @adev: amdgpu device pointer
4009 * @job: which job trigger hang
4010 *
4011 * Attempt to reset the GPU if it has hung (all asics).
4012 * Attempt to do soft-reset or full-reset and reinitialize Asic
4013 * Returns 0 for success or an error on failure.
4014 */
4015
4016int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4017 struct amdgpu_job *job)
4018{
1d721ed6
AG
4019 struct list_head device_list, *device_list_handle = NULL;
4020 bool need_full_reset, job_signaled;
26bc5340 4021 struct amdgpu_hive_info *hive = NULL;
26bc5340 4022 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4023 int i, r = 0;
7c6e68c7 4024 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4025 bool use_baco =
4026 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4027 true : false;
26bc5340 4028
d5ea093e
AG
4029 /*
4030 * Flush RAM to disk so that after reboot
4031 * the user can read log and see why the system rebooted.
4032 */
b823821f 4033 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4034
4035 DRM_WARN("Emergency reboot.");
4036
4037 ksys_sync_helper();
4038 emergency_restart();
4039 }
4040
1d721ed6 4041 need_full_reset = job_signaled = false;
26bc5340
AG
4042 INIT_LIST_HEAD(&device_list);
4043
b823821f
LM
4044 dev_info(adev->dev, "GPU %s begin!\n",
4045 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340 4046
beff74bc 4047 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 4048
1d721ed6
AG
4049 hive = amdgpu_get_xgmi_hive(adev, false);
4050
26bc5340 4051 /*
1d721ed6
AG
4052 * Here we trylock to avoid chain of resets executing from
4053 * either trigger by jobs on different adevs in XGMI hive or jobs on
4054 * different schedulers for same device while this TO handler is running.
4055 * We always reset all schedulers for device and all devices for XGMI
4056 * hive so that should take care of them too.
26bc5340 4057 */
1d721ed6
AG
4058
4059 if (hive && !mutex_trylock(&hive->reset_lock)) {
4060 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4061 job ? job->base.id : -1, hive->hive_id);
26bc5340 4062 return 0;
1d721ed6 4063 }
26bc5340
AG
4064
4065 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
4066 if (!amdgpu_device_lock_adev(adev, !hive)) {
4067 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 4068 job ? job->base.id : -1);
1d721ed6 4069 return 0;
26bc5340
AG
4070 }
4071
7c6e68c7
AG
4072 /* Block kfd: SRIOV would do it separately */
4073 if (!amdgpu_sriov_vf(adev))
4074 amdgpu_amdkfd_pre_reset(adev);
4075
26bc5340 4076 /* Build list of devices to reset */
1d721ed6 4077 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 4078 if (!hive) {
7c6e68c7
AG
4079 /*unlock kfd: SRIOV would do it separately */
4080 if (!amdgpu_sriov_vf(adev))
4081 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
4082 amdgpu_device_unlock_adev(adev);
4083 return -ENODEV;
4084 }
4085
4086 /*
4087 * In case we are in XGMI hive mode device reset is done for all the
4088 * nodes in the hive to retrain all XGMI links and hence the reset
4089 * sequence is executed in loop on all nodes.
4090 */
4091 device_list_handle = &hive->device_list;
4092 } else {
4093 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4094 device_list_handle = &device_list;
4095 }
4096
1d721ed6
AG
4097 /* block all schedulers and reset given job's ring */
4098 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4099 if (tmp_adev != adev) {
12ffa55d 4100 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
4101 if (!amdgpu_sriov_vf(tmp_adev))
4102 amdgpu_amdkfd_pre_reset(tmp_adev);
4103 }
4104
12ffa55d
AG
4105 /*
4106 * Mark these ASICs to be reseted as untracked first
4107 * And add them back after reset completed
4108 */
4109 amdgpu_unregister_gpu_instance(tmp_adev);
4110
565d1941
EQ
4111 amdgpu_fbdev_set_suspend(adev, 1);
4112
f1c1314b 4113 /* disable ras on ALL IPs */
b823821f
LM
4114 if (!(in_ras_intr && !use_baco) &&
4115 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4116 amdgpu_ras_suspend(tmp_adev);
4117
1d721ed6
AG
4118 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4119 struct amdgpu_ring *ring = tmp_adev->rings[i];
4120
4121 if (!ring || !ring->sched.thread)
4122 continue;
4123
0b2d2c2e 4124 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4125
b823821f 4126 if (in_ras_intr && !use_baco)
7c6e68c7 4127 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4128 }
4129 }
4130
4131
b823821f 4132 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4133 goto skip_sched_resume;
4134
1d721ed6
AG
4135 /*
4136 * Must check guilty signal here since after this point all old
4137 * HW fences are force signaled.
4138 *
4139 * job->base holds a reference to parent fence
4140 */
4141 if (job && job->base.s_fence->parent &&
4142 dma_fence_is_signaled(job->base.s_fence->parent))
4143 job_signaled = true;
4144
1d721ed6
AG
4145 if (job_signaled) {
4146 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4147 goto skip_hw_reset;
4148 }
4149
4150
4151 /* Guilty job will be freed after this*/
0b2d2c2e 4152 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
4153 if (r) {
4154 /*TODO Should we stop ?*/
4155 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4156 r, adev->ddev->unique);
4157 adev->asic_reset_res = r;
4158 }
4159
26bc5340
AG
4160retry: /* Rest of adevs pre asic reset from XGMI hive. */
4161 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4162
4163 if (tmp_adev == adev)
4164 continue;
4165
26bc5340
AG
4166 r = amdgpu_device_pre_asic_reset(tmp_adev,
4167 NULL,
4168 &need_full_reset);
4169 /*TODO Should we stop ?*/
4170 if (r) {
4171 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4172 r, tmp_adev->ddev->unique);
4173 tmp_adev->asic_reset_res = r;
4174 }
4175 }
4176
4177 /* Actual ASIC resets if needed.*/
4178 /* TODO Implement XGMI hive reset logic for SRIOV */
4179 if (amdgpu_sriov_vf(adev)) {
4180 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4181 if (r)
4182 adev->asic_reset_res = r;
4183 } else {
041a62bc 4184 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4185 if (r && r == -EAGAIN)
4186 goto retry;
4187 }
4188
1d721ed6
AG
4189skip_hw_reset:
4190
26bc5340
AG
4191 /* Post ASIC reset for all devs .*/
4192 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4193
1d721ed6
AG
4194 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4195 struct amdgpu_ring *ring = tmp_adev->rings[i];
4196
4197 if (!ring || !ring->sched.thread)
4198 continue;
4199
4200 /* No point to resubmit jobs if we didn't HW reset*/
4201 if (!tmp_adev->asic_reset_res && !job_signaled)
4202 drm_sched_resubmit_jobs(&ring->sched);
4203
4204 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4205 }
4206
4207 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4208 drm_helper_resume_force_mode(tmp_adev->ddev);
4209 }
4210
4211 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4212
4213 if (r) {
4214 /* bad news, how to tell it to userspace ? */
12ffa55d 4215 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4216 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4217 } else {
12ffa55d 4218 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4219 }
7c6e68c7 4220 }
26bc5340 4221
7c6e68c7
AG
4222skip_sched_resume:
4223 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4224 /*unlock kfd: SRIOV would do it separately */
b823821f 4225 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4226 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
4227 amdgpu_device_unlock_adev(tmp_adev);
4228 }
4229
1d721ed6 4230 if (hive)
22d6575b 4231 mutex_unlock(&hive->reset_lock);
26bc5340
AG
4232
4233 if (r)
4234 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4235 return r;
4236}
4237
e3ecdffa
AD
4238/**
4239 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4240 *
4241 * @adev: amdgpu_device pointer
4242 *
4243 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4244 * and lanes) of the slot the device is in. Handles APUs and
4245 * virtualized environments where PCIE config space may not be available.
4246 */
5494d864 4247static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4248{
5d9a6330 4249 struct pci_dev *pdev;
c5313457
HK
4250 enum pci_bus_speed speed_cap, platform_speed_cap;
4251 enum pcie_link_width platform_link_width;
d0dd7f0c 4252
cd474ba0
AD
4253 if (amdgpu_pcie_gen_cap)
4254 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4255
cd474ba0
AD
4256 if (amdgpu_pcie_lane_cap)
4257 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4258
cd474ba0
AD
4259 /* covers APUs as well */
4260 if (pci_is_root_bus(adev->pdev->bus)) {
4261 if (adev->pm.pcie_gen_mask == 0)
4262 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4263 if (adev->pm.pcie_mlw_mask == 0)
4264 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4265 return;
cd474ba0 4266 }
d0dd7f0c 4267
c5313457
HK
4268 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4269 return;
4270
dbaa922b
AD
4271 pcie_bandwidth_available(adev->pdev, NULL,
4272 &platform_speed_cap, &platform_link_width);
c5313457 4273
cd474ba0 4274 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4275 /* asic caps */
4276 pdev = adev->pdev;
4277 speed_cap = pcie_get_speed_cap(pdev);
4278 if (speed_cap == PCI_SPEED_UNKNOWN) {
4279 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4280 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4281 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4282 } else {
5d9a6330
AD
4283 if (speed_cap == PCIE_SPEED_16_0GT)
4284 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4285 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4286 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4287 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4288 else if (speed_cap == PCIE_SPEED_8_0GT)
4289 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4290 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4291 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4292 else if (speed_cap == PCIE_SPEED_5_0GT)
4293 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4294 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4295 else
4296 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4297 }
4298 /* platform caps */
c5313457 4299 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4300 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4301 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4302 } else {
c5313457 4303 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4304 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4305 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4306 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4307 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4308 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4309 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4310 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4311 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4312 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4313 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4314 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4315 else
4316 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4317
cd474ba0
AD
4318 }
4319 }
4320 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4321 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4322 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4323 } else {
c5313457 4324 switch (platform_link_width) {
5d9a6330 4325 case PCIE_LNK_X32:
cd474ba0
AD
4326 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4327 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4328 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4329 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4330 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4331 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4332 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4333 break;
5d9a6330 4334 case PCIE_LNK_X16:
cd474ba0
AD
4335 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4336 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4337 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4338 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4339 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4340 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4341 break;
5d9a6330 4342 case PCIE_LNK_X12:
cd474ba0
AD
4343 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4344 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4345 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4346 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4347 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4348 break;
5d9a6330 4349 case PCIE_LNK_X8:
cd474ba0
AD
4350 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4351 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4352 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4353 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4354 break;
5d9a6330 4355 case PCIE_LNK_X4:
cd474ba0
AD
4356 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4357 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4358 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4359 break;
5d9a6330 4360 case PCIE_LNK_X2:
cd474ba0
AD
4361 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4362 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4363 break;
5d9a6330 4364 case PCIE_LNK_X1:
cd474ba0
AD
4365 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4366 break;
4367 default:
4368 break;
4369 }
d0dd7f0c
AD
4370 }
4371 }
4372}
d38ceaf9 4373
361dbd01
AD
4374int amdgpu_device_baco_enter(struct drm_device *dev)
4375{
4376 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4377 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4378
4379 if (!amdgpu_device_supports_baco(adev->ddev))
4380 return -ENOTSUPP;
4381
7a22677b
LM
4382 if (ras && ras->supported)
4383 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4384
9530273e 4385 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4386}
4387
4388int amdgpu_device_baco_exit(struct drm_device *dev)
4389{
4390 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4391 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4392 int ret = 0;
361dbd01
AD
4393
4394 if (!amdgpu_device_supports_baco(adev->ddev))
4395 return -ENOTSUPP;
4396
9530273e
EQ
4397 ret = amdgpu_dpm_baco_exit(adev);
4398 if (ret)
4399 return ret;
7a22677b
LM
4400
4401 if (ras && ras->supported)
4402 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4403
4404 return 0;
361dbd01 4405}