drm/amd/display: Fix wrongly passed static prefix
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
d5ea093e 68#include <linux/suspend.h>
c6a6e2db 69#include <drm/task_barrier.h>
d5ea093e 70
e2a75f88 71MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 72MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 73MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 74MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 75MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 76MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 77MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 78MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 79MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 80MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 81
2dc80b00
S
82#define AMDGPU_RESUME_MS 2000
83
050091ab 84const char *amdgpu_asic_name[] = {
da69c161
KW
85 "TAHITI",
86 "PITCAIRN",
87 "VERDE",
88 "OLAND",
89 "HAINAN",
d38ceaf9
AD
90 "BONAIRE",
91 "KAVERI",
92 "KABINI",
93 "HAWAII",
94 "MULLINS",
95 "TOPAZ",
96 "TONGA",
48299f95 97 "FIJI",
d38ceaf9 98 "CARRIZO",
139f4917 99 "STONEY",
2cc0c0b5
FC
100 "POLARIS10",
101 "POLARIS11",
c4642a47 102 "POLARIS12",
48ff108d 103 "VEGAM",
d4196f01 104 "VEGA10",
8fab806a 105 "VEGA12",
956fcddc 106 "VEGA20",
2ca8a5d2 107 "RAVEN",
d6c3b24e 108 "ARCTURUS",
1eee4228 109 "RENOIR",
852a6626 110 "NAVI10",
87dbad02 111 "NAVI14",
9802f5d7 112 "NAVI12",
d38ceaf9
AD
113 "LAST",
114};
115
dcea6e65
KR
116/**
117 * DOC: pcie_replay_count
118 *
119 * The amdgpu driver provides a sysfs API for reporting the total number
120 * of PCIe replays (NAKs)
121 * The file pcie_replay_count is used for this and returns the total
122 * number of replays as a sum of the NAKs generated and NAKs received
123 */
124
125static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
126 struct device_attribute *attr, char *buf)
127{
128 struct drm_device *ddev = dev_get_drvdata(dev);
129 struct amdgpu_device *adev = ddev->dev_private;
130 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
131
132 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
133}
134
135static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
136 amdgpu_device_get_pcie_replay_count, NULL);
137
5494d864
AD
138static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
139
e3ecdffa 140/**
31af062a 141 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
142 *
143 * @dev: drm_device pointer
144 *
145 * Returns true if the device is a dGPU with HG/PX power control,
146 * otherwise return false.
147 */
31af062a 148bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
149{
150 struct amdgpu_device *adev = dev->dev_private;
151
2f7d10b3 152 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
153 return true;
154 return false;
155}
156
a69cba42
AD
157/**
158 * amdgpu_device_supports_baco - Does the device support BACO
159 *
160 * @dev: drm_device pointer
161 *
162 * Returns true if the device supporte BACO,
163 * otherwise return false.
164 */
165bool amdgpu_device_supports_baco(struct drm_device *dev)
166{
167 struct amdgpu_device *adev = dev->dev_private;
168
169 return amdgpu_asic_supports_baco(adev);
170}
171
e35e2b11
TY
172/**
173 * VRAM access helper functions.
174 *
175 * amdgpu_device_vram_access - read/write a buffer in vram
176 *
177 * @adev: amdgpu_device pointer
178 * @pos: offset of the buffer in vram
179 * @buf: virtual address of the buffer in system memory
180 * @size: read/write size, sizeof(@buf) must > @size
181 * @write: true - write to vram, otherwise - read from vram
182 */
183void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
184 uint32_t *buf, size_t size, bool write)
185{
186 uint64_t last;
187 unsigned long flags;
188
189 last = size - 4;
190 for (last += pos; pos <= last; pos += 4) {
191 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
192 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
193 WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
194 if (write)
195 WREG32_NO_KIQ(mmMM_DATA, *buf++);
196 else
197 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
198 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
199 }
200}
201
d38ceaf9
AD
202/*
203 * MMIO register access helper functions.
204 */
e3ecdffa
AD
205/**
206 * amdgpu_mm_rreg - read a memory mapped IO register
207 *
208 * @adev: amdgpu_device pointer
209 * @reg: dword aligned register offset
210 * @acc_flags: access flags which require special behavior
211 *
212 * Returns the 32 bit value from the offset specified.
213 */
d38ceaf9 214uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 215 uint32_t acc_flags)
d38ceaf9 216{
f4b373f4
TSD
217 uint32_t ret;
218
c68dbcd8 219 if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
d33a99c4 220 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 221
15d72fd7 222 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 223 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
224 else {
225 unsigned long flags;
d38ceaf9
AD
226
227 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
228 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
229 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
230 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 231 }
f4b373f4
TSD
232 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
233 return ret;
d38ceaf9
AD
234}
235
421a2a30
ML
236/*
237 * MMIO register read with bytes helper functions
238 * @offset:bytes offset from MMIO start
239 *
240*/
241
e3ecdffa
AD
242/**
243 * amdgpu_mm_rreg8 - read a memory mapped IO register
244 *
245 * @adev: amdgpu_device pointer
246 * @offset: byte aligned register offset
247 *
248 * Returns the 8 bit value from the offset specified.
249 */
421a2a30
ML
250uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
251 if (offset < adev->rmmio_size)
252 return (readb(adev->rmmio + offset));
253 BUG();
254}
255
256/*
257 * MMIO register write with bytes helper functions
258 * @offset:bytes offset from MMIO start
259 * @value: the value want to be written to the register
260 *
261*/
e3ecdffa
AD
262/**
263 * amdgpu_mm_wreg8 - read a memory mapped IO register
264 *
265 * @adev: amdgpu_device pointer
266 * @offset: byte aligned register offset
267 * @value: 8 bit value to write
268 *
269 * Writes the value specified to the offset specified.
270 */
421a2a30
ML
271void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
272 if (offset < adev->rmmio_size)
273 writeb(value, adev->rmmio + offset);
274 else
275 BUG();
276}
277
e3ecdffa
AD
278/**
279 * amdgpu_mm_wreg - write to a memory mapped IO register
280 *
281 * @adev: amdgpu_device pointer
282 * @reg: dword aligned register offset
283 * @v: 32 bit value to write to the register
284 * @acc_flags: access flags which require special behavior
285 *
286 * Writes the value specified to the offset specified.
287 */
d38ceaf9 288void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 289 uint32_t acc_flags)
d38ceaf9 290{
f4b373f4 291 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 292
47ed4e1c
KW
293 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
294 adev->last_mm_index = v;
295 }
296
c68dbcd8 297 if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
d33a99c4 298 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 299
15d72fd7 300 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
301 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
302 else {
303 unsigned long flags;
304
305 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
306 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
307 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
308 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
309 }
47ed4e1c
KW
310
311 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
312 udelay(500);
313 }
d38ceaf9
AD
314}
315
e3ecdffa
AD
316/**
317 * amdgpu_io_rreg - read an IO register
318 *
319 * @adev: amdgpu_device pointer
320 * @reg: dword aligned register offset
321 *
322 * Returns the 32 bit value from the offset specified.
323 */
d38ceaf9
AD
324u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
325{
326 if ((reg * 4) < adev->rio_mem_size)
327 return ioread32(adev->rio_mem + (reg * 4));
328 else {
329 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
330 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
331 }
332}
333
e3ecdffa
AD
334/**
335 * amdgpu_io_wreg - write to an IO register
336 *
337 * @adev: amdgpu_device pointer
338 * @reg: dword aligned register offset
339 * @v: 32 bit value to write to the register
340 *
341 * Writes the value specified to the offset specified.
342 */
d38ceaf9
AD
343void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
344{
47ed4e1c
KW
345 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
346 adev->last_mm_index = v;
347 }
d38ceaf9
AD
348
349 if ((reg * 4) < adev->rio_mem_size)
350 iowrite32(v, adev->rio_mem + (reg * 4));
351 else {
352 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
353 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
354 }
47ed4e1c
KW
355
356 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
357 udelay(500);
358 }
d38ceaf9
AD
359}
360
361/**
362 * amdgpu_mm_rdoorbell - read a doorbell dword
363 *
364 * @adev: amdgpu_device pointer
365 * @index: doorbell index
366 *
367 * Returns the value in the doorbell aperture at the
368 * requested doorbell index (CIK).
369 */
370u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
371{
372 if (index < adev->doorbell.num_doorbells) {
373 return readl(adev->doorbell.ptr + index);
374 } else {
375 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
376 return 0;
377 }
378}
379
380/**
381 * amdgpu_mm_wdoorbell - write a doorbell dword
382 *
383 * @adev: amdgpu_device pointer
384 * @index: doorbell index
385 * @v: value to write
386 *
387 * Writes @v to the doorbell aperture at the
388 * requested doorbell index (CIK).
389 */
390void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
391{
392 if (index < adev->doorbell.num_doorbells) {
393 writel(v, adev->doorbell.ptr + index);
394 } else {
395 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
396 }
397}
398
832be404
KW
399/**
400 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
401 *
402 * @adev: amdgpu_device pointer
403 * @index: doorbell index
404 *
405 * Returns the value in the doorbell aperture at the
406 * requested doorbell index (VEGA10+).
407 */
408u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
409{
410 if (index < adev->doorbell.num_doorbells) {
411 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
412 } else {
413 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
414 return 0;
415 }
416}
417
418/**
419 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
420 *
421 * @adev: amdgpu_device pointer
422 * @index: doorbell index
423 * @v: value to write
424 *
425 * Writes @v to the doorbell aperture at the
426 * requested doorbell index (VEGA10+).
427 */
428void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
429{
430 if (index < adev->doorbell.num_doorbells) {
431 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
432 } else {
433 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
434 }
435}
436
d38ceaf9
AD
437/**
438 * amdgpu_invalid_rreg - dummy reg read function
439 *
440 * @adev: amdgpu device pointer
441 * @reg: offset of register
442 *
443 * Dummy register read function. Used for register blocks
444 * that certain asics don't have (all asics).
445 * Returns the value in the register.
446 */
447static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
448{
449 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
450 BUG();
451 return 0;
452}
453
454/**
455 * amdgpu_invalid_wreg - dummy reg write function
456 *
457 * @adev: amdgpu device pointer
458 * @reg: offset of register
459 * @v: value to write to the register
460 *
461 * Dummy register read function. Used for register blocks
462 * that certain asics don't have (all asics).
463 */
464static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
465{
466 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
467 reg, v);
468 BUG();
469}
470
4fa1c6a6
TZ
471/**
472 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
473 *
474 * @adev: amdgpu device pointer
475 * @reg: offset of register
476 *
477 * Dummy register read function. Used for register blocks
478 * that certain asics don't have (all asics).
479 * Returns the value in the register.
480 */
481static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
482{
483 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
484 BUG();
485 return 0;
486}
487
488/**
489 * amdgpu_invalid_wreg64 - dummy reg write function
490 *
491 * @adev: amdgpu device pointer
492 * @reg: offset of register
493 * @v: value to write to the register
494 *
495 * Dummy register read function. Used for register blocks
496 * that certain asics don't have (all asics).
497 */
498static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
499{
500 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
501 reg, v);
502 BUG();
503}
504
d38ceaf9
AD
505/**
506 * amdgpu_block_invalid_rreg - dummy reg read function
507 *
508 * @adev: amdgpu device pointer
509 * @block: offset of instance
510 * @reg: offset of register
511 *
512 * Dummy register read function. Used for register blocks
513 * that certain asics don't have (all asics).
514 * Returns the value in the register.
515 */
516static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
517 uint32_t block, uint32_t reg)
518{
519 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
520 reg, block);
521 BUG();
522 return 0;
523}
524
525/**
526 * amdgpu_block_invalid_wreg - dummy reg write function
527 *
528 * @adev: amdgpu device pointer
529 * @block: offset of instance
530 * @reg: offset of register
531 * @v: value to write to the register
532 *
533 * Dummy register read function. Used for register blocks
534 * that certain asics don't have (all asics).
535 */
536static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
537 uint32_t block,
538 uint32_t reg, uint32_t v)
539{
540 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
541 reg, block, v);
542 BUG();
543}
544
e3ecdffa
AD
545/**
546 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
547 *
548 * @adev: amdgpu device pointer
549 *
550 * Allocates a scratch page of VRAM for use by various things in the
551 * driver.
552 */
06ec9070 553static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 554{
a4a02777
CK
555 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
556 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
557 &adev->vram_scratch.robj,
558 &adev->vram_scratch.gpu_addr,
559 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
560}
561
e3ecdffa
AD
562/**
563 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
564 *
565 * @adev: amdgpu device pointer
566 *
567 * Frees the VRAM scratch page.
568 */
06ec9070 569static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 570{
078af1a3 571 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
572}
573
574/**
9c3f2b54 575 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
576 *
577 * @adev: amdgpu_device pointer
578 * @registers: pointer to the register array
579 * @array_size: size of the register array
580 *
581 * Programs an array or registers with and and or masks.
582 * This is a helper for setting golden registers.
583 */
9c3f2b54
AD
584void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
585 const u32 *registers,
586 const u32 array_size)
d38ceaf9
AD
587{
588 u32 tmp, reg, and_mask, or_mask;
589 int i;
590
591 if (array_size % 3)
592 return;
593
594 for (i = 0; i < array_size; i +=3) {
595 reg = registers[i + 0];
596 and_mask = registers[i + 1];
597 or_mask = registers[i + 2];
598
599 if (and_mask == 0xffffffff) {
600 tmp = or_mask;
601 } else {
602 tmp = RREG32(reg);
603 tmp &= ~and_mask;
e0d07657
HZ
604 if (adev->family >= AMDGPU_FAMILY_AI)
605 tmp |= (or_mask & and_mask);
606 else
607 tmp |= or_mask;
d38ceaf9
AD
608 }
609 WREG32(reg, tmp);
610 }
611}
612
e3ecdffa
AD
613/**
614 * amdgpu_device_pci_config_reset - reset the GPU
615 *
616 * @adev: amdgpu_device pointer
617 *
618 * Resets the GPU using the pci config reset sequence.
619 * Only applicable to asics prior to vega10.
620 */
8111c387 621void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
622{
623 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
624}
625
626/*
627 * GPU doorbell aperture helpers function.
628 */
629/**
06ec9070 630 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
631 *
632 * @adev: amdgpu_device pointer
633 *
634 * Init doorbell driver information (CIK)
635 * Returns 0 on success, error on failure.
636 */
06ec9070 637static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 638{
6585661d 639
705e519e
CK
640 /* No doorbell on SI hardware generation */
641 if (adev->asic_type < CHIP_BONAIRE) {
642 adev->doorbell.base = 0;
643 adev->doorbell.size = 0;
644 adev->doorbell.num_doorbells = 0;
645 adev->doorbell.ptr = NULL;
646 return 0;
647 }
648
d6895ad3
CK
649 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
650 return -EINVAL;
651
22357775
AD
652 amdgpu_asic_init_doorbell_index(adev);
653
d38ceaf9
AD
654 /* doorbell bar mapping */
655 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
656 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
657
edf600da 658 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 659 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
660 if (adev->doorbell.num_doorbells == 0)
661 return -EINVAL;
662
ec3db8a6 663 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
664 * paging queue doorbell use the second page. The
665 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
666 * doorbells are in the first page. So with paging queue enabled,
667 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
668 */
669 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 670 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 671
8972e5d2
CK
672 adev->doorbell.ptr = ioremap(adev->doorbell.base,
673 adev->doorbell.num_doorbells *
674 sizeof(u32));
675 if (adev->doorbell.ptr == NULL)
d38ceaf9 676 return -ENOMEM;
d38ceaf9
AD
677
678 return 0;
679}
680
681/**
06ec9070 682 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
683 *
684 * @adev: amdgpu_device pointer
685 *
686 * Tear down doorbell driver information (CIK)
687 */
06ec9070 688static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
689{
690 iounmap(adev->doorbell.ptr);
691 adev->doorbell.ptr = NULL;
692}
693
22cb0164 694
d38ceaf9
AD
695
696/*
06ec9070 697 * amdgpu_device_wb_*()
455a7bc2 698 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 699 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
700 */
701
702/**
06ec9070 703 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
704 *
705 * @adev: amdgpu_device pointer
706 *
707 * Disables Writeback and frees the Writeback memory (all asics).
708 * Used at driver shutdown.
709 */
06ec9070 710static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
711{
712 if (adev->wb.wb_obj) {
a76ed485
AD
713 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
714 &adev->wb.gpu_addr,
715 (void **)&adev->wb.wb);
d38ceaf9
AD
716 adev->wb.wb_obj = NULL;
717 }
718}
719
720/**
06ec9070 721 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
722 *
723 * @adev: amdgpu_device pointer
724 *
455a7bc2 725 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
726 * Used at driver startup.
727 * Returns 0 on success or an -error on failure.
728 */
06ec9070 729static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
730{
731 int r;
732
733 if (adev->wb.wb_obj == NULL) {
97407b63
AD
734 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
735 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
736 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
737 &adev->wb.wb_obj, &adev->wb.gpu_addr,
738 (void **)&adev->wb.wb);
d38ceaf9
AD
739 if (r) {
740 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
741 return r;
742 }
d38ceaf9
AD
743
744 adev->wb.num_wb = AMDGPU_MAX_WB;
745 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
746
747 /* clear wb memory */
73469585 748 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
749 }
750
751 return 0;
752}
753
754/**
131b4b36 755 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
756 *
757 * @adev: amdgpu_device pointer
758 * @wb: wb index
759 *
760 * Allocate a wb slot for use by the driver (all asics).
761 * Returns 0 on success or -EINVAL on failure.
762 */
131b4b36 763int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
764{
765 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 766
97407b63 767 if (offset < adev->wb.num_wb) {
7014285a 768 __set_bit(offset, adev->wb.used);
63ae07ca 769 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
770 return 0;
771 } else {
772 return -EINVAL;
773 }
774}
775
d38ceaf9 776/**
131b4b36 777 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
778 *
779 * @adev: amdgpu_device pointer
780 * @wb: wb index
781 *
782 * Free a wb slot allocated for use by the driver (all asics)
783 */
131b4b36 784void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 785{
73469585 786 wb >>= 3;
d38ceaf9 787 if (wb < adev->wb.num_wb)
73469585 788 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
789}
790
d6895ad3
CK
791/**
792 * amdgpu_device_resize_fb_bar - try to resize FB BAR
793 *
794 * @adev: amdgpu_device pointer
795 *
796 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
797 * to fail, but if any of the BARs is not accessible after the size we abort
798 * driver loading by returning -ENODEV.
799 */
800int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
801{
770d13b1 802 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 803 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
804 struct pci_bus *root;
805 struct resource *res;
806 unsigned i;
d6895ad3
CK
807 u16 cmd;
808 int r;
809
0c03b912 810 /* Bypass for VF */
811 if (amdgpu_sriov_vf(adev))
812 return 0;
813
31b8adab
CK
814 /* Check if the root BUS has 64bit memory resources */
815 root = adev->pdev->bus;
816 while (root->parent)
817 root = root->parent;
818
819 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 820 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
821 res->start > 0x100000000ull)
822 break;
823 }
824
825 /* Trying to resize is pointless without a root hub window above 4GB */
826 if (!res)
827 return 0;
828
d6895ad3
CK
829 /* Disable memory decoding while we change the BAR addresses and size */
830 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
831 pci_write_config_word(adev->pdev, PCI_COMMAND,
832 cmd & ~PCI_COMMAND_MEMORY);
833
834 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 835 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
836 if (adev->asic_type >= CHIP_BONAIRE)
837 pci_release_resource(adev->pdev, 2);
838
839 pci_release_resource(adev->pdev, 0);
840
841 r = pci_resize_resource(adev->pdev, 0, rbar_size);
842 if (r == -ENOSPC)
843 DRM_INFO("Not enough PCI address space for a large BAR.");
844 else if (r && r != -ENOTSUPP)
845 DRM_ERROR("Problem resizing BAR0 (%d).", r);
846
847 pci_assign_unassigned_bus_resources(adev->pdev->bus);
848
849 /* When the doorbell or fb BAR isn't available we have no chance of
850 * using the device.
851 */
06ec9070 852 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
853 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
854 return -ENODEV;
855
856 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
857
858 return 0;
859}
a05502e5 860
d38ceaf9
AD
861/*
862 * GPU helpers function.
863 */
864/**
39c640c0 865 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
866 *
867 * @adev: amdgpu_device pointer
868 *
c836fec5
JQ
869 * Check if the asic has been initialized (all asics) at driver startup
870 * or post is needed if hw reset is performed.
871 * Returns true if need or false if not.
d38ceaf9 872 */
39c640c0 873bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
874{
875 uint32_t reg;
876
bec86378
ML
877 if (amdgpu_sriov_vf(adev))
878 return false;
879
880 if (amdgpu_passthrough(adev)) {
1da2c326
ML
881 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
882 * some old smc fw still need driver do vPost otherwise gpu hang, while
883 * those smc fw version above 22.15 doesn't have this flaw, so we force
884 * vpost executed for smc version below 22.15
bec86378
ML
885 */
886 if (adev->asic_type == CHIP_FIJI) {
887 int err;
888 uint32_t fw_ver;
889 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
890 /* force vPost if error occured */
891 if (err)
892 return true;
893
894 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
895 if (fw_ver < 0x00160e00)
896 return true;
bec86378 897 }
bec86378 898 }
91fe77eb 899
900 if (adev->has_hw_reset) {
901 adev->has_hw_reset = false;
902 return true;
903 }
904
905 /* bios scratch used on CIK+ */
906 if (adev->asic_type >= CHIP_BONAIRE)
907 return amdgpu_atombios_scratch_need_asic_init(adev);
908
909 /* check MEM_SIZE for older asics */
910 reg = amdgpu_asic_get_config_memsize(adev);
911
912 if ((reg != 0) && (reg != 0xffffffff))
913 return false;
914
915 return true;
bec86378
ML
916}
917
d38ceaf9
AD
918/* if we get transitioned to only one device, take VGA back */
919/**
06ec9070 920 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
921 *
922 * @cookie: amdgpu_device pointer
923 * @state: enable/disable vga decode
924 *
925 * Enable/disable vga decode (all asics).
926 * Returns VGA resource flags.
927 */
06ec9070 928static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
929{
930 struct amdgpu_device *adev = cookie;
931 amdgpu_asic_set_vga_state(adev, state);
932 if (state)
933 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
934 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
935 else
936 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
937}
938
e3ecdffa
AD
939/**
940 * amdgpu_device_check_block_size - validate the vm block size
941 *
942 * @adev: amdgpu_device pointer
943 *
944 * Validates the vm block size specified via module parameter.
945 * The vm block size defines number of bits in page table versus page directory,
946 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
947 * page table and the remaining bits are in the page directory.
948 */
06ec9070 949static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
950{
951 /* defines number of bits in page table versus page directory,
952 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
953 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
954 if (amdgpu_vm_block_size == -1)
955 return;
a1adf8be 956
bab4fee7 957 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
958 dev_warn(adev->dev, "VM page table size (%d) too small\n",
959 amdgpu_vm_block_size);
97489129 960 amdgpu_vm_block_size = -1;
a1adf8be 961 }
a1adf8be
CZ
962}
963
e3ecdffa
AD
964/**
965 * amdgpu_device_check_vm_size - validate the vm size
966 *
967 * @adev: amdgpu_device pointer
968 *
969 * Validates the vm size in GB specified via module parameter.
970 * The VM size is the size of the GPU virtual memory space in GB.
971 */
06ec9070 972static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 973{
64dab074
AD
974 /* no need to check the default value */
975 if (amdgpu_vm_size == -1)
976 return;
977
83ca145d
ZJ
978 if (amdgpu_vm_size < 1) {
979 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
980 amdgpu_vm_size);
f3368128 981 amdgpu_vm_size = -1;
83ca145d 982 }
83ca145d
ZJ
983}
984
7951e376
RZ
985static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
986{
987 struct sysinfo si;
a9d4fe2f 988 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
989 uint64_t total_memory;
990 uint64_t dram_size_seven_GB = 0x1B8000000;
991 uint64_t dram_size_three_GB = 0xB8000000;
992
993 if (amdgpu_smu_memory_pool_size == 0)
994 return;
995
996 if (!is_os_64) {
997 DRM_WARN("Not 64-bit OS, feature not supported\n");
998 goto def_value;
999 }
1000 si_meminfo(&si);
1001 total_memory = (uint64_t)si.totalram * si.mem_unit;
1002
1003 if ((amdgpu_smu_memory_pool_size == 1) ||
1004 (amdgpu_smu_memory_pool_size == 2)) {
1005 if (total_memory < dram_size_three_GB)
1006 goto def_value1;
1007 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1008 (amdgpu_smu_memory_pool_size == 8)) {
1009 if (total_memory < dram_size_seven_GB)
1010 goto def_value1;
1011 } else {
1012 DRM_WARN("Smu memory pool size not supported\n");
1013 goto def_value;
1014 }
1015 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1016
1017 return;
1018
1019def_value1:
1020 DRM_WARN("No enough system memory\n");
1021def_value:
1022 adev->pm.smu_prv_buffer_size = 0;
1023}
1024
d38ceaf9 1025/**
06ec9070 1026 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1027 *
1028 * @adev: amdgpu_device pointer
1029 *
1030 * Validates certain module parameters and updates
1031 * the associated values used by the driver (all asics).
1032 */
912dfc84 1033static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1034{
5b011235
CZ
1035 if (amdgpu_sched_jobs < 4) {
1036 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1037 amdgpu_sched_jobs);
1038 amdgpu_sched_jobs = 4;
76117507 1039 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1040 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1041 amdgpu_sched_jobs);
1042 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1043 }
d38ceaf9 1044
83e74db6 1045 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1046 /* gart size must be greater or equal to 32M */
1047 dev_warn(adev->dev, "gart size (%d) too small\n",
1048 amdgpu_gart_size);
83e74db6 1049 amdgpu_gart_size = -1;
d38ceaf9
AD
1050 }
1051
36d38372 1052 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1053 /* gtt size must be greater or equal to 32M */
36d38372
CK
1054 dev_warn(adev->dev, "gtt size (%d) too small\n",
1055 amdgpu_gtt_size);
1056 amdgpu_gtt_size = -1;
d38ceaf9
AD
1057 }
1058
d07f14be
RH
1059 /* valid range is between 4 and 9 inclusive */
1060 if (amdgpu_vm_fragment_size != -1 &&
1061 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1062 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1063 amdgpu_vm_fragment_size = -1;
1064 }
1065
7951e376
RZ
1066 amdgpu_device_check_smu_prv_buffer_size(adev);
1067
06ec9070 1068 amdgpu_device_check_vm_size(adev);
d38ceaf9 1069
06ec9070 1070 amdgpu_device_check_block_size(adev);
6a7f76e7 1071
19aede77 1072 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1073
e3c00faa 1074 return 0;
d38ceaf9
AD
1075}
1076
1077/**
1078 * amdgpu_switcheroo_set_state - set switcheroo state
1079 *
1080 * @pdev: pci dev pointer
1694467b 1081 * @state: vga_switcheroo state
d38ceaf9
AD
1082 *
1083 * Callback for the switcheroo driver. Suspends or resumes the
1084 * the asics before or after it is powered up using ACPI methods.
1085 */
1086static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1087{
1088 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1089 int r;
d38ceaf9 1090
31af062a 1091 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1092 return;
1093
1094 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1095 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1096 /* don't suspend or resume card normally */
1097 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1098
de185019
AD
1099 pci_set_power_state(dev->pdev, PCI_D0);
1100 pci_restore_state(dev->pdev);
1101 r = pci_enable_device(dev->pdev);
1102 if (r)
1103 DRM_WARN("pci_enable_device failed (%d)\n", r);
1104 amdgpu_device_resume(dev, true);
d38ceaf9 1105
d38ceaf9
AD
1106 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1107 drm_kms_helper_poll_enable(dev);
1108 } else {
7ca85295 1109 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1110 drm_kms_helper_poll_disable(dev);
1111 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1112 amdgpu_device_suspend(dev, true);
1113 pci_save_state(dev->pdev);
1114 /* Shut down the device */
1115 pci_disable_device(dev->pdev);
1116 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1117 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1118 }
1119}
1120
1121/**
1122 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1123 *
1124 * @pdev: pci dev pointer
1125 *
1126 * Callback for the switcheroo driver. Check of the switcheroo
1127 * state can be changed.
1128 * Returns true if the state can be changed, false if not.
1129 */
1130static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1131{
1132 struct drm_device *dev = pci_get_drvdata(pdev);
1133
1134 /*
1135 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1136 * locking inversion with the driver load path. And the access here is
1137 * completely racy anyway. So don't bother with locking for now.
1138 */
1139 return dev->open_count == 0;
1140}
1141
1142static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1143 .set_gpu_state = amdgpu_switcheroo_set_state,
1144 .reprobe = NULL,
1145 .can_switch = amdgpu_switcheroo_can_switch,
1146};
1147
e3ecdffa
AD
1148/**
1149 * amdgpu_device_ip_set_clockgating_state - set the CG state
1150 *
87e3f136 1151 * @dev: amdgpu_device pointer
e3ecdffa
AD
1152 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1153 * @state: clockgating state (gate or ungate)
1154 *
1155 * Sets the requested clockgating state for all instances of
1156 * the hardware IP specified.
1157 * Returns the error code from the last instance.
1158 */
43fa561f 1159int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1160 enum amd_ip_block_type block_type,
1161 enum amd_clockgating_state state)
d38ceaf9 1162{
43fa561f 1163 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1164 int i, r = 0;
1165
1166 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1167 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1168 continue;
c722865a
RZ
1169 if (adev->ip_blocks[i].version->type != block_type)
1170 continue;
1171 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1172 continue;
1173 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1174 (void *)adev, state);
1175 if (r)
1176 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1177 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1178 }
1179 return r;
1180}
1181
e3ecdffa
AD
1182/**
1183 * amdgpu_device_ip_set_powergating_state - set the PG state
1184 *
87e3f136 1185 * @dev: amdgpu_device pointer
e3ecdffa
AD
1186 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1187 * @state: powergating state (gate or ungate)
1188 *
1189 * Sets the requested powergating state for all instances of
1190 * the hardware IP specified.
1191 * Returns the error code from the last instance.
1192 */
43fa561f 1193int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1194 enum amd_ip_block_type block_type,
1195 enum amd_powergating_state state)
d38ceaf9 1196{
43fa561f 1197 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1198 int i, r = 0;
1199
1200 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1201 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1202 continue;
c722865a
RZ
1203 if (adev->ip_blocks[i].version->type != block_type)
1204 continue;
1205 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1206 continue;
1207 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1208 (void *)adev, state);
1209 if (r)
1210 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1211 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1212 }
1213 return r;
1214}
1215
e3ecdffa
AD
1216/**
1217 * amdgpu_device_ip_get_clockgating_state - get the CG state
1218 *
1219 * @adev: amdgpu_device pointer
1220 * @flags: clockgating feature flags
1221 *
1222 * Walks the list of IPs on the device and updates the clockgating
1223 * flags for each IP.
1224 * Updates @flags with the feature flags for each hardware IP where
1225 * clockgating is enabled.
1226 */
2990a1fc
AD
1227void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1228 u32 *flags)
6cb2d4e4
HR
1229{
1230 int i;
1231
1232 for (i = 0; i < adev->num_ip_blocks; i++) {
1233 if (!adev->ip_blocks[i].status.valid)
1234 continue;
1235 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1236 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1237 }
1238}
1239
e3ecdffa
AD
1240/**
1241 * amdgpu_device_ip_wait_for_idle - wait for idle
1242 *
1243 * @adev: amdgpu_device pointer
1244 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1245 *
1246 * Waits for the request hardware IP to be idle.
1247 * Returns 0 for success or a negative error code on failure.
1248 */
2990a1fc
AD
1249int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1250 enum amd_ip_block_type block_type)
5dbbb60b
AD
1251{
1252 int i, r;
1253
1254 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1255 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1256 continue;
a1255107
AD
1257 if (adev->ip_blocks[i].version->type == block_type) {
1258 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1259 if (r)
1260 return r;
1261 break;
1262 }
1263 }
1264 return 0;
1265
1266}
1267
e3ecdffa
AD
1268/**
1269 * amdgpu_device_ip_is_idle - is the hardware IP idle
1270 *
1271 * @adev: amdgpu_device pointer
1272 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1273 *
1274 * Check if the hardware IP is idle or not.
1275 * Returns true if it the IP is idle, false if not.
1276 */
2990a1fc
AD
1277bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1278 enum amd_ip_block_type block_type)
5dbbb60b
AD
1279{
1280 int i;
1281
1282 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1283 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1284 continue;
a1255107
AD
1285 if (adev->ip_blocks[i].version->type == block_type)
1286 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1287 }
1288 return true;
1289
1290}
1291
e3ecdffa
AD
1292/**
1293 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1294 *
1295 * @adev: amdgpu_device pointer
87e3f136 1296 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1297 *
1298 * Returns a pointer to the hardware IP block structure
1299 * if it exists for the asic, otherwise NULL.
1300 */
2990a1fc
AD
1301struct amdgpu_ip_block *
1302amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1303 enum amd_ip_block_type type)
d38ceaf9
AD
1304{
1305 int i;
1306
1307 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1308 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1309 return &adev->ip_blocks[i];
1310
1311 return NULL;
1312}
1313
1314/**
2990a1fc 1315 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1316 *
1317 * @adev: amdgpu_device pointer
5fc3aeeb 1318 * @type: enum amd_ip_block_type
d38ceaf9
AD
1319 * @major: major version
1320 * @minor: minor version
1321 *
1322 * return 0 if equal or greater
1323 * return 1 if smaller or the ip_block doesn't exist
1324 */
2990a1fc
AD
1325int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1326 enum amd_ip_block_type type,
1327 u32 major, u32 minor)
d38ceaf9 1328{
2990a1fc 1329 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1330
a1255107
AD
1331 if (ip_block && ((ip_block->version->major > major) ||
1332 ((ip_block->version->major == major) &&
1333 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1334 return 0;
1335
1336 return 1;
1337}
1338
a1255107 1339/**
2990a1fc 1340 * amdgpu_device_ip_block_add
a1255107
AD
1341 *
1342 * @adev: amdgpu_device pointer
1343 * @ip_block_version: pointer to the IP to add
1344 *
1345 * Adds the IP block driver information to the collection of IPs
1346 * on the asic.
1347 */
2990a1fc
AD
1348int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1349 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1350{
1351 if (!ip_block_version)
1352 return -EINVAL;
1353
e966a725 1354 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1355 ip_block_version->funcs->name);
1356
a1255107
AD
1357 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1358
1359 return 0;
1360}
1361
e3ecdffa
AD
1362/**
1363 * amdgpu_device_enable_virtual_display - enable virtual display feature
1364 *
1365 * @adev: amdgpu_device pointer
1366 *
1367 * Enabled the virtual display feature if the user has enabled it via
1368 * the module parameter virtual_display. This feature provides a virtual
1369 * display hardware on headless boards or in virtualized environments.
1370 * This function parses and validates the configuration string specified by
1371 * the user and configues the virtual display configuration (number of
1372 * virtual connectors, crtcs, etc.) specified.
1373 */
483ef985 1374static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1375{
1376 adev->enable_virtual_display = false;
1377
1378 if (amdgpu_virtual_display) {
1379 struct drm_device *ddev = adev->ddev;
1380 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1381 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1382
1383 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1384 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1385 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1386 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1387 if (!strcmp("all", pciaddname)
1388 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1389 long num_crtc;
1390 int res = -1;
1391
9accf2fd 1392 adev->enable_virtual_display = true;
0f66356d
ED
1393
1394 if (pciaddname_tmp)
1395 res = kstrtol(pciaddname_tmp, 10,
1396 &num_crtc);
1397
1398 if (!res) {
1399 if (num_crtc < 1)
1400 num_crtc = 1;
1401 if (num_crtc > 6)
1402 num_crtc = 6;
1403 adev->mode_info.num_crtc = num_crtc;
1404 } else {
1405 adev->mode_info.num_crtc = 1;
1406 }
9accf2fd
ED
1407 break;
1408 }
1409 }
1410
0f66356d
ED
1411 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1412 amdgpu_virtual_display, pci_address_name,
1413 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1414
1415 kfree(pciaddstr);
1416 }
1417}
1418
e3ecdffa
AD
1419/**
1420 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1421 *
1422 * @adev: amdgpu_device pointer
1423 *
1424 * Parses the asic configuration parameters specified in the gpu info
1425 * firmware and makes them availale to the driver for use in configuring
1426 * the asic.
1427 * Returns 0 on success, -EINVAL on failure.
1428 */
e2a75f88
AD
1429static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1430{
e2a75f88
AD
1431 const char *chip_name;
1432 char fw_name[30];
1433 int err;
1434 const struct gpu_info_firmware_header_v1_0 *hdr;
1435
ab4fe3e1
HR
1436 adev->firmware.gpu_info_fw = NULL;
1437
e2a75f88
AD
1438 switch (adev->asic_type) {
1439 case CHIP_TOPAZ:
1440 case CHIP_TONGA:
1441 case CHIP_FIJI:
e2a75f88 1442 case CHIP_POLARIS10:
cc07f18d 1443 case CHIP_POLARIS11:
e2a75f88 1444 case CHIP_POLARIS12:
cc07f18d 1445 case CHIP_VEGAM:
e2a75f88
AD
1446 case CHIP_CARRIZO:
1447 case CHIP_STONEY:
1448#ifdef CONFIG_DRM_AMDGPU_SI
1449 case CHIP_VERDE:
1450 case CHIP_TAHITI:
1451 case CHIP_PITCAIRN:
1452 case CHIP_OLAND:
1453 case CHIP_HAINAN:
1454#endif
1455#ifdef CONFIG_DRM_AMDGPU_CIK
1456 case CHIP_BONAIRE:
1457 case CHIP_HAWAII:
1458 case CHIP_KAVERI:
1459 case CHIP_KABINI:
1460 case CHIP_MULLINS:
1461#endif
27c0bc71 1462 case CHIP_VEGA20:
e2a75f88
AD
1463 default:
1464 return 0;
1465 case CHIP_VEGA10:
1466 chip_name = "vega10";
1467 break;
3f76dced
AD
1468 case CHIP_VEGA12:
1469 chip_name = "vega12";
1470 break;
2d2e5e7e 1471 case CHIP_RAVEN:
54c4d17e
FX
1472 if (adev->rev_id >= 8)
1473 chip_name = "raven2";
741deade
AD
1474 else if (adev->pdev->device == 0x15d8)
1475 chip_name = "picasso";
54c4d17e
FX
1476 else
1477 chip_name = "raven";
2d2e5e7e 1478 break;
65e60f6e
LM
1479 case CHIP_ARCTURUS:
1480 chip_name = "arcturus";
1481 break;
b51a26a0
HR
1482 case CHIP_RENOIR:
1483 chip_name = "renoir";
1484 break;
23c6268e
HR
1485 case CHIP_NAVI10:
1486 chip_name = "navi10";
1487 break;
ed42cfe1
XY
1488 case CHIP_NAVI14:
1489 chip_name = "navi14";
1490 break;
42b325e5
XY
1491 case CHIP_NAVI12:
1492 chip_name = "navi12";
1493 break;
e2a75f88
AD
1494 }
1495
1496 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1497 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1498 if (err) {
1499 dev_err(adev->dev,
1500 "Failed to load gpu_info firmware \"%s\"\n",
1501 fw_name);
1502 goto out;
1503 }
ab4fe3e1 1504 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1505 if (err) {
1506 dev_err(adev->dev,
1507 "Failed to validate gpu_info firmware \"%s\"\n",
1508 fw_name);
1509 goto out;
1510 }
1511
ab4fe3e1 1512 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1513 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1514
1515 switch (hdr->version_major) {
1516 case 1:
1517 {
1518 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1519 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1520 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1521
ec51d3fa
XY
1522 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1523 goto parse_soc_bounding_box;
1524
b5ab16bf
AD
1525 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1526 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1527 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1528 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1529 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1530 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1531 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1532 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1533 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1534 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1535 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1536 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1537 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1538 adev->gfx.cu_info.max_waves_per_simd =
1539 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1540 adev->gfx.cu_info.max_scratch_slots_per_cu =
1541 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1542 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1543 if (hdr->version_minor >= 1) {
35c2e910
HZ
1544 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1545 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1546 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1547 adev->gfx.config.num_sc_per_sh =
1548 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1549 adev->gfx.config.num_packer_per_sc =
1550 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1551 }
ec51d3fa
XY
1552
1553parse_soc_bounding_box:
ec51d3fa
XY
1554 /*
1555 * soc bounding box info is not integrated in disocovery table,
1556 * we always need to parse it from gpu info firmware.
1557 */
48321c3d
HW
1558 if (hdr->version_minor == 2) {
1559 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1560 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1561 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1562 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1563 }
e2a75f88
AD
1564 break;
1565 }
1566 default:
1567 dev_err(adev->dev,
1568 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1569 err = -EINVAL;
1570 goto out;
1571 }
1572out:
e2a75f88
AD
1573 return err;
1574}
1575
e3ecdffa
AD
1576/**
1577 * amdgpu_device_ip_early_init - run early init for hardware IPs
1578 *
1579 * @adev: amdgpu_device pointer
1580 *
1581 * Early initialization pass for hardware IPs. The hardware IPs that make
1582 * up each asic are discovered each IP's early_init callback is run. This
1583 * is the first stage in initializing the asic.
1584 * Returns 0 on success, negative error code on failure.
1585 */
06ec9070 1586static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1587{
aaa36a97 1588 int i, r;
d38ceaf9 1589
483ef985 1590 amdgpu_device_enable_virtual_display(adev);
a6be7570 1591
d38ceaf9 1592 switch (adev->asic_type) {
aaa36a97
AD
1593 case CHIP_TOPAZ:
1594 case CHIP_TONGA:
48299f95 1595 case CHIP_FIJI:
2cc0c0b5 1596 case CHIP_POLARIS10:
32cc7e53 1597 case CHIP_POLARIS11:
c4642a47 1598 case CHIP_POLARIS12:
32cc7e53 1599 case CHIP_VEGAM:
aaa36a97 1600 case CHIP_CARRIZO:
39bb0c92
SL
1601 case CHIP_STONEY:
1602 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1603 adev->family = AMDGPU_FAMILY_CZ;
1604 else
1605 adev->family = AMDGPU_FAMILY_VI;
1606
1607 r = vi_set_ip_blocks(adev);
1608 if (r)
1609 return r;
1610 break;
33f34802
KW
1611#ifdef CONFIG_DRM_AMDGPU_SI
1612 case CHIP_VERDE:
1613 case CHIP_TAHITI:
1614 case CHIP_PITCAIRN:
1615 case CHIP_OLAND:
1616 case CHIP_HAINAN:
295d0daf 1617 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1618 r = si_set_ip_blocks(adev);
1619 if (r)
1620 return r;
1621 break;
1622#endif
a2e73f56
AD
1623#ifdef CONFIG_DRM_AMDGPU_CIK
1624 case CHIP_BONAIRE:
1625 case CHIP_HAWAII:
1626 case CHIP_KAVERI:
1627 case CHIP_KABINI:
1628 case CHIP_MULLINS:
1629 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1630 adev->family = AMDGPU_FAMILY_CI;
1631 else
1632 adev->family = AMDGPU_FAMILY_KV;
1633
1634 r = cik_set_ip_blocks(adev);
1635 if (r)
1636 return r;
1637 break;
1638#endif
e48a3cd9
AD
1639 case CHIP_VEGA10:
1640 case CHIP_VEGA12:
e4bd8170 1641 case CHIP_VEGA20:
e48a3cd9 1642 case CHIP_RAVEN:
61cf44c1 1643 case CHIP_ARCTURUS:
b51a26a0
HR
1644 case CHIP_RENOIR:
1645 if (adev->asic_type == CHIP_RAVEN ||
1646 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1647 adev->family = AMDGPU_FAMILY_RV;
1648 else
1649 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1650
1651 r = soc15_set_ip_blocks(adev);
1652 if (r)
1653 return r;
1654 break;
0a5b8c7b 1655 case CHIP_NAVI10:
7ecb5cd4 1656 case CHIP_NAVI14:
4808cf9c 1657 case CHIP_NAVI12:
0a5b8c7b
HR
1658 adev->family = AMDGPU_FAMILY_NV;
1659
1660 r = nv_set_ip_blocks(adev);
1661 if (r)
1662 return r;
1663 break;
d38ceaf9
AD
1664 default:
1665 /* FIXME: not supported yet */
1666 return -EINVAL;
1667 }
1668
e2a75f88
AD
1669 r = amdgpu_device_parse_gpu_info_fw(adev);
1670 if (r)
1671 return r;
1672
ec51d3fa
XY
1673 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1674 amdgpu_discovery_get_gfx_info(adev);
1675
1884734a 1676 amdgpu_amdkfd_device_probe(adev);
1677
3149d9da
XY
1678 if (amdgpu_sriov_vf(adev)) {
1679 r = amdgpu_virt_request_full_gpu(adev, true);
1680 if (r)
5ffa61c1 1681 return -EAGAIN;
3149d9da
XY
1682 }
1683
3b94fb10 1684 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1685 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1686 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1687
d38ceaf9
AD
1688 for (i = 0; i < adev->num_ip_blocks; i++) {
1689 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1690 DRM_ERROR("disabled ip block: %d <%s>\n",
1691 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1692 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1693 } else {
a1255107
AD
1694 if (adev->ip_blocks[i].version->funcs->early_init) {
1695 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1696 if (r == -ENOENT) {
a1255107 1697 adev->ip_blocks[i].status.valid = false;
2c1a2784 1698 } else if (r) {
a1255107
AD
1699 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1700 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1701 return r;
2c1a2784 1702 } else {
a1255107 1703 adev->ip_blocks[i].status.valid = true;
2c1a2784 1704 }
974e6b64 1705 } else {
a1255107 1706 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1707 }
d38ceaf9 1708 }
21a249ca
AD
1709 /* get the vbios after the asic_funcs are set up */
1710 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1711 /* Read BIOS */
1712 if (!amdgpu_get_bios(adev))
1713 return -EINVAL;
1714
1715 r = amdgpu_atombios_init(adev);
1716 if (r) {
1717 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1718 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1719 return r;
1720 }
1721 }
d38ceaf9
AD
1722 }
1723
395d1fb9
NH
1724 adev->cg_flags &= amdgpu_cg_mask;
1725 adev->pg_flags &= amdgpu_pg_mask;
1726
d38ceaf9
AD
1727 return 0;
1728}
1729
0a4f2520
RZ
1730static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1731{
1732 int i, r;
1733
1734 for (i = 0; i < adev->num_ip_blocks; i++) {
1735 if (!adev->ip_blocks[i].status.sw)
1736 continue;
1737 if (adev->ip_blocks[i].status.hw)
1738 continue;
1739 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1740 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1741 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1742 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1743 if (r) {
1744 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1745 adev->ip_blocks[i].version->funcs->name, r);
1746 return r;
1747 }
1748 adev->ip_blocks[i].status.hw = true;
1749 }
1750 }
1751
1752 return 0;
1753}
1754
1755static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1756{
1757 int i, r;
1758
1759 for (i = 0; i < adev->num_ip_blocks; i++) {
1760 if (!adev->ip_blocks[i].status.sw)
1761 continue;
1762 if (adev->ip_blocks[i].status.hw)
1763 continue;
1764 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1765 if (r) {
1766 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1767 adev->ip_blocks[i].version->funcs->name, r);
1768 return r;
1769 }
1770 adev->ip_blocks[i].status.hw = true;
1771 }
1772
1773 return 0;
1774}
1775
7a3e0bb2
RZ
1776static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1777{
1778 int r = 0;
1779 int i;
80f41f84 1780 uint32_t smu_version;
7a3e0bb2
RZ
1781
1782 if (adev->asic_type >= CHIP_VEGA10) {
1783 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1784 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1785 continue;
1786
1787 /* no need to do the fw loading again if already done*/
1788 if (adev->ip_blocks[i].status.hw == true)
1789 break;
1790
1791 if (adev->in_gpu_reset || adev->in_suspend) {
1792 r = adev->ip_blocks[i].version->funcs->resume(adev);
1793 if (r) {
1794 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1795 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1796 return r;
1797 }
1798 } else {
1799 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1800 if (r) {
1801 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1802 adev->ip_blocks[i].version->funcs->name, r);
1803 return r;
7a3e0bb2 1804 }
7a3e0bb2 1805 }
482f0e53
ML
1806
1807 adev->ip_blocks[i].status.hw = true;
1808 break;
7a3e0bb2
RZ
1809 }
1810 }
482f0e53 1811
8973d9ec
ED
1812 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1813 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1814
80f41f84 1815 return r;
7a3e0bb2
RZ
1816}
1817
e3ecdffa
AD
1818/**
1819 * amdgpu_device_ip_init - run init for hardware IPs
1820 *
1821 * @adev: amdgpu_device pointer
1822 *
1823 * Main initialization pass for hardware IPs. The list of all the hardware
1824 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1825 * are run. sw_init initializes the software state associated with each IP
1826 * and hw_init initializes the hardware associated with each IP.
1827 * Returns 0 on success, negative error code on failure.
1828 */
06ec9070 1829static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1830{
1831 int i, r;
1832
c030f2e4 1833 r = amdgpu_ras_init(adev);
1834 if (r)
1835 return r;
1836
d38ceaf9 1837 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1838 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1839 continue;
a1255107 1840 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1841 if (r) {
a1255107
AD
1842 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1843 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1844 goto init_failed;
2c1a2784 1845 }
a1255107 1846 adev->ip_blocks[i].status.sw = true;
bfca0289 1847
d38ceaf9 1848 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1849 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1850 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1851 if (r) {
1852 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1853 goto init_failed;
2c1a2784 1854 }
a1255107 1855 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1856 if (r) {
1857 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1858 goto init_failed;
2c1a2784 1859 }
06ec9070 1860 r = amdgpu_device_wb_init(adev);
2c1a2784 1861 if (r) {
06ec9070 1862 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1863 goto init_failed;
2c1a2784 1864 }
a1255107 1865 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1866
1867 /* right after GMC hw init, we create CSA */
f92d5c61 1868 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1869 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1870 AMDGPU_GEM_DOMAIN_VRAM,
1871 AMDGPU_CSA_SIZE);
2493664f
ML
1872 if (r) {
1873 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1874 goto init_failed;
2493664f
ML
1875 }
1876 }
d38ceaf9
AD
1877 }
1878 }
1879
c9ffa427
YT
1880 if (amdgpu_sriov_vf(adev))
1881 amdgpu_virt_init_data_exchange(adev);
1882
533aed27
AG
1883 r = amdgpu_ib_pool_init(adev);
1884 if (r) {
1885 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1886 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1887 goto init_failed;
1888 }
1889
c8963ea4
RZ
1890 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1891 if (r)
72d3f592 1892 goto init_failed;
0a4f2520
RZ
1893
1894 r = amdgpu_device_ip_hw_init_phase1(adev);
1895 if (r)
72d3f592 1896 goto init_failed;
0a4f2520 1897
7a3e0bb2
RZ
1898 r = amdgpu_device_fw_loading(adev);
1899 if (r)
72d3f592 1900 goto init_failed;
7a3e0bb2 1901
0a4f2520
RZ
1902 r = amdgpu_device_ip_hw_init_phase2(adev);
1903 if (r)
72d3f592 1904 goto init_failed;
d38ceaf9 1905
121a2bc6
AG
1906 /*
1907 * retired pages will be loaded from eeprom and reserved here,
1908 * it should be called after amdgpu_device_ip_hw_init_phase2 since
1909 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
1910 * for I2C communication which only true at this point.
1911 * recovery_init may fail, but it can free all resources allocated by
1912 * itself and its failure should not stop amdgpu init process.
1913 *
1914 * Note: theoretically, this should be called before all vram allocations
1915 * to protect retired page from abusing
1916 */
1917 amdgpu_ras_recovery_init(adev);
1918
3e2e2ab5
HZ
1919 if (adev->gmc.xgmi.num_physical_nodes > 1)
1920 amdgpu_xgmi_add_device(adev);
1884734a 1921 amdgpu_amdkfd_device_init(adev);
c6332b97 1922
72d3f592 1923init_failed:
c9ffa427 1924 if (amdgpu_sriov_vf(adev))
c6332b97 1925 amdgpu_virt_release_full_gpu(adev, true);
1926
72d3f592 1927 return r;
d38ceaf9
AD
1928}
1929
e3ecdffa
AD
1930/**
1931 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1932 *
1933 * @adev: amdgpu_device pointer
1934 *
1935 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1936 * this function before a GPU reset. If the value is retained after a
1937 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1938 */
06ec9070 1939static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1940{
1941 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1942}
1943
e3ecdffa
AD
1944/**
1945 * amdgpu_device_check_vram_lost - check if vram is valid
1946 *
1947 * @adev: amdgpu_device pointer
1948 *
1949 * Checks the reset magic value written to the gart pointer in VRAM.
1950 * The driver calls this after a GPU reset to see if the contents of
1951 * VRAM is lost or now.
1952 * returns true if vram is lost, false if not.
1953 */
06ec9070 1954static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1955{
1956 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1957 AMDGPU_RESET_MAGIC_NUM);
1958}
1959
e3ecdffa 1960/**
1112a46b 1961 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1962 *
1963 * @adev: amdgpu_device pointer
b8b72130 1964 * @state: clockgating state (gate or ungate)
e3ecdffa 1965 *
e3ecdffa 1966 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1967 * set_clockgating_state callbacks are run.
1968 * Late initialization pass enabling clockgating for hardware IPs.
1969 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1970 * Returns 0 on success, negative error code on failure.
1971 */
fdd34271 1972
1112a46b
RZ
1973static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1974 enum amd_clockgating_state state)
d38ceaf9 1975{
1112a46b 1976 int i, j, r;
d38ceaf9 1977
4a2ba394
SL
1978 if (amdgpu_emu_mode == 1)
1979 return 0;
1980
1112a46b
RZ
1981 for (j = 0; j < adev->num_ip_blocks; j++) {
1982 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1983 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1984 continue;
4a446d55 1985 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1986 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1987 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1988 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 1989 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 1990 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1991 /* enable clockgating to save power */
a1255107 1992 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1993 state);
4a446d55
AD
1994 if (r) {
1995 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1996 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1997 return r;
1998 }
b0b00ff1 1999 }
d38ceaf9 2000 }
06b18f61 2001
c9f96fd5
RZ
2002 return 0;
2003}
2004
1112a46b 2005static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2006{
1112a46b 2007 int i, j, r;
06b18f61 2008
c9f96fd5
RZ
2009 if (amdgpu_emu_mode == 1)
2010 return 0;
2011
1112a46b
RZ
2012 for (j = 0; j < adev->num_ip_blocks; j++) {
2013 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2014 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2015 continue;
2016 /* skip CG for VCE/UVD, it's handled specially */
2017 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2018 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2019 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2020 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2021 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2022 /* enable powergating to save power */
2023 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2024 state);
c9f96fd5
RZ
2025 if (r) {
2026 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2027 adev->ip_blocks[i].version->funcs->name, r);
2028 return r;
2029 }
2030 }
2031 }
2dc80b00
S
2032 return 0;
2033}
2034
beff74bc
AD
2035static int amdgpu_device_enable_mgpu_fan_boost(void)
2036{
2037 struct amdgpu_gpu_instance *gpu_ins;
2038 struct amdgpu_device *adev;
2039 int i, ret = 0;
2040
2041 mutex_lock(&mgpu_info.mutex);
2042
2043 /*
2044 * MGPU fan boost feature should be enabled
2045 * only when there are two or more dGPUs in
2046 * the system
2047 */
2048 if (mgpu_info.num_dgpu < 2)
2049 goto out;
2050
2051 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2052 gpu_ins = &(mgpu_info.gpu_ins[i]);
2053 adev = gpu_ins->adev;
2054 if (!(adev->flags & AMD_IS_APU) &&
2055 !gpu_ins->mgpu_fan_enabled &&
2056 adev->powerplay.pp_funcs &&
2057 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2058 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2059 if (ret)
2060 break;
2061
2062 gpu_ins->mgpu_fan_enabled = 1;
2063 }
2064 }
2065
2066out:
2067 mutex_unlock(&mgpu_info.mutex);
2068
2069 return ret;
2070}
2071
e3ecdffa
AD
2072/**
2073 * amdgpu_device_ip_late_init - run late init for hardware IPs
2074 *
2075 * @adev: amdgpu_device pointer
2076 *
2077 * Late initialization pass for hardware IPs. The list of all the hardware
2078 * IPs that make up the asic is walked and the late_init callbacks are run.
2079 * late_init covers any special initialization that an IP requires
2080 * after all of the have been initialized or something that needs to happen
2081 * late in the init process.
2082 * Returns 0 on success, negative error code on failure.
2083 */
06ec9070 2084static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2085{
60599a03 2086 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2087 int i = 0, r;
2088
2089 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2090 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2091 continue;
2092 if (adev->ip_blocks[i].version->funcs->late_init) {
2093 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2094 if (r) {
2095 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2096 adev->ip_blocks[i].version->funcs->name, r);
2097 return r;
2098 }
2dc80b00 2099 }
73f847db 2100 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2101 }
2102
1112a46b
RZ
2103 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2104 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2105
06ec9070 2106 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2107
beff74bc
AD
2108 r = amdgpu_device_enable_mgpu_fan_boost();
2109 if (r)
2110 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2111
60599a03
EQ
2112
2113 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2114 mutex_lock(&mgpu_info.mutex);
2115
2116 /*
2117 * Reset device p-state to low as this was booted with high.
2118 *
2119 * This should be performed only after all devices from the same
2120 * hive get initialized.
2121 *
2122 * However, it's unknown how many device in the hive in advance.
2123 * As this is counted one by one during devices initializations.
2124 *
2125 * So, we wait for all XGMI interlinked devices initialized.
2126 * This may bring some delays as those devices may come from
2127 * different hives. But that should be OK.
2128 */
2129 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2130 for (i = 0; i < mgpu_info.num_gpu; i++) {
2131 gpu_instance = &(mgpu_info.gpu_ins[i]);
2132 if (gpu_instance->adev->flags & AMD_IS_APU)
2133 continue;
2134
2135 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2136 if (r) {
2137 DRM_ERROR("pstate setting failed (%d).\n", r);
2138 break;
2139 }
2140 }
2141 }
2142
2143 mutex_unlock(&mgpu_info.mutex);
2144 }
2145
d38ceaf9
AD
2146 return 0;
2147}
2148
e3ecdffa
AD
2149/**
2150 * amdgpu_device_ip_fini - run fini for hardware IPs
2151 *
2152 * @adev: amdgpu_device pointer
2153 *
2154 * Main teardown pass for hardware IPs. The list of all the hardware
2155 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2156 * are run. hw_fini tears down the hardware associated with each IP
2157 * and sw_fini tears down any software state associated with each IP.
2158 * Returns 0 on success, negative error code on failure.
2159 */
06ec9070 2160static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2161{
2162 int i, r;
2163
c030f2e4 2164 amdgpu_ras_pre_fini(adev);
2165
a82400b5
AG
2166 if (adev->gmc.xgmi.num_physical_nodes > 1)
2167 amdgpu_xgmi_remove_device(adev);
2168
1884734a 2169 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2170
2171 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2172 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2173
3e96dbfd
AD
2174 /* need to disable SMC first */
2175 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2176 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2177 continue;
fdd34271 2178 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2179 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2180 /* XXX handle errors */
2181 if (r) {
2182 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2183 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2184 }
a1255107 2185 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2186 break;
2187 }
2188 }
2189
d38ceaf9 2190 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2191 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2192 continue;
8201a67a 2193
a1255107 2194 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2195 /* XXX handle errors */
2c1a2784 2196 if (r) {
a1255107
AD
2197 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2198 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2199 }
8201a67a 2200
a1255107 2201 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2202 }
2203
9950cda2 2204
d38ceaf9 2205 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2206 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2207 continue;
c12aba3a
ML
2208
2209 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2210 amdgpu_ucode_free_bo(adev);
1e256e27 2211 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2212 amdgpu_device_wb_fini(adev);
2213 amdgpu_device_vram_scratch_fini(adev);
533aed27 2214 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2215 }
2216
a1255107 2217 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2218 /* XXX handle errors */
2c1a2784 2219 if (r) {
a1255107
AD
2220 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2221 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2222 }
a1255107
AD
2223 adev->ip_blocks[i].status.sw = false;
2224 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2225 }
2226
a6dcfd9c 2227 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2228 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2229 continue;
a1255107
AD
2230 if (adev->ip_blocks[i].version->funcs->late_fini)
2231 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2232 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2233 }
2234
c030f2e4 2235 amdgpu_ras_fini(adev);
2236
030308fc 2237 if (amdgpu_sriov_vf(adev))
24136135
ML
2238 if (amdgpu_virt_release_full_gpu(adev, false))
2239 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2240
d38ceaf9
AD
2241 return 0;
2242}
2243
e3ecdffa 2244/**
beff74bc 2245 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2246 *
1112a46b 2247 * @work: work_struct.
e3ecdffa 2248 */
beff74bc 2249static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2250{
2251 struct amdgpu_device *adev =
beff74bc 2252 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2253 int r;
2254
2255 r = amdgpu_ib_ring_tests(adev);
2256 if (r)
2257 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2258}
2259
1e317b99
RZ
2260static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2261{
2262 struct amdgpu_device *adev =
2263 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2264
2265 mutex_lock(&adev->gfx.gfx_off_mutex);
2266 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2267 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2268 adev->gfx.gfx_off_state = true;
2269 }
2270 mutex_unlock(&adev->gfx.gfx_off_mutex);
2271}
2272
e3ecdffa 2273/**
e7854a03 2274 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2275 *
2276 * @adev: amdgpu_device pointer
2277 *
2278 * Main suspend function for hardware IPs. The list of all the hardware
2279 * IPs that make up the asic is walked, clockgating is disabled and the
2280 * suspend callbacks are run. suspend puts the hardware and software state
2281 * in each IP into a state suitable for suspend.
2282 * Returns 0 on success, negative error code on failure.
2283 */
e7854a03
AD
2284static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2285{
2286 int i, r;
2287
05df1f01 2288 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2289 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2290
e7854a03
AD
2291 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2292 if (!adev->ip_blocks[i].status.valid)
2293 continue;
2294 /* displays are handled separately */
2295 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2296 /* XXX handle errors */
2297 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2298 /* XXX handle errors */
2299 if (r) {
2300 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2301 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2302 return r;
e7854a03 2303 }
482f0e53 2304 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2305 }
2306 }
2307
e7854a03
AD
2308 return 0;
2309}
2310
2311/**
2312 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2313 *
2314 * @adev: amdgpu_device pointer
2315 *
2316 * Main suspend function for hardware IPs. The list of all the hardware
2317 * IPs that make up the asic is walked, clockgating is disabled and the
2318 * suspend callbacks are run. suspend puts the hardware and software state
2319 * in each IP into a state suitable for suspend.
2320 * Returns 0 on success, negative error code on failure.
2321 */
2322static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2323{
2324 int i, r;
2325
2326 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2327 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2328 continue;
e7854a03
AD
2329 /* displays are handled in phase1 */
2330 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2331 continue;
bff77e86
LM
2332 /* PSP lost connection when err_event_athub occurs */
2333 if (amdgpu_ras_intr_triggered() &&
2334 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2335 adev->ip_blocks[i].status.hw = false;
2336 continue;
2337 }
d38ceaf9 2338 /* XXX handle errors */
a1255107 2339 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2340 /* XXX handle errors */
2c1a2784 2341 if (r) {
a1255107
AD
2342 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2343 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2344 }
876923fb 2345 adev->ip_blocks[i].status.hw = false;
a3a09142
AD
2346 /* handle putting the SMC in the appropriate state */
2347 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
9530273e 2348 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
0e0b89c0
EQ
2349 if (r) {
2350 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2351 adev->mp1_state, r);
2352 return r;
a3a09142
AD
2353 }
2354 }
b5507c7e
AG
2355
2356 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2357 }
2358
2359 return 0;
2360}
2361
e7854a03
AD
2362/**
2363 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2364 *
2365 * @adev: amdgpu_device pointer
2366 *
2367 * Main suspend function for hardware IPs. The list of all the hardware
2368 * IPs that make up the asic is walked, clockgating is disabled and the
2369 * suspend callbacks are run. suspend puts the hardware and software state
2370 * in each IP into a state suitable for suspend.
2371 * Returns 0 on success, negative error code on failure.
2372 */
2373int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2374{
2375 int r;
2376
e7819644
YT
2377 if (amdgpu_sriov_vf(adev))
2378 amdgpu_virt_request_full_gpu(adev, false);
2379
e7854a03
AD
2380 r = amdgpu_device_ip_suspend_phase1(adev);
2381 if (r)
2382 return r;
2383 r = amdgpu_device_ip_suspend_phase2(adev);
2384
e7819644
YT
2385 if (amdgpu_sriov_vf(adev))
2386 amdgpu_virt_release_full_gpu(adev, false);
2387
e7854a03
AD
2388 return r;
2389}
2390
06ec9070 2391static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2392{
2393 int i, r;
2394
2cb681b6
ML
2395 static enum amd_ip_block_type ip_order[] = {
2396 AMD_IP_BLOCK_TYPE_GMC,
2397 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2398 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2399 AMD_IP_BLOCK_TYPE_IH,
2400 };
a90ad3c2 2401
2cb681b6
ML
2402 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2403 int j;
2404 struct amdgpu_ip_block *block;
a90ad3c2 2405
2cb681b6
ML
2406 for (j = 0; j < adev->num_ip_blocks; j++) {
2407 block = &adev->ip_blocks[j];
2408
482f0e53 2409 block->status.hw = false;
2cb681b6
ML
2410 if (block->version->type != ip_order[i] ||
2411 !block->status.valid)
2412 continue;
2413
2414 r = block->version->funcs->hw_init(adev);
0aaeefcc 2415 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2416 if (r)
2417 return r;
482f0e53 2418 block->status.hw = true;
a90ad3c2
ML
2419 }
2420 }
2421
2422 return 0;
2423}
2424
06ec9070 2425static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2426{
2427 int i, r;
2428
2cb681b6
ML
2429 static enum amd_ip_block_type ip_order[] = {
2430 AMD_IP_BLOCK_TYPE_SMC,
2431 AMD_IP_BLOCK_TYPE_DCE,
2432 AMD_IP_BLOCK_TYPE_GFX,
2433 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2434 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2435 AMD_IP_BLOCK_TYPE_VCE,
2436 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2437 };
a90ad3c2 2438
2cb681b6
ML
2439 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2440 int j;
2441 struct amdgpu_ip_block *block;
a90ad3c2 2442
2cb681b6
ML
2443 for (j = 0; j < adev->num_ip_blocks; j++) {
2444 block = &adev->ip_blocks[j];
2445
2446 if (block->version->type != ip_order[i] ||
482f0e53
ML
2447 !block->status.valid ||
2448 block->status.hw)
2cb681b6
ML
2449 continue;
2450
895bd048
JZ
2451 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2452 r = block->version->funcs->resume(adev);
2453 else
2454 r = block->version->funcs->hw_init(adev);
2455
0aaeefcc 2456 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2457 if (r)
2458 return r;
482f0e53 2459 block->status.hw = true;
a90ad3c2
ML
2460 }
2461 }
2462
2463 return 0;
2464}
2465
e3ecdffa
AD
2466/**
2467 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2468 *
2469 * @adev: amdgpu_device pointer
2470 *
2471 * First resume function for hardware IPs. The list of all the hardware
2472 * IPs that make up the asic is walked and the resume callbacks are run for
2473 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2474 * after a suspend and updates the software state as necessary. This
2475 * function is also used for restoring the GPU after a GPU reset.
2476 * Returns 0 on success, negative error code on failure.
2477 */
06ec9070 2478static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2479{
2480 int i, r;
2481
a90ad3c2 2482 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2483 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2484 continue;
a90ad3c2 2485 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2486 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2487 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2488
fcf0649f
CZ
2489 r = adev->ip_blocks[i].version->funcs->resume(adev);
2490 if (r) {
2491 DRM_ERROR("resume of IP block <%s> failed %d\n",
2492 adev->ip_blocks[i].version->funcs->name, r);
2493 return r;
2494 }
482f0e53 2495 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2496 }
2497 }
2498
2499 return 0;
2500}
2501
e3ecdffa
AD
2502/**
2503 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2504 *
2505 * @adev: amdgpu_device pointer
2506 *
2507 * First resume function for hardware IPs. The list of all the hardware
2508 * IPs that make up the asic is walked and the resume callbacks are run for
2509 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2510 * functional state after a suspend and updates the software state as
2511 * necessary. This function is also used for restoring the GPU after a GPU
2512 * reset.
2513 * Returns 0 on success, negative error code on failure.
2514 */
06ec9070 2515static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2516{
2517 int i, r;
2518
2519 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2520 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2521 continue;
fcf0649f 2522 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2523 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2524 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2525 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2526 continue;
a1255107 2527 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2528 if (r) {
a1255107
AD
2529 DRM_ERROR("resume of IP block <%s> failed %d\n",
2530 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2531 return r;
2c1a2784 2532 }
482f0e53 2533 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2534 }
2535
2536 return 0;
2537}
2538
e3ecdffa
AD
2539/**
2540 * amdgpu_device_ip_resume - run resume for hardware IPs
2541 *
2542 * @adev: amdgpu_device pointer
2543 *
2544 * Main resume function for hardware IPs. The hardware IPs
2545 * are split into two resume functions because they are
2546 * are also used in in recovering from a GPU reset and some additional
2547 * steps need to be take between them. In this case (S3/S4) they are
2548 * run sequentially.
2549 * Returns 0 on success, negative error code on failure.
2550 */
06ec9070 2551static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2552{
2553 int r;
2554
06ec9070 2555 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2556 if (r)
2557 return r;
7a3e0bb2
RZ
2558
2559 r = amdgpu_device_fw_loading(adev);
2560 if (r)
2561 return r;
2562
06ec9070 2563 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2564
2565 return r;
2566}
2567
e3ecdffa
AD
2568/**
2569 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2570 *
2571 * @adev: amdgpu_device pointer
2572 *
2573 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2574 */
4e99a44e 2575static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2576{
6867e1b5
ML
2577 if (amdgpu_sriov_vf(adev)) {
2578 if (adev->is_atom_fw) {
2579 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2580 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2581 } else {
2582 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2583 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2584 }
2585
2586 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2587 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2588 }
048765ad
AR
2589}
2590
e3ecdffa
AD
2591/**
2592 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2593 *
2594 * @asic_type: AMD asic type
2595 *
2596 * Check if there is DC (new modesetting infrastructre) support for an asic.
2597 * returns true if DC has support, false if not.
2598 */
4562236b
HW
2599bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2600{
2601 switch (asic_type) {
2602#if defined(CONFIG_DRM_AMD_DC)
2603 case CHIP_BONAIRE:
0d6fbccb 2604 case CHIP_KAVERI:
367e6687
AD
2605 case CHIP_KABINI:
2606 case CHIP_MULLINS:
d9fda248
HW
2607 /*
2608 * We have systems in the wild with these ASICs that require
2609 * LVDS and VGA support which is not supported with DC.
2610 *
2611 * Fallback to the non-DC driver here by default so as not to
2612 * cause regressions.
2613 */
2614 return amdgpu_dc > 0;
2615 case CHIP_HAWAII:
4562236b
HW
2616 case CHIP_CARRIZO:
2617 case CHIP_STONEY:
4562236b 2618 case CHIP_POLARIS10:
675fd32b 2619 case CHIP_POLARIS11:
2c8ad2d5 2620 case CHIP_POLARIS12:
675fd32b 2621 case CHIP_VEGAM:
4562236b
HW
2622 case CHIP_TONGA:
2623 case CHIP_FIJI:
42f8ffa1 2624 case CHIP_VEGA10:
dca7b401 2625 case CHIP_VEGA12:
c6034aa2 2626 case CHIP_VEGA20:
b86a1aa3 2627#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2628 case CHIP_RAVEN:
b4f199c7 2629 case CHIP_NAVI10:
8fceceb6 2630 case CHIP_NAVI14:
078655d9 2631 case CHIP_NAVI12:
e1c14c43 2632 case CHIP_RENOIR:
42f8ffa1 2633#endif
fd187853 2634 return amdgpu_dc != 0;
4562236b
HW
2635#endif
2636 default:
93b09a9a
SS
2637 if (amdgpu_dc > 0)
2638 DRM_INFO("Display Core has been requested via kernel parameter "
2639 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2640 return false;
2641 }
2642}
2643
2644/**
2645 * amdgpu_device_has_dc_support - check if dc is supported
2646 *
2647 * @adev: amdgpu_device_pointer
2648 *
2649 * Returns true for supported, false for not supported
2650 */
2651bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2652{
2555039d
XY
2653 if (amdgpu_sriov_vf(adev))
2654 return false;
2655
4562236b
HW
2656 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2657}
2658
d4535e2c
AG
2659
2660static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2661{
2662 struct amdgpu_device *adev =
2663 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2664 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2665
c6a6e2db
AG
2666 /* It's a bug to not have a hive within this function */
2667 if (WARN_ON(!hive))
2668 return;
2669
2670 /*
2671 * Use task barrier to synchronize all xgmi reset works across the
2672 * hive. task_barrier_enter and task_barrier_exit will block
2673 * until all the threads running the xgmi reset works reach
2674 * those points. task_barrier_full will do both blocks.
2675 */
2676 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2677
2678 task_barrier_enter(&hive->tb);
2679 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2680
2681 if (adev->asic_reset_res)
2682 goto fail;
2683
2684 task_barrier_exit(&hive->tb);
2685 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2686
2687 if (adev->asic_reset_res)
2688 goto fail;
2689 } else {
2690
2691 task_barrier_full(&hive->tb);
2692 adev->asic_reset_res = amdgpu_asic_reset(adev);
2693 }
ce316fa5 2694
c6a6e2db 2695fail:
d4535e2c 2696 if (adev->asic_reset_res)
fed184e9 2697 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2698 adev->asic_reset_res, adev->ddev->unique);
2699}
2700
71f98027
AD
2701static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2702{
2703 char *input = amdgpu_lockup_timeout;
2704 char *timeout_setting = NULL;
2705 int index = 0;
2706 long timeout;
2707 int ret = 0;
2708
2709 /*
2710 * By default timeout for non compute jobs is 10000.
2711 * And there is no timeout enforced on compute jobs.
2712 * In SR-IOV or passthrough mode, timeout for compute
2713 * jobs are 10000 by default.
2714 */
2715 adev->gfx_timeout = msecs_to_jiffies(10000);
2716 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2717 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2718 adev->compute_timeout = adev->gfx_timeout;
2719 else
2720 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2721
f440ff44 2722 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2723 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2724 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2725 ret = kstrtol(timeout_setting, 0, &timeout);
2726 if (ret)
2727 return ret;
2728
2729 if (timeout == 0) {
2730 index++;
2731 continue;
2732 } else if (timeout < 0) {
2733 timeout = MAX_SCHEDULE_TIMEOUT;
2734 } else {
2735 timeout = msecs_to_jiffies(timeout);
2736 }
2737
2738 switch (index++) {
2739 case 0:
2740 adev->gfx_timeout = timeout;
2741 break;
2742 case 1:
2743 adev->compute_timeout = timeout;
2744 break;
2745 case 2:
2746 adev->sdma_timeout = timeout;
2747 break;
2748 case 3:
2749 adev->video_timeout = timeout;
2750 break;
2751 default:
2752 break;
2753 }
2754 }
2755 /*
2756 * There is only one value specified and
2757 * it should apply to all non-compute jobs.
2758 */
bcccee89 2759 if (index == 1) {
71f98027 2760 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2761 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2762 adev->compute_timeout = adev->gfx_timeout;
2763 }
71f98027
AD
2764 }
2765
2766 return ret;
2767}
d4535e2c 2768
d38ceaf9
AD
2769/**
2770 * amdgpu_device_init - initialize the driver
2771 *
2772 * @adev: amdgpu_device pointer
87e3f136 2773 * @ddev: drm dev pointer
d38ceaf9
AD
2774 * @pdev: pci dev pointer
2775 * @flags: driver flags
2776 *
2777 * Initializes the driver info and hw (all asics).
2778 * Returns 0 for success or an error on failure.
2779 * Called at driver startup.
2780 */
2781int amdgpu_device_init(struct amdgpu_device *adev,
2782 struct drm_device *ddev,
2783 struct pci_dev *pdev,
2784 uint32_t flags)
2785{
2786 int r, i;
3840c5bc 2787 bool boco = false;
95844d20 2788 u32 max_MBps;
d38ceaf9
AD
2789
2790 adev->shutdown = false;
2791 adev->dev = &pdev->dev;
2792 adev->ddev = ddev;
2793 adev->pdev = pdev;
2794 adev->flags = flags;
4e66d7d2
YZ
2795
2796 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2797 adev->asic_type = amdgpu_force_asic_type;
2798 else
2799 adev->asic_type = flags & AMD_ASIC_MASK;
2800
d38ceaf9 2801 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2802 if (amdgpu_emu_mode == 1)
2803 adev->usec_timeout *= 2;
770d13b1 2804 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2805 adev->accel_working = false;
2806 adev->num_rings = 0;
2807 adev->mman.buffer_funcs = NULL;
2808 adev->mman.buffer_funcs_ring = NULL;
2809 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2810 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2811 adev->gmc.gmc_funcs = NULL;
f54d1867 2812 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2813 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2814
2815 adev->smc_rreg = &amdgpu_invalid_rreg;
2816 adev->smc_wreg = &amdgpu_invalid_wreg;
2817 adev->pcie_rreg = &amdgpu_invalid_rreg;
2818 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2819 adev->pciep_rreg = &amdgpu_invalid_rreg;
2820 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2821 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2822 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2823 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2824 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2825 adev->didt_rreg = &amdgpu_invalid_rreg;
2826 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2827 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2828 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2829 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2830 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2831
3e39ab90
AD
2832 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2833 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2834 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2835
2836 /* mutex initialization are all done here so we
2837 * can recall function without having locking issues */
d38ceaf9 2838 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2839 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2840 mutex_init(&adev->pm.mutex);
2841 mutex_init(&adev->gfx.gpu_clock_mutex);
2842 mutex_init(&adev->srbm_mutex);
b8866c26 2843 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2844 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2845 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2846 mutex_init(&adev->mn_lock);
e23b74aa 2847 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2848 hash_init(adev->mn_hash);
13a752e3 2849 mutex_init(&adev->lock_reset);
32eaeae0 2850 mutex_init(&adev->psp.mutex);
bd052211 2851 mutex_init(&adev->notifier_lock);
d38ceaf9 2852
912dfc84
EQ
2853 r = amdgpu_device_check_arguments(adev);
2854 if (r)
2855 return r;
d38ceaf9 2856
d38ceaf9
AD
2857 spin_lock_init(&adev->mmio_idx_lock);
2858 spin_lock_init(&adev->smc_idx_lock);
2859 spin_lock_init(&adev->pcie_idx_lock);
2860 spin_lock_init(&adev->uvd_ctx_idx_lock);
2861 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2862 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2863 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2864 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2865 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2866
0c4e7fa5
CZ
2867 INIT_LIST_HEAD(&adev->shadow_list);
2868 mutex_init(&adev->shadow_list_lock);
2869
795f2813
AR
2870 INIT_LIST_HEAD(&adev->ring_lru_list);
2871 spin_lock_init(&adev->ring_lru_list_lock);
2872
beff74bc
AD
2873 INIT_DELAYED_WORK(&adev->delayed_init_work,
2874 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2875 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2876 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2877
d4535e2c
AG
2878 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2879
d23ee13f 2880 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2881 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2882
0fa49558
AX
2883 /* Registers mapping */
2884 /* TODO: block userspace mapping of io register */
da69c161
KW
2885 if (adev->asic_type >= CHIP_BONAIRE) {
2886 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2887 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2888 } else {
2889 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2890 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2891 }
d38ceaf9 2892
d38ceaf9
AD
2893 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2894 if (adev->rmmio == NULL) {
2895 return -ENOMEM;
2896 }
2897 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2898 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2899
d38ceaf9
AD
2900 /* io port mapping */
2901 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2902 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2903 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2904 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2905 break;
2906 }
2907 }
2908 if (adev->rio_mem == NULL)
b64a18c5 2909 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2910
b2109d8e
JX
2911 /* enable PCIE atomic ops */
2912 r = pci_enable_atomic_ops_to_root(adev->pdev,
2913 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2914 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2915 if (r) {
2916 adev->have_atomics_support = false;
2917 DRM_INFO("PCIE atomic ops is not supported\n");
2918 } else {
2919 adev->have_atomics_support = true;
2920 }
2921
5494d864
AD
2922 amdgpu_device_get_pcie_info(adev);
2923
b239c017
JX
2924 if (amdgpu_mcbp)
2925 DRM_INFO("MCBP is enabled\n");
2926
5f84cc63
JX
2927 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2928 adev->enable_mes = true;
2929
f54eeab4 2930 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2931 r = amdgpu_discovery_init(adev);
2932 if (r) {
2933 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2934 return r;
2935 }
2936 }
2937
d38ceaf9 2938 /* early init functions */
06ec9070 2939 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2940 if (r)
2941 return r;
2942
df99ac0f
JZ
2943 r = amdgpu_device_get_job_timeout_settings(adev);
2944 if (r) {
2945 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
2946 return r;
2947 }
2948
6585661d
OZ
2949 /* doorbell bar mapping and doorbell index init*/
2950 amdgpu_device_doorbell_init(adev);
2951
d38ceaf9
AD
2952 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2953 /* this will fail for cards that aren't VGA class devices, just
2954 * ignore it */
06ec9070 2955 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2956
31af062a 2957 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
2958 boco = true;
2959 if (amdgpu_has_atpx() &&
2960 (amdgpu_is_atpx_hybrid() ||
2961 amdgpu_has_atpx_dgpu_power_cntl()) &&
2962 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 2963 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
2964 &amdgpu_switcheroo_ops, boco);
2965 if (boco)
d38ceaf9
AD
2966 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2967
9475a943
SL
2968 if (amdgpu_emu_mode == 1) {
2969 /* post the asic on emulation mode */
2970 emu_soc_asic_init(adev);
bfca0289 2971 goto fence_driver_init;
9475a943 2972 }
bfca0289 2973
4e99a44e
ML
2974 /* detect if we are with an SRIOV vbios */
2975 amdgpu_device_detect_sriov_bios(adev);
048765ad 2976
95e8e59e
AD
2977 /* check if we need to reset the asic
2978 * E.g., driver was not cleanly unloaded previously, etc.
2979 */
f14899fd 2980 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2981 r = amdgpu_asic_reset(adev);
2982 if (r) {
2983 dev_err(adev->dev, "asic reset on init failed\n");
2984 goto failed;
2985 }
2986 }
2987
d38ceaf9 2988 /* Post card if necessary */
39c640c0 2989 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2990 if (!adev->bios) {
bec86378 2991 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2992 r = -EINVAL;
2993 goto failed;
d38ceaf9 2994 }
bec86378 2995 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2996 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2997 if (r) {
2998 dev_err(adev->dev, "gpu post error!\n");
2999 goto failed;
3000 }
d38ceaf9
AD
3001 }
3002
88b64e95
AD
3003 if (adev->is_atom_fw) {
3004 /* Initialize clocks */
3005 r = amdgpu_atomfirmware_get_clock_info(adev);
3006 if (r) {
3007 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3008 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3009 goto failed;
3010 }
3011 } else {
a5bde2f9
AD
3012 /* Initialize clocks */
3013 r = amdgpu_atombios_get_clock_info(adev);
3014 if (r) {
3015 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3016 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3017 goto failed;
a5bde2f9
AD
3018 }
3019 /* init i2c buses */
4562236b
HW
3020 if (!amdgpu_device_has_dc_support(adev))
3021 amdgpu_atombios_i2c_init(adev);
2c1a2784 3022 }
d38ceaf9 3023
bfca0289 3024fence_driver_init:
d38ceaf9
AD
3025 /* Fence driver */
3026 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3027 if (r) {
3028 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3029 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3030 goto failed;
2c1a2784 3031 }
d38ceaf9
AD
3032
3033 /* init the mode config */
3034 drm_mode_config_init(adev->ddev);
3035
06ec9070 3036 r = amdgpu_device_ip_init(adev);
d38ceaf9 3037 if (r) {
8840a387 3038 /* failed in exclusive mode due to timeout */
3039 if (amdgpu_sriov_vf(adev) &&
3040 !amdgpu_sriov_runtime(adev) &&
3041 amdgpu_virt_mmio_blocked(adev) &&
3042 !amdgpu_virt_wait_reset(adev)) {
3043 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3044 /* Don't send request since VF is inactive. */
3045 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3046 adev->virt.ops = NULL;
8840a387 3047 r = -EAGAIN;
3048 goto failed;
3049 }
06ec9070 3050 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3051 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3052 goto failed;
d38ceaf9
AD
3053 }
3054
d7f72fe4
YZ
3055 DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3056 adev->gfx.config.max_shader_engines,
3057 adev->gfx.config.max_sh_per_se,
3058 adev->gfx.config.max_cu_per_sh,
3059 adev->gfx.cu_info.number);
3060
f880799d
ND
3061 amdgpu_ctx_init_sched(adev);
3062
d38ceaf9
AD
3063 adev->accel_working = true;
3064
e59c0205
AX
3065 amdgpu_vm_check_compute_bug(adev);
3066
95844d20
MO
3067 /* Initialize the buffer migration limit. */
3068 if (amdgpu_moverate >= 0)
3069 max_MBps = amdgpu_moverate;
3070 else
3071 max_MBps = 8; /* Allow 8 MB/s. */
3072 /* Get a log2 for easy divisions. */
3073 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3074
9bc92b9c
ML
3075 amdgpu_fbdev_init(adev);
3076
d2f52ac8 3077 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3078 if (r) {
3079 adev->pm_sysfs_en = false;
d2f52ac8 3080 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3081 } else
3082 adev->pm_sysfs_en = true;
d2f52ac8 3083
5bb23532 3084 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3085 if (r) {
3086 adev->ucode_sysfs_en = false;
5bb23532 3087 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3088 } else
3089 adev->ucode_sysfs_en = true;
5bb23532 3090
75758255 3091 r = amdgpu_debugfs_gem_init(adev);
3f14e623 3092 if (r)
d38ceaf9 3093 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
3094
3095 r = amdgpu_debugfs_regs_init(adev);
3f14e623 3096 if (r)
d38ceaf9 3097 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 3098
50ab2533 3099 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 3100 if (r)
50ab2533 3101 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 3102
763efb6c 3103 r = amdgpu_debugfs_init(adev);
db95e218 3104 if (r)
763efb6c 3105 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 3106
d38ceaf9
AD
3107 if ((amdgpu_testing & 1)) {
3108 if (adev->accel_working)
3109 amdgpu_test_moves(adev);
3110 else
3111 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3112 }
d38ceaf9
AD
3113 if (amdgpu_benchmarking) {
3114 if (adev->accel_working)
3115 amdgpu_benchmark(adev, amdgpu_benchmarking);
3116 else
3117 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3118 }
3119
b0adca4d
EQ
3120 /*
3121 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3122 * Otherwise the mgpu fan boost feature will be skipped due to the
3123 * gpu instance is counted less.
3124 */
3125 amdgpu_register_gpu_instance(adev);
3126
d38ceaf9
AD
3127 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3128 * explicit gating rather than handling it automatically.
3129 */
06ec9070 3130 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3131 if (r) {
06ec9070 3132 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3133 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3134 goto failed;
2c1a2784 3135 }
d38ceaf9 3136
108c6a63 3137 /* must succeed. */
511fdbc3 3138 amdgpu_ras_resume(adev);
108c6a63 3139
beff74bc
AD
3140 queue_delayed_work(system_wq, &adev->delayed_init_work,
3141 msecs_to_jiffies(AMDGPU_RESUME_MS));
3142
dcea6e65
KR
3143 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3144 if (r) {
3145 dev_err(adev->dev, "Could not create pcie_replay_count");
3146 return r;
3147 }
108c6a63 3148
d155bef0
AB
3149 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3150 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3151 if (r)
3152 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3153
d38ceaf9 3154 return 0;
83ba126a
AD
3155
3156failed:
89041940 3157 amdgpu_vf_error_trans_all(adev);
3840c5bc 3158 if (boco)
83ba126a 3159 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3160
83ba126a 3161 return r;
d38ceaf9
AD
3162}
3163
d38ceaf9
AD
3164/**
3165 * amdgpu_device_fini - tear down the driver
3166 *
3167 * @adev: amdgpu_device pointer
3168 *
3169 * Tear down the driver info (all asics).
3170 * Called at driver shutdown.
3171 */
3172void amdgpu_device_fini(struct amdgpu_device *adev)
3173{
3174 int r;
3175
3176 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3177 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3178 adev->shutdown = true;
9f875167 3179
e5b03032
ML
3180 /* disable all interrupts */
3181 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3182 if (adev->mode_info.mode_config_initialized){
3183 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3184 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3185 else
3186 drm_atomic_helper_shutdown(adev->ddev);
3187 }
d38ceaf9 3188 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3189 if (adev->pm_sysfs_en)
3190 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3191 amdgpu_fbdev_fini(adev);
06ec9070 3192 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3193 if (adev->firmware.gpu_info_fw) {
3194 release_firmware(adev->firmware.gpu_info_fw);
3195 adev->firmware.gpu_info_fw = NULL;
3196 }
d38ceaf9
AD
3197 adev->accel_working = false;
3198 /* free i2c buses */
4562236b
HW
3199 if (!amdgpu_device_has_dc_support(adev))
3200 amdgpu_i2c_fini(adev);
bfca0289
SL
3201
3202 if (amdgpu_emu_mode != 1)
3203 amdgpu_atombios_fini(adev);
3204
d38ceaf9
AD
3205 kfree(adev->bios);
3206 adev->bios = NULL;
3840c5bc
AD
3207 if (amdgpu_has_atpx() &&
3208 (amdgpu_is_atpx_hybrid() ||
3209 amdgpu_has_atpx_dgpu_power_cntl()) &&
3210 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3211 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3212 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3213 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3214 vga_client_register(adev->pdev, NULL, NULL, NULL);
3215 if (adev->rio_mem)
3216 pci_iounmap(adev->pdev, adev->rio_mem);
3217 adev->rio_mem = NULL;
3218 iounmap(adev->rmmio);
3219 adev->rmmio = NULL;
06ec9070 3220 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3221
d38ceaf9 3222 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 3223 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
7c868b59
YT
3224 if (adev->ucode_sysfs_en)
3225 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
3226 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3227 amdgpu_pmu_fini(adev);
6698a3d0 3228 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 3229 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3230 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3231}
3232
3233
3234/*
3235 * Suspend & resume.
3236 */
3237/**
810ddc3a 3238 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3239 *
87e3f136
DP
3240 * @dev: drm dev pointer
3241 * @suspend: suspend state
3242 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3243 *
3244 * Puts the hw in the suspend state (all asics).
3245 * Returns 0 for success or an error on failure.
3246 * Called at driver suspend.
3247 */
de185019 3248int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3249{
3250 struct amdgpu_device *adev;
3251 struct drm_crtc *crtc;
3252 struct drm_connector *connector;
f8d2d39e 3253 struct drm_connector_list_iter iter;
5ceb54c6 3254 int r;
d38ceaf9
AD
3255
3256 if (dev == NULL || dev->dev_private == NULL) {
3257 return -ENODEV;
3258 }
3259
3260 adev = dev->dev_private;
3261
3262 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3263 return 0;
3264
44779b43 3265 adev->in_suspend = true;
d38ceaf9
AD
3266 drm_kms_helper_poll_disable(dev);
3267
5f818173
S
3268 if (fbcon)
3269 amdgpu_fbdev_set_suspend(adev, 1);
3270
beff74bc 3271 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3272
4562236b
HW
3273 if (!amdgpu_device_has_dc_support(adev)) {
3274 /* turn off display hw */
3275 drm_modeset_lock_all(dev);
f8d2d39e
LP
3276 drm_connector_list_iter_begin(dev, &iter);
3277 drm_for_each_connector_iter(connector, &iter)
3278 drm_helper_connector_dpms(connector,
3279 DRM_MODE_DPMS_OFF);
3280 drm_connector_list_iter_end(&iter);
4562236b 3281 drm_modeset_unlock_all(dev);
fe1053b7
AD
3282 /* unpin the front buffers and cursors */
3283 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3284 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3285 struct drm_framebuffer *fb = crtc->primary->fb;
3286 struct amdgpu_bo *robj;
3287
91334223 3288 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3289 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3290 r = amdgpu_bo_reserve(aobj, true);
3291 if (r == 0) {
3292 amdgpu_bo_unpin(aobj);
3293 amdgpu_bo_unreserve(aobj);
3294 }
756e6880 3295 }
756e6880 3296
fe1053b7
AD
3297 if (fb == NULL || fb->obj[0] == NULL) {
3298 continue;
3299 }
3300 robj = gem_to_amdgpu_bo(fb->obj[0]);
3301 /* don't unpin kernel fb objects */
3302 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3303 r = amdgpu_bo_reserve(robj, true);
3304 if (r == 0) {
3305 amdgpu_bo_unpin(robj);
3306 amdgpu_bo_unreserve(robj);
3307 }
d38ceaf9
AD
3308 }
3309 }
3310 }
fe1053b7
AD
3311
3312 amdgpu_amdkfd_suspend(adev);
3313
5e6932fe 3314 amdgpu_ras_suspend(adev);
3315
fe1053b7
AD
3316 r = amdgpu_device_ip_suspend_phase1(adev);
3317
d38ceaf9
AD
3318 /* evict vram memory */
3319 amdgpu_bo_evict_vram(adev);
3320
5ceb54c6 3321 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3322
fe1053b7 3323 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3324
a0a71e49
AD
3325 /* evict remaining vram memory
3326 * This second call to evict vram is to evict the gart page table
3327 * using the CPU.
3328 */
d38ceaf9
AD
3329 amdgpu_bo_evict_vram(adev);
3330
d38ceaf9
AD
3331 return 0;
3332}
3333
3334/**
810ddc3a 3335 * amdgpu_device_resume - initiate device resume
d38ceaf9 3336 *
87e3f136
DP
3337 * @dev: drm dev pointer
3338 * @resume: resume state
3339 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3340 *
3341 * Bring the hw back to operating state (all asics).
3342 * Returns 0 for success or an error on failure.
3343 * Called at driver resume.
3344 */
de185019 3345int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3346{
3347 struct drm_connector *connector;
f8d2d39e 3348 struct drm_connector_list_iter iter;
d38ceaf9 3349 struct amdgpu_device *adev = dev->dev_private;
756e6880 3350 struct drm_crtc *crtc;
03161a6e 3351 int r = 0;
d38ceaf9
AD
3352
3353 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3354 return 0;
3355
d38ceaf9 3356 /* post card */
39c640c0 3357 if (amdgpu_device_need_post(adev)) {
74b0b157 3358 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3359 if (r)
3360 DRM_ERROR("amdgpu asic init failed\n");
3361 }
d38ceaf9 3362
06ec9070 3363 r = amdgpu_device_ip_resume(adev);
e6707218 3364 if (r) {
06ec9070 3365 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3366 return r;
e6707218 3367 }
5ceb54c6
AD
3368 amdgpu_fence_driver_resume(adev);
3369
d38ceaf9 3370
06ec9070 3371 r = amdgpu_device_ip_late_init(adev);
03161a6e 3372 if (r)
4d3b9ae5 3373 return r;
d38ceaf9 3374
beff74bc
AD
3375 queue_delayed_work(system_wq, &adev->delayed_init_work,
3376 msecs_to_jiffies(AMDGPU_RESUME_MS));
3377
fe1053b7
AD
3378 if (!amdgpu_device_has_dc_support(adev)) {
3379 /* pin cursors */
3380 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3381 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3382
91334223 3383 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3384 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3385 r = amdgpu_bo_reserve(aobj, true);
3386 if (r == 0) {
3387 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3388 if (r != 0)
3389 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3390 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3391 amdgpu_bo_unreserve(aobj);
3392 }
756e6880
AD
3393 }
3394 }
3395 }
ba997709
YZ
3396 r = amdgpu_amdkfd_resume(adev);
3397 if (r)
3398 return r;
756e6880 3399
96a5d8d4 3400 /* Make sure IB tests flushed */
beff74bc 3401 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3402
d38ceaf9
AD
3403 /* blat the mode back in */
3404 if (fbcon) {
4562236b
HW
3405 if (!amdgpu_device_has_dc_support(adev)) {
3406 /* pre DCE11 */
3407 drm_helper_resume_force_mode(dev);
3408
3409 /* turn on display hw */
3410 drm_modeset_lock_all(dev);
f8d2d39e
LP
3411
3412 drm_connector_list_iter_begin(dev, &iter);
3413 drm_for_each_connector_iter(connector, &iter)
3414 drm_helper_connector_dpms(connector,
3415 DRM_MODE_DPMS_ON);
3416 drm_connector_list_iter_end(&iter);
3417
4562236b 3418 drm_modeset_unlock_all(dev);
d38ceaf9 3419 }
4d3b9ae5 3420 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3421 }
3422
3423 drm_kms_helper_poll_enable(dev);
23a1a9e5 3424
5e6932fe 3425 amdgpu_ras_resume(adev);
3426
23a1a9e5
L
3427 /*
3428 * Most of the connector probing functions try to acquire runtime pm
3429 * refs to ensure that the GPU is powered on when connector polling is
3430 * performed. Since we're calling this from a runtime PM callback,
3431 * trying to acquire rpm refs will cause us to deadlock.
3432 *
3433 * Since we're guaranteed to be holding the rpm lock, it's safe to
3434 * temporarily disable the rpm helpers so this doesn't deadlock us.
3435 */
3436#ifdef CONFIG_PM
3437 dev->dev->power.disable_depth++;
3438#endif
4562236b
HW
3439 if (!amdgpu_device_has_dc_support(adev))
3440 drm_helper_hpd_irq_event(dev);
3441 else
3442 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3443#ifdef CONFIG_PM
3444 dev->dev->power.disable_depth--;
3445#endif
44779b43
RZ
3446 adev->in_suspend = false;
3447
4d3b9ae5 3448 return 0;
d38ceaf9
AD
3449}
3450
e3ecdffa
AD
3451/**
3452 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3453 *
3454 * @adev: amdgpu_device pointer
3455 *
3456 * The list of all the hardware IPs that make up the asic is walked and
3457 * the check_soft_reset callbacks are run. check_soft_reset determines
3458 * if the asic is still hung or not.
3459 * Returns true if any of the IPs are still in a hung state, false if not.
3460 */
06ec9070 3461static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3462{
3463 int i;
3464 bool asic_hang = false;
3465
f993d628
ML
3466 if (amdgpu_sriov_vf(adev))
3467 return true;
3468
8bc04c29
AD
3469 if (amdgpu_asic_need_full_reset(adev))
3470 return true;
3471
63fbf42f 3472 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3473 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3474 continue;
a1255107
AD
3475 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3476 adev->ip_blocks[i].status.hang =
3477 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3478 if (adev->ip_blocks[i].status.hang) {
3479 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3480 asic_hang = true;
3481 }
3482 }
3483 return asic_hang;
3484}
3485
e3ecdffa
AD
3486/**
3487 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3488 *
3489 * @adev: amdgpu_device pointer
3490 *
3491 * The list of all the hardware IPs that make up the asic is walked and the
3492 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3493 * handles any IP specific hardware or software state changes that are
3494 * necessary for a soft reset to succeed.
3495 * Returns 0 on success, negative error code on failure.
3496 */
06ec9070 3497static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3498{
3499 int i, r = 0;
3500
3501 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3502 if (!adev->ip_blocks[i].status.valid)
d31a501e 3503 continue;
a1255107
AD
3504 if (adev->ip_blocks[i].status.hang &&
3505 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3506 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3507 if (r)
3508 return r;
3509 }
3510 }
3511
3512 return 0;
3513}
3514
e3ecdffa
AD
3515/**
3516 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3517 *
3518 * @adev: amdgpu_device pointer
3519 *
3520 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3521 * reset is necessary to recover.
3522 * Returns true if a full asic reset is required, false if not.
3523 */
06ec9070 3524static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3525{
da146d3b
AD
3526 int i;
3527
8bc04c29
AD
3528 if (amdgpu_asic_need_full_reset(adev))
3529 return true;
3530
da146d3b 3531 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3532 if (!adev->ip_blocks[i].status.valid)
da146d3b 3533 continue;
a1255107
AD
3534 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3535 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3536 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3537 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3538 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3539 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3540 DRM_INFO("Some block need full reset!\n");
3541 return true;
3542 }
3543 }
35d782fe
CZ
3544 }
3545 return false;
3546}
3547
e3ecdffa
AD
3548/**
3549 * amdgpu_device_ip_soft_reset - do a soft reset
3550 *
3551 * @adev: amdgpu_device pointer
3552 *
3553 * The list of all the hardware IPs that make up the asic is walked and the
3554 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3555 * IP specific hardware or software state changes that are necessary to soft
3556 * reset the IP.
3557 * Returns 0 on success, negative error code on failure.
3558 */
06ec9070 3559static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3560{
3561 int i, r = 0;
3562
3563 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3564 if (!adev->ip_blocks[i].status.valid)
35d782fe 3565 continue;
a1255107
AD
3566 if (adev->ip_blocks[i].status.hang &&
3567 adev->ip_blocks[i].version->funcs->soft_reset) {
3568 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3569 if (r)
3570 return r;
3571 }
3572 }
3573
3574 return 0;
3575}
3576
e3ecdffa
AD
3577/**
3578 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3579 *
3580 * @adev: amdgpu_device pointer
3581 *
3582 * The list of all the hardware IPs that make up the asic is walked and the
3583 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3584 * handles any IP specific hardware or software state changes that are
3585 * necessary after the IP has been soft reset.
3586 * Returns 0 on success, negative error code on failure.
3587 */
06ec9070 3588static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3589{
3590 int i, r = 0;
3591
3592 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3593 if (!adev->ip_blocks[i].status.valid)
35d782fe 3594 continue;
a1255107
AD
3595 if (adev->ip_blocks[i].status.hang &&
3596 adev->ip_blocks[i].version->funcs->post_soft_reset)
3597 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3598 if (r)
3599 return r;
3600 }
3601
3602 return 0;
3603}
3604
e3ecdffa 3605/**
c33adbc7 3606 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3607 *
3608 * @adev: amdgpu_device pointer
3609 *
3610 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3611 * restore things like GPUVM page tables after a GPU reset where
3612 * the contents of VRAM might be lost.
403009bf
CK
3613 *
3614 * Returns:
3615 * 0 on success, negative error code on failure.
e3ecdffa 3616 */
c33adbc7 3617static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3618{
c41d1cf6 3619 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3620 struct amdgpu_bo *shadow;
3621 long r = 1, tmo;
c41d1cf6
ML
3622
3623 if (amdgpu_sriov_runtime(adev))
b045d3af 3624 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3625 else
3626 tmo = msecs_to_jiffies(100);
3627
3628 DRM_INFO("recover vram bo from shadow start\n");
3629 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3630 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3631
3632 /* No need to recover an evicted BO */
3633 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3634 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3635 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3636 continue;
3637
3638 r = amdgpu_bo_restore_shadow(shadow, &next);
3639 if (r)
3640 break;
3641
c41d1cf6 3642 if (fence) {
1712fb1a 3643 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3644 dma_fence_put(fence);
3645 fence = next;
1712fb1a 3646 if (tmo == 0) {
3647 r = -ETIMEDOUT;
c41d1cf6 3648 break;
1712fb1a 3649 } else if (tmo < 0) {
3650 r = tmo;
3651 break;
3652 }
403009bf
CK
3653 } else {
3654 fence = next;
c41d1cf6 3655 }
c41d1cf6
ML
3656 }
3657 mutex_unlock(&adev->shadow_list_lock);
3658
403009bf
CK
3659 if (fence)
3660 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3661 dma_fence_put(fence);
3662
1712fb1a 3663 if (r < 0 || tmo <= 0) {
3664 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3665 return -EIO;
3666 }
c41d1cf6 3667
403009bf
CK
3668 DRM_INFO("recover vram bo from shadow done\n");
3669 return 0;
c41d1cf6
ML
3670}
3671
a90ad3c2 3672
e3ecdffa 3673/**
06ec9070 3674 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3675 *
3676 * @adev: amdgpu device pointer
87e3f136 3677 * @from_hypervisor: request from hypervisor
5740682e
ML
3678 *
3679 * do VF FLR and reinitialize Asic
3f48c681 3680 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3681 */
3682static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3683 bool from_hypervisor)
5740682e
ML
3684{
3685 int r;
3686
3687 if (from_hypervisor)
3688 r = amdgpu_virt_request_full_gpu(adev, true);
3689 else
3690 r = amdgpu_virt_reset_gpu(adev);
3691 if (r)
3692 return r;
a90ad3c2
ML
3693
3694 /* Resume IP prior to SMC */
06ec9070 3695 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3696 if (r)
3697 goto error;
a90ad3c2 3698
c9ffa427 3699 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3700 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3701 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3702
7a3e0bb2
RZ
3703 r = amdgpu_device_fw_loading(adev);
3704 if (r)
3705 return r;
3706
a90ad3c2 3707 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3708 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3709 if (r)
3710 goto error;
a90ad3c2
ML
3711
3712 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3713 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3714 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3715
abc34253
ED
3716error:
3717 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3718 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3719 amdgpu_inc_vram_lost(adev);
c33adbc7 3720 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3721 }
3722
3723 return r;
3724}
3725
12938fad
CK
3726/**
3727 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3728 *
3729 * @adev: amdgpu device pointer
3730 *
3731 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3732 * a hung GPU.
3733 */
3734bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3735{
3736 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3737 DRM_INFO("Timeout, but no hardware hang detected.\n");
3738 return false;
3739 }
3740
3ba7b418
AG
3741 if (amdgpu_gpu_recovery == 0)
3742 goto disabled;
3743
3744 if (amdgpu_sriov_vf(adev))
3745 return true;
3746
3747 if (amdgpu_gpu_recovery == -1) {
3748 switch (adev->asic_type) {
fc42d47c
AG
3749 case CHIP_BONAIRE:
3750 case CHIP_HAWAII:
3ba7b418
AG
3751 case CHIP_TOPAZ:
3752 case CHIP_TONGA:
3753 case CHIP_FIJI:
3754 case CHIP_POLARIS10:
3755 case CHIP_POLARIS11:
3756 case CHIP_POLARIS12:
3757 case CHIP_VEGAM:
3758 case CHIP_VEGA20:
3759 case CHIP_VEGA10:
3760 case CHIP_VEGA12:
c43b849f 3761 case CHIP_RAVEN:
e9d4cf91 3762 case CHIP_ARCTURUS:
2cb44fb0 3763 case CHIP_RENOIR:
658c6639
AD
3764 case CHIP_NAVI10:
3765 case CHIP_NAVI14:
3766 case CHIP_NAVI12:
3ba7b418
AG
3767 break;
3768 default:
3769 goto disabled;
3770 }
12938fad
CK
3771 }
3772
3773 return true;
3ba7b418
AG
3774
3775disabled:
3776 DRM_INFO("GPU recovery disabled.\n");
3777 return false;
12938fad
CK
3778}
3779
5c6dd71e 3780
26bc5340
AG
3781static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3782 struct amdgpu_job *job,
3783 bool *need_full_reset_arg)
3784{
3785 int i, r = 0;
3786 bool need_full_reset = *need_full_reset_arg;
71182665 3787
71182665 3788 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3789 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3790 struct amdgpu_ring *ring = adev->rings[i];
3791
51687759 3792 if (!ring || !ring->sched.thread)
0875dc9e 3793 continue;
5740682e 3794
2f9d4084
ML
3795 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3796 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3797 }
d38ceaf9 3798
222b5f04
AG
3799 if(job)
3800 drm_sched_increase_karma(&job->base);
3801
1d721ed6 3802 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3803 if (!amdgpu_sriov_vf(adev)) {
3804
3805 if (!need_full_reset)
3806 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3807
3808 if (!need_full_reset) {
3809 amdgpu_device_ip_pre_soft_reset(adev);
3810 r = amdgpu_device_ip_soft_reset(adev);
3811 amdgpu_device_ip_post_soft_reset(adev);
3812 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3813 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3814 need_full_reset = true;
3815 }
3816 }
3817
3818 if (need_full_reset)
3819 r = amdgpu_device_ip_suspend(adev);
3820
3821 *need_full_reset_arg = need_full_reset;
3822 }
3823
3824 return r;
3825}
3826
041a62bc 3827static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3828 struct list_head *device_list_handle,
3829 bool *need_full_reset_arg)
3830{
3831 struct amdgpu_device *tmp_adev = NULL;
3832 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3833 int r = 0;
3834
3835 /*
3836 * ASIC reset has to be done on all HGMI hive nodes ASAP
3837 * to allow proper links negotiation in FW (within 1 sec)
3838 */
3839 if (need_full_reset) {
3840 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3841 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3842 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3843 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3844 r = -EALREADY;
3845 } else
3846 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 3847
041a62bc
AG
3848 if (r) {
3849 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3850 r, tmp_adev->ddev->unique);
3851 break;
ce316fa5
LM
3852 }
3853 }
3854
041a62bc
AG
3855 /* For XGMI wait for all resets to complete before proceed */
3856 if (!r) {
ce316fa5
LM
3857 list_for_each_entry(tmp_adev, device_list_handle,
3858 gmc.xgmi.head) {
3859 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3860 flush_work(&tmp_adev->xgmi_reset_work);
3861 r = tmp_adev->asic_reset_res;
3862 if (r)
3863 break;
ce316fa5
LM
3864 }
3865 }
3866 }
ce316fa5 3867 }
26bc5340 3868
00eaa571
LM
3869 if (!r && amdgpu_ras_intr_triggered())
3870 amdgpu_ras_intr_cleared();
3871
26bc5340
AG
3872 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3873 if (need_full_reset) {
3874 /* post card */
3875 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3876 DRM_WARN("asic atom init failed!");
3877
3878 if (!r) {
3879 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3880 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3881 if (r)
3882 goto out;
3883
3884 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3885 if (vram_lost) {
77e7f829 3886 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 3887 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
3888 }
3889
3890 r = amdgpu_gtt_mgr_recover(
3891 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3892 if (r)
3893 goto out;
3894
3895 r = amdgpu_device_fw_loading(tmp_adev);
3896 if (r)
3897 return r;
3898
3899 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3900 if (r)
3901 goto out;
3902
3903 if (vram_lost)
3904 amdgpu_device_fill_reset_magic(tmp_adev);
3905
fdafb359
EQ
3906 /*
3907 * Add this ASIC as tracked as reset was already
3908 * complete successfully.
3909 */
3910 amdgpu_register_gpu_instance(tmp_adev);
3911
7c04ca50 3912 r = amdgpu_device_ip_late_init(tmp_adev);
3913 if (r)
3914 goto out;
3915
e79a04d5 3916 /* must succeed. */
511fdbc3 3917 amdgpu_ras_resume(tmp_adev);
e79a04d5 3918
26bc5340
AG
3919 /* Update PSP FW topology after reset */
3920 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3921 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3922 }
3923 }
3924
3925
3926out:
3927 if (!r) {
3928 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3929 r = amdgpu_ib_ring_tests(tmp_adev);
3930 if (r) {
3931 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3932 r = amdgpu_device_ip_suspend(tmp_adev);
3933 need_full_reset = true;
3934 r = -EAGAIN;
3935 goto end;
3936 }
3937 }
3938
3939 if (!r)
3940 r = amdgpu_device_recover_vram(tmp_adev);
3941 else
3942 tmp_adev->asic_reset_res = r;
3943 }
3944
3945end:
3946 *need_full_reset_arg = need_full_reset;
3947 return r;
3948}
3949
1d721ed6 3950static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3951{
1d721ed6
AG
3952 if (trylock) {
3953 if (!mutex_trylock(&adev->lock_reset))
3954 return false;
3955 } else
3956 mutex_lock(&adev->lock_reset);
5740682e 3957
26bc5340 3958 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 3959 adev->in_gpu_reset = true;
a3a09142
AD
3960 switch (amdgpu_asic_reset_method(adev)) {
3961 case AMD_RESET_METHOD_MODE1:
3962 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3963 break;
3964 case AMD_RESET_METHOD_MODE2:
3965 adev->mp1_state = PP_MP1_STATE_RESET;
3966 break;
3967 default:
3968 adev->mp1_state = PP_MP1_STATE_NONE;
3969 break;
3970 }
1d721ed6
AG
3971
3972 return true;
26bc5340 3973}
d38ceaf9 3974
26bc5340
AG
3975static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3976{
89041940 3977 amdgpu_vf_error_trans_all(adev);
a3a09142 3978 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 3979 adev->in_gpu_reset = false;
13a752e3 3980 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3981}
3982
26bc5340
AG
3983/**
3984 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3985 *
3986 * @adev: amdgpu device pointer
3987 * @job: which job trigger hang
3988 *
3989 * Attempt to reset the GPU if it has hung (all asics).
3990 * Attempt to do soft-reset or full-reset and reinitialize Asic
3991 * Returns 0 for success or an error on failure.
3992 */
3993
3994int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3995 struct amdgpu_job *job)
3996{
1d721ed6
AG
3997 struct list_head device_list, *device_list_handle = NULL;
3998 bool need_full_reset, job_signaled;
26bc5340 3999 struct amdgpu_hive_info *hive = NULL;
26bc5340 4000 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4001 int i, r = 0;
7c6e68c7 4002 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4003 bool use_baco =
4004 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4005 true : false;
26bc5340 4006
d5ea093e
AG
4007 /*
4008 * Flush RAM to disk so that after reboot
4009 * the user can read log and see why the system rebooted.
4010 */
b823821f 4011 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4012
4013 DRM_WARN("Emergency reboot.");
4014
4015 ksys_sync_helper();
4016 emergency_restart();
4017 }
4018
1d721ed6 4019 need_full_reset = job_signaled = false;
26bc5340
AG
4020 INIT_LIST_HEAD(&device_list);
4021
b823821f
LM
4022 dev_info(adev->dev, "GPU %s begin!\n",
4023 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340 4024
beff74bc 4025 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 4026
1d721ed6
AG
4027 hive = amdgpu_get_xgmi_hive(adev, false);
4028
26bc5340 4029 /*
1d721ed6
AG
4030 * Here we trylock to avoid chain of resets executing from
4031 * either trigger by jobs on different adevs in XGMI hive or jobs on
4032 * different schedulers for same device while this TO handler is running.
4033 * We always reset all schedulers for device and all devices for XGMI
4034 * hive so that should take care of them too.
26bc5340 4035 */
1d721ed6
AG
4036
4037 if (hive && !mutex_trylock(&hive->reset_lock)) {
4038 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4039 job ? job->base.id : -1, hive->hive_id);
26bc5340 4040 return 0;
1d721ed6 4041 }
26bc5340
AG
4042
4043 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
4044 if (!amdgpu_device_lock_adev(adev, !hive)) {
4045 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 4046 job ? job->base.id : -1);
1d721ed6 4047 return 0;
26bc5340
AG
4048 }
4049
7c6e68c7
AG
4050 /* Block kfd: SRIOV would do it separately */
4051 if (!amdgpu_sriov_vf(adev))
4052 amdgpu_amdkfd_pre_reset(adev);
4053
26bc5340 4054 /* Build list of devices to reset */
1d721ed6 4055 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 4056 if (!hive) {
7c6e68c7
AG
4057 /*unlock kfd: SRIOV would do it separately */
4058 if (!amdgpu_sriov_vf(adev))
4059 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
4060 amdgpu_device_unlock_adev(adev);
4061 return -ENODEV;
4062 }
4063
4064 /*
4065 * In case we are in XGMI hive mode device reset is done for all the
4066 * nodes in the hive to retrain all XGMI links and hence the reset
4067 * sequence is executed in loop on all nodes.
4068 */
4069 device_list_handle = &hive->device_list;
4070 } else {
4071 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4072 device_list_handle = &device_list;
4073 }
4074
1d721ed6
AG
4075 /* block all schedulers and reset given job's ring */
4076 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4077 if (tmp_adev != adev) {
12ffa55d 4078 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
4079 if (!amdgpu_sriov_vf(tmp_adev))
4080 amdgpu_amdkfd_pre_reset(tmp_adev);
4081 }
4082
12ffa55d
AG
4083 /*
4084 * Mark these ASICs to be reseted as untracked first
4085 * And add them back after reset completed
4086 */
4087 amdgpu_unregister_gpu_instance(tmp_adev);
4088
f1c1314b 4089 /* disable ras on ALL IPs */
b823821f
LM
4090 if (!(in_ras_intr && !use_baco) &&
4091 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4092 amdgpu_ras_suspend(tmp_adev);
4093
1d721ed6
AG
4094 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4095 struct amdgpu_ring *ring = tmp_adev->rings[i];
4096
4097 if (!ring || !ring->sched.thread)
4098 continue;
4099
0b2d2c2e 4100 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4101
b823821f 4102 if (in_ras_intr && !use_baco)
7c6e68c7 4103 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4104 }
4105 }
4106
4107
b823821f 4108 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4109 goto skip_sched_resume;
4110
1d721ed6
AG
4111 /*
4112 * Must check guilty signal here since after this point all old
4113 * HW fences are force signaled.
4114 *
4115 * job->base holds a reference to parent fence
4116 */
4117 if (job && job->base.s_fence->parent &&
4118 dma_fence_is_signaled(job->base.s_fence->parent))
4119 job_signaled = true;
4120
1d721ed6
AG
4121 if (job_signaled) {
4122 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4123 goto skip_hw_reset;
4124 }
4125
4126
4127 /* Guilty job will be freed after this*/
0b2d2c2e 4128 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
4129 if (r) {
4130 /*TODO Should we stop ?*/
4131 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4132 r, adev->ddev->unique);
4133 adev->asic_reset_res = r;
4134 }
4135
26bc5340
AG
4136retry: /* Rest of adevs pre asic reset from XGMI hive. */
4137 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4138
4139 if (tmp_adev == adev)
4140 continue;
4141
26bc5340
AG
4142 r = amdgpu_device_pre_asic_reset(tmp_adev,
4143 NULL,
4144 &need_full_reset);
4145 /*TODO Should we stop ?*/
4146 if (r) {
4147 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4148 r, tmp_adev->ddev->unique);
4149 tmp_adev->asic_reset_res = r;
4150 }
4151 }
4152
4153 /* Actual ASIC resets if needed.*/
4154 /* TODO Implement XGMI hive reset logic for SRIOV */
4155 if (amdgpu_sriov_vf(adev)) {
4156 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4157 if (r)
4158 adev->asic_reset_res = r;
4159 } else {
041a62bc 4160 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4161 if (r && r == -EAGAIN)
4162 goto retry;
4163 }
4164
1d721ed6
AG
4165skip_hw_reset:
4166
26bc5340
AG
4167 /* Post ASIC reset for all devs .*/
4168 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4169
1d721ed6
AG
4170 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4171 struct amdgpu_ring *ring = tmp_adev->rings[i];
4172
4173 if (!ring || !ring->sched.thread)
4174 continue;
4175
4176 /* No point to resubmit jobs if we didn't HW reset*/
4177 if (!tmp_adev->asic_reset_res && !job_signaled)
4178 drm_sched_resubmit_jobs(&ring->sched);
4179
4180 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4181 }
4182
4183 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4184 drm_helper_resume_force_mode(tmp_adev->ddev);
4185 }
4186
4187 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4188
4189 if (r) {
4190 /* bad news, how to tell it to userspace ? */
12ffa55d 4191 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4192 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4193 } else {
12ffa55d 4194 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4195 }
7c6e68c7 4196 }
26bc5340 4197
7c6e68c7
AG
4198skip_sched_resume:
4199 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4200 /*unlock kfd: SRIOV would do it separately */
b823821f 4201 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4202 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
4203 amdgpu_device_unlock_adev(tmp_adev);
4204 }
4205
1d721ed6 4206 if (hive)
22d6575b 4207 mutex_unlock(&hive->reset_lock);
26bc5340
AG
4208
4209 if (r)
4210 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4211 return r;
4212}
4213
e3ecdffa
AD
4214/**
4215 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4216 *
4217 * @adev: amdgpu_device pointer
4218 *
4219 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4220 * and lanes) of the slot the device is in. Handles APUs and
4221 * virtualized environments where PCIE config space may not be available.
4222 */
5494d864 4223static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4224{
5d9a6330 4225 struct pci_dev *pdev;
c5313457
HK
4226 enum pci_bus_speed speed_cap, platform_speed_cap;
4227 enum pcie_link_width platform_link_width;
d0dd7f0c 4228
cd474ba0
AD
4229 if (amdgpu_pcie_gen_cap)
4230 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4231
cd474ba0
AD
4232 if (amdgpu_pcie_lane_cap)
4233 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4234
cd474ba0
AD
4235 /* covers APUs as well */
4236 if (pci_is_root_bus(adev->pdev->bus)) {
4237 if (adev->pm.pcie_gen_mask == 0)
4238 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4239 if (adev->pm.pcie_mlw_mask == 0)
4240 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4241 return;
cd474ba0 4242 }
d0dd7f0c 4243
c5313457
HK
4244 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4245 return;
4246
dbaa922b
AD
4247 pcie_bandwidth_available(adev->pdev, NULL,
4248 &platform_speed_cap, &platform_link_width);
c5313457 4249
cd474ba0 4250 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4251 /* asic caps */
4252 pdev = adev->pdev;
4253 speed_cap = pcie_get_speed_cap(pdev);
4254 if (speed_cap == PCI_SPEED_UNKNOWN) {
4255 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4256 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4257 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4258 } else {
5d9a6330
AD
4259 if (speed_cap == PCIE_SPEED_16_0GT)
4260 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4261 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4262 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4263 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4264 else if (speed_cap == PCIE_SPEED_8_0GT)
4265 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4266 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4267 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4268 else if (speed_cap == PCIE_SPEED_5_0GT)
4269 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4270 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4271 else
4272 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4273 }
4274 /* platform caps */
c5313457 4275 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4276 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4277 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4278 } else {
c5313457 4279 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4280 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4281 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4282 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4283 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4284 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4285 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4286 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4287 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4288 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4289 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4290 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4291 else
4292 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4293
cd474ba0
AD
4294 }
4295 }
4296 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4297 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4298 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4299 } else {
c5313457 4300 switch (platform_link_width) {
5d9a6330 4301 case PCIE_LNK_X32:
cd474ba0
AD
4302 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4303 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4304 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4305 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4306 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4307 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4308 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4309 break;
5d9a6330 4310 case PCIE_LNK_X16:
cd474ba0
AD
4311 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4312 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4313 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4314 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4315 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4316 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4317 break;
5d9a6330 4318 case PCIE_LNK_X12:
cd474ba0
AD
4319 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4320 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4321 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4322 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4323 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4324 break;
5d9a6330 4325 case PCIE_LNK_X8:
cd474ba0
AD
4326 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4327 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4328 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4329 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4330 break;
5d9a6330 4331 case PCIE_LNK_X4:
cd474ba0
AD
4332 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4333 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4334 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4335 break;
5d9a6330 4336 case PCIE_LNK_X2:
cd474ba0
AD
4337 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4338 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4339 break;
5d9a6330 4340 case PCIE_LNK_X1:
cd474ba0
AD
4341 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4342 break;
4343 default:
4344 break;
4345 }
d0dd7f0c
AD
4346 }
4347 }
4348}
d38ceaf9 4349
361dbd01
AD
4350int amdgpu_device_baco_enter(struct drm_device *dev)
4351{
4352 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4353 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4354
4355 if (!amdgpu_device_supports_baco(adev->ddev))
4356 return -ENOTSUPP;
4357
7a22677b
LM
4358 if (ras && ras->supported)
4359 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4360
9530273e 4361 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4362}
4363
4364int amdgpu_device_baco_exit(struct drm_device *dev)
4365{
4366 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4367 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4368 int ret = 0;
361dbd01
AD
4369
4370 if (!amdgpu_device_supports_baco(adev->ddev))
4371 return -ENOTSUPP;
4372
9530273e
EQ
4373 ret = amdgpu_dpm_baco_exit(adev);
4374 if (ret)
4375 return ret;
7a22677b
LM
4376
4377 if (ras && ras->supported)
4378 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4379
4380 return 0;
361dbd01 4381}