drm/amdgpu: fix amdgpu pmu to use hwc->config instead of hwc->conf
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
d5ea093e 68#include <linux/suspend.h>
c6a6e2db 69#include <drm/task_barrier.h>
d5ea093e 70
e2a75f88 71MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 72MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 73MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 74MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 75MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 76MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 77MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 78MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 79MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 80MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 81
2dc80b00
S
82#define AMDGPU_RESUME_MS 2000
83
050091ab 84const char *amdgpu_asic_name[] = {
da69c161
KW
85 "TAHITI",
86 "PITCAIRN",
87 "VERDE",
88 "OLAND",
89 "HAINAN",
d38ceaf9
AD
90 "BONAIRE",
91 "KAVERI",
92 "KABINI",
93 "HAWAII",
94 "MULLINS",
95 "TOPAZ",
96 "TONGA",
48299f95 97 "FIJI",
d38ceaf9 98 "CARRIZO",
139f4917 99 "STONEY",
2cc0c0b5
FC
100 "POLARIS10",
101 "POLARIS11",
c4642a47 102 "POLARIS12",
48ff108d 103 "VEGAM",
d4196f01 104 "VEGA10",
8fab806a 105 "VEGA12",
956fcddc 106 "VEGA20",
2ca8a5d2 107 "RAVEN",
d6c3b24e 108 "ARCTURUS",
1eee4228 109 "RENOIR",
852a6626 110 "NAVI10",
87dbad02 111 "NAVI14",
9802f5d7 112 "NAVI12",
d38ceaf9
AD
113 "LAST",
114};
115
dcea6e65
KR
116/**
117 * DOC: pcie_replay_count
118 *
119 * The amdgpu driver provides a sysfs API for reporting the total number
120 * of PCIe replays (NAKs)
121 * The file pcie_replay_count is used for this and returns the total
122 * number of replays as a sum of the NAKs generated and NAKs received
123 */
124
125static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
126 struct device_attribute *attr, char *buf)
127{
128 struct drm_device *ddev = dev_get_drvdata(dev);
129 struct amdgpu_device *adev = ddev->dev_private;
130 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
131
132 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
133}
134
135static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
136 amdgpu_device_get_pcie_replay_count, NULL);
137
5494d864
AD
138static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
139
e3ecdffa 140/**
31af062a 141 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
142 *
143 * @dev: drm_device pointer
144 *
145 * Returns true if the device is a dGPU with HG/PX power control,
146 * otherwise return false.
147 */
31af062a 148bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
149{
150 struct amdgpu_device *adev = dev->dev_private;
151
2f7d10b3 152 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
153 return true;
154 return false;
155}
156
a69cba42
AD
157/**
158 * amdgpu_device_supports_baco - Does the device support BACO
159 *
160 * @dev: drm_device pointer
161 *
162 * Returns true if the device supporte BACO,
163 * otherwise return false.
164 */
165bool amdgpu_device_supports_baco(struct drm_device *dev)
166{
167 struct amdgpu_device *adev = dev->dev_private;
168
169 return amdgpu_asic_supports_baco(adev);
170}
171
e35e2b11
TY
172/**
173 * VRAM access helper functions.
174 *
175 * amdgpu_device_vram_access - read/write a buffer in vram
176 *
177 * @adev: amdgpu_device pointer
178 * @pos: offset of the buffer in vram
179 * @buf: virtual address of the buffer in system memory
180 * @size: read/write size, sizeof(@buf) must > @size
181 * @write: true - write to vram, otherwise - read from vram
182 */
183void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
184 uint32_t *buf, size_t size, bool write)
185{
186 uint64_t last;
187 unsigned long flags;
188
189 last = size - 4;
190 for (last += pos; pos <= last; pos += 4) {
191 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
192 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
193 WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
194 if (write)
195 WREG32_NO_KIQ(mmMM_DATA, *buf++);
196 else
197 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
198 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
199 }
200}
201
d38ceaf9
AD
202/*
203 * MMIO register access helper functions.
204 */
e3ecdffa
AD
205/**
206 * amdgpu_mm_rreg - read a memory mapped IO register
207 *
208 * @adev: amdgpu_device pointer
209 * @reg: dword aligned register offset
210 * @acc_flags: access flags which require special behavior
211 *
212 * Returns the 32 bit value from the offset specified.
213 */
d38ceaf9 214uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 215 uint32_t acc_flags)
d38ceaf9 216{
f4b373f4
TSD
217 uint32_t ret;
218
c68dbcd8 219 if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
d33a99c4 220 return amdgpu_kiq_rreg(adev, reg);
bc992ba5 221
15d72fd7 222 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 223 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
224 else {
225 unsigned long flags;
d38ceaf9
AD
226
227 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
228 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
229 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
230 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 231 }
f4b373f4
TSD
232 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
233 return ret;
d38ceaf9
AD
234}
235
421a2a30
ML
236/*
237 * MMIO register read with bytes helper functions
238 * @offset:bytes offset from MMIO start
239 *
240*/
241
e3ecdffa
AD
242/**
243 * amdgpu_mm_rreg8 - read a memory mapped IO register
244 *
245 * @adev: amdgpu_device pointer
246 * @offset: byte aligned register offset
247 *
248 * Returns the 8 bit value from the offset specified.
249 */
421a2a30
ML
250uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
251 if (offset < adev->rmmio_size)
252 return (readb(adev->rmmio + offset));
253 BUG();
254}
255
256/*
257 * MMIO register write with bytes helper functions
258 * @offset:bytes offset from MMIO start
259 * @value: the value want to be written to the register
260 *
261*/
e3ecdffa
AD
262/**
263 * amdgpu_mm_wreg8 - read a memory mapped IO register
264 *
265 * @adev: amdgpu_device pointer
266 * @offset: byte aligned register offset
267 * @value: 8 bit value to write
268 *
269 * Writes the value specified to the offset specified.
270 */
421a2a30
ML
271void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
272 if (offset < adev->rmmio_size)
273 writeb(value, adev->rmmio + offset);
274 else
275 BUG();
276}
277
e3ecdffa
AD
278/**
279 * amdgpu_mm_wreg - write to a memory mapped IO register
280 *
281 * @adev: amdgpu_device pointer
282 * @reg: dword aligned register offset
283 * @v: 32 bit value to write to the register
284 * @acc_flags: access flags which require special behavior
285 *
286 * Writes the value specified to the offset specified.
287 */
d38ceaf9 288void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 289 uint32_t acc_flags)
d38ceaf9 290{
f4b373f4 291 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 292
47ed4e1c
KW
293 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
294 adev->last_mm_index = v;
295 }
296
c68dbcd8 297 if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
d33a99c4 298 return amdgpu_kiq_wreg(adev, reg, v);
bc992ba5 299
15d72fd7 300 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
301 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
302 else {
303 unsigned long flags;
304
305 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
306 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
307 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
308 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
309 }
47ed4e1c
KW
310
311 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
312 udelay(500);
313 }
d38ceaf9
AD
314}
315
e3ecdffa
AD
316/**
317 * amdgpu_io_rreg - read an IO register
318 *
319 * @adev: amdgpu_device pointer
320 * @reg: dword aligned register offset
321 *
322 * Returns the 32 bit value from the offset specified.
323 */
d38ceaf9
AD
324u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
325{
326 if ((reg * 4) < adev->rio_mem_size)
327 return ioread32(adev->rio_mem + (reg * 4));
328 else {
329 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
330 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
331 }
332}
333
e3ecdffa
AD
334/**
335 * amdgpu_io_wreg - write to an IO register
336 *
337 * @adev: amdgpu_device pointer
338 * @reg: dword aligned register offset
339 * @v: 32 bit value to write to the register
340 *
341 * Writes the value specified to the offset specified.
342 */
d38ceaf9
AD
343void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
344{
47ed4e1c
KW
345 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
346 adev->last_mm_index = v;
347 }
d38ceaf9
AD
348
349 if ((reg * 4) < adev->rio_mem_size)
350 iowrite32(v, adev->rio_mem + (reg * 4));
351 else {
352 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
353 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
354 }
47ed4e1c
KW
355
356 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
357 udelay(500);
358 }
d38ceaf9
AD
359}
360
361/**
362 * amdgpu_mm_rdoorbell - read a doorbell dword
363 *
364 * @adev: amdgpu_device pointer
365 * @index: doorbell index
366 *
367 * Returns the value in the doorbell aperture at the
368 * requested doorbell index (CIK).
369 */
370u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
371{
372 if (index < adev->doorbell.num_doorbells) {
373 return readl(adev->doorbell.ptr + index);
374 } else {
375 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
376 return 0;
377 }
378}
379
380/**
381 * amdgpu_mm_wdoorbell - write a doorbell dword
382 *
383 * @adev: amdgpu_device pointer
384 * @index: doorbell index
385 * @v: value to write
386 *
387 * Writes @v to the doorbell aperture at the
388 * requested doorbell index (CIK).
389 */
390void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
391{
392 if (index < adev->doorbell.num_doorbells) {
393 writel(v, adev->doorbell.ptr + index);
394 } else {
395 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
396 }
397}
398
832be404
KW
399/**
400 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
401 *
402 * @adev: amdgpu_device pointer
403 * @index: doorbell index
404 *
405 * Returns the value in the doorbell aperture at the
406 * requested doorbell index (VEGA10+).
407 */
408u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
409{
410 if (index < adev->doorbell.num_doorbells) {
411 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
412 } else {
413 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
414 return 0;
415 }
416}
417
418/**
419 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
420 *
421 * @adev: amdgpu_device pointer
422 * @index: doorbell index
423 * @v: value to write
424 *
425 * Writes @v to the doorbell aperture at the
426 * requested doorbell index (VEGA10+).
427 */
428void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
429{
430 if (index < adev->doorbell.num_doorbells) {
431 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
432 } else {
433 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
434 }
435}
436
d38ceaf9
AD
437/**
438 * amdgpu_invalid_rreg - dummy reg read function
439 *
440 * @adev: amdgpu device pointer
441 * @reg: offset of register
442 *
443 * Dummy register read function. Used for register blocks
444 * that certain asics don't have (all asics).
445 * Returns the value in the register.
446 */
447static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
448{
449 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
450 BUG();
451 return 0;
452}
453
454/**
455 * amdgpu_invalid_wreg - dummy reg write function
456 *
457 * @adev: amdgpu device pointer
458 * @reg: offset of register
459 * @v: value to write to the register
460 *
461 * Dummy register read function. Used for register blocks
462 * that certain asics don't have (all asics).
463 */
464static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
465{
466 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
467 reg, v);
468 BUG();
469}
470
4fa1c6a6
TZ
471/**
472 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
473 *
474 * @adev: amdgpu device pointer
475 * @reg: offset of register
476 *
477 * Dummy register read function. Used for register blocks
478 * that certain asics don't have (all asics).
479 * Returns the value in the register.
480 */
481static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
482{
483 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
484 BUG();
485 return 0;
486}
487
488/**
489 * amdgpu_invalid_wreg64 - dummy reg write function
490 *
491 * @adev: amdgpu device pointer
492 * @reg: offset of register
493 * @v: value to write to the register
494 *
495 * Dummy register read function. Used for register blocks
496 * that certain asics don't have (all asics).
497 */
498static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
499{
500 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
501 reg, v);
502 BUG();
503}
504
d38ceaf9
AD
505/**
506 * amdgpu_block_invalid_rreg - dummy reg read function
507 *
508 * @adev: amdgpu device pointer
509 * @block: offset of instance
510 * @reg: offset of register
511 *
512 * Dummy register read function. Used for register blocks
513 * that certain asics don't have (all asics).
514 * Returns the value in the register.
515 */
516static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
517 uint32_t block, uint32_t reg)
518{
519 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
520 reg, block);
521 BUG();
522 return 0;
523}
524
525/**
526 * amdgpu_block_invalid_wreg - dummy reg write function
527 *
528 * @adev: amdgpu device pointer
529 * @block: offset of instance
530 * @reg: offset of register
531 * @v: value to write to the register
532 *
533 * Dummy register read function. Used for register blocks
534 * that certain asics don't have (all asics).
535 */
536static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
537 uint32_t block,
538 uint32_t reg, uint32_t v)
539{
540 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
541 reg, block, v);
542 BUG();
543}
544
e3ecdffa
AD
545/**
546 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
547 *
548 * @adev: amdgpu device pointer
549 *
550 * Allocates a scratch page of VRAM for use by various things in the
551 * driver.
552 */
06ec9070 553static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 554{
a4a02777
CK
555 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
556 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
557 &adev->vram_scratch.robj,
558 &adev->vram_scratch.gpu_addr,
559 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
560}
561
e3ecdffa
AD
562/**
563 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
564 *
565 * @adev: amdgpu device pointer
566 *
567 * Frees the VRAM scratch page.
568 */
06ec9070 569static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 570{
078af1a3 571 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
572}
573
574/**
9c3f2b54 575 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
576 *
577 * @adev: amdgpu_device pointer
578 * @registers: pointer to the register array
579 * @array_size: size of the register array
580 *
581 * Programs an array or registers with and and or masks.
582 * This is a helper for setting golden registers.
583 */
9c3f2b54
AD
584void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
585 const u32 *registers,
586 const u32 array_size)
d38ceaf9
AD
587{
588 u32 tmp, reg, and_mask, or_mask;
589 int i;
590
591 if (array_size % 3)
592 return;
593
594 for (i = 0; i < array_size; i +=3) {
595 reg = registers[i + 0];
596 and_mask = registers[i + 1];
597 or_mask = registers[i + 2];
598
599 if (and_mask == 0xffffffff) {
600 tmp = or_mask;
601 } else {
602 tmp = RREG32(reg);
603 tmp &= ~and_mask;
e0d07657
HZ
604 if (adev->family >= AMDGPU_FAMILY_AI)
605 tmp |= (or_mask & and_mask);
606 else
607 tmp |= or_mask;
d38ceaf9
AD
608 }
609 WREG32(reg, tmp);
610 }
611}
612
e3ecdffa
AD
613/**
614 * amdgpu_device_pci_config_reset - reset the GPU
615 *
616 * @adev: amdgpu_device pointer
617 *
618 * Resets the GPU using the pci config reset sequence.
619 * Only applicable to asics prior to vega10.
620 */
8111c387 621void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
622{
623 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
624}
625
626/*
627 * GPU doorbell aperture helpers function.
628 */
629/**
06ec9070 630 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
631 *
632 * @adev: amdgpu_device pointer
633 *
634 * Init doorbell driver information (CIK)
635 * Returns 0 on success, error on failure.
636 */
06ec9070 637static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 638{
6585661d 639
705e519e
CK
640 /* No doorbell on SI hardware generation */
641 if (adev->asic_type < CHIP_BONAIRE) {
642 adev->doorbell.base = 0;
643 adev->doorbell.size = 0;
644 adev->doorbell.num_doorbells = 0;
645 adev->doorbell.ptr = NULL;
646 return 0;
647 }
648
d6895ad3
CK
649 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
650 return -EINVAL;
651
22357775
AD
652 amdgpu_asic_init_doorbell_index(adev);
653
d38ceaf9
AD
654 /* doorbell bar mapping */
655 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
656 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
657
edf600da 658 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 659 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
660 if (adev->doorbell.num_doorbells == 0)
661 return -EINVAL;
662
ec3db8a6 663 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
664 * paging queue doorbell use the second page. The
665 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
666 * doorbells are in the first page. So with paging queue enabled,
667 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
668 */
669 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 670 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 671
8972e5d2
CK
672 adev->doorbell.ptr = ioremap(adev->doorbell.base,
673 adev->doorbell.num_doorbells *
674 sizeof(u32));
675 if (adev->doorbell.ptr == NULL)
d38ceaf9 676 return -ENOMEM;
d38ceaf9
AD
677
678 return 0;
679}
680
681/**
06ec9070 682 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
683 *
684 * @adev: amdgpu_device pointer
685 *
686 * Tear down doorbell driver information (CIK)
687 */
06ec9070 688static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
689{
690 iounmap(adev->doorbell.ptr);
691 adev->doorbell.ptr = NULL;
692}
693
22cb0164 694
d38ceaf9
AD
695
696/*
06ec9070 697 * amdgpu_device_wb_*()
455a7bc2 698 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 699 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
700 */
701
702/**
06ec9070 703 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
704 *
705 * @adev: amdgpu_device pointer
706 *
707 * Disables Writeback and frees the Writeback memory (all asics).
708 * Used at driver shutdown.
709 */
06ec9070 710static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
711{
712 if (adev->wb.wb_obj) {
a76ed485
AD
713 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
714 &adev->wb.gpu_addr,
715 (void **)&adev->wb.wb);
d38ceaf9
AD
716 adev->wb.wb_obj = NULL;
717 }
718}
719
720/**
06ec9070 721 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
722 *
723 * @adev: amdgpu_device pointer
724 *
455a7bc2 725 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
726 * Used at driver startup.
727 * Returns 0 on success or an -error on failure.
728 */
06ec9070 729static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
730{
731 int r;
732
733 if (adev->wb.wb_obj == NULL) {
97407b63
AD
734 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
735 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
736 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
737 &adev->wb.wb_obj, &adev->wb.gpu_addr,
738 (void **)&adev->wb.wb);
d38ceaf9
AD
739 if (r) {
740 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
741 return r;
742 }
d38ceaf9
AD
743
744 adev->wb.num_wb = AMDGPU_MAX_WB;
745 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
746
747 /* clear wb memory */
73469585 748 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
749 }
750
751 return 0;
752}
753
754/**
131b4b36 755 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
756 *
757 * @adev: amdgpu_device pointer
758 * @wb: wb index
759 *
760 * Allocate a wb slot for use by the driver (all asics).
761 * Returns 0 on success or -EINVAL on failure.
762 */
131b4b36 763int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
764{
765 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 766
97407b63 767 if (offset < adev->wb.num_wb) {
7014285a 768 __set_bit(offset, adev->wb.used);
63ae07ca 769 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
770 return 0;
771 } else {
772 return -EINVAL;
773 }
774}
775
d38ceaf9 776/**
131b4b36 777 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
778 *
779 * @adev: amdgpu_device pointer
780 * @wb: wb index
781 *
782 * Free a wb slot allocated for use by the driver (all asics)
783 */
131b4b36 784void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 785{
73469585 786 wb >>= 3;
d38ceaf9 787 if (wb < adev->wb.num_wb)
73469585 788 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
789}
790
d6895ad3
CK
791/**
792 * amdgpu_device_resize_fb_bar - try to resize FB BAR
793 *
794 * @adev: amdgpu_device pointer
795 *
796 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
797 * to fail, but if any of the BARs is not accessible after the size we abort
798 * driver loading by returning -ENODEV.
799 */
800int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
801{
770d13b1 802 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 803 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
804 struct pci_bus *root;
805 struct resource *res;
806 unsigned i;
d6895ad3
CK
807 u16 cmd;
808 int r;
809
0c03b912 810 /* Bypass for VF */
811 if (amdgpu_sriov_vf(adev))
812 return 0;
813
31b8adab
CK
814 /* Check if the root BUS has 64bit memory resources */
815 root = adev->pdev->bus;
816 while (root->parent)
817 root = root->parent;
818
819 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 820 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
821 res->start > 0x100000000ull)
822 break;
823 }
824
825 /* Trying to resize is pointless without a root hub window above 4GB */
826 if (!res)
827 return 0;
828
d6895ad3
CK
829 /* Disable memory decoding while we change the BAR addresses and size */
830 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
831 pci_write_config_word(adev->pdev, PCI_COMMAND,
832 cmd & ~PCI_COMMAND_MEMORY);
833
834 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 835 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
836 if (adev->asic_type >= CHIP_BONAIRE)
837 pci_release_resource(adev->pdev, 2);
838
839 pci_release_resource(adev->pdev, 0);
840
841 r = pci_resize_resource(adev->pdev, 0, rbar_size);
842 if (r == -ENOSPC)
843 DRM_INFO("Not enough PCI address space for a large BAR.");
844 else if (r && r != -ENOTSUPP)
845 DRM_ERROR("Problem resizing BAR0 (%d).", r);
846
847 pci_assign_unassigned_bus_resources(adev->pdev->bus);
848
849 /* When the doorbell or fb BAR isn't available we have no chance of
850 * using the device.
851 */
06ec9070 852 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
853 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
854 return -ENODEV;
855
856 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
857
858 return 0;
859}
a05502e5 860
d38ceaf9
AD
861/*
862 * GPU helpers function.
863 */
864/**
39c640c0 865 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
866 *
867 * @adev: amdgpu_device pointer
868 *
c836fec5
JQ
869 * Check if the asic has been initialized (all asics) at driver startup
870 * or post is needed if hw reset is performed.
871 * Returns true if need or false if not.
d38ceaf9 872 */
39c640c0 873bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
874{
875 uint32_t reg;
876
bec86378
ML
877 if (amdgpu_sriov_vf(adev))
878 return false;
879
880 if (amdgpu_passthrough(adev)) {
1da2c326
ML
881 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
882 * some old smc fw still need driver do vPost otherwise gpu hang, while
883 * those smc fw version above 22.15 doesn't have this flaw, so we force
884 * vpost executed for smc version below 22.15
bec86378
ML
885 */
886 if (adev->asic_type == CHIP_FIJI) {
887 int err;
888 uint32_t fw_ver;
889 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
890 /* force vPost if error occured */
891 if (err)
892 return true;
893
894 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
895 if (fw_ver < 0x00160e00)
896 return true;
bec86378 897 }
bec86378 898 }
91fe77eb 899
900 if (adev->has_hw_reset) {
901 adev->has_hw_reset = false;
902 return true;
903 }
904
905 /* bios scratch used on CIK+ */
906 if (adev->asic_type >= CHIP_BONAIRE)
907 return amdgpu_atombios_scratch_need_asic_init(adev);
908
909 /* check MEM_SIZE for older asics */
910 reg = amdgpu_asic_get_config_memsize(adev);
911
912 if ((reg != 0) && (reg != 0xffffffff))
913 return false;
914
915 return true;
bec86378
ML
916}
917
d38ceaf9
AD
918/* if we get transitioned to only one device, take VGA back */
919/**
06ec9070 920 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
921 *
922 * @cookie: amdgpu_device pointer
923 * @state: enable/disable vga decode
924 *
925 * Enable/disable vga decode (all asics).
926 * Returns VGA resource flags.
927 */
06ec9070 928static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
929{
930 struct amdgpu_device *adev = cookie;
931 amdgpu_asic_set_vga_state(adev, state);
932 if (state)
933 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
934 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
935 else
936 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
937}
938
e3ecdffa
AD
939/**
940 * amdgpu_device_check_block_size - validate the vm block size
941 *
942 * @adev: amdgpu_device pointer
943 *
944 * Validates the vm block size specified via module parameter.
945 * The vm block size defines number of bits in page table versus page directory,
946 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
947 * page table and the remaining bits are in the page directory.
948 */
06ec9070 949static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
950{
951 /* defines number of bits in page table versus page directory,
952 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
953 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
954 if (amdgpu_vm_block_size == -1)
955 return;
a1adf8be 956
bab4fee7 957 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
958 dev_warn(adev->dev, "VM page table size (%d) too small\n",
959 amdgpu_vm_block_size);
97489129 960 amdgpu_vm_block_size = -1;
a1adf8be 961 }
a1adf8be
CZ
962}
963
e3ecdffa
AD
964/**
965 * amdgpu_device_check_vm_size - validate the vm size
966 *
967 * @adev: amdgpu_device pointer
968 *
969 * Validates the vm size in GB specified via module parameter.
970 * The VM size is the size of the GPU virtual memory space in GB.
971 */
06ec9070 972static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 973{
64dab074
AD
974 /* no need to check the default value */
975 if (amdgpu_vm_size == -1)
976 return;
977
83ca145d
ZJ
978 if (amdgpu_vm_size < 1) {
979 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
980 amdgpu_vm_size);
f3368128 981 amdgpu_vm_size = -1;
83ca145d 982 }
83ca145d
ZJ
983}
984
7951e376
RZ
985static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
986{
987 struct sysinfo si;
a9d4fe2f 988 bool is_os_64 = (sizeof(void *) == 8);
7951e376
RZ
989 uint64_t total_memory;
990 uint64_t dram_size_seven_GB = 0x1B8000000;
991 uint64_t dram_size_three_GB = 0xB8000000;
992
993 if (amdgpu_smu_memory_pool_size == 0)
994 return;
995
996 if (!is_os_64) {
997 DRM_WARN("Not 64-bit OS, feature not supported\n");
998 goto def_value;
999 }
1000 si_meminfo(&si);
1001 total_memory = (uint64_t)si.totalram * si.mem_unit;
1002
1003 if ((amdgpu_smu_memory_pool_size == 1) ||
1004 (amdgpu_smu_memory_pool_size == 2)) {
1005 if (total_memory < dram_size_three_GB)
1006 goto def_value1;
1007 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1008 (amdgpu_smu_memory_pool_size == 8)) {
1009 if (total_memory < dram_size_seven_GB)
1010 goto def_value1;
1011 } else {
1012 DRM_WARN("Smu memory pool size not supported\n");
1013 goto def_value;
1014 }
1015 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1016
1017 return;
1018
1019def_value1:
1020 DRM_WARN("No enough system memory\n");
1021def_value:
1022 adev->pm.smu_prv_buffer_size = 0;
1023}
1024
d38ceaf9 1025/**
06ec9070 1026 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1027 *
1028 * @adev: amdgpu_device pointer
1029 *
1030 * Validates certain module parameters and updates
1031 * the associated values used by the driver (all asics).
1032 */
912dfc84 1033static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1034{
5b011235
CZ
1035 if (amdgpu_sched_jobs < 4) {
1036 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1037 amdgpu_sched_jobs);
1038 amdgpu_sched_jobs = 4;
76117507 1039 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1040 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1041 amdgpu_sched_jobs);
1042 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1043 }
d38ceaf9 1044
83e74db6 1045 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1046 /* gart size must be greater or equal to 32M */
1047 dev_warn(adev->dev, "gart size (%d) too small\n",
1048 amdgpu_gart_size);
83e74db6 1049 amdgpu_gart_size = -1;
d38ceaf9
AD
1050 }
1051
36d38372 1052 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1053 /* gtt size must be greater or equal to 32M */
36d38372
CK
1054 dev_warn(adev->dev, "gtt size (%d) too small\n",
1055 amdgpu_gtt_size);
1056 amdgpu_gtt_size = -1;
d38ceaf9
AD
1057 }
1058
d07f14be
RH
1059 /* valid range is between 4 and 9 inclusive */
1060 if (amdgpu_vm_fragment_size != -1 &&
1061 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1062 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1063 amdgpu_vm_fragment_size = -1;
1064 }
1065
7951e376
RZ
1066 amdgpu_device_check_smu_prv_buffer_size(adev);
1067
06ec9070 1068 amdgpu_device_check_vm_size(adev);
d38ceaf9 1069
06ec9070 1070 amdgpu_device_check_block_size(adev);
6a7f76e7 1071
19aede77 1072 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84 1073
e3c00faa 1074 return 0;
d38ceaf9
AD
1075}
1076
1077/**
1078 * amdgpu_switcheroo_set_state - set switcheroo state
1079 *
1080 * @pdev: pci dev pointer
1694467b 1081 * @state: vga_switcheroo state
d38ceaf9
AD
1082 *
1083 * Callback for the switcheroo driver. Suspends or resumes the
1084 * the asics before or after it is powered up using ACPI methods.
1085 */
1086static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1087{
1088 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1089 int r;
d38ceaf9 1090
31af062a 1091 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1092 return;
1093
1094 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1095 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1096 /* don't suspend or resume card normally */
1097 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1098
de185019
AD
1099 pci_set_power_state(dev->pdev, PCI_D0);
1100 pci_restore_state(dev->pdev);
1101 r = pci_enable_device(dev->pdev);
1102 if (r)
1103 DRM_WARN("pci_enable_device failed (%d)\n", r);
1104 amdgpu_device_resume(dev, true);
d38ceaf9 1105
d38ceaf9
AD
1106 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1107 drm_kms_helper_poll_enable(dev);
1108 } else {
7ca85295 1109 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1110 drm_kms_helper_poll_disable(dev);
1111 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1112 amdgpu_device_suspend(dev, true);
1113 pci_save_state(dev->pdev);
1114 /* Shut down the device */
1115 pci_disable_device(dev->pdev);
1116 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1117 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1118 }
1119}
1120
1121/**
1122 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1123 *
1124 * @pdev: pci dev pointer
1125 *
1126 * Callback for the switcheroo driver. Check of the switcheroo
1127 * state can be changed.
1128 * Returns true if the state can be changed, false if not.
1129 */
1130static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1131{
1132 struct drm_device *dev = pci_get_drvdata(pdev);
1133
1134 /*
1135 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1136 * locking inversion with the driver load path. And the access here is
1137 * completely racy anyway. So don't bother with locking for now.
1138 */
1139 return dev->open_count == 0;
1140}
1141
1142static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1143 .set_gpu_state = amdgpu_switcheroo_set_state,
1144 .reprobe = NULL,
1145 .can_switch = amdgpu_switcheroo_can_switch,
1146};
1147
e3ecdffa
AD
1148/**
1149 * amdgpu_device_ip_set_clockgating_state - set the CG state
1150 *
87e3f136 1151 * @dev: amdgpu_device pointer
e3ecdffa
AD
1152 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1153 * @state: clockgating state (gate or ungate)
1154 *
1155 * Sets the requested clockgating state for all instances of
1156 * the hardware IP specified.
1157 * Returns the error code from the last instance.
1158 */
43fa561f 1159int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1160 enum amd_ip_block_type block_type,
1161 enum amd_clockgating_state state)
d38ceaf9 1162{
43fa561f 1163 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1164 int i, r = 0;
1165
1166 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1167 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1168 continue;
c722865a
RZ
1169 if (adev->ip_blocks[i].version->type != block_type)
1170 continue;
1171 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1172 continue;
1173 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1174 (void *)adev, state);
1175 if (r)
1176 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1177 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1178 }
1179 return r;
1180}
1181
e3ecdffa
AD
1182/**
1183 * amdgpu_device_ip_set_powergating_state - set the PG state
1184 *
87e3f136 1185 * @dev: amdgpu_device pointer
e3ecdffa
AD
1186 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1187 * @state: powergating state (gate or ungate)
1188 *
1189 * Sets the requested powergating state for all instances of
1190 * the hardware IP specified.
1191 * Returns the error code from the last instance.
1192 */
43fa561f 1193int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1194 enum amd_ip_block_type block_type,
1195 enum amd_powergating_state state)
d38ceaf9 1196{
43fa561f 1197 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1198 int i, r = 0;
1199
1200 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1201 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1202 continue;
c722865a
RZ
1203 if (adev->ip_blocks[i].version->type != block_type)
1204 continue;
1205 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1206 continue;
1207 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1208 (void *)adev, state);
1209 if (r)
1210 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1211 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1212 }
1213 return r;
1214}
1215
e3ecdffa
AD
1216/**
1217 * amdgpu_device_ip_get_clockgating_state - get the CG state
1218 *
1219 * @adev: amdgpu_device pointer
1220 * @flags: clockgating feature flags
1221 *
1222 * Walks the list of IPs on the device and updates the clockgating
1223 * flags for each IP.
1224 * Updates @flags with the feature flags for each hardware IP where
1225 * clockgating is enabled.
1226 */
2990a1fc
AD
1227void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1228 u32 *flags)
6cb2d4e4
HR
1229{
1230 int i;
1231
1232 for (i = 0; i < adev->num_ip_blocks; i++) {
1233 if (!adev->ip_blocks[i].status.valid)
1234 continue;
1235 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1236 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1237 }
1238}
1239
e3ecdffa
AD
1240/**
1241 * amdgpu_device_ip_wait_for_idle - wait for idle
1242 *
1243 * @adev: amdgpu_device pointer
1244 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1245 *
1246 * Waits for the request hardware IP to be idle.
1247 * Returns 0 for success or a negative error code on failure.
1248 */
2990a1fc
AD
1249int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1250 enum amd_ip_block_type block_type)
5dbbb60b
AD
1251{
1252 int i, r;
1253
1254 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1255 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1256 continue;
a1255107
AD
1257 if (adev->ip_blocks[i].version->type == block_type) {
1258 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1259 if (r)
1260 return r;
1261 break;
1262 }
1263 }
1264 return 0;
1265
1266}
1267
e3ecdffa
AD
1268/**
1269 * amdgpu_device_ip_is_idle - is the hardware IP idle
1270 *
1271 * @adev: amdgpu_device pointer
1272 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1273 *
1274 * Check if the hardware IP is idle or not.
1275 * Returns true if it the IP is idle, false if not.
1276 */
2990a1fc
AD
1277bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1278 enum amd_ip_block_type block_type)
5dbbb60b
AD
1279{
1280 int i;
1281
1282 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1283 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1284 continue;
a1255107
AD
1285 if (adev->ip_blocks[i].version->type == block_type)
1286 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1287 }
1288 return true;
1289
1290}
1291
e3ecdffa
AD
1292/**
1293 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1294 *
1295 * @adev: amdgpu_device pointer
87e3f136 1296 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1297 *
1298 * Returns a pointer to the hardware IP block structure
1299 * if it exists for the asic, otherwise NULL.
1300 */
2990a1fc
AD
1301struct amdgpu_ip_block *
1302amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1303 enum amd_ip_block_type type)
d38ceaf9
AD
1304{
1305 int i;
1306
1307 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1308 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1309 return &adev->ip_blocks[i];
1310
1311 return NULL;
1312}
1313
1314/**
2990a1fc 1315 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1316 *
1317 * @adev: amdgpu_device pointer
5fc3aeeb 1318 * @type: enum amd_ip_block_type
d38ceaf9
AD
1319 * @major: major version
1320 * @minor: minor version
1321 *
1322 * return 0 if equal or greater
1323 * return 1 if smaller or the ip_block doesn't exist
1324 */
2990a1fc
AD
1325int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1326 enum amd_ip_block_type type,
1327 u32 major, u32 minor)
d38ceaf9 1328{
2990a1fc 1329 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1330
a1255107
AD
1331 if (ip_block && ((ip_block->version->major > major) ||
1332 ((ip_block->version->major == major) &&
1333 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1334 return 0;
1335
1336 return 1;
1337}
1338
a1255107 1339/**
2990a1fc 1340 * amdgpu_device_ip_block_add
a1255107
AD
1341 *
1342 * @adev: amdgpu_device pointer
1343 * @ip_block_version: pointer to the IP to add
1344 *
1345 * Adds the IP block driver information to the collection of IPs
1346 * on the asic.
1347 */
2990a1fc
AD
1348int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1349 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1350{
1351 if (!ip_block_version)
1352 return -EINVAL;
1353
e966a725 1354 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1355 ip_block_version->funcs->name);
1356
a1255107
AD
1357 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1358
1359 return 0;
1360}
1361
e3ecdffa
AD
1362/**
1363 * amdgpu_device_enable_virtual_display - enable virtual display feature
1364 *
1365 * @adev: amdgpu_device pointer
1366 *
1367 * Enabled the virtual display feature if the user has enabled it via
1368 * the module parameter virtual_display. This feature provides a virtual
1369 * display hardware on headless boards or in virtualized environments.
1370 * This function parses and validates the configuration string specified by
1371 * the user and configues the virtual display configuration (number of
1372 * virtual connectors, crtcs, etc.) specified.
1373 */
483ef985 1374static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1375{
1376 adev->enable_virtual_display = false;
1377
1378 if (amdgpu_virtual_display) {
1379 struct drm_device *ddev = adev->ddev;
1380 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1381 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1382
1383 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1384 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1385 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1386 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1387 if (!strcmp("all", pciaddname)
1388 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1389 long num_crtc;
1390 int res = -1;
1391
9accf2fd 1392 adev->enable_virtual_display = true;
0f66356d
ED
1393
1394 if (pciaddname_tmp)
1395 res = kstrtol(pciaddname_tmp, 10,
1396 &num_crtc);
1397
1398 if (!res) {
1399 if (num_crtc < 1)
1400 num_crtc = 1;
1401 if (num_crtc > 6)
1402 num_crtc = 6;
1403 adev->mode_info.num_crtc = num_crtc;
1404 } else {
1405 adev->mode_info.num_crtc = 1;
1406 }
9accf2fd
ED
1407 break;
1408 }
1409 }
1410
0f66356d
ED
1411 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1412 amdgpu_virtual_display, pci_address_name,
1413 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1414
1415 kfree(pciaddstr);
1416 }
1417}
1418
e3ecdffa
AD
1419/**
1420 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1421 *
1422 * @adev: amdgpu_device pointer
1423 *
1424 * Parses the asic configuration parameters specified in the gpu info
1425 * firmware and makes them availale to the driver for use in configuring
1426 * the asic.
1427 * Returns 0 on success, -EINVAL on failure.
1428 */
e2a75f88
AD
1429static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1430{
e2a75f88
AD
1431 const char *chip_name;
1432 char fw_name[30];
1433 int err;
1434 const struct gpu_info_firmware_header_v1_0 *hdr;
1435
ab4fe3e1
HR
1436 adev->firmware.gpu_info_fw = NULL;
1437
e2a75f88
AD
1438 switch (adev->asic_type) {
1439 case CHIP_TOPAZ:
1440 case CHIP_TONGA:
1441 case CHIP_FIJI:
e2a75f88 1442 case CHIP_POLARIS10:
cc07f18d 1443 case CHIP_POLARIS11:
e2a75f88 1444 case CHIP_POLARIS12:
cc07f18d 1445 case CHIP_VEGAM:
e2a75f88
AD
1446 case CHIP_CARRIZO:
1447 case CHIP_STONEY:
1448#ifdef CONFIG_DRM_AMDGPU_SI
1449 case CHIP_VERDE:
1450 case CHIP_TAHITI:
1451 case CHIP_PITCAIRN:
1452 case CHIP_OLAND:
1453 case CHIP_HAINAN:
1454#endif
1455#ifdef CONFIG_DRM_AMDGPU_CIK
1456 case CHIP_BONAIRE:
1457 case CHIP_HAWAII:
1458 case CHIP_KAVERI:
1459 case CHIP_KABINI:
1460 case CHIP_MULLINS:
1461#endif
27c0bc71 1462 case CHIP_VEGA20:
e2a75f88
AD
1463 default:
1464 return 0;
1465 case CHIP_VEGA10:
1466 chip_name = "vega10";
1467 break;
3f76dced
AD
1468 case CHIP_VEGA12:
1469 chip_name = "vega12";
1470 break;
2d2e5e7e 1471 case CHIP_RAVEN:
54c4d17e
FX
1472 if (adev->rev_id >= 8)
1473 chip_name = "raven2";
741deade
AD
1474 else if (adev->pdev->device == 0x15d8)
1475 chip_name = "picasso";
54c4d17e
FX
1476 else
1477 chip_name = "raven";
2d2e5e7e 1478 break;
65e60f6e
LM
1479 case CHIP_ARCTURUS:
1480 chip_name = "arcturus";
1481 break;
b51a26a0
HR
1482 case CHIP_RENOIR:
1483 chip_name = "renoir";
1484 break;
23c6268e
HR
1485 case CHIP_NAVI10:
1486 chip_name = "navi10";
1487 break;
ed42cfe1
XY
1488 case CHIP_NAVI14:
1489 chip_name = "navi14";
1490 break;
42b325e5
XY
1491 case CHIP_NAVI12:
1492 chip_name = "navi12";
1493 break;
e2a75f88
AD
1494 }
1495
1496 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1497 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1498 if (err) {
1499 dev_err(adev->dev,
1500 "Failed to load gpu_info firmware \"%s\"\n",
1501 fw_name);
1502 goto out;
1503 }
ab4fe3e1 1504 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1505 if (err) {
1506 dev_err(adev->dev,
1507 "Failed to validate gpu_info firmware \"%s\"\n",
1508 fw_name);
1509 goto out;
1510 }
1511
ab4fe3e1 1512 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1513 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1514
1515 switch (hdr->version_major) {
1516 case 1:
1517 {
1518 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1519 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1520 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1521
ec51d3fa
XY
1522 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1523 goto parse_soc_bounding_box;
1524
b5ab16bf
AD
1525 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1526 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1527 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1528 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1529 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1530 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1531 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1532 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1533 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1534 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1535 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1536 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1537 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1538 adev->gfx.cu_info.max_waves_per_simd =
1539 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1540 adev->gfx.cu_info.max_scratch_slots_per_cu =
1541 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1542 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1543 if (hdr->version_minor >= 1) {
35c2e910
HZ
1544 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1545 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1546 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1547 adev->gfx.config.num_sc_per_sh =
1548 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1549 adev->gfx.config.num_packer_per_sc =
1550 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1551 }
ec51d3fa
XY
1552
1553parse_soc_bounding_box:
ec51d3fa
XY
1554 /*
1555 * soc bounding box info is not integrated in disocovery table,
1556 * we always need to parse it from gpu info firmware.
1557 */
48321c3d
HW
1558 if (hdr->version_minor == 2) {
1559 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1560 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1561 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1562 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1563 }
e2a75f88
AD
1564 break;
1565 }
1566 default:
1567 dev_err(adev->dev,
1568 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1569 err = -EINVAL;
1570 goto out;
1571 }
1572out:
e2a75f88
AD
1573 return err;
1574}
1575
e3ecdffa
AD
1576/**
1577 * amdgpu_device_ip_early_init - run early init for hardware IPs
1578 *
1579 * @adev: amdgpu_device pointer
1580 *
1581 * Early initialization pass for hardware IPs. The hardware IPs that make
1582 * up each asic are discovered each IP's early_init callback is run. This
1583 * is the first stage in initializing the asic.
1584 * Returns 0 on success, negative error code on failure.
1585 */
06ec9070 1586static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1587{
aaa36a97 1588 int i, r;
d38ceaf9 1589
483ef985 1590 amdgpu_device_enable_virtual_display(adev);
a6be7570 1591
d38ceaf9 1592 switch (adev->asic_type) {
aaa36a97
AD
1593 case CHIP_TOPAZ:
1594 case CHIP_TONGA:
48299f95 1595 case CHIP_FIJI:
2cc0c0b5 1596 case CHIP_POLARIS10:
32cc7e53 1597 case CHIP_POLARIS11:
c4642a47 1598 case CHIP_POLARIS12:
32cc7e53 1599 case CHIP_VEGAM:
aaa36a97 1600 case CHIP_CARRIZO:
39bb0c92
SL
1601 case CHIP_STONEY:
1602 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1603 adev->family = AMDGPU_FAMILY_CZ;
1604 else
1605 adev->family = AMDGPU_FAMILY_VI;
1606
1607 r = vi_set_ip_blocks(adev);
1608 if (r)
1609 return r;
1610 break;
33f34802
KW
1611#ifdef CONFIG_DRM_AMDGPU_SI
1612 case CHIP_VERDE:
1613 case CHIP_TAHITI:
1614 case CHIP_PITCAIRN:
1615 case CHIP_OLAND:
1616 case CHIP_HAINAN:
295d0daf 1617 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1618 r = si_set_ip_blocks(adev);
1619 if (r)
1620 return r;
1621 break;
1622#endif
a2e73f56
AD
1623#ifdef CONFIG_DRM_AMDGPU_CIK
1624 case CHIP_BONAIRE:
1625 case CHIP_HAWAII:
1626 case CHIP_KAVERI:
1627 case CHIP_KABINI:
1628 case CHIP_MULLINS:
1629 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1630 adev->family = AMDGPU_FAMILY_CI;
1631 else
1632 adev->family = AMDGPU_FAMILY_KV;
1633
1634 r = cik_set_ip_blocks(adev);
1635 if (r)
1636 return r;
1637 break;
1638#endif
e48a3cd9
AD
1639 case CHIP_VEGA10:
1640 case CHIP_VEGA12:
e4bd8170 1641 case CHIP_VEGA20:
e48a3cd9 1642 case CHIP_RAVEN:
61cf44c1 1643 case CHIP_ARCTURUS:
b51a26a0
HR
1644 case CHIP_RENOIR:
1645 if (adev->asic_type == CHIP_RAVEN ||
1646 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1647 adev->family = AMDGPU_FAMILY_RV;
1648 else
1649 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1650
1651 r = soc15_set_ip_blocks(adev);
1652 if (r)
1653 return r;
1654 break;
0a5b8c7b 1655 case CHIP_NAVI10:
7ecb5cd4 1656 case CHIP_NAVI14:
4808cf9c 1657 case CHIP_NAVI12:
0a5b8c7b
HR
1658 adev->family = AMDGPU_FAMILY_NV;
1659
1660 r = nv_set_ip_blocks(adev);
1661 if (r)
1662 return r;
1663 break;
d38ceaf9
AD
1664 default:
1665 /* FIXME: not supported yet */
1666 return -EINVAL;
1667 }
1668
e2a75f88
AD
1669 r = amdgpu_device_parse_gpu_info_fw(adev);
1670 if (r)
1671 return r;
1672
ec51d3fa
XY
1673 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1674 amdgpu_discovery_get_gfx_info(adev);
1675
1884734a 1676 amdgpu_amdkfd_device_probe(adev);
1677
3149d9da
XY
1678 if (amdgpu_sriov_vf(adev)) {
1679 r = amdgpu_virt_request_full_gpu(adev, true);
1680 if (r)
5ffa61c1 1681 return -EAGAIN;
3149d9da
XY
1682 }
1683
3b94fb10 1684 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1685 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1686 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1687
d38ceaf9
AD
1688 for (i = 0; i < adev->num_ip_blocks; i++) {
1689 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1690 DRM_ERROR("disabled ip block: %d <%s>\n",
1691 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1692 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1693 } else {
a1255107
AD
1694 if (adev->ip_blocks[i].version->funcs->early_init) {
1695 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1696 if (r == -ENOENT) {
a1255107 1697 adev->ip_blocks[i].status.valid = false;
2c1a2784 1698 } else if (r) {
a1255107
AD
1699 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1700 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1701 return r;
2c1a2784 1702 } else {
a1255107 1703 adev->ip_blocks[i].status.valid = true;
2c1a2784 1704 }
974e6b64 1705 } else {
a1255107 1706 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1707 }
d38ceaf9 1708 }
21a249ca
AD
1709 /* get the vbios after the asic_funcs are set up */
1710 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1711 /* Read BIOS */
1712 if (!amdgpu_get_bios(adev))
1713 return -EINVAL;
1714
1715 r = amdgpu_atombios_init(adev);
1716 if (r) {
1717 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1718 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1719 return r;
1720 }
1721 }
d38ceaf9
AD
1722 }
1723
395d1fb9
NH
1724 adev->cg_flags &= amdgpu_cg_mask;
1725 adev->pg_flags &= amdgpu_pg_mask;
1726
d38ceaf9
AD
1727 return 0;
1728}
1729
0a4f2520
RZ
1730static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1731{
1732 int i, r;
1733
1734 for (i = 0; i < adev->num_ip_blocks; i++) {
1735 if (!adev->ip_blocks[i].status.sw)
1736 continue;
1737 if (adev->ip_blocks[i].status.hw)
1738 continue;
1739 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1740 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1741 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1742 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1743 if (r) {
1744 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1745 adev->ip_blocks[i].version->funcs->name, r);
1746 return r;
1747 }
1748 adev->ip_blocks[i].status.hw = true;
1749 }
1750 }
1751
1752 return 0;
1753}
1754
1755static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1756{
1757 int i, r;
1758
1759 for (i = 0; i < adev->num_ip_blocks; i++) {
1760 if (!adev->ip_blocks[i].status.sw)
1761 continue;
1762 if (adev->ip_blocks[i].status.hw)
1763 continue;
1764 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1765 if (r) {
1766 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1767 adev->ip_blocks[i].version->funcs->name, r);
1768 return r;
1769 }
1770 adev->ip_blocks[i].status.hw = true;
1771 }
1772
1773 return 0;
1774}
1775
7a3e0bb2
RZ
1776static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1777{
1778 int r = 0;
1779 int i;
80f41f84 1780 uint32_t smu_version;
7a3e0bb2
RZ
1781
1782 if (adev->asic_type >= CHIP_VEGA10) {
1783 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1784 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1785 continue;
1786
1787 /* no need to do the fw loading again if already done*/
1788 if (adev->ip_blocks[i].status.hw == true)
1789 break;
1790
1791 if (adev->in_gpu_reset || adev->in_suspend) {
1792 r = adev->ip_blocks[i].version->funcs->resume(adev);
1793 if (r) {
1794 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1795 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1796 return r;
1797 }
1798 } else {
1799 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1800 if (r) {
1801 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1802 adev->ip_blocks[i].version->funcs->name, r);
1803 return r;
7a3e0bb2 1804 }
7a3e0bb2 1805 }
482f0e53
ML
1806
1807 adev->ip_blocks[i].status.hw = true;
1808 break;
7a3e0bb2
RZ
1809 }
1810 }
482f0e53 1811
8973d9ec
ED
1812 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1813 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1814
80f41f84 1815 return r;
7a3e0bb2
RZ
1816}
1817
e3ecdffa
AD
1818/**
1819 * amdgpu_device_ip_init - run init for hardware IPs
1820 *
1821 * @adev: amdgpu_device pointer
1822 *
1823 * Main initialization pass for hardware IPs. The list of all the hardware
1824 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1825 * are run. sw_init initializes the software state associated with each IP
1826 * and hw_init initializes the hardware associated with each IP.
1827 * Returns 0 on success, negative error code on failure.
1828 */
06ec9070 1829static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1830{
1831 int i, r;
1832
c030f2e4 1833 r = amdgpu_ras_init(adev);
1834 if (r)
1835 return r;
1836
d38ceaf9 1837 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1838 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1839 continue;
a1255107 1840 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1841 if (r) {
a1255107
AD
1842 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1843 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1844 goto init_failed;
2c1a2784 1845 }
a1255107 1846 adev->ip_blocks[i].status.sw = true;
bfca0289 1847
d38ceaf9 1848 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1849 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1850 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1851 if (r) {
1852 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1853 goto init_failed;
2c1a2784 1854 }
a1255107 1855 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1856 if (r) {
1857 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1858 goto init_failed;
2c1a2784 1859 }
06ec9070 1860 r = amdgpu_device_wb_init(adev);
2c1a2784 1861 if (r) {
06ec9070 1862 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1863 goto init_failed;
2c1a2784 1864 }
a1255107 1865 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1866
1867 /* right after GMC hw init, we create CSA */
f92d5c61 1868 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1869 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1870 AMDGPU_GEM_DOMAIN_VRAM,
1871 AMDGPU_CSA_SIZE);
2493664f
ML
1872 if (r) {
1873 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1874 goto init_failed;
2493664f
ML
1875 }
1876 }
d38ceaf9
AD
1877 }
1878 }
1879
c9ffa427
YT
1880 if (amdgpu_sriov_vf(adev))
1881 amdgpu_virt_init_data_exchange(adev);
1882
533aed27
AG
1883 r = amdgpu_ib_pool_init(adev);
1884 if (r) {
1885 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1886 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1887 goto init_failed;
1888 }
1889
c8963ea4
RZ
1890 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1891 if (r)
72d3f592 1892 goto init_failed;
0a4f2520
RZ
1893
1894 r = amdgpu_device_ip_hw_init_phase1(adev);
1895 if (r)
72d3f592 1896 goto init_failed;
0a4f2520 1897
7a3e0bb2
RZ
1898 r = amdgpu_device_fw_loading(adev);
1899 if (r)
72d3f592 1900 goto init_failed;
7a3e0bb2 1901
0a4f2520
RZ
1902 r = amdgpu_device_ip_hw_init_phase2(adev);
1903 if (r)
72d3f592 1904 goto init_failed;
d38ceaf9 1905
121a2bc6
AG
1906 /*
1907 * retired pages will be loaded from eeprom and reserved here,
1908 * it should be called after amdgpu_device_ip_hw_init_phase2 since
1909 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
1910 * for I2C communication which only true at this point.
1911 * recovery_init may fail, but it can free all resources allocated by
1912 * itself and its failure should not stop amdgpu init process.
1913 *
1914 * Note: theoretically, this should be called before all vram allocations
1915 * to protect retired page from abusing
1916 */
1917 amdgpu_ras_recovery_init(adev);
1918
3e2e2ab5
HZ
1919 if (adev->gmc.xgmi.num_physical_nodes > 1)
1920 amdgpu_xgmi_add_device(adev);
1884734a 1921 amdgpu_amdkfd_device_init(adev);
c6332b97 1922
72d3f592 1923init_failed:
c9ffa427 1924 if (amdgpu_sriov_vf(adev))
c6332b97 1925 amdgpu_virt_release_full_gpu(adev, true);
1926
72d3f592 1927 return r;
d38ceaf9
AD
1928}
1929
e3ecdffa
AD
1930/**
1931 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1932 *
1933 * @adev: amdgpu_device pointer
1934 *
1935 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1936 * this function before a GPU reset. If the value is retained after a
1937 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1938 */
06ec9070 1939static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1940{
1941 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1942}
1943
e3ecdffa
AD
1944/**
1945 * amdgpu_device_check_vram_lost - check if vram is valid
1946 *
1947 * @adev: amdgpu_device pointer
1948 *
1949 * Checks the reset magic value written to the gart pointer in VRAM.
1950 * The driver calls this after a GPU reset to see if the contents of
1951 * VRAM is lost or now.
1952 * returns true if vram is lost, false if not.
1953 */
06ec9070 1954static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1955{
1956 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1957 AMDGPU_RESET_MAGIC_NUM);
1958}
1959
e3ecdffa 1960/**
1112a46b 1961 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1962 *
1963 * @adev: amdgpu_device pointer
b8b72130 1964 * @state: clockgating state (gate or ungate)
e3ecdffa 1965 *
e3ecdffa 1966 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1967 * set_clockgating_state callbacks are run.
1968 * Late initialization pass enabling clockgating for hardware IPs.
1969 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1970 * Returns 0 on success, negative error code on failure.
1971 */
fdd34271 1972
1112a46b
RZ
1973static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1974 enum amd_clockgating_state state)
d38ceaf9 1975{
1112a46b 1976 int i, j, r;
d38ceaf9 1977
4a2ba394
SL
1978 if (amdgpu_emu_mode == 1)
1979 return 0;
1980
1112a46b
RZ
1981 for (j = 0; j < adev->num_ip_blocks; j++) {
1982 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1983 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1984 continue;
4a446d55 1985 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1986 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1987 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1988 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 1989 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 1990 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1991 /* enable clockgating to save power */
a1255107 1992 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1993 state);
4a446d55
AD
1994 if (r) {
1995 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1996 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1997 return r;
1998 }
b0b00ff1 1999 }
d38ceaf9 2000 }
06b18f61 2001
c9f96fd5
RZ
2002 return 0;
2003}
2004
1112a46b 2005static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2006{
1112a46b 2007 int i, j, r;
06b18f61 2008
c9f96fd5
RZ
2009 if (amdgpu_emu_mode == 1)
2010 return 0;
2011
1112a46b
RZ
2012 for (j = 0; j < adev->num_ip_blocks; j++) {
2013 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2014 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2015 continue;
2016 /* skip CG for VCE/UVD, it's handled specially */
2017 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2018 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2019 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2020 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2021 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2022 /* enable powergating to save power */
2023 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2024 state);
c9f96fd5
RZ
2025 if (r) {
2026 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2027 adev->ip_blocks[i].version->funcs->name, r);
2028 return r;
2029 }
2030 }
2031 }
2dc80b00
S
2032 return 0;
2033}
2034
beff74bc
AD
2035static int amdgpu_device_enable_mgpu_fan_boost(void)
2036{
2037 struct amdgpu_gpu_instance *gpu_ins;
2038 struct amdgpu_device *adev;
2039 int i, ret = 0;
2040
2041 mutex_lock(&mgpu_info.mutex);
2042
2043 /*
2044 * MGPU fan boost feature should be enabled
2045 * only when there are two or more dGPUs in
2046 * the system
2047 */
2048 if (mgpu_info.num_dgpu < 2)
2049 goto out;
2050
2051 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2052 gpu_ins = &(mgpu_info.gpu_ins[i]);
2053 adev = gpu_ins->adev;
2054 if (!(adev->flags & AMD_IS_APU) &&
2055 !gpu_ins->mgpu_fan_enabled &&
2056 adev->powerplay.pp_funcs &&
2057 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2058 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2059 if (ret)
2060 break;
2061
2062 gpu_ins->mgpu_fan_enabled = 1;
2063 }
2064 }
2065
2066out:
2067 mutex_unlock(&mgpu_info.mutex);
2068
2069 return ret;
2070}
2071
e3ecdffa
AD
2072/**
2073 * amdgpu_device_ip_late_init - run late init for hardware IPs
2074 *
2075 * @adev: amdgpu_device pointer
2076 *
2077 * Late initialization pass for hardware IPs. The list of all the hardware
2078 * IPs that make up the asic is walked and the late_init callbacks are run.
2079 * late_init covers any special initialization that an IP requires
2080 * after all of the have been initialized or something that needs to happen
2081 * late in the init process.
2082 * Returns 0 on success, negative error code on failure.
2083 */
06ec9070 2084static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2085{
60599a03 2086 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2087 int i = 0, r;
2088
2089 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2090 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2091 continue;
2092 if (adev->ip_blocks[i].version->funcs->late_init) {
2093 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2094 if (r) {
2095 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2096 adev->ip_blocks[i].version->funcs->name, r);
2097 return r;
2098 }
2dc80b00 2099 }
73f847db 2100 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2101 }
2102
1112a46b
RZ
2103 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2104 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2105
06ec9070 2106 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2107
beff74bc
AD
2108 r = amdgpu_device_enable_mgpu_fan_boost();
2109 if (r)
2110 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2111
60599a03
EQ
2112
2113 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2114 mutex_lock(&mgpu_info.mutex);
2115
2116 /*
2117 * Reset device p-state to low as this was booted with high.
2118 *
2119 * This should be performed only after all devices from the same
2120 * hive get initialized.
2121 *
2122 * However, it's unknown how many device in the hive in advance.
2123 * As this is counted one by one during devices initializations.
2124 *
2125 * So, we wait for all XGMI interlinked devices initialized.
2126 * This may bring some delays as those devices may come from
2127 * different hives. But that should be OK.
2128 */
2129 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2130 for (i = 0; i < mgpu_info.num_gpu; i++) {
2131 gpu_instance = &(mgpu_info.gpu_ins[i]);
2132 if (gpu_instance->adev->flags & AMD_IS_APU)
2133 continue;
2134
2135 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2136 if (r) {
2137 DRM_ERROR("pstate setting failed (%d).\n", r);
2138 break;
2139 }
2140 }
2141 }
2142
2143 mutex_unlock(&mgpu_info.mutex);
2144 }
2145
d38ceaf9
AD
2146 return 0;
2147}
2148
e3ecdffa
AD
2149/**
2150 * amdgpu_device_ip_fini - run fini for hardware IPs
2151 *
2152 * @adev: amdgpu_device pointer
2153 *
2154 * Main teardown pass for hardware IPs. The list of all the hardware
2155 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2156 * are run. hw_fini tears down the hardware associated with each IP
2157 * and sw_fini tears down any software state associated with each IP.
2158 * Returns 0 on success, negative error code on failure.
2159 */
06ec9070 2160static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2161{
2162 int i, r;
2163
c030f2e4 2164 amdgpu_ras_pre_fini(adev);
2165
a82400b5
AG
2166 if (adev->gmc.xgmi.num_physical_nodes > 1)
2167 amdgpu_xgmi_remove_device(adev);
2168
1884734a 2169 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2170
2171 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2172 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2173
3e96dbfd
AD
2174 /* need to disable SMC first */
2175 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2176 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2177 continue;
fdd34271 2178 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2179 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2180 /* XXX handle errors */
2181 if (r) {
2182 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2183 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2184 }
a1255107 2185 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2186 break;
2187 }
2188 }
2189
d38ceaf9 2190 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2191 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2192 continue;
8201a67a 2193
a1255107 2194 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2195 /* XXX handle errors */
2c1a2784 2196 if (r) {
a1255107
AD
2197 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2198 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2199 }
8201a67a 2200
a1255107 2201 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2202 }
2203
9950cda2 2204
d38ceaf9 2205 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2206 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2207 continue;
c12aba3a
ML
2208
2209 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2210 amdgpu_ucode_free_bo(adev);
1e256e27 2211 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2212 amdgpu_device_wb_fini(adev);
2213 amdgpu_device_vram_scratch_fini(adev);
533aed27 2214 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2215 }
2216
a1255107 2217 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2218 /* XXX handle errors */
2c1a2784 2219 if (r) {
a1255107
AD
2220 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2221 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2222 }
a1255107
AD
2223 adev->ip_blocks[i].status.sw = false;
2224 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2225 }
2226
a6dcfd9c 2227 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2228 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2229 continue;
a1255107
AD
2230 if (adev->ip_blocks[i].version->funcs->late_fini)
2231 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2232 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2233 }
2234
c030f2e4 2235 amdgpu_ras_fini(adev);
2236
030308fc 2237 if (amdgpu_sriov_vf(adev))
24136135
ML
2238 if (amdgpu_virt_release_full_gpu(adev, false))
2239 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2240
d38ceaf9
AD
2241 return 0;
2242}
2243
e3ecdffa 2244/**
beff74bc 2245 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2246 *
1112a46b 2247 * @work: work_struct.
e3ecdffa 2248 */
beff74bc 2249static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2250{
2251 struct amdgpu_device *adev =
beff74bc 2252 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2253 int r;
2254
2255 r = amdgpu_ib_ring_tests(adev);
2256 if (r)
2257 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2258}
2259
1e317b99
RZ
2260static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2261{
2262 struct amdgpu_device *adev =
2263 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2264
2265 mutex_lock(&adev->gfx.gfx_off_mutex);
2266 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2267 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2268 adev->gfx.gfx_off_state = true;
2269 }
2270 mutex_unlock(&adev->gfx.gfx_off_mutex);
2271}
2272
e3ecdffa 2273/**
e7854a03 2274 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2275 *
2276 * @adev: amdgpu_device pointer
2277 *
2278 * Main suspend function for hardware IPs. The list of all the hardware
2279 * IPs that make up the asic is walked, clockgating is disabled and the
2280 * suspend callbacks are run. suspend puts the hardware and software state
2281 * in each IP into a state suitable for suspend.
2282 * Returns 0 on success, negative error code on failure.
2283 */
e7854a03
AD
2284static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2285{
2286 int i, r;
2287
05df1f01 2288 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2289 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2290
e7854a03
AD
2291 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2292 if (!adev->ip_blocks[i].status.valid)
2293 continue;
2294 /* displays are handled separately */
2295 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2296 /* XXX handle errors */
2297 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2298 /* XXX handle errors */
2299 if (r) {
2300 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2301 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2302 return r;
e7854a03 2303 }
482f0e53 2304 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2305 }
2306 }
2307
e7854a03
AD
2308 return 0;
2309}
2310
2311/**
2312 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2313 *
2314 * @adev: amdgpu_device pointer
2315 *
2316 * Main suspend function for hardware IPs. The list of all the hardware
2317 * IPs that make up the asic is walked, clockgating is disabled and the
2318 * suspend callbacks are run. suspend puts the hardware and software state
2319 * in each IP into a state suitable for suspend.
2320 * Returns 0 on success, negative error code on failure.
2321 */
2322static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2323{
2324 int i, r;
2325
2326 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2327 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2328 continue;
e7854a03
AD
2329 /* displays are handled in phase1 */
2330 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2331 continue;
bff77e86
LM
2332 /* PSP lost connection when err_event_athub occurs */
2333 if (amdgpu_ras_intr_triggered() &&
2334 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2335 adev->ip_blocks[i].status.hw = false;
2336 continue;
2337 }
d38ceaf9 2338 /* XXX handle errors */
a1255107 2339 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2340 /* XXX handle errors */
2c1a2784 2341 if (r) {
a1255107
AD
2342 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2343 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2344 }
876923fb 2345 adev->ip_blocks[i].status.hw = false;
a3a09142 2346 /* handle putting the SMC in the appropriate state */
86b93fd6
JZ
2347 if(!amdgpu_sriov_vf(adev)){
2348 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2349 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
2350 if (r) {
2351 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2352 adev->mp1_state, r);
2353 return r;
2354 }
a3a09142
AD
2355 }
2356 }
b5507c7e 2357 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2358 }
2359
2360 return 0;
2361}
2362
e7854a03
AD
2363/**
2364 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2365 *
2366 * @adev: amdgpu_device pointer
2367 *
2368 * Main suspend function for hardware IPs. The list of all the hardware
2369 * IPs that make up the asic is walked, clockgating is disabled and the
2370 * suspend callbacks are run. suspend puts the hardware and software state
2371 * in each IP into a state suitable for suspend.
2372 * Returns 0 on success, negative error code on failure.
2373 */
2374int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2375{
2376 int r;
2377
e7819644
YT
2378 if (amdgpu_sriov_vf(adev))
2379 amdgpu_virt_request_full_gpu(adev, false);
2380
e7854a03
AD
2381 r = amdgpu_device_ip_suspend_phase1(adev);
2382 if (r)
2383 return r;
2384 r = amdgpu_device_ip_suspend_phase2(adev);
2385
e7819644
YT
2386 if (amdgpu_sriov_vf(adev))
2387 amdgpu_virt_release_full_gpu(adev, false);
2388
e7854a03
AD
2389 return r;
2390}
2391
06ec9070 2392static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2393{
2394 int i, r;
2395
2cb681b6
ML
2396 static enum amd_ip_block_type ip_order[] = {
2397 AMD_IP_BLOCK_TYPE_GMC,
2398 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2399 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2400 AMD_IP_BLOCK_TYPE_IH,
2401 };
a90ad3c2 2402
2cb681b6
ML
2403 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2404 int j;
2405 struct amdgpu_ip_block *block;
a90ad3c2 2406
2cb681b6
ML
2407 for (j = 0; j < adev->num_ip_blocks; j++) {
2408 block = &adev->ip_blocks[j];
2409
482f0e53 2410 block->status.hw = false;
2cb681b6
ML
2411 if (block->version->type != ip_order[i] ||
2412 !block->status.valid)
2413 continue;
2414
2415 r = block->version->funcs->hw_init(adev);
0aaeefcc 2416 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2417 if (r)
2418 return r;
482f0e53 2419 block->status.hw = true;
a90ad3c2
ML
2420 }
2421 }
2422
2423 return 0;
2424}
2425
06ec9070 2426static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2427{
2428 int i, r;
2429
2cb681b6
ML
2430 static enum amd_ip_block_type ip_order[] = {
2431 AMD_IP_BLOCK_TYPE_SMC,
2432 AMD_IP_BLOCK_TYPE_DCE,
2433 AMD_IP_BLOCK_TYPE_GFX,
2434 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c 2435 AMD_IP_BLOCK_TYPE_UVD,
d83c7a07
JJ
2436 AMD_IP_BLOCK_TYPE_VCE,
2437 AMD_IP_BLOCK_TYPE_VCN
2cb681b6 2438 };
a90ad3c2 2439
2cb681b6
ML
2440 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2441 int j;
2442 struct amdgpu_ip_block *block;
a90ad3c2 2443
2cb681b6
ML
2444 for (j = 0; j < adev->num_ip_blocks; j++) {
2445 block = &adev->ip_blocks[j];
2446
2447 if (block->version->type != ip_order[i] ||
482f0e53
ML
2448 !block->status.valid ||
2449 block->status.hw)
2cb681b6
ML
2450 continue;
2451
895bd048
JZ
2452 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
2453 r = block->version->funcs->resume(adev);
2454 else
2455 r = block->version->funcs->hw_init(adev);
2456
0aaeefcc 2457 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2458 if (r)
2459 return r;
482f0e53 2460 block->status.hw = true;
a90ad3c2
ML
2461 }
2462 }
2463
2464 return 0;
2465}
2466
e3ecdffa
AD
2467/**
2468 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2469 *
2470 * @adev: amdgpu_device pointer
2471 *
2472 * First resume function for hardware IPs. The list of all the hardware
2473 * IPs that make up the asic is walked and the resume callbacks are run for
2474 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2475 * after a suspend and updates the software state as necessary. This
2476 * function is also used for restoring the GPU after a GPU reset.
2477 * Returns 0 on success, negative error code on failure.
2478 */
06ec9070 2479static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2480{
2481 int i, r;
2482
a90ad3c2 2483 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2484 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2485 continue;
a90ad3c2 2486 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2487 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2488 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2489
fcf0649f
CZ
2490 r = adev->ip_blocks[i].version->funcs->resume(adev);
2491 if (r) {
2492 DRM_ERROR("resume of IP block <%s> failed %d\n",
2493 adev->ip_blocks[i].version->funcs->name, r);
2494 return r;
2495 }
482f0e53 2496 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2497 }
2498 }
2499
2500 return 0;
2501}
2502
e3ecdffa
AD
2503/**
2504 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2505 *
2506 * @adev: amdgpu_device pointer
2507 *
2508 * First resume function for hardware IPs. The list of all the hardware
2509 * IPs that make up the asic is walked and the resume callbacks are run for
2510 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2511 * functional state after a suspend and updates the software state as
2512 * necessary. This function is also used for restoring the GPU after a GPU
2513 * reset.
2514 * Returns 0 on success, negative error code on failure.
2515 */
06ec9070 2516static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2517{
2518 int i, r;
2519
2520 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2521 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2522 continue;
fcf0649f 2523 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2524 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2525 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2526 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2527 continue;
a1255107 2528 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2529 if (r) {
a1255107
AD
2530 DRM_ERROR("resume of IP block <%s> failed %d\n",
2531 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2532 return r;
2c1a2784 2533 }
482f0e53 2534 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2535 }
2536
2537 return 0;
2538}
2539
e3ecdffa
AD
2540/**
2541 * amdgpu_device_ip_resume - run resume for hardware IPs
2542 *
2543 * @adev: amdgpu_device pointer
2544 *
2545 * Main resume function for hardware IPs. The hardware IPs
2546 * are split into two resume functions because they are
2547 * are also used in in recovering from a GPU reset and some additional
2548 * steps need to be take between them. In this case (S3/S4) they are
2549 * run sequentially.
2550 * Returns 0 on success, negative error code on failure.
2551 */
06ec9070 2552static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2553{
2554 int r;
2555
06ec9070 2556 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2557 if (r)
2558 return r;
7a3e0bb2
RZ
2559
2560 r = amdgpu_device_fw_loading(adev);
2561 if (r)
2562 return r;
2563
06ec9070 2564 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2565
2566 return r;
2567}
2568
e3ecdffa
AD
2569/**
2570 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2571 *
2572 * @adev: amdgpu_device pointer
2573 *
2574 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2575 */
4e99a44e 2576static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2577{
6867e1b5
ML
2578 if (amdgpu_sriov_vf(adev)) {
2579 if (adev->is_atom_fw) {
2580 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2581 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2582 } else {
2583 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2584 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2585 }
2586
2587 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2588 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2589 }
048765ad
AR
2590}
2591
e3ecdffa
AD
2592/**
2593 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2594 *
2595 * @asic_type: AMD asic type
2596 *
2597 * Check if there is DC (new modesetting infrastructre) support for an asic.
2598 * returns true if DC has support, false if not.
2599 */
4562236b
HW
2600bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2601{
2602 switch (asic_type) {
2603#if defined(CONFIG_DRM_AMD_DC)
2604 case CHIP_BONAIRE:
0d6fbccb 2605 case CHIP_KAVERI:
367e6687
AD
2606 case CHIP_KABINI:
2607 case CHIP_MULLINS:
d9fda248
HW
2608 /*
2609 * We have systems in the wild with these ASICs that require
2610 * LVDS and VGA support which is not supported with DC.
2611 *
2612 * Fallback to the non-DC driver here by default so as not to
2613 * cause regressions.
2614 */
2615 return amdgpu_dc > 0;
2616 case CHIP_HAWAII:
4562236b
HW
2617 case CHIP_CARRIZO:
2618 case CHIP_STONEY:
4562236b 2619 case CHIP_POLARIS10:
675fd32b 2620 case CHIP_POLARIS11:
2c8ad2d5 2621 case CHIP_POLARIS12:
675fd32b 2622 case CHIP_VEGAM:
4562236b
HW
2623 case CHIP_TONGA:
2624 case CHIP_FIJI:
42f8ffa1 2625 case CHIP_VEGA10:
dca7b401 2626 case CHIP_VEGA12:
c6034aa2 2627 case CHIP_VEGA20:
b86a1aa3 2628#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2629 case CHIP_RAVEN:
b4f199c7 2630 case CHIP_NAVI10:
8fceceb6 2631 case CHIP_NAVI14:
078655d9 2632 case CHIP_NAVI12:
e1c14c43 2633 case CHIP_RENOIR:
42f8ffa1 2634#endif
fd187853 2635 return amdgpu_dc != 0;
4562236b
HW
2636#endif
2637 default:
93b09a9a
SS
2638 if (amdgpu_dc > 0)
2639 DRM_INFO("Display Core has been requested via kernel parameter "
2640 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2641 return false;
2642 }
2643}
2644
2645/**
2646 * amdgpu_device_has_dc_support - check if dc is supported
2647 *
2648 * @adev: amdgpu_device_pointer
2649 *
2650 * Returns true for supported, false for not supported
2651 */
2652bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2653{
2555039d
XY
2654 if (amdgpu_sriov_vf(adev))
2655 return false;
2656
4562236b
HW
2657 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2658}
2659
d4535e2c
AG
2660
2661static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2662{
2663 struct amdgpu_device *adev =
2664 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2665 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2666
c6a6e2db
AG
2667 /* It's a bug to not have a hive within this function */
2668 if (WARN_ON(!hive))
2669 return;
2670
2671 /*
2672 * Use task barrier to synchronize all xgmi reset works across the
2673 * hive. task_barrier_enter and task_barrier_exit will block
2674 * until all the threads running the xgmi reset works reach
2675 * those points. task_barrier_full will do both blocks.
2676 */
2677 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2678
2679 task_barrier_enter(&hive->tb);
2680 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2681
2682 if (adev->asic_reset_res)
2683 goto fail;
2684
2685 task_barrier_exit(&hive->tb);
2686 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2687
2688 if (adev->asic_reset_res)
2689 goto fail;
2690 } else {
2691
2692 task_barrier_full(&hive->tb);
2693 adev->asic_reset_res = amdgpu_asic_reset(adev);
2694 }
ce316fa5 2695
c6a6e2db 2696fail:
d4535e2c 2697 if (adev->asic_reset_res)
fed184e9 2698 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2699 adev->asic_reset_res, adev->ddev->unique);
2700}
2701
71f98027
AD
2702static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2703{
2704 char *input = amdgpu_lockup_timeout;
2705 char *timeout_setting = NULL;
2706 int index = 0;
2707 long timeout;
2708 int ret = 0;
2709
2710 /*
2711 * By default timeout for non compute jobs is 10000.
2712 * And there is no timeout enforced on compute jobs.
2713 * In SR-IOV or passthrough mode, timeout for compute
2714 * jobs are 10000 by default.
2715 */
2716 adev->gfx_timeout = msecs_to_jiffies(10000);
2717 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2718 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2719 adev->compute_timeout = adev->gfx_timeout;
2720 else
2721 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2722
f440ff44 2723 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2724 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2725 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2726 ret = kstrtol(timeout_setting, 0, &timeout);
2727 if (ret)
2728 return ret;
2729
2730 if (timeout == 0) {
2731 index++;
2732 continue;
2733 } else if (timeout < 0) {
2734 timeout = MAX_SCHEDULE_TIMEOUT;
2735 } else {
2736 timeout = msecs_to_jiffies(timeout);
2737 }
2738
2739 switch (index++) {
2740 case 0:
2741 adev->gfx_timeout = timeout;
2742 break;
2743 case 1:
2744 adev->compute_timeout = timeout;
2745 break;
2746 case 2:
2747 adev->sdma_timeout = timeout;
2748 break;
2749 case 3:
2750 adev->video_timeout = timeout;
2751 break;
2752 default:
2753 break;
2754 }
2755 }
2756 /*
2757 * There is only one value specified and
2758 * it should apply to all non-compute jobs.
2759 */
bcccee89 2760 if (index == 1) {
71f98027 2761 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2762 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2763 adev->compute_timeout = adev->gfx_timeout;
2764 }
71f98027
AD
2765 }
2766
2767 return ret;
2768}
d4535e2c 2769
d38ceaf9
AD
2770/**
2771 * amdgpu_device_init - initialize the driver
2772 *
2773 * @adev: amdgpu_device pointer
87e3f136 2774 * @ddev: drm dev pointer
d38ceaf9
AD
2775 * @pdev: pci dev pointer
2776 * @flags: driver flags
2777 *
2778 * Initializes the driver info and hw (all asics).
2779 * Returns 0 for success or an error on failure.
2780 * Called at driver startup.
2781 */
2782int amdgpu_device_init(struct amdgpu_device *adev,
2783 struct drm_device *ddev,
2784 struct pci_dev *pdev,
2785 uint32_t flags)
2786{
2787 int r, i;
3840c5bc 2788 bool boco = false;
95844d20 2789 u32 max_MBps;
d38ceaf9
AD
2790
2791 adev->shutdown = false;
2792 adev->dev = &pdev->dev;
2793 adev->ddev = ddev;
2794 adev->pdev = pdev;
2795 adev->flags = flags;
4e66d7d2
YZ
2796
2797 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2798 adev->asic_type = amdgpu_force_asic_type;
2799 else
2800 adev->asic_type = flags & AMD_ASIC_MASK;
2801
d38ceaf9 2802 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2803 if (amdgpu_emu_mode == 1)
2804 adev->usec_timeout *= 2;
770d13b1 2805 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2806 adev->accel_working = false;
2807 adev->num_rings = 0;
2808 adev->mman.buffer_funcs = NULL;
2809 adev->mman.buffer_funcs_ring = NULL;
2810 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2811 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2812 adev->gmc.gmc_funcs = NULL;
f54d1867 2813 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2814 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2815
2816 adev->smc_rreg = &amdgpu_invalid_rreg;
2817 adev->smc_wreg = &amdgpu_invalid_wreg;
2818 adev->pcie_rreg = &amdgpu_invalid_rreg;
2819 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2820 adev->pciep_rreg = &amdgpu_invalid_rreg;
2821 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2822 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2823 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2824 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2825 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2826 adev->didt_rreg = &amdgpu_invalid_rreg;
2827 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2828 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2829 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2830 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2831 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2832
3e39ab90
AD
2833 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2834 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2835 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2836
2837 /* mutex initialization are all done here so we
2838 * can recall function without having locking issues */
d38ceaf9 2839 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2840 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2841 mutex_init(&adev->pm.mutex);
2842 mutex_init(&adev->gfx.gpu_clock_mutex);
2843 mutex_init(&adev->srbm_mutex);
b8866c26 2844 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2845 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2846 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2847 mutex_init(&adev->mn_lock);
e23b74aa 2848 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2849 hash_init(adev->mn_hash);
13a752e3 2850 mutex_init(&adev->lock_reset);
32eaeae0 2851 mutex_init(&adev->psp.mutex);
bd052211 2852 mutex_init(&adev->notifier_lock);
d38ceaf9 2853
912dfc84
EQ
2854 r = amdgpu_device_check_arguments(adev);
2855 if (r)
2856 return r;
d38ceaf9 2857
d38ceaf9
AD
2858 spin_lock_init(&adev->mmio_idx_lock);
2859 spin_lock_init(&adev->smc_idx_lock);
2860 spin_lock_init(&adev->pcie_idx_lock);
2861 spin_lock_init(&adev->uvd_ctx_idx_lock);
2862 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2863 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2864 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2865 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2866 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2867
0c4e7fa5
CZ
2868 INIT_LIST_HEAD(&adev->shadow_list);
2869 mutex_init(&adev->shadow_list_lock);
2870
795f2813
AR
2871 INIT_LIST_HEAD(&adev->ring_lru_list);
2872 spin_lock_init(&adev->ring_lru_list_lock);
2873
beff74bc
AD
2874 INIT_DELAYED_WORK(&adev->delayed_init_work,
2875 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2876 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2877 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2878
d4535e2c
AG
2879 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2880
d23ee13f 2881 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2882 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2883
0fa49558
AX
2884 /* Registers mapping */
2885 /* TODO: block userspace mapping of io register */
da69c161
KW
2886 if (adev->asic_type >= CHIP_BONAIRE) {
2887 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2888 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2889 } else {
2890 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2891 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2892 }
d38ceaf9 2893
d38ceaf9
AD
2894 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2895 if (adev->rmmio == NULL) {
2896 return -ENOMEM;
2897 }
2898 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2899 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2900
d38ceaf9
AD
2901 /* io port mapping */
2902 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2903 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2904 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2905 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2906 break;
2907 }
2908 }
2909 if (adev->rio_mem == NULL)
b64a18c5 2910 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2911
b2109d8e
JX
2912 /* enable PCIE atomic ops */
2913 r = pci_enable_atomic_ops_to_root(adev->pdev,
2914 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2915 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2916 if (r) {
2917 adev->have_atomics_support = false;
2918 DRM_INFO("PCIE atomic ops is not supported\n");
2919 } else {
2920 adev->have_atomics_support = true;
2921 }
2922
5494d864
AD
2923 amdgpu_device_get_pcie_info(adev);
2924
b239c017
JX
2925 if (amdgpu_mcbp)
2926 DRM_INFO("MCBP is enabled\n");
2927
5f84cc63
JX
2928 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2929 adev->enable_mes = true;
2930
f54eeab4 2931 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2932 r = amdgpu_discovery_init(adev);
2933 if (r) {
2934 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2935 return r;
2936 }
2937 }
2938
d38ceaf9 2939 /* early init functions */
06ec9070 2940 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2941 if (r)
2942 return r;
2943
df99ac0f
JZ
2944 r = amdgpu_device_get_job_timeout_settings(adev);
2945 if (r) {
2946 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
2947 return r;
2948 }
2949
6585661d
OZ
2950 /* doorbell bar mapping and doorbell index init*/
2951 amdgpu_device_doorbell_init(adev);
2952
d38ceaf9
AD
2953 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2954 /* this will fail for cards that aren't VGA class devices, just
2955 * ignore it */
06ec9070 2956 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2957
31af062a 2958 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
2959 boco = true;
2960 if (amdgpu_has_atpx() &&
2961 (amdgpu_is_atpx_hybrid() ||
2962 amdgpu_has_atpx_dgpu_power_cntl()) &&
2963 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 2964 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
2965 &amdgpu_switcheroo_ops, boco);
2966 if (boco)
d38ceaf9
AD
2967 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2968
9475a943
SL
2969 if (amdgpu_emu_mode == 1) {
2970 /* post the asic on emulation mode */
2971 emu_soc_asic_init(adev);
bfca0289 2972 goto fence_driver_init;
9475a943 2973 }
bfca0289 2974
4e99a44e
ML
2975 /* detect if we are with an SRIOV vbios */
2976 amdgpu_device_detect_sriov_bios(adev);
048765ad 2977
95e8e59e
AD
2978 /* check if we need to reset the asic
2979 * E.g., driver was not cleanly unloaded previously, etc.
2980 */
f14899fd 2981 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2982 r = amdgpu_asic_reset(adev);
2983 if (r) {
2984 dev_err(adev->dev, "asic reset on init failed\n");
2985 goto failed;
2986 }
2987 }
2988
d38ceaf9 2989 /* Post card if necessary */
39c640c0 2990 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2991 if (!adev->bios) {
bec86378 2992 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2993 r = -EINVAL;
2994 goto failed;
d38ceaf9 2995 }
bec86378 2996 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2997 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2998 if (r) {
2999 dev_err(adev->dev, "gpu post error!\n");
3000 goto failed;
3001 }
d38ceaf9
AD
3002 }
3003
88b64e95
AD
3004 if (adev->is_atom_fw) {
3005 /* Initialize clocks */
3006 r = amdgpu_atomfirmware_get_clock_info(adev);
3007 if (r) {
3008 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3009 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3010 goto failed;
3011 }
3012 } else {
a5bde2f9
AD
3013 /* Initialize clocks */
3014 r = amdgpu_atombios_get_clock_info(adev);
3015 if (r) {
3016 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3017 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3018 goto failed;
a5bde2f9
AD
3019 }
3020 /* init i2c buses */
4562236b
HW
3021 if (!amdgpu_device_has_dc_support(adev))
3022 amdgpu_atombios_i2c_init(adev);
2c1a2784 3023 }
d38ceaf9 3024
bfca0289 3025fence_driver_init:
d38ceaf9
AD
3026 /* Fence driver */
3027 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3028 if (r) {
3029 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3030 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3031 goto failed;
2c1a2784 3032 }
d38ceaf9
AD
3033
3034 /* init the mode config */
3035 drm_mode_config_init(adev->ddev);
3036
06ec9070 3037 r = amdgpu_device_ip_init(adev);
d38ceaf9 3038 if (r) {
8840a387 3039 /* failed in exclusive mode due to timeout */
3040 if (amdgpu_sriov_vf(adev) &&
3041 !amdgpu_sriov_runtime(adev) &&
3042 amdgpu_virt_mmio_blocked(adev) &&
3043 !amdgpu_virt_wait_reset(adev)) {
3044 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3045 /* Don't send request since VF is inactive. */
3046 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3047 adev->virt.ops = NULL;
8840a387 3048 r = -EAGAIN;
3049 goto failed;
3050 }
06ec9070 3051 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3052 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3053 goto failed;
d38ceaf9
AD
3054 }
3055
d7f72fe4
YZ
3056 DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3057 adev->gfx.config.max_shader_engines,
3058 adev->gfx.config.max_sh_per_se,
3059 adev->gfx.config.max_cu_per_sh,
3060 adev->gfx.cu_info.number);
3061
f880799d
ND
3062 amdgpu_ctx_init_sched(adev);
3063
d38ceaf9
AD
3064 adev->accel_working = true;
3065
e59c0205
AX
3066 amdgpu_vm_check_compute_bug(adev);
3067
95844d20
MO
3068 /* Initialize the buffer migration limit. */
3069 if (amdgpu_moverate >= 0)
3070 max_MBps = amdgpu_moverate;
3071 else
3072 max_MBps = 8; /* Allow 8 MB/s. */
3073 /* Get a log2 for easy divisions. */
3074 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3075
9bc92b9c
ML
3076 amdgpu_fbdev_init(adev);
3077
d2f52ac8 3078 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3079 if (r) {
3080 adev->pm_sysfs_en = false;
d2f52ac8 3081 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3082 } else
3083 adev->pm_sysfs_en = true;
d2f52ac8 3084
5bb23532 3085 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3086 if (r) {
3087 adev->ucode_sysfs_en = false;
5bb23532 3088 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3089 } else
3090 adev->ucode_sysfs_en = true;
5bb23532 3091
75758255 3092 r = amdgpu_debugfs_gem_init(adev);
3f14e623 3093 if (r)
d38ceaf9 3094 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
3095
3096 r = amdgpu_debugfs_regs_init(adev);
3f14e623 3097 if (r)
d38ceaf9 3098 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 3099
50ab2533 3100 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 3101 if (r)
50ab2533 3102 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 3103
763efb6c 3104 r = amdgpu_debugfs_init(adev);
db95e218 3105 if (r)
763efb6c 3106 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 3107
d38ceaf9
AD
3108 if ((amdgpu_testing & 1)) {
3109 if (adev->accel_working)
3110 amdgpu_test_moves(adev);
3111 else
3112 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3113 }
d38ceaf9
AD
3114 if (amdgpu_benchmarking) {
3115 if (adev->accel_working)
3116 amdgpu_benchmark(adev, amdgpu_benchmarking);
3117 else
3118 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3119 }
3120
b0adca4d
EQ
3121 /*
3122 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3123 * Otherwise the mgpu fan boost feature will be skipped due to the
3124 * gpu instance is counted less.
3125 */
3126 amdgpu_register_gpu_instance(adev);
3127
d38ceaf9
AD
3128 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3129 * explicit gating rather than handling it automatically.
3130 */
06ec9070 3131 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3132 if (r) {
06ec9070 3133 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3134 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3135 goto failed;
2c1a2784 3136 }
d38ceaf9 3137
108c6a63 3138 /* must succeed. */
511fdbc3 3139 amdgpu_ras_resume(adev);
108c6a63 3140
beff74bc
AD
3141 queue_delayed_work(system_wq, &adev->delayed_init_work,
3142 msecs_to_jiffies(AMDGPU_RESUME_MS));
3143
dcea6e65
KR
3144 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3145 if (r) {
3146 dev_err(adev->dev, "Could not create pcie_replay_count");
3147 return r;
3148 }
108c6a63 3149
d155bef0
AB
3150 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3151 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3152 if (r)
3153 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3154
d38ceaf9 3155 return 0;
83ba126a
AD
3156
3157failed:
89041940 3158 amdgpu_vf_error_trans_all(adev);
3840c5bc 3159 if (boco)
83ba126a 3160 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3161
83ba126a 3162 return r;
d38ceaf9
AD
3163}
3164
d38ceaf9
AD
3165/**
3166 * amdgpu_device_fini - tear down the driver
3167 *
3168 * @adev: amdgpu_device pointer
3169 *
3170 * Tear down the driver info (all asics).
3171 * Called at driver shutdown.
3172 */
3173void amdgpu_device_fini(struct amdgpu_device *adev)
3174{
3175 int r;
3176
3177 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3178 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3179 adev->shutdown = true;
9f875167 3180
e5b03032
ML
3181 /* disable all interrupts */
3182 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3183 if (adev->mode_info.mode_config_initialized){
3184 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3185 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3186 else
3187 drm_atomic_helper_shutdown(adev->ddev);
3188 }
d38ceaf9 3189 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3190 if (adev->pm_sysfs_en)
3191 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3192 amdgpu_fbdev_fini(adev);
06ec9070 3193 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3194 if (adev->firmware.gpu_info_fw) {
3195 release_firmware(adev->firmware.gpu_info_fw);
3196 adev->firmware.gpu_info_fw = NULL;
3197 }
d38ceaf9
AD
3198 adev->accel_working = false;
3199 /* free i2c buses */
4562236b
HW
3200 if (!amdgpu_device_has_dc_support(adev))
3201 amdgpu_i2c_fini(adev);
bfca0289
SL
3202
3203 if (amdgpu_emu_mode != 1)
3204 amdgpu_atombios_fini(adev);
3205
d38ceaf9
AD
3206 kfree(adev->bios);
3207 adev->bios = NULL;
3840c5bc
AD
3208 if (amdgpu_has_atpx() &&
3209 (amdgpu_is_atpx_hybrid() ||
3210 amdgpu_has_atpx_dgpu_power_cntl()) &&
3211 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3212 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3213 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3214 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3215 vga_client_register(adev->pdev, NULL, NULL, NULL);
3216 if (adev->rio_mem)
3217 pci_iounmap(adev->pdev, adev->rio_mem);
3218 adev->rio_mem = NULL;
3219 iounmap(adev->rmmio);
3220 adev->rmmio = NULL;
06ec9070 3221 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3222
d38ceaf9 3223 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 3224 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
7c868b59
YT
3225 if (adev->ucode_sysfs_en)
3226 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
3227 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3228 amdgpu_pmu_fini(adev);
6698a3d0 3229 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 3230 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3231 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3232}
3233
3234
3235/*
3236 * Suspend & resume.
3237 */
3238/**
810ddc3a 3239 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3240 *
87e3f136
DP
3241 * @dev: drm dev pointer
3242 * @suspend: suspend state
3243 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3244 *
3245 * Puts the hw in the suspend state (all asics).
3246 * Returns 0 for success or an error on failure.
3247 * Called at driver suspend.
3248 */
de185019 3249int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3250{
3251 struct amdgpu_device *adev;
3252 struct drm_crtc *crtc;
3253 struct drm_connector *connector;
f8d2d39e 3254 struct drm_connector_list_iter iter;
5ceb54c6 3255 int r;
d38ceaf9
AD
3256
3257 if (dev == NULL || dev->dev_private == NULL) {
3258 return -ENODEV;
3259 }
3260
3261 adev = dev->dev_private;
3262
3263 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3264 return 0;
3265
44779b43 3266 adev->in_suspend = true;
d38ceaf9
AD
3267 drm_kms_helper_poll_disable(dev);
3268
5f818173
S
3269 if (fbcon)
3270 amdgpu_fbdev_set_suspend(adev, 1);
3271
beff74bc 3272 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3273
4562236b
HW
3274 if (!amdgpu_device_has_dc_support(adev)) {
3275 /* turn off display hw */
3276 drm_modeset_lock_all(dev);
f8d2d39e
LP
3277 drm_connector_list_iter_begin(dev, &iter);
3278 drm_for_each_connector_iter(connector, &iter)
3279 drm_helper_connector_dpms(connector,
3280 DRM_MODE_DPMS_OFF);
3281 drm_connector_list_iter_end(&iter);
4562236b 3282 drm_modeset_unlock_all(dev);
fe1053b7
AD
3283 /* unpin the front buffers and cursors */
3284 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3285 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3286 struct drm_framebuffer *fb = crtc->primary->fb;
3287 struct amdgpu_bo *robj;
3288
91334223 3289 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3290 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3291 r = amdgpu_bo_reserve(aobj, true);
3292 if (r == 0) {
3293 amdgpu_bo_unpin(aobj);
3294 amdgpu_bo_unreserve(aobj);
3295 }
756e6880 3296 }
756e6880 3297
fe1053b7
AD
3298 if (fb == NULL || fb->obj[0] == NULL) {
3299 continue;
3300 }
3301 robj = gem_to_amdgpu_bo(fb->obj[0]);
3302 /* don't unpin kernel fb objects */
3303 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3304 r = amdgpu_bo_reserve(robj, true);
3305 if (r == 0) {
3306 amdgpu_bo_unpin(robj);
3307 amdgpu_bo_unreserve(robj);
3308 }
d38ceaf9
AD
3309 }
3310 }
3311 }
fe1053b7
AD
3312
3313 amdgpu_amdkfd_suspend(adev);
3314
5e6932fe 3315 amdgpu_ras_suspend(adev);
3316
fe1053b7
AD
3317 r = amdgpu_device_ip_suspend_phase1(adev);
3318
d38ceaf9
AD
3319 /* evict vram memory */
3320 amdgpu_bo_evict_vram(adev);
3321
5ceb54c6 3322 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3323
fe1053b7 3324 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3325
a0a71e49
AD
3326 /* evict remaining vram memory
3327 * This second call to evict vram is to evict the gart page table
3328 * using the CPU.
3329 */
d38ceaf9
AD
3330 amdgpu_bo_evict_vram(adev);
3331
d38ceaf9
AD
3332 return 0;
3333}
3334
3335/**
810ddc3a 3336 * amdgpu_device_resume - initiate device resume
d38ceaf9 3337 *
87e3f136
DP
3338 * @dev: drm dev pointer
3339 * @resume: resume state
3340 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3341 *
3342 * Bring the hw back to operating state (all asics).
3343 * Returns 0 for success or an error on failure.
3344 * Called at driver resume.
3345 */
de185019 3346int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3347{
3348 struct drm_connector *connector;
f8d2d39e 3349 struct drm_connector_list_iter iter;
d38ceaf9 3350 struct amdgpu_device *adev = dev->dev_private;
756e6880 3351 struct drm_crtc *crtc;
03161a6e 3352 int r = 0;
d38ceaf9
AD
3353
3354 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3355 return 0;
3356
d38ceaf9 3357 /* post card */
39c640c0 3358 if (amdgpu_device_need_post(adev)) {
74b0b157 3359 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3360 if (r)
3361 DRM_ERROR("amdgpu asic init failed\n");
3362 }
d38ceaf9 3363
06ec9070 3364 r = amdgpu_device_ip_resume(adev);
e6707218 3365 if (r) {
06ec9070 3366 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3367 return r;
e6707218 3368 }
5ceb54c6
AD
3369 amdgpu_fence_driver_resume(adev);
3370
d38ceaf9 3371
06ec9070 3372 r = amdgpu_device_ip_late_init(adev);
03161a6e 3373 if (r)
4d3b9ae5 3374 return r;
d38ceaf9 3375
beff74bc
AD
3376 queue_delayed_work(system_wq, &adev->delayed_init_work,
3377 msecs_to_jiffies(AMDGPU_RESUME_MS));
3378
fe1053b7
AD
3379 if (!amdgpu_device_has_dc_support(adev)) {
3380 /* pin cursors */
3381 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3382 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3383
91334223 3384 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3385 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3386 r = amdgpu_bo_reserve(aobj, true);
3387 if (r == 0) {
3388 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3389 if (r != 0)
3390 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3391 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3392 amdgpu_bo_unreserve(aobj);
3393 }
756e6880
AD
3394 }
3395 }
3396 }
ba997709
YZ
3397 r = amdgpu_amdkfd_resume(adev);
3398 if (r)
3399 return r;
756e6880 3400
96a5d8d4 3401 /* Make sure IB tests flushed */
beff74bc 3402 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3403
d38ceaf9
AD
3404 /* blat the mode back in */
3405 if (fbcon) {
4562236b
HW
3406 if (!amdgpu_device_has_dc_support(adev)) {
3407 /* pre DCE11 */
3408 drm_helper_resume_force_mode(dev);
3409
3410 /* turn on display hw */
3411 drm_modeset_lock_all(dev);
f8d2d39e
LP
3412
3413 drm_connector_list_iter_begin(dev, &iter);
3414 drm_for_each_connector_iter(connector, &iter)
3415 drm_helper_connector_dpms(connector,
3416 DRM_MODE_DPMS_ON);
3417 drm_connector_list_iter_end(&iter);
3418
4562236b 3419 drm_modeset_unlock_all(dev);
d38ceaf9 3420 }
4d3b9ae5 3421 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3422 }
3423
3424 drm_kms_helper_poll_enable(dev);
23a1a9e5 3425
5e6932fe 3426 amdgpu_ras_resume(adev);
3427
23a1a9e5
L
3428 /*
3429 * Most of the connector probing functions try to acquire runtime pm
3430 * refs to ensure that the GPU is powered on when connector polling is
3431 * performed. Since we're calling this from a runtime PM callback,
3432 * trying to acquire rpm refs will cause us to deadlock.
3433 *
3434 * Since we're guaranteed to be holding the rpm lock, it's safe to
3435 * temporarily disable the rpm helpers so this doesn't deadlock us.
3436 */
3437#ifdef CONFIG_PM
3438 dev->dev->power.disable_depth++;
3439#endif
4562236b
HW
3440 if (!amdgpu_device_has_dc_support(adev))
3441 drm_helper_hpd_irq_event(dev);
3442 else
3443 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3444#ifdef CONFIG_PM
3445 dev->dev->power.disable_depth--;
3446#endif
44779b43
RZ
3447 adev->in_suspend = false;
3448
4d3b9ae5 3449 return 0;
d38ceaf9
AD
3450}
3451
e3ecdffa
AD
3452/**
3453 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3454 *
3455 * @adev: amdgpu_device pointer
3456 *
3457 * The list of all the hardware IPs that make up the asic is walked and
3458 * the check_soft_reset callbacks are run. check_soft_reset determines
3459 * if the asic is still hung or not.
3460 * Returns true if any of the IPs are still in a hung state, false if not.
3461 */
06ec9070 3462static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3463{
3464 int i;
3465 bool asic_hang = false;
3466
f993d628
ML
3467 if (amdgpu_sriov_vf(adev))
3468 return true;
3469
8bc04c29
AD
3470 if (amdgpu_asic_need_full_reset(adev))
3471 return true;
3472
63fbf42f 3473 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3474 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3475 continue;
a1255107
AD
3476 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3477 adev->ip_blocks[i].status.hang =
3478 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3479 if (adev->ip_blocks[i].status.hang) {
3480 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3481 asic_hang = true;
3482 }
3483 }
3484 return asic_hang;
3485}
3486
e3ecdffa
AD
3487/**
3488 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3489 *
3490 * @adev: amdgpu_device pointer
3491 *
3492 * The list of all the hardware IPs that make up the asic is walked and the
3493 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3494 * handles any IP specific hardware or software state changes that are
3495 * necessary for a soft reset to succeed.
3496 * Returns 0 on success, negative error code on failure.
3497 */
06ec9070 3498static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3499{
3500 int i, r = 0;
3501
3502 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3503 if (!adev->ip_blocks[i].status.valid)
d31a501e 3504 continue;
a1255107
AD
3505 if (adev->ip_blocks[i].status.hang &&
3506 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3507 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3508 if (r)
3509 return r;
3510 }
3511 }
3512
3513 return 0;
3514}
3515
e3ecdffa
AD
3516/**
3517 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3518 *
3519 * @adev: amdgpu_device pointer
3520 *
3521 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3522 * reset is necessary to recover.
3523 * Returns true if a full asic reset is required, false if not.
3524 */
06ec9070 3525static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3526{
da146d3b
AD
3527 int i;
3528
8bc04c29
AD
3529 if (amdgpu_asic_need_full_reset(adev))
3530 return true;
3531
da146d3b 3532 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3533 if (!adev->ip_blocks[i].status.valid)
da146d3b 3534 continue;
a1255107
AD
3535 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3536 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3537 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3538 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3539 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3540 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3541 DRM_INFO("Some block need full reset!\n");
3542 return true;
3543 }
3544 }
35d782fe
CZ
3545 }
3546 return false;
3547}
3548
e3ecdffa
AD
3549/**
3550 * amdgpu_device_ip_soft_reset - do a soft reset
3551 *
3552 * @adev: amdgpu_device pointer
3553 *
3554 * The list of all the hardware IPs that make up the asic is walked and the
3555 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3556 * IP specific hardware or software state changes that are necessary to soft
3557 * reset the IP.
3558 * Returns 0 on success, negative error code on failure.
3559 */
06ec9070 3560static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3561{
3562 int i, r = 0;
3563
3564 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3565 if (!adev->ip_blocks[i].status.valid)
35d782fe 3566 continue;
a1255107
AD
3567 if (adev->ip_blocks[i].status.hang &&
3568 adev->ip_blocks[i].version->funcs->soft_reset) {
3569 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3570 if (r)
3571 return r;
3572 }
3573 }
3574
3575 return 0;
3576}
3577
e3ecdffa
AD
3578/**
3579 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3580 *
3581 * @adev: amdgpu_device pointer
3582 *
3583 * The list of all the hardware IPs that make up the asic is walked and the
3584 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3585 * handles any IP specific hardware or software state changes that are
3586 * necessary after the IP has been soft reset.
3587 * Returns 0 on success, negative error code on failure.
3588 */
06ec9070 3589static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3590{
3591 int i, r = 0;
3592
3593 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3594 if (!adev->ip_blocks[i].status.valid)
35d782fe 3595 continue;
a1255107
AD
3596 if (adev->ip_blocks[i].status.hang &&
3597 adev->ip_blocks[i].version->funcs->post_soft_reset)
3598 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3599 if (r)
3600 return r;
3601 }
3602
3603 return 0;
3604}
3605
e3ecdffa 3606/**
c33adbc7 3607 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3608 *
3609 * @adev: amdgpu_device pointer
3610 *
3611 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3612 * restore things like GPUVM page tables after a GPU reset where
3613 * the contents of VRAM might be lost.
403009bf
CK
3614 *
3615 * Returns:
3616 * 0 on success, negative error code on failure.
e3ecdffa 3617 */
c33adbc7 3618static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3619{
c41d1cf6 3620 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3621 struct amdgpu_bo *shadow;
3622 long r = 1, tmo;
c41d1cf6
ML
3623
3624 if (amdgpu_sriov_runtime(adev))
b045d3af 3625 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3626 else
3627 tmo = msecs_to_jiffies(100);
3628
3629 DRM_INFO("recover vram bo from shadow start\n");
3630 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3631 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3632
3633 /* No need to recover an evicted BO */
3634 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3635 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3636 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3637 continue;
3638
3639 r = amdgpu_bo_restore_shadow(shadow, &next);
3640 if (r)
3641 break;
3642
c41d1cf6 3643 if (fence) {
1712fb1a 3644 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3645 dma_fence_put(fence);
3646 fence = next;
1712fb1a 3647 if (tmo == 0) {
3648 r = -ETIMEDOUT;
c41d1cf6 3649 break;
1712fb1a 3650 } else if (tmo < 0) {
3651 r = tmo;
3652 break;
3653 }
403009bf
CK
3654 } else {
3655 fence = next;
c41d1cf6 3656 }
c41d1cf6
ML
3657 }
3658 mutex_unlock(&adev->shadow_list_lock);
3659
403009bf
CK
3660 if (fence)
3661 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3662 dma_fence_put(fence);
3663
1712fb1a 3664 if (r < 0 || tmo <= 0) {
3665 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3666 return -EIO;
3667 }
c41d1cf6 3668
403009bf
CK
3669 DRM_INFO("recover vram bo from shadow done\n");
3670 return 0;
c41d1cf6
ML
3671}
3672
a90ad3c2 3673
e3ecdffa 3674/**
06ec9070 3675 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3676 *
3677 * @adev: amdgpu device pointer
87e3f136 3678 * @from_hypervisor: request from hypervisor
5740682e
ML
3679 *
3680 * do VF FLR and reinitialize Asic
3f48c681 3681 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3682 */
3683static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3684 bool from_hypervisor)
5740682e
ML
3685{
3686 int r;
3687
3688 if (from_hypervisor)
3689 r = amdgpu_virt_request_full_gpu(adev, true);
3690 else
3691 r = amdgpu_virt_reset_gpu(adev);
3692 if (r)
3693 return r;
a90ad3c2
ML
3694
3695 /* Resume IP prior to SMC */
06ec9070 3696 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3697 if (r)
3698 goto error;
a90ad3c2 3699
c9ffa427 3700 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3701 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3702 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3703
7a3e0bb2
RZ
3704 r = amdgpu_device_fw_loading(adev);
3705 if (r)
3706 return r;
3707
a90ad3c2 3708 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3709 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3710 if (r)
3711 goto error;
a90ad3c2
ML
3712
3713 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3714 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3715 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3716
abc34253
ED
3717error:
3718 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3719 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3720 amdgpu_inc_vram_lost(adev);
c33adbc7 3721 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3722 }
3723
3724 return r;
3725}
3726
12938fad
CK
3727/**
3728 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3729 *
3730 * @adev: amdgpu device pointer
3731 *
3732 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3733 * a hung GPU.
3734 */
3735bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3736{
3737 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3738 DRM_INFO("Timeout, but no hardware hang detected.\n");
3739 return false;
3740 }
3741
3ba7b418
AG
3742 if (amdgpu_gpu_recovery == 0)
3743 goto disabled;
3744
3745 if (amdgpu_sriov_vf(adev))
3746 return true;
3747
3748 if (amdgpu_gpu_recovery == -1) {
3749 switch (adev->asic_type) {
fc42d47c
AG
3750 case CHIP_BONAIRE:
3751 case CHIP_HAWAII:
3ba7b418
AG
3752 case CHIP_TOPAZ:
3753 case CHIP_TONGA:
3754 case CHIP_FIJI:
3755 case CHIP_POLARIS10:
3756 case CHIP_POLARIS11:
3757 case CHIP_POLARIS12:
3758 case CHIP_VEGAM:
3759 case CHIP_VEGA20:
3760 case CHIP_VEGA10:
3761 case CHIP_VEGA12:
c43b849f 3762 case CHIP_RAVEN:
e9d4cf91 3763 case CHIP_ARCTURUS:
2cb44fb0 3764 case CHIP_RENOIR:
658c6639
AD
3765 case CHIP_NAVI10:
3766 case CHIP_NAVI14:
3767 case CHIP_NAVI12:
3ba7b418
AG
3768 break;
3769 default:
3770 goto disabled;
3771 }
12938fad
CK
3772 }
3773
3774 return true;
3ba7b418
AG
3775
3776disabled:
3777 DRM_INFO("GPU recovery disabled.\n");
3778 return false;
12938fad
CK
3779}
3780
5c6dd71e 3781
26bc5340
AG
3782static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3783 struct amdgpu_job *job,
3784 bool *need_full_reset_arg)
3785{
3786 int i, r = 0;
3787 bool need_full_reset = *need_full_reset_arg;
71182665 3788
71182665 3789 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3790 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3791 struct amdgpu_ring *ring = adev->rings[i];
3792
51687759 3793 if (!ring || !ring->sched.thread)
0875dc9e 3794 continue;
5740682e 3795
2f9d4084
ML
3796 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3797 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3798 }
d38ceaf9 3799
222b5f04
AG
3800 if(job)
3801 drm_sched_increase_karma(&job->base);
3802
1d721ed6 3803 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3804 if (!amdgpu_sriov_vf(adev)) {
3805
3806 if (!need_full_reset)
3807 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3808
3809 if (!need_full_reset) {
3810 amdgpu_device_ip_pre_soft_reset(adev);
3811 r = amdgpu_device_ip_soft_reset(adev);
3812 amdgpu_device_ip_post_soft_reset(adev);
3813 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3814 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3815 need_full_reset = true;
3816 }
3817 }
3818
3819 if (need_full_reset)
3820 r = amdgpu_device_ip_suspend(adev);
3821
3822 *need_full_reset_arg = need_full_reset;
3823 }
3824
3825 return r;
3826}
3827
041a62bc 3828static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3829 struct list_head *device_list_handle,
3830 bool *need_full_reset_arg)
3831{
3832 struct amdgpu_device *tmp_adev = NULL;
3833 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3834 int r = 0;
3835
3836 /*
3837 * ASIC reset has to be done on all HGMI hive nodes ASAP
3838 * to allow proper links negotiation in FW (within 1 sec)
3839 */
3840 if (need_full_reset) {
3841 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3842 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3843 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3844 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3845 r = -EALREADY;
3846 } else
3847 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 3848
041a62bc
AG
3849 if (r) {
3850 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3851 r, tmp_adev->ddev->unique);
3852 break;
ce316fa5
LM
3853 }
3854 }
3855
041a62bc
AG
3856 /* For XGMI wait for all resets to complete before proceed */
3857 if (!r) {
ce316fa5
LM
3858 list_for_each_entry(tmp_adev, device_list_handle,
3859 gmc.xgmi.head) {
3860 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3861 flush_work(&tmp_adev->xgmi_reset_work);
3862 r = tmp_adev->asic_reset_res;
3863 if (r)
3864 break;
ce316fa5
LM
3865 }
3866 }
3867 }
ce316fa5 3868 }
26bc5340 3869
00eaa571
LM
3870 if (!r && amdgpu_ras_intr_triggered())
3871 amdgpu_ras_intr_cleared();
3872
26bc5340
AG
3873 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3874 if (need_full_reset) {
3875 /* post card */
3876 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3877 DRM_WARN("asic atom init failed!");
3878
3879 if (!r) {
3880 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3881 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3882 if (r)
3883 goto out;
3884
3885 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3886 if (vram_lost) {
77e7f829 3887 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 3888 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
3889 }
3890
3891 r = amdgpu_gtt_mgr_recover(
3892 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3893 if (r)
3894 goto out;
3895
3896 r = amdgpu_device_fw_loading(tmp_adev);
3897 if (r)
3898 return r;
3899
3900 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3901 if (r)
3902 goto out;
3903
3904 if (vram_lost)
3905 amdgpu_device_fill_reset_magic(tmp_adev);
3906
fdafb359
EQ
3907 /*
3908 * Add this ASIC as tracked as reset was already
3909 * complete successfully.
3910 */
3911 amdgpu_register_gpu_instance(tmp_adev);
3912
7c04ca50 3913 r = amdgpu_device_ip_late_init(tmp_adev);
3914 if (r)
3915 goto out;
3916
e79a04d5 3917 /* must succeed. */
511fdbc3 3918 amdgpu_ras_resume(tmp_adev);
e79a04d5 3919
26bc5340
AG
3920 /* Update PSP FW topology after reset */
3921 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3922 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3923 }
3924 }
3925
3926
3927out:
3928 if (!r) {
3929 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3930 r = amdgpu_ib_ring_tests(tmp_adev);
3931 if (r) {
3932 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3933 r = amdgpu_device_ip_suspend(tmp_adev);
3934 need_full_reset = true;
3935 r = -EAGAIN;
3936 goto end;
3937 }
3938 }
3939
3940 if (!r)
3941 r = amdgpu_device_recover_vram(tmp_adev);
3942 else
3943 tmp_adev->asic_reset_res = r;
3944 }
3945
3946end:
3947 *need_full_reset_arg = need_full_reset;
3948 return r;
3949}
3950
1d721ed6 3951static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3952{
1d721ed6
AG
3953 if (trylock) {
3954 if (!mutex_trylock(&adev->lock_reset))
3955 return false;
3956 } else
3957 mutex_lock(&adev->lock_reset);
5740682e 3958
26bc5340 3959 atomic_inc(&adev->gpu_reset_counter);
2a9b90ae 3960 adev->in_gpu_reset = true;
a3a09142
AD
3961 switch (amdgpu_asic_reset_method(adev)) {
3962 case AMD_RESET_METHOD_MODE1:
3963 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3964 break;
3965 case AMD_RESET_METHOD_MODE2:
3966 adev->mp1_state = PP_MP1_STATE_RESET;
3967 break;
3968 default:
3969 adev->mp1_state = PP_MP1_STATE_NONE;
3970 break;
3971 }
1d721ed6
AG
3972
3973 return true;
26bc5340 3974}
d38ceaf9 3975
26bc5340
AG
3976static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3977{
89041940 3978 amdgpu_vf_error_trans_all(adev);
a3a09142 3979 adev->mp1_state = PP_MP1_STATE_NONE;
2a9b90ae 3980 adev->in_gpu_reset = false;
13a752e3 3981 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3982}
3983
26bc5340
AG
3984/**
3985 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3986 *
3987 * @adev: amdgpu device pointer
3988 * @job: which job trigger hang
3989 *
3990 * Attempt to reset the GPU if it has hung (all asics).
3991 * Attempt to do soft-reset or full-reset and reinitialize Asic
3992 * Returns 0 for success or an error on failure.
3993 */
3994
3995int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3996 struct amdgpu_job *job)
3997{
1d721ed6
AG
3998 struct list_head device_list, *device_list_handle = NULL;
3999 bool need_full_reset, job_signaled;
26bc5340 4000 struct amdgpu_hive_info *hive = NULL;
26bc5340 4001 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4002 int i, r = 0;
7c6e68c7 4003 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4004 bool use_baco =
4005 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4006 true : false;
26bc5340 4007
d5ea093e
AG
4008 /*
4009 * Flush RAM to disk so that after reboot
4010 * the user can read log and see why the system rebooted.
4011 */
b823821f 4012 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4013
4014 DRM_WARN("Emergency reboot.");
4015
4016 ksys_sync_helper();
4017 emergency_restart();
4018 }
4019
1d721ed6 4020 need_full_reset = job_signaled = false;
26bc5340
AG
4021 INIT_LIST_HEAD(&device_list);
4022
b823821f
LM
4023 dev_info(adev->dev, "GPU %s begin!\n",
4024 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340 4025
beff74bc 4026 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 4027
1d721ed6
AG
4028 hive = amdgpu_get_xgmi_hive(adev, false);
4029
26bc5340 4030 /*
1d721ed6
AG
4031 * Here we trylock to avoid chain of resets executing from
4032 * either trigger by jobs on different adevs in XGMI hive or jobs on
4033 * different schedulers for same device while this TO handler is running.
4034 * We always reset all schedulers for device and all devices for XGMI
4035 * hive so that should take care of them too.
26bc5340 4036 */
1d721ed6
AG
4037
4038 if (hive && !mutex_trylock(&hive->reset_lock)) {
4039 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4040 job ? job->base.id : -1, hive->hive_id);
26bc5340 4041 return 0;
1d721ed6 4042 }
26bc5340
AG
4043
4044 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
4045 if (!amdgpu_device_lock_adev(adev, !hive)) {
4046 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 4047 job ? job->base.id : -1);
1d721ed6 4048 return 0;
26bc5340
AG
4049 }
4050
7c6e68c7
AG
4051 /* Block kfd: SRIOV would do it separately */
4052 if (!amdgpu_sriov_vf(adev))
4053 amdgpu_amdkfd_pre_reset(adev);
4054
26bc5340 4055 /* Build list of devices to reset */
1d721ed6 4056 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 4057 if (!hive) {
7c6e68c7
AG
4058 /*unlock kfd: SRIOV would do it separately */
4059 if (!amdgpu_sriov_vf(adev))
4060 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
4061 amdgpu_device_unlock_adev(adev);
4062 return -ENODEV;
4063 }
4064
4065 /*
4066 * In case we are in XGMI hive mode device reset is done for all the
4067 * nodes in the hive to retrain all XGMI links and hence the reset
4068 * sequence is executed in loop on all nodes.
4069 */
4070 device_list_handle = &hive->device_list;
4071 } else {
4072 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4073 device_list_handle = &device_list;
4074 }
4075
1d721ed6
AG
4076 /* block all schedulers and reset given job's ring */
4077 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4078 if (tmp_adev != adev) {
12ffa55d 4079 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
4080 if (!amdgpu_sriov_vf(tmp_adev))
4081 amdgpu_amdkfd_pre_reset(tmp_adev);
4082 }
4083
12ffa55d
AG
4084 /*
4085 * Mark these ASICs to be reseted as untracked first
4086 * And add them back after reset completed
4087 */
4088 amdgpu_unregister_gpu_instance(tmp_adev);
4089
f1c1314b 4090 /* disable ras on ALL IPs */
b823821f
LM
4091 if (!(in_ras_intr && !use_baco) &&
4092 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4093 amdgpu_ras_suspend(tmp_adev);
4094
1d721ed6
AG
4095 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4096 struct amdgpu_ring *ring = tmp_adev->rings[i];
4097
4098 if (!ring || !ring->sched.thread)
4099 continue;
4100
0b2d2c2e 4101 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4102
b823821f 4103 if (in_ras_intr && !use_baco)
7c6e68c7 4104 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4105 }
4106 }
4107
4108
b823821f 4109 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4110 goto skip_sched_resume;
4111
1d721ed6
AG
4112 /*
4113 * Must check guilty signal here since after this point all old
4114 * HW fences are force signaled.
4115 *
4116 * job->base holds a reference to parent fence
4117 */
4118 if (job && job->base.s_fence->parent &&
4119 dma_fence_is_signaled(job->base.s_fence->parent))
4120 job_signaled = true;
4121
1d721ed6
AG
4122 if (job_signaled) {
4123 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4124 goto skip_hw_reset;
4125 }
4126
4127
4128 /* Guilty job will be freed after this*/
0b2d2c2e 4129 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
4130 if (r) {
4131 /*TODO Should we stop ?*/
4132 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4133 r, adev->ddev->unique);
4134 adev->asic_reset_res = r;
4135 }
4136
26bc5340
AG
4137retry: /* Rest of adevs pre asic reset from XGMI hive. */
4138 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4139
4140 if (tmp_adev == adev)
4141 continue;
4142
26bc5340
AG
4143 r = amdgpu_device_pre_asic_reset(tmp_adev,
4144 NULL,
4145 &need_full_reset);
4146 /*TODO Should we stop ?*/
4147 if (r) {
4148 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4149 r, tmp_adev->ddev->unique);
4150 tmp_adev->asic_reset_res = r;
4151 }
4152 }
4153
4154 /* Actual ASIC resets if needed.*/
4155 /* TODO Implement XGMI hive reset logic for SRIOV */
4156 if (amdgpu_sriov_vf(adev)) {
4157 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4158 if (r)
4159 adev->asic_reset_res = r;
4160 } else {
041a62bc 4161 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4162 if (r && r == -EAGAIN)
4163 goto retry;
4164 }
4165
1d721ed6
AG
4166skip_hw_reset:
4167
26bc5340
AG
4168 /* Post ASIC reset for all devs .*/
4169 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4170
1d721ed6
AG
4171 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4172 struct amdgpu_ring *ring = tmp_adev->rings[i];
4173
4174 if (!ring || !ring->sched.thread)
4175 continue;
4176
4177 /* No point to resubmit jobs if we didn't HW reset*/
4178 if (!tmp_adev->asic_reset_res && !job_signaled)
4179 drm_sched_resubmit_jobs(&ring->sched);
4180
4181 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4182 }
4183
4184 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4185 drm_helper_resume_force_mode(tmp_adev->ddev);
4186 }
4187
4188 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4189
4190 if (r) {
4191 /* bad news, how to tell it to userspace ? */
12ffa55d 4192 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4193 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4194 } else {
12ffa55d 4195 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4196 }
7c6e68c7 4197 }
26bc5340 4198
7c6e68c7
AG
4199skip_sched_resume:
4200 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4201 /*unlock kfd: SRIOV would do it separately */
b823821f 4202 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4203 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
4204 amdgpu_device_unlock_adev(tmp_adev);
4205 }
4206
1d721ed6 4207 if (hive)
22d6575b 4208 mutex_unlock(&hive->reset_lock);
26bc5340
AG
4209
4210 if (r)
4211 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4212 return r;
4213}
4214
e3ecdffa
AD
4215/**
4216 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4217 *
4218 * @adev: amdgpu_device pointer
4219 *
4220 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4221 * and lanes) of the slot the device is in. Handles APUs and
4222 * virtualized environments where PCIE config space may not be available.
4223 */
5494d864 4224static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4225{
5d9a6330 4226 struct pci_dev *pdev;
c5313457
HK
4227 enum pci_bus_speed speed_cap, platform_speed_cap;
4228 enum pcie_link_width platform_link_width;
d0dd7f0c 4229
cd474ba0
AD
4230 if (amdgpu_pcie_gen_cap)
4231 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4232
cd474ba0
AD
4233 if (amdgpu_pcie_lane_cap)
4234 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4235
cd474ba0
AD
4236 /* covers APUs as well */
4237 if (pci_is_root_bus(adev->pdev->bus)) {
4238 if (adev->pm.pcie_gen_mask == 0)
4239 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4240 if (adev->pm.pcie_mlw_mask == 0)
4241 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4242 return;
cd474ba0 4243 }
d0dd7f0c 4244
c5313457
HK
4245 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4246 return;
4247
dbaa922b
AD
4248 pcie_bandwidth_available(adev->pdev, NULL,
4249 &platform_speed_cap, &platform_link_width);
c5313457 4250
cd474ba0 4251 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4252 /* asic caps */
4253 pdev = adev->pdev;
4254 speed_cap = pcie_get_speed_cap(pdev);
4255 if (speed_cap == PCI_SPEED_UNKNOWN) {
4256 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4257 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4258 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4259 } else {
5d9a6330
AD
4260 if (speed_cap == PCIE_SPEED_16_0GT)
4261 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4262 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4263 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4264 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4265 else if (speed_cap == PCIE_SPEED_8_0GT)
4266 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4267 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4268 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4269 else if (speed_cap == PCIE_SPEED_5_0GT)
4270 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4271 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4272 else
4273 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4274 }
4275 /* platform caps */
c5313457 4276 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4277 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4278 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4279 } else {
c5313457 4280 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4281 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4282 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4283 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4284 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4285 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4286 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4287 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4288 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4289 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4290 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4291 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4292 else
4293 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4294
cd474ba0
AD
4295 }
4296 }
4297 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4298 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4299 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4300 } else {
c5313457 4301 switch (platform_link_width) {
5d9a6330 4302 case PCIE_LNK_X32:
cd474ba0
AD
4303 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4304 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4305 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4306 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4307 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4308 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4309 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4310 break;
5d9a6330 4311 case PCIE_LNK_X16:
cd474ba0
AD
4312 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4313 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4314 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4315 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4316 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4317 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4318 break;
5d9a6330 4319 case PCIE_LNK_X12:
cd474ba0
AD
4320 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4321 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4322 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4323 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4324 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4325 break;
5d9a6330 4326 case PCIE_LNK_X8:
cd474ba0
AD
4327 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4328 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4329 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4330 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4331 break;
5d9a6330 4332 case PCIE_LNK_X4:
cd474ba0
AD
4333 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4334 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4335 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4336 break;
5d9a6330 4337 case PCIE_LNK_X2:
cd474ba0
AD
4338 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4339 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4340 break;
5d9a6330 4341 case PCIE_LNK_X1:
cd474ba0
AD
4342 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4343 break;
4344 default:
4345 break;
4346 }
d0dd7f0c
AD
4347 }
4348 }
4349}
d38ceaf9 4350
361dbd01
AD
4351int amdgpu_device_baco_enter(struct drm_device *dev)
4352{
4353 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4354 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4355
4356 if (!amdgpu_device_supports_baco(adev->ddev))
4357 return -ENOTSUPP;
4358
7a22677b
LM
4359 if (ras && ras->supported)
4360 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4361
9530273e 4362 return amdgpu_dpm_baco_enter(adev);
361dbd01
AD
4363}
4364
4365int amdgpu_device_baco_exit(struct drm_device *dev)
4366{
4367 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4368 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
9530273e 4369 int ret = 0;
361dbd01
AD
4370
4371 if (!amdgpu_device_supports_baco(adev->ddev))
4372 return -ENOTSUPP;
4373
9530273e
EQ
4374 ret = amdgpu_dpm_baco_exit(adev);
4375 if (ret)
4376 return ret;
7a22677b
LM
4377
4378 if (ras && ras->supported)
4379 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4380
4381 return 0;
361dbd01 4382}