drm/amdgpu: add VCN2.5 sriov start for Arctrus
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
d5ea093e 68#include <linux/suspend.h>
c6a6e2db 69#include <drm/task_barrier.h>
d5ea093e 70
e2a75f88 71MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 72MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 73MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 74MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 75MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 76MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 77MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 78MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 79MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 80MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 81
2dc80b00
S
82#define AMDGPU_RESUME_MS 2000
83
050091ab 84const char *amdgpu_asic_name[] = {
da69c161
KW
85 "TAHITI",
86 "PITCAIRN",
87 "VERDE",
88 "OLAND",
89 "HAINAN",
d38ceaf9
AD
90 "BONAIRE",
91 "KAVERI",
92 "KABINI",
93 "HAWAII",
94 "MULLINS",
95 "TOPAZ",
96 "TONGA",
48299f95 97 "FIJI",
d38ceaf9 98 "CARRIZO",
139f4917 99 "STONEY",
2cc0c0b5
FC
100 "POLARIS10",
101 "POLARIS11",
c4642a47 102 "POLARIS12",
48ff108d 103 "VEGAM",
d4196f01 104 "VEGA10",
8fab806a 105 "VEGA12",
956fcddc 106 "VEGA20",
2ca8a5d2 107 "RAVEN",
d6c3b24e 108 "ARCTURUS",
1eee4228 109 "RENOIR",
852a6626 110 "NAVI10",
87dbad02 111 "NAVI14",
9802f5d7 112 "NAVI12",
d38ceaf9
AD
113 "LAST",
114};
115
dcea6e65
KR
116/**
117 * DOC: pcie_replay_count
118 *
119 * The amdgpu driver provides a sysfs API for reporting the total number
120 * of PCIe replays (NAKs)
121 * The file pcie_replay_count is used for this and returns the total
122 * number of replays as a sum of the NAKs generated and NAKs received
123 */
124
125static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
126 struct device_attribute *attr, char *buf)
127{
128 struct drm_device *ddev = dev_get_drvdata(dev);
129 struct amdgpu_device *adev = ddev->dev_private;
130 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
131
132 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
133}
134
135static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
136 amdgpu_device_get_pcie_replay_count, NULL);
137
5494d864
AD
138static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
139
e3ecdffa 140/**
31af062a 141 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
142 *
143 * @dev: drm_device pointer
144 *
145 * Returns true if the device is a dGPU with HG/PX power control,
146 * otherwise return false.
147 */
31af062a 148bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
149{
150 struct amdgpu_device *adev = dev->dev_private;
151
2f7d10b3 152 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
153 return true;
154 return false;
155}
156
a69cba42
AD
157/**
158 * amdgpu_device_supports_baco - Does the device support BACO
159 *
160 * @dev: drm_device pointer
161 *
162 * Returns true if the device supporte BACO,
163 * otherwise return false.
164 */
165bool amdgpu_device_supports_baco(struct drm_device *dev)
166{
167 struct amdgpu_device *adev = dev->dev_private;
168
169 return amdgpu_asic_supports_baco(adev);
170}
171
e35e2b11
TY
172/**
173 * VRAM access helper functions.
174 *
175 * amdgpu_device_vram_access - read/write a buffer in vram
176 *
177 * @adev: amdgpu_device pointer
178 * @pos: offset of the buffer in vram
179 * @buf: virtual address of the buffer in system memory
180 * @size: read/write size, sizeof(@buf) must > @size
181 * @write: true - write to vram, otherwise - read from vram
182 */
183void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
184 uint32_t *buf, size_t size, bool write)
185{
186 uint64_t last;
187 unsigned long flags;
188
189 last = size - 4;
190 for (last += pos; pos <= last; pos += 4) {
191 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
192 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
193 WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
194 if (write)
195 WREG32_NO_KIQ(mmMM_DATA, *buf++);
196 else
197 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
198 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
199 }
200}
201
d38ceaf9
AD
202/*
203 * MMIO register access helper functions.
204 */
e3ecdffa
AD
205/**
206 * amdgpu_mm_rreg - read a memory mapped IO register
207 *
208 * @adev: amdgpu_device pointer
209 * @reg: dword aligned register offset
210 * @acc_flags: access flags which require special behavior
211 *
212 * Returns the 32 bit value from the offset specified.
213 */
d38ceaf9 214uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 215 uint32_t acc_flags)
d38ceaf9 216{
f4b373f4
TSD
217 uint32_t ret;
218
43ca8efa 219 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 220 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 221
15d72fd7 222 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 223 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
224 else {
225 unsigned long flags;
d38ceaf9
AD
226
227 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
228 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
229 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
230 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 231 }
f4b373f4
TSD
232 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
233 return ret;
d38ceaf9
AD
234}
235
421a2a30
ML
236/*
237 * MMIO register read with bytes helper functions
238 * @offset:bytes offset from MMIO start
239 *
240*/
241
e3ecdffa
AD
242/**
243 * amdgpu_mm_rreg8 - read a memory mapped IO register
244 *
245 * @adev: amdgpu_device pointer
246 * @offset: byte aligned register offset
247 *
248 * Returns the 8 bit value from the offset specified.
249 */
421a2a30
ML
250uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
251 if (offset < adev->rmmio_size)
252 return (readb(adev->rmmio + offset));
253 BUG();
254}
255
256/*
257 * MMIO register write with bytes helper functions
258 * @offset:bytes offset from MMIO start
259 * @value: the value want to be written to the register
260 *
261*/
e3ecdffa
AD
262/**
263 * amdgpu_mm_wreg8 - read a memory mapped IO register
264 *
265 * @adev: amdgpu_device pointer
266 * @offset: byte aligned register offset
267 * @value: 8 bit value to write
268 *
269 * Writes the value specified to the offset specified.
270 */
421a2a30
ML
271void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
272 if (offset < adev->rmmio_size)
273 writeb(value, adev->rmmio + offset);
274 else
275 BUG();
276}
277
e3ecdffa
AD
278/**
279 * amdgpu_mm_wreg - write to a memory mapped IO register
280 *
281 * @adev: amdgpu_device pointer
282 * @reg: dword aligned register offset
283 * @v: 32 bit value to write to the register
284 * @acc_flags: access flags which require special behavior
285 *
286 * Writes the value specified to the offset specified.
287 */
d38ceaf9 288void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 289 uint32_t acc_flags)
d38ceaf9 290{
f4b373f4 291 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 292
47ed4e1c
KW
293 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
294 adev->last_mm_index = v;
295 }
296
43ca8efa 297 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 298 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 299
15d72fd7 300 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
301 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
302 else {
303 unsigned long flags;
304
305 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
306 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
307 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
308 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
309 }
47ed4e1c
KW
310
311 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
312 udelay(500);
313 }
d38ceaf9
AD
314}
315
e3ecdffa
AD
316/**
317 * amdgpu_io_rreg - read an IO register
318 *
319 * @adev: amdgpu_device pointer
320 * @reg: dword aligned register offset
321 *
322 * Returns the 32 bit value from the offset specified.
323 */
d38ceaf9
AD
324u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
325{
326 if ((reg * 4) < adev->rio_mem_size)
327 return ioread32(adev->rio_mem + (reg * 4));
328 else {
329 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
330 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
331 }
332}
333
e3ecdffa
AD
334/**
335 * amdgpu_io_wreg - write to an IO register
336 *
337 * @adev: amdgpu_device pointer
338 * @reg: dword aligned register offset
339 * @v: 32 bit value to write to the register
340 *
341 * Writes the value specified to the offset specified.
342 */
d38ceaf9
AD
343void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
344{
47ed4e1c
KW
345 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
346 adev->last_mm_index = v;
347 }
d38ceaf9
AD
348
349 if ((reg * 4) < adev->rio_mem_size)
350 iowrite32(v, adev->rio_mem + (reg * 4));
351 else {
352 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
353 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
354 }
47ed4e1c
KW
355
356 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
357 udelay(500);
358 }
d38ceaf9
AD
359}
360
361/**
362 * amdgpu_mm_rdoorbell - read a doorbell dword
363 *
364 * @adev: amdgpu_device pointer
365 * @index: doorbell index
366 *
367 * Returns the value in the doorbell aperture at the
368 * requested doorbell index (CIK).
369 */
370u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
371{
372 if (index < adev->doorbell.num_doorbells) {
373 return readl(adev->doorbell.ptr + index);
374 } else {
375 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
376 return 0;
377 }
378}
379
380/**
381 * amdgpu_mm_wdoorbell - write a doorbell dword
382 *
383 * @adev: amdgpu_device pointer
384 * @index: doorbell index
385 * @v: value to write
386 *
387 * Writes @v to the doorbell aperture at the
388 * requested doorbell index (CIK).
389 */
390void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
391{
392 if (index < adev->doorbell.num_doorbells) {
393 writel(v, adev->doorbell.ptr + index);
394 } else {
395 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
396 }
397}
398
832be404
KW
399/**
400 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
401 *
402 * @adev: amdgpu_device pointer
403 * @index: doorbell index
404 *
405 * Returns the value in the doorbell aperture at the
406 * requested doorbell index (VEGA10+).
407 */
408u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
409{
410 if (index < adev->doorbell.num_doorbells) {
411 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
412 } else {
413 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
414 return 0;
415 }
416}
417
418/**
419 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
420 *
421 * @adev: amdgpu_device pointer
422 * @index: doorbell index
423 * @v: value to write
424 *
425 * Writes @v to the doorbell aperture at the
426 * requested doorbell index (VEGA10+).
427 */
428void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
429{
430 if (index < adev->doorbell.num_doorbells) {
431 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
432 } else {
433 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
434 }
435}
436
d38ceaf9
AD
437/**
438 * amdgpu_invalid_rreg - dummy reg read function
439 *
440 * @adev: amdgpu device pointer
441 * @reg: offset of register
442 *
443 * Dummy register read function. Used for register blocks
444 * that certain asics don't have (all asics).
445 * Returns the value in the register.
446 */
447static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
448{
449 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
450 BUG();
451 return 0;
452}
453
454/**
455 * amdgpu_invalid_wreg - dummy reg write function
456 *
457 * @adev: amdgpu device pointer
458 * @reg: offset of register
459 * @v: value to write to the register
460 *
461 * Dummy register read function. Used for register blocks
462 * that certain asics don't have (all asics).
463 */
464static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
465{
466 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
467 reg, v);
468 BUG();
469}
470
4fa1c6a6
TZ
471/**
472 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
473 *
474 * @adev: amdgpu device pointer
475 * @reg: offset of register
476 *
477 * Dummy register read function. Used for register blocks
478 * that certain asics don't have (all asics).
479 * Returns the value in the register.
480 */
481static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
482{
483 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
484 BUG();
485 return 0;
486}
487
488/**
489 * amdgpu_invalid_wreg64 - dummy reg write function
490 *
491 * @adev: amdgpu device pointer
492 * @reg: offset of register
493 * @v: value to write to the register
494 *
495 * Dummy register read function. Used for register blocks
496 * that certain asics don't have (all asics).
497 */
498static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
499{
500 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
501 reg, v);
502 BUG();
503}
504
d38ceaf9
AD
505/**
506 * amdgpu_block_invalid_rreg - dummy reg read function
507 *
508 * @adev: amdgpu device pointer
509 * @block: offset of instance
510 * @reg: offset of register
511 *
512 * Dummy register read function. Used for register blocks
513 * that certain asics don't have (all asics).
514 * Returns the value in the register.
515 */
516static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
517 uint32_t block, uint32_t reg)
518{
519 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
520 reg, block);
521 BUG();
522 return 0;
523}
524
525/**
526 * amdgpu_block_invalid_wreg - dummy reg write function
527 *
528 * @adev: amdgpu device pointer
529 * @block: offset of instance
530 * @reg: offset of register
531 * @v: value to write to the register
532 *
533 * Dummy register read function. Used for register blocks
534 * that certain asics don't have (all asics).
535 */
536static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
537 uint32_t block,
538 uint32_t reg, uint32_t v)
539{
540 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
541 reg, block, v);
542 BUG();
543}
544
e3ecdffa
AD
545/**
546 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
547 *
548 * @adev: amdgpu device pointer
549 *
550 * Allocates a scratch page of VRAM for use by various things in the
551 * driver.
552 */
06ec9070 553static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 554{
a4a02777
CK
555 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
556 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
557 &adev->vram_scratch.robj,
558 &adev->vram_scratch.gpu_addr,
559 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
560}
561
e3ecdffa
AD
562/**
563 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
564 *
565 * @adev: amdgpu device pointer
566 *
567 * Frees the VRAM scratch page.
568 */
06ec9070 569static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 570{
078af1a3 571 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
572}
573
574/**
9c3f2b54 575 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
576 *
577 * @adev: amdgpu_device pointer
578 * @registers: pointer to the register array
579 * @array_size: size of the register array
580 *
581 * Programs an array or registers with and and or masks.
582 * This is a helper for setting golden registers.
583 */
9c3f2b54
AD
584void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
585 const u32 *registers,
586 const u32 array_size)
d38ceaf9
AD
587{
588 u32 tmp, reg, and_mask, or_mask;
589 int i;
590
591 if (array_size % 3)
592 return;
593
594 for (i = 0; i < array_size; i +=3) {
595 reg = registers[i + 0];
596 and_mask = registers[i + 1];
597 or_mask = registers[i + 2];
598
599 if (and_mask == 0xffffffff) {
600 tmp = or_mask;
601 } else {
602 tmp = RREG32(reg);
603 tmp &= ~and_mask;
e0d07657
HZ
604 if (adev->family >= AMDGPU_FAMILY_AI)
605 tmp |= (or_mask & and_mask);
606 else
607 tmp |= or_mask;
d38ceaf9
AD
608 }
609 WREG32(reg, tmp);
610 }
611}
612
e3ecdffa
AD
613/**
614 * amdgpu_device_pci_config_reset - reset the GPU
615 *
616 * @adev: amdgpu_device pointer
617 *
618 * Resets the GPU using the pci config reset sequence.
619 * Only applicable to asics prior to vega10.
620 */
8111c387 621void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
622{
623 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
624}
625
626/*
627 * GPU doorbell aperture helpers function.
628 */
629/**
06ec9070 630 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
631 *
632 * @adev: amdgpu_device pointer
633 *
634 * Init doorbell driver information (CIK)
635 * Returns 0 on success, error on failure.
636 */
06ec9070 637static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 638{
6585661d 639
705e519e
CK
640 /* No doorbell on SI hardware generation */
641 if (adev->asic_type < CHIP_BONAIRE) {
642 adev->doorbell.base = 0;
643 adev->doorbell.size = 0;
644 adev->doorbell.num_doorbells = 0;
645 adev->doorbell.ptr = NULL;
646 return 0;
647 }
648
d6895ad3
CK
649 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
650 return -EINVAL;
651
22357775
AD
652 amdgpu_asic_init_doorbell_index(adev);
653
d38ceaf9
AD
654 /* doorbell bar mapping */
655 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
656 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
657
edf600da 658 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 659 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
660 if (adev->doorbell.num_doorbells == 0)
661 return -EINVAL;
662
ec3db8a6 663 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
664 * paging queue doorbell use the second page. The
665 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
666 * doorbells are in the first page. So with paging queue enabled,
667 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
668 */
669 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 670 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 671
8972e5d2
CK
672 adev->doorbell.ptr = ioremap(adev->doorbell.base,
673 adev->doorbell.num_doorbells *
674 sizeof(u32));
675 if (adev->doorbell.ptr == NULL)
d38ceaf9 676 return -ENOMEM;
d38ceaf9
AD
677
678 return 0;
679}
680
681/**
06ec9070 682 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
683 *
684 * @adev: amdgpu_device pointer
685 *
686 * Tear down doorbell driver information (CIK)
687 */
06ec9070 688static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
689{
690 iounmap(adev->doorbell.ptr);
691 adev->doorbell.ptr = NULL;
692}
693
22cb0164 694
d38ceaf9
AD
695
696/*
06ec9070 697 * amdgpu_device_wb_*()
455a7bc2 698 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 699 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
700 */
701
702/**
06ec9070 703 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
704 *
705 * @adev: amdgpu_device pointer
706 *
707 * Disables Writeback and frees the Writeback memory (all asics).
708 * Used at driver shutdown.
709 */
06ec9070 710static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
711{
712 if (adev->wb.wb_obj) {
a76ed485
AD
713 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
714 &adev->wb.gpu_addr,
715 (void **)&adev->wb.wb);
d38ceaf9
AD
716 adev->wb.wb_obj = NULL;
717 }
718}
719
720/**
06ec9070 721 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
722 *
723 * @adev: amdgpu_device pointer
724 *
455a7bc2 725 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
726 * Used at driver startup.
727 * Returns 0 on success or an -error on failure.
728 */
06ec9070 729static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
730{
731 int r;
732
733 if (adev->wb.wb_obj == NULL) {
97407b63
AD
734 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
735 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
736 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
737 &adev->wb.wb_obj, &adev->wb.gpu_addr,
738 (void **)&adev->wb.wb);
d38ceaf9
AD
739 if (r) {
740 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
741 return r;
742 }
d38ceaf9
AD
743
744 adev->wb.num_wb = AMDGPU_MAX_WB;
745 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
746
747 /* clear wb memory */
73469585 748 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
749 }
750
751 return 0;
752}
753
754/**
131b4b36 755 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
756 *
757 * @adev: amdgpu_device pointer
758 * @wb: wb index
759 *
760 * Allocate a wb slot for use by the driver (all asics).
761 * Returns 0 on success or -EINVAL on failure.
762 */
131b4b36 763int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
764{
765 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 766
97407b63 767 if (offset < adev->wb.num_wb) {
7014285a 768 __set_bit(offset, adev->wb.used);
63ae07ca 769 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
770 return 0;
771 } else {
772 return -EINVAL;
773 }
774}
775
d38ceaf9 776/**
131b4b36 777 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
778 *
779 * @adev: amdgpu_device pointer
780 * @wb: wb index
781 *
782 * Free a wb slot allocated for use by the driver (all asics)
783 */
131b4b36 784void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 785{
73469585 786 wb >>= 3;
d38ceaf9 787 if (wb < adev->wb.num_wb)
73469585 788 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
789}
790
d6895ad3
CK
791/**
792 * amdgpu_device_resize_fb_bar - try to resize FB BAR
793 *
794 * @adev: amdgpu_device pointer
795 *
796 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
797 * to fail, but if any of the BARs is not accessible after the size we abort
798 * driver loading by returning -ENODEV.
799 */
800int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
801{
770d13b1 802 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 803 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
804 struct pci_bus *root;
805 struct resource *res;
806 unsigned i;
d6895ad3
CK
807 u16 cmd;
808 int r;
809
0c03b912 810 /* Bypass for VF */
811 if (amdgpu_sriov_vf(adev))
812 return 0;
813
31b8adab
CK
814 /* Check if the root BUS has 64bit memory resources */
815 root = adev->pdev->bus;
816 while (root->parent)
817 root = root->parent;
818
819 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 820 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
821 res->start > 0x100000000ull)
822 break;
823 }
824
825 /* Trying to resize is pointless without a root hub window above 4GB */
826 if (!res)
827 return 0;
828
d6895ad3
CK
829 /* Disable memory decoding while we change the BAR addresses and size */
830 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
831 pci_write_config_word(adev->pdev, PCI_COMMAND,
832 cmd & ~PCI_COMMAND_MEMORY);
833
834 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 835 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
836 if (adev->asic_type >= CHIP_BONAIRE)
837 pci_release_resource(adev->pdev, 2);
838
839 pci_release_resource(adev->pdev, 0);
840
841 r = pci_resize_resource(adev->pdev, 0, rbar_size);
842 if (r == -ENOSPC)
843 DRM_INFO("Not enough PCI address space for a large BAR.");
844 else if (r && r != -ENOTSUPP)
845 DRM_ERROR("Problem resizing BAR0 (%d).", r);
846
847 pci_assign_unassigned_bus_resources(adev->pdev->bus);
848
849 /* When the doorbell or fb BAR isn't available we have no chance of
850 * using the device.
851 */
06ec9070 852 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
853 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
854 return -ENODEV;
855
856 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
857
858 return 0;
859}
a05502e5 860
d38ceaf9
AD
861/*
862 * GPU helpers function.
863 */
864/**
39c640c0 865 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
866 *
867 * @adev: amdgpu_device pointer
868 *
c836fec5
JQ
869 * Check if the asic has been initialized (all asics) at driver startup
870 * or post is needed if hw reset is performed.
871 * Returns true if need or false if not.
d38ceaf9 872 */
39c640c0 873bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
874{
875 uint32_t reg;
876
bec86378
ML
877 if (amdgpu_sriov_vf(adev))
878 return false;
879
880 if (amdgpu_passthrough(adev)) {
1da2c326
ML
881 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
882 * some old smc fw still need driver do vPost otherwise gpu hang, while
883 * those smc fw version above 22.15 doesn't have this flaw, so we force
884 * vpost executed for smc version below 22.15
bec86378
ML
885 */
886 if (adev->asic_type == CHIP_FIJI) {
887 int err;
888 uint32_t fw_ver;
889 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
890 /* force vPost if error occured */
891 if (err)
892 return true;
893
894 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
895 if (fw_ver < 0x00160e00)
896 return true;
bec86378 897 }
bec86378 898 }
91fe77eb 899
900 if (adev->has_hw_reset) {
901 adev->has_hw_reset = false;
902 return true;
903 }
904
905 /* bios scratch used on CIK+ */
906 if (adev->asic_type >= CHIP_BONAIRE)
907 return amdgpu_atombios_scratch_need_asic_init(adev);
908
909 /* check MEM_SIZE for older asics */
910 reg = amdgpu_asic_get_config_memsize(adev);
911
912 if ((reg != 0) && (reg != 0xffffffff))
913 return false;
914
915 return true;
bec86378
ML
916}
917
d38ceaf9
AD
918/* if we get transitioned to only one device, take VGA back */
919/**
06ec9070 920 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
921 *
922 * @cookie: amdgpu_device pointer
923 * @state: enable/disable vga decode
924 *
925 * Enable/disable vga decode (all asics).
926 * Returns VGA resource flags.
927 */
06ec9070 928static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
929{
930 struct amdgpu_device *adev = cookie;
931 amdgpu_asic_set_vga_state(adev, state);
932 if (state)
933 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
934 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
935 else
936 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
937}
938
e3ecdffa
AD
939/**
940 * amdgpu_device_check_block_size - validate the vm block size
941 *
942 * @adev: amdgpu_device pointer
943 *
944 * Validates the vm block size specified via module parameter.
945 * The vm block size defines number of bits in page table versus page directory,
946 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
947 * page table and the remaining bits are in the page directory.
948 */
06ec9070 949static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
950{
951 /* defines number of bits in page table versus page directory,
952 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
953 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
954 if (amdgpu_vm_block_size == -1)
955 return;
a1adf8be 956
bab4fee7 957 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
958 dev_warn(adev->dev, "VM page table size (%d) too small\n",
959 amdgpu_vm_block_size);
97489129 960 amdgpu_vm_block_size = -1;
a1adf8be 961 }
a1adf8be
CZ
962}
963
e3ecdffa
AD
964/**
965 * amdgpu_device_check_vm_size - validate the vm size
966 *
967 * @adev: amdgpu_device pointer
968 *
969 * Validates the vm size in GB specified via module parameter.
970 * The VM size is the size of the GPU virtual memory space in GB.
971 */
06ec9070 972static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 973{
64dab074
AD
974 /* no need to check the default value */
975 if (amdgpu_vm_size == -1)
976 return;
977
83ca145d
ZJ
978 if (amdgpu_vm_size < 1) {
979 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
980 amdgpu_vm_size);
f3368128 981 amdgpu_vm_size = -1;
83ca145d 982 }
83ca145d
ZJ
983}
984
7951e376
RZ
985static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
986{
987 struct sysinfo si;
988 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
989 uint64_t total_memory;
990 uint64_t dram_size_seven_GB = 0x1B8000000;
991 uint64_t dram_size_three_GB = 0xB8000000;
992
993 if (amdgpu_smu_memory_pool_size == 0)
994 return;
995
996 if (!is_os_64) {
997 DRM_WARN("Not 64-bit OS, feature not supported\n");
998 goto def_value;
999 }
1000 si_meminfo(&si);
1001 total_memory = (uint64_t)si.totalram * si.mem_unit;
1002
1003 if ((amdgpu_smu_memory_pool_size == 1) ||
1004 (amdgpu_smu_memory_pool_size == 2)) {
1005 if (total_memory < dram_size_three_GB)
1006 goto def_value1;
1007 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1008 (amdgpu_smu_memory_pool_size == 8)) {
1009 if (total_memory < dram_size_seven_GB)
1010 goto def_value1;
1011 } else {
1012 DRM_WARN("Smu memory pool size not supported\n");
1013 goto def_value;
1014 }
1015 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1016
1017 return;
1018
1019def_value1:
1020 DRM_WARN("No enough system memory\n");
1021def_value:
1022 adev->pm.smu_prv_buffer_size = 0;
1023}
1024
d38ceaf9 1025/**
06ec9070 1026 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1027 *
1028 * @adev: amdgpu_device pointer
1029 *
1030 * Validates certain module parameters and updates
1031 * the associated values used by the driver (all asics).
1032 */
912dfc84 1033static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1034{
912dfc84
EQ
1035 int ret = 0;
1036
5b011235
CZ
1037 if (amdgpu_sched_jobs < 4) {
1038 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1039 amdgpu_sched_jobs);
1040 amdgpu_sched_jobs = 4;
76117507 1041 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1042 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1043 amdgpu_sched_jobs);
1044 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1045 }
d38ceaf9 1046
83e74db6 1047 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1048 /* gart size must be greater or equal to 32M */
1049 dev_warn(adev->dev, "gart size (%d) too small\n",
1050 amdgpu_gart_size);
83e74db6 1051 amdgpu_gart_size = -1;
d38ceaf9
AD
1052 }
1053
36d38372 1054 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1055 /* gtt size must be greater or equal to 32M */
36d38372
CK
1056 dev_warn(adev->dev, "gtt size (%d) too small\n",
1057 amdgpu_gtt_size);
1058 amdgpu_gtt_size = -1;
d38ceaf9
AD
1059 }
1060
d07f14be
RH
1061 /* valid range is between 4 and 9 inclusive */
1062 if (amdgpu_vm_fragment_size != -1 &&
1063 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1064 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1065 amdgpu_vm_fragment_size = -1;
1066 }
1067
7951e376
RZ
1068 amdgpu_device_check_smu_prv_buffer_size(adev);
1069
06ec9070 1070 amdgpu_device_check_vm_size(adev);
d38ceaf9 1071
06ec9070 1072 amdgpu_device_check_block_size(adev);
6a7f76e7 1073
19aede77 1074 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
1075
1076 return ret;
d38ceaf9
AD
1077}
1078
1079/**
1080 * amdgpu_switcheroo_set_state - set switcheroo state
1081 *
1082 * @pdev: pci dev pointer
1694467b 1083 * @state: vga_switcheroo state
d38ceaf9
AD
1084 *
1085 * Callback for the switcheroo driver. Suspends or resumes the
1086 * the asics before or after it is powered up using ACPI methods.
1087 */
1088static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1089{
1090 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1091 int r;
d38ceaf9 1092
31af062a 1093 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1094 return;
1095
1096 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1097 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1098 /* don't suspend or resume card normally */
1099 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1100
de185019
AD
1101 pci_set_power_state(dev->pdev, PCI_D0);
1102 pci_restore_state(dev->pdev);
1103 r = pci_enable_device(dev->pdev);
1104 if (r)
1105 DRM_WARN("pci_enable_device failed (%d)\n", r);
1106 amdgpu_device_resume(dev, true);
d38ceaf9 1107
d38ceaf9
AD
1108 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1109 drm_kms_helper_poll_enable(dev);
1110 } else {
7ca85295 1111 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1112 drm_kms_helper_poll_disable(dev);
1113 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1114 amdgpu_device_suspend(dev, true);
1115 pci_save_state(dev->pdev);
1116 /* Shut down the device */
1117 pci_disable_device(dev->pdev);
1118 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1119 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1120 }
1121}
1122
1123/**
1124 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1125 *
1126 * @pdev: pci dev pointer
1127 *
1128 * Callback for the switcheroo driver. Check of the switcheroo
1129 * state can be changed.
1130 * Returns true if the state can be changed, false if not.
1131 */
1132static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1133{
1134 struct drm_device *dev = pci_get_drvdata(pdev);
1135
1136 /*
1137 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1138 * locking inversion with the driver load path. And the access here is
1139 * completely racy anyway. So don't bother with locking for now.
1140 */
1141 return dev->open_count == 0;
1142}
1143
1144static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1145 .set_gpu_state = amdgpu_switcheroo_set_state,
1146 .reprobe = NULL,
1147 .can_switch = amdgpu_switcheroo_can_switch,
1148};
1149
e3ecdffa
AD
1150/**
1151 * amdgpu_device_ip_set_clockgating_state - set the CG state
1152 *
87e3f136 1153 * @dev: amdgpu_device pointer
e3ecdffa
AD
1154 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1155 * @state: clockgating state (gate or ungate)
1156 *
1157 * Sets the requested clockgating state for all instances of
1158 * the hardware IP specified.
1159 * Returns the error code from the last instance.
1160 */
43fa561f 1161int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1162 enum amd_ip_block_type block_type,
1163 enum amd_clockgating_state state)
d38ceaf9 1164{
43fa561f 1165 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1166 int i, r = 0;
1167
1168 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1169 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1170 continue;
c722865a
RZ
1171 if (adev->ip_blocks[i].version->type != block_type)
1172 continue;
1173 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1174 continue;
1175 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1176 (void *)adev, state);
1177 if (r)
1178 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1179 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1180 }
1181 return r;
1182}
1183
e3ecdffa
AD
1184/**
1185 * amdgpu_device_ip_set_powergating_state - set the PG state
1186 *
87e3f136 1187 * @dev: amdgpu_device pointer
e3ecdffa
AD
1188 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1189 * @state: powergating state (gate or ungate)
1190 *
1191 * Sets the requested powergating state for all instances of
1192 * the hardware IP specified.
1193 * Returns the error code from the last instance.
1194 */
43fa561f 1195int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1196 enum amd_ip_block_type block_type,
1197 enum amd_powergating_state state)
d38ceaf9 1198{
43fa561f 1199 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1200 int i, r = 0;
1201
1202 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1203 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1204 continue;
c722865a
RZ
1205 if (adev->ip_blocks[i].version->type != block_type)
1206 continue;
1207 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1208 continue;
1209 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1210 (void *)adev, state);
1211 if (r)
1212 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1213 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1214 }
1215 return r;
1216}
1217
e3ecdffa
AD
1218/**
1219 * amdgpu_device_ip_get_clockgating_state - get the CG state
1220 *
1221 * @adev: amdgpu_device pointer
1222 * @flags: clockgating feature flags
1223 *
1224 * Walks the list of IPs on the device and updates the clockgating
1225 * flags for each IP.
1226 * Updates @flags with the feature flags for each hardware IP where
1227 * clockgating is enabled.
1228 */
2990a1fc
AD
1229void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1230 u32 *flags)
6cb2d4e4
HR
1231{
1232 int i;
1233
1234 for (i = 0; i < adev->num_ip_blocks; i++) {
1235 if (!adev->ip_blocks[i].status.valid)
1236 continue;
1237 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1238 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1239 }
1240}
1241
e3ecdffa
AD
1242/**
1243 * amdgpu_device_ip_wait_for_idle - wait for idle
1244 *
1245 * @adev: amdgpu_device pointer
1246 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1247 *
1248 * Waits for the request hardware IP to be idle.
1249 * Returns 0 for success or a negative error code on failure.
1250 */
2990a1fc
AD
1251int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1252 enum amd_ip_block_type block_type)
5dbbb60b
AD
1253{
1254 int i, r;
1255
1256 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1257 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1258 continue;
a1255107
AD
1259 if (adev->ip_blocks[i].version->type == block_type) {
1260 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1261 if (r)
1262 return r;
1263 break;
1264 }
1265 }
1266 return 0;
1267
1268}
1269
e3ecdffa
AD
1270/**
1271 * amdgpu_device_ip_is_idle - is the hardware IP idle
1272 *
1273 * @adev: amdgpu_device pointer
1274 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1275 *
1276 * Check if the hardware IP is idle or not.
1277 * Returns true if it the IP is idle, false if not.
1278 */
2990a1fc
AD
1279bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1280 enum amd_ip_block_type block_type)
5dbbb60b
AD
1281{
1282 int i;
1283
1284 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1285 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1286 continue;
a1255107
AD
1287 if (adev->ip_blocks[i].version->type == block_type)
1288 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1289 }
1290 return true;
1291
1292}
1293
e3ecdffa
AD
1294/**
1295 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1296 *
1297 * @adev: amdgpu_device pointer
87e3f136 1298 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1299 *
1300 * Returns a pointer to the hardware IP block structure
1301 * if it exists for the asic, otherwise NULL.
1302 */
2990a1fc
AD
1303struct amdgpu_ip_block *
1304amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1305 enum amd_ip_block_type type)
d38ceaf9
AD
1306{
1307 int i;
1308
1309 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1310 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1311 return &adev->ip_blocks[i];
1312
1313 return NULL;
1314}
1315
1316/**
2990a1fc 1317 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1318 *
1319 * @adev: amdgpu_device pointer
5fc3aeeb 1320 * @type: enum amd_ip_block_type
d38ceaf9
AD
1321 * @major: major version
1322 * @minor: minor version
1323 *
1324 * return 0 if equal or greater
1325 * return 1 if smaller or the ip_block doesn't exist
1326 */
2990a1fc
AD
1327int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1328 enum amd_ip_block_type type,
1329 u32 major, u32 minor)
d38ceaf9 1330{
2990a1fc 1331 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1332
a1255107
AD
1333 if (ip_block && ((ip_block->version->major > major) ||
1334 ((ip_block->version->major == major) &&
1335 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1336 return 0;
1337
1338 return 1;
1339}
1340
a1255107 1341/**
2990a1fc 1342 * amdgpu_device_ip_block_add
a1255107
AD
1343 *
1344 * @adev: amdgpu_device pointer
1345 * @ip_block_version: pointer to the IP to add
1346 *
1347 * Adds the IP block driver information to the collection of IPs
1348 * on the asic.
1349 */
2990a1fc
AD
1350int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1351 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1352{
1353 if (!ip_block_version)
1354 return -EINVAL;
1355
e966a725 1356 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1357 ip_block_version->funcs->name);
1358
a1255107
AD
1359 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1360
1361 return 0;
1362}
1363
e3ecdffa
AD
1364/**
1365 * amdgpu_device_enable_virtual_display - enable virtual display feature
1366 *
1367 * @adev: amdgpu_device pointer
1368 *
1369 * Enabled the virtual display feature if the user has enabled it via
1370 * the module parameter virtual_display. This feature provides a virtual
1371 * display hardware on headless boards or in virtualized environments.
1372 * This function parses and validates the configuration string specified by
1373 * the user and configues the virtual display configuration (number of
1374 * virtual connectors, crtcs, etc.) specified.
1375 */
483ef985 1376static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1377{
1378 adev->enable_virtual_display = false;
1379
1380 if (amdgpu_virtual_display) {
1381 struct drm_device *ddev = adev->ddev;
1382 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1383 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1384
1385 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1386 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1387 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1388 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1389 if (!strcmp("all", pciaddname)
1390 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1391 long num_crtc;
1392 int res = -1;
1393
9accf2fd 1394 adev->enable_virtual_display = true;
0f66356d
ED
1395
1396 if (pciaddname_tmp)
1397 res = kstrtol(pciaddname_tmp, 10,
1398 &num_crtc);
1399
1400 if (!res) {
1401 if (num_crtc < 1)
1402 num_crtc = 1;
1403 if (num_crtc > 6)
1404 num_crtc = 6;
1405 adev->mode_info.num_crtc = num_crtc;
1406 } else {
1407 adev->mode_info.num_crtc = 1;
1408 }
9accf2fd
ED
1409 break;
1410 }
1411 }
1412
0f66356d
ED
1413 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1414 amdgpu_virtual_display, pci_address_name,
1415 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1416
1417 kfree(pciaddstr);
1418 }
1419}
1420
e3ecdffa
AD
1421/**
1422 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1423 *
1424 * @adev: amdgpu_device pointer
1425 *
1426 * Parses the asic configuration parameters specified in the gpu info
1427 * firmware and makes them availale to the driver for use in configuring
1428 * the asic.
1429 * Returns 0 on success, -EINVAL on failure.
1430 */
e2a75f88
AD
1431static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1432{
e2a75f88
AD
1433 const char *chip_name;
1434 char fw_name[30];
1435 int err;
1436 const struct gpu_info_firmware_header_v1_0 *hdr;
1437
ab4fe3e1
HR
1438 adev->firmware.gpu_info_fw = NULL;
1439
e2a75f88
AD
1440 switch (adev->asic_type) {
1441 case CHIP_TOPAZ:
1442 case CHIP_TONGA:
1443 case CHIP_FIJI:
e2a75f88 1444 case CHIP_POLARIS10:
cc07f18d 1445 case CHIP_POLARIS11:
e2a75f88 1446 case CHIP_POLARIS12:
cc07f18d 1447 case CHIP_VEGAM:
e2a75f88
AD
1448 case CHIP_CARRIZO:
1449 case CHIP_STONEY:
1450#ifdef CONFIG_DRM_AMDGPU_SI
1451 case CHIP_VERDE:
1452 case CHIP_TAHITI:
1453 case CHIP_PITCAIRN:
1454 case CHIP_OLAND:
1455 case CHIP_HAINAN:
1456#endif
1457#ifdef CONFIG_DRM_AMDGPU_CIK
1458 case CHIP_BONAIRE:
1459 case CHIP_HAWAII:
1460 case CHIP_KAVERI:
1461 case CHIP_KABINI:
1462 case CHIP_MULLINS:
1463#endif
27c0bc71 1464 case CHIP_VEGA20:
e2a75f88
AD
1465 default:
1466 return 0;
1467 case CHIP_VEGA10:
1468 chip_name = "vega10";
1469 break;
3f76dced
AD
1470 case CHIP_VEGA12:
1471 chip_name = "vega12";
1472 break;
2d2e5e7e 1473 case CHIP_RAVEN:
54c4d17e
FX
1474 if (adev->rev_id >= 8)
1475 chip_name = "raven2";
741deade
AD
1476 else if (adev->pdev->device == 0x15d8)
1477 chip_name = "picasso";
54c4d17e
FX
1478 else
1479 chip_name = "raven";
2d2e5e7e 1480 break;
65e60f6e
LM
1481 case CHIP_ARCTURUS:
1482 chip_name = "arcturus";
1483 break;
b51a26a0
HR
1484 case CHIP_RENOIR:
1485 chip_name = "renoir";
1486 break;
23c6268e
HR
1487 case CHIP_NAVI10:
1488 chip_name = "navi10";
1489 break;
ed42cfe1
XY
1490 case CHIP_NAVI14:
1491 chip_name = "navi14";
1492 break;
42b325e5
XY
1493 case CHIP_NAVI12:
1494 chip_name = "navi12";
1495 break;
e2a75f88
AD
1496 }
1497
1498 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1499 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1500 if (err) {
1501 dev_err(adev->dev,
1502 "Failed to load gpu_info firmware \"%s\"\n",
1503 fw_name);
1504 goto out;
1505 }
ab4fe3e1 1506 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1507 if (err) {
1508 dev_err(adev->dev,
1509 "Failed to validate gpu_info firmware \"%s\"\n",
1510 fw_name);
1511 goto out;
1512 }
1513
ab4fe3e1 1514 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1515 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1516
1517 switch (hdr->version_major) {
1518 case 1:
1519 {
1520 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1521 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1522 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1523
ec51d3fa
XY
1524 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1525 goto parse_soc_bounding_box;
1526
b5ab16bf
AD
1527 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1528 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1529 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1530 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1531 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1532 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1533 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1534 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1535 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1536 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1537 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1538 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1539 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1540 adev->gfx.cu_info.max_waves_per_simd =
1541 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1542 adev->gfx.cu_info.max_scratch_slots_per_cu =
1543 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1544 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1545 if (hdr->version_minor >= 1) {
35c2e910
HZ
1546 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1547 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1548 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1549 adev->gfx.config.num_sc_per_sh =
1550 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1551 adev->gfx.config.num_packer_per_sc =
1552 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1553 }
ec51d3fa
XY
1554
1555parse_soc_bounding_box:
ec51d3fa
XY
1556 /*
1557 * soc bounding box info is not integrated in disocovery table,
1558 * we always need to parse it from gpu info firmware.
1559 */
48321c3d
HW
1560 if (hdr->version_minor == 2) {
1561 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1562 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1563 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1564 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1565 }
e2a75f88
AD
1566 break;
1567 }
1568 default:
1569 dev_err(adev->dev,
1570 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1571 err = -EINVAL;
1572 goto out;
1573 }
1574out:
e2a75f88
AD
1575 return err;
1576}
1577
e3ecdffa
AD
1578/**
1579 * amdgpu_device_ip_early_init - run early init for hardware IPs
1580 *
1581 * @adev: amdgpu_device pointer
1582 *
1583 * Early initialization pass for hardware IPs. The hardware IPs that make
1584 * up each asic are discovered each IP's early_init callback is run. This
1585 * is the first stage in initializing the asic.
1586 * Returns 0 on success, negative error code on failure.
1587 */
06ec9070 1588static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1589{
aaa36a97 1590 int i, r;
d38ceaf9 1591
483ef985 1592 amdgpu_device_enable_virtual_display(adev);
a6be7570 1593
d38ceaf9 1594 switch (adev->asic_type) {
aaa36a97
AD
1595 case CHIP_TOPAZ:
1596 case CHIP_TONGA:
48299f95 1597 case CHIP_FIJI:
2cc0c0b5 1598 case CHIP_POLARIS10:
32cc7e53 1599 case CHIP_POLARIS11:
c4642a47 1600 case CHIP_POLARIS12:
32cc7e53 1601 case CHIP_VEGAM:
aaa36a97 1602 case CHIP_CARRIZO:
39bb0c92
SL
1603 case CHIP_STONEY:
1604 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1605 adev->family = AMDGPU_FAMILY_CZ;
1606 else
1607 adev->family = AMDGPU_FAMILY_VI;
1608
1609 r = vi_set_ip_blocks(adev);
1610 if (r)
1611 return r;
1612 break;
33f34802
KW
1613#ifdef CONFIG_DRM_AMDGPU_SI
1614 case CHIP_VERDE:
1615 case CHIP_TAHITI:
1616 case CHIP_PITCAIRN:
1617 case CHIP_OLAND:
1618 case CHIP_HAINAN:
295d0daf 1619 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1620 r = si_set_ip_blocks(adev);
1621 if (r)
1622 return r;
1623 break;
1624#endif
a2e73f56
AD
1625#ifdef CONFIG_DRM_AMDGPU_CIK
1626 case CHIP_BONAIRE:
1627 case CHIP_HAWAII:
1628 case CHIP_KAVERI:
1629 case CHIP_KABINI:
1630 case CHIP_MULLINS:
1631 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1632 adev->family = AMDGPU_FAMILY_CI;
1633 else
1634 adev->family = AMDGPU_FAMILY_KV;
1635
1636 r = cik_set_ip_blocks(adev);
1637 if (r)
1638 return r;
1639 break;
1640#endif
e48a3cd9
AD
1641 case CHIP_VEGA10:
1642 case CHIP_VEGA12:
e4bd8170 1643 case CHIP_VEGA20:
e48a3cd9 1644 case CHIP_RAVEN:
61cf44c1 1645 case CHIP_ARCTURUS:
b51a26a0
HR
1646 case CHIP_RENOIR:
1647 if (adev->asic_type == CHIP_RAVEN ||
1648 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1649 adev->family = AMDGPU_FAMILY_RV;
1650 else
1651 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1652
1653 r = soc15_set_ip_blocks(adev);
1654 if (r)
1655 return r;
1656 break;
0a5b8c7b 1657 case CHIP_NAVI10:
7ecb5cd4 1658 case CHIP_NAVI14:
4808cf9c 1659 case CHIP_NAVI12:
0a5b8c7b
HR
1660 adev->family = AMDGPU_FAMILY_NV;
1661
1662 r = nv_set_ip_blocks(adev);
1663 if (r)
1664 return r;
1665 break;
d38ceaf9
AD
1666 default:
1667 /* FIXME: not supported yet */
1668 return -EINVAL;
1669 }
1670
e2a75f88
AD
1671 r = amdgpu_device_parse_gpu_info_fw(adev);
1672 if (r)
1673 return r;
1674
ec51d3fa
XY
1675 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1676 amdgpu_discovery_get_gfx_info(adev);
1677
1884734a 1678 amdgpu_amdkfd_device_probe(adev);
1679
3149d9da
XY
1680 if (amdgpu_sriov_vf(adev)) {
1681 r = amdgpu_virt_request_full_gpu(adev, true);
1682 if (r)
5ffa61c1 1683 return -EAGAIN;
3149d9da
XY
1684 }
1685
3b94fb10 1686 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1687 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1688 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1689
d38ceaf9
AD
1690 for (i = 0; i < adev->num_ip_blocks; i++) {
1691 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1692 DRM_ERROR("disabled ip block: %d <%s>\n",
1693 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1694 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1695 } else {
a1255107
AD
1696 if (adev->ip_blocks[i].version->funcs->early_init) {
1697 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1698 if (r == -ENOENT) {
a1255107 1699 adev->ip_blocks[i].status.valid = false;
2c1a2784 1700 } else if (r) {
a1255107
AD
1701 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1702 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1703 return r;
2c1a2784 1704 } else {
a1255107 1705 adev->ip_blocks[i].status.valid = true;
2c1a2784 1706 }
974e6b64 1707 } else {
a1255107 1708 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1709 }
d38ceaf9 1710 }
21a249ca
AD
1711 /* get the vbios after the asic_funcs are set up */
1712 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1713 /* Read BIOS */
1714 if (!amdgpu_get_bios(adev))
1715 return -EINVAL;
1716
1717 r = amdgpu_atombios_init(adev);
1718 if (r) {
1719 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1720 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1721 return r;
1722 }
1723 }
d38ceaf9
AD
1724 }
1725
395d1fb9
NH
1726 adev->cg_flags &= amdgpu_cg_mask;
1727 adev->pg_flags &= amdgpu_pg_mask;
1728
d38ceaf9
AD
1729 return 0;
1730}
1731
0a4f2520
RZ
1732static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1733{
1734 int i, r;
1735
1736 for (i = 0; i < adev->num_ip_blocks; i++) {
1737 if (!adev->ip_blocks[i].status.sw)
1738 continue;
1739 if (adev->ip_blocks[i].status.hw)
1740 continue;
1741 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1742 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1743 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1744 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1745 if (r) {
1746 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1747 adev->ip_blocks[i].version->funcs->name, r);
1748 return r;
1749 }
1750 adev->ip_blocks[i].status.hw = true;
1751 }
1752 }
1753
1754 return 0;
1755}
1756
1757static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1758{
1759 int i, r;
1760
1761 for (i = 0; i < adev->num_ip_blocks; i++) {
1762 if (!adev->ip_blocks[i].status.sw)
1763 continue;
1764 if (adev->ip_blocks[i].status.hw)
1765 continue;
1766 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1767 if (r) {
1768 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1769 adev->ip_blocks[i].version->funcs->name, r);
1770 return r;
1771 }
1772 adev->ip_blocks[i].status.hw = true;
1773 }
1774
1775 return 0;
1776}
1777
7a3e0bb2
RZ
1778static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1779{
1780 int r = 0;
1781 int i;
80f41f84 1782 uint32_t smu_version;
7a3e0bb2
RZ
1783
1784 if (adev->asic_type >= CHIP_VEGA10) {
1785 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1786 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1787 continue;
1788
1789 /* no need to do the fw loading again if already done*/
1790 if (adev->ip_blocks[i].status.hw == true)
1791 break;
1792
1793 if (adev->in_gpu_reset || adev->in_suspend) {
1794 r = adev->ip_blocks[i].version->funcs->resume(adev);
1795 if (r) {
1796 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1797 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1798 return r;
1799 }
1800 } else {
1801 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1802 if (r) {
1803 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1804 adev->ip_blocks[i].version->funcs->name, r);
1805 return r;
7a3e0bb2 1806 }
7a3e0bb2 1807 }
482f0e53
ML
1808
1809 adev->ip_blocks[i].status.hw = true;
1810 break;
7a3e0bb2
RZ
1811 }
1812 }
482f0e53 1813
8973d9ec
ED
1814 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
1815 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1816
80f41f84 1817 return r;
7a3e0bb2
RZ
1818}
1819
e3ecdffa
AD
1820/**
1821 * amdgpu_device_ip_init - run init for hardware IPs
1822 *
1823 * @adev: amdgpu_device pointer
1824 *
1825 * Main initialization pass for hardware IPs. The list of all the hardware
1826 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1827 * are run. sw_init initializes the software state associated with each IP
1828 * and hw_init initializes the hardware associated with each IP.
1829 * Returns 0 on success, negative error code on failure.
1830 */
06ec9070 1831static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1832{
1833 int i, r;
1834
c030f2e4 1835 r = amdgpu_ras_init(adev);
1836 if (r)
1837 return r;
1838
d38ceaf9 1839 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1840 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1841 continue;
a1255107 1842 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1843 if (r) {
a1255107
AD
1844 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1845 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1846 goto init_failed;
2c1a2784 1847 }
a1255107 1848 adev->ip_blocks[i].status.sw = true;
bfca0289 1849
d38ceaf9 1850 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1851 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1852 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1853 if (r) {
1854 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1855 goto init_failed;
2c1a2784 1856 }
a1255107 1857 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1858 if (r) {
1859 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1860 goto init_failed;
2c1a2784 1861 }
06ec9070 1862 r = amdgpu_device_wb_init(adev);
2c1a2784 1863 if (r) {
06ec9070 1864 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1865 goto init_failed;
2c1a2784 1866 }
a1255107 1867 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1868
1869 /* right after GMC hw init, we create CSA */
f92d5c61 1870 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1871 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1872 AMDGPU_GEM_DOMAIN_VRAM,
1873 AMDGPU_CSA_SIZE);
2493664f
ML
1874 if (r) {
1875 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1876 goto init_failed;
2493664f
ML
1877 }
1878 }
d38ceaf9
AD
1879 }
1880 }
1881
c9ffa427
YT
1882 if (amdgpu_sriov_vf(adev))
1883 amdgpu_virt_init_data_exchange(adev);
1884
533aed27
AG
1885 r = amdgpu_ib_pool_init(adev);
1886 if (r) {
1887 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1888 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1889 goto init_failed;
1890 }
1891
c8963ea4
RZ
1892 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1893 if (r)
72d3f592 1894 goto init_failed;
0a4f2520
RZ
1895
1896 r = amdgpu_device_ip_hw_init_phase1(adev);
1897 if (r)
72d3f592 1898 goto init_failed;
0a4f2520 1899
7a3e0bb2
RZ
1900 r = amdgpu_device_fw_loading(adev);
1901 if (r)
72d3f592 1902 goto init_failed;
7a3e0bb2 1903
0a4f2520
RZ
1904 r = amdgpu_device_ip_hw_init_phase2(adev);
1905 if (r)
72d3f592 1906 goto init_failed;
d38ceaf9 1907
121a2bc6
AG
1908 /*
1909 * retired pages will be loaded from eeprom and reserved here,
1910 * it should be called after amdgpu_device_ip_hw_init_phase2 since
1911 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
1912 * for I2C communication which only true at this point.
1913 * recovery_init may fail, but it can free all resources allocated by
1914 * itself and its failure should not stop amdgpu init process.
1915 *
1916 * Note: theoretically, this should be called before all vram allocations
1917 * to protect retired page from abusing
1918 */
1919 amdgpu_ras_recovery_init(adev);
1920
3e2e2ab5
HZ
1921 if (adev->gmc.xgmi.num_physical_nodes > 1)
1922 amdgpu_xgmi_add_device(adev);
1884734a 1923 amdgpu_amdkfd_device_init(adev);
c6332b97 1924
72d3f592 1925init_failed:
c9ffa427 1926 if (amdgpu_sriov_vf(adev))
c6332b97 1927 amdgpu_virt_release_full_gpu(adev, true);
1928
72d3f592 1929 return r;
d38ceaf9
AD
1930}
1931
e3ecdffa
AD
1932/**
1933 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1934 *
1935 * @adev: amdgpu_device pointer
1936 *
1937 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1938 * this function before a GPU reset. If the value is retained after a
1939 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1940 */
06ec9070 1941static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1942{
1943 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1944}
1945
e3ecdffa
AD
1946/**
1947 * amdgpu_device_check_vram_lost - check if vram is valid
1948 *
1949 * @adev: amdgpu_device pointer
1950 *
1951 * Checks the reset magic value written to the gart pointer in VRAM.
1952 * The driver calls this after a GPU reset to see if the contents of
1953 * VRAM is lost or now.
1954 * returns true if vram is lost, false if not.
1955 */
06ec9070 1956static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1957{
1958 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1959 AMDGPU_RESET_MAGIC_NUM);
1960}
1961
e3ecdffa 1962/**
1112a46b 1963 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1964 *
1965 * @adev: amdgpu_device pointer
b8b72130 1966 * @state: clockgating state (gate or ungate)
e3ecdffa 1967 *
e3ecdffa 1968 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1969 * set_clockgating_state callbacks are run.
1970 * Late initialization pass enabling clockgating for hardware IPs.
1971 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1972 * Returns 0 on success, negative error code on failure.
1973 */
fdd34271 1974
1112a46b
RZ
1975static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1976 enum amd_clockgating_state state)
d38ceaf9 1977{
1112a46b 1978 int i, j, r;
d38ceaf9 1979
4a2ba394
SL
1980 if (amdgpu_emu_mode == 1)
1981 return 0;
1982
1112a46b
RZ
1983 for (j = 0; j < adev->num_ip_blocks; j++) {
1984 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1985 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1986 continue;
4a446d55 1987 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1988 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1989 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1990 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 1991 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 1992 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1993 /* enable clockgating to save power */
a1255107 1994 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1995 state);
4a446d55
AD
1996 if (r) {
1997 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1998 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1999 return r;
2000 }
b0b00ff1 2001 }
d38ceaf9 2002 }
06b18f61 2003
c9f96fd5
RZ
2004 return 0;
2005}
2006
1112a46b 2007static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2008{
1112a46b 2009 int i, j, r;
06b18f61 2010
c9f96fd5
RZ
2011 if (amdgpu_emu_mode == 1)
2012 return 0;
2013
1112a46b
RZ
2014 for (j = 0; j < adev->num_ip_blocks; j++) {
2015 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2016 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2017 continue;
2018 /* skip CG for VCE/UVD, it's handled specially */
2019 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2020 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2021 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2022 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2023 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2024 /* enable powergating to save power */
2025 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2026 state);
c9f96fd5
RZ
2027 if (r) {
2028 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2029 adev->ip_blocks[i].version->funcs->name, r);
2030 return r;
2031 }
2032 }
2033 }
2dc80b00
S
2034 return 0;
2035}
2036
beff74bc
AD
2037static int amdgpu_device_enable_mgpu_fan_boost(void)
2038{
2039 struct amdgpu_gpu_instance *gpu_ins;
2040 struct amdgpu_device *adev;
2041 int i, ret = 0;
2042
2043 mutex_lock(&mgpu_info.mutex);
2044
2045 /*
2046 * MGPU fan boost feature should be enabled
2047 * only when there are two or more dGPUs in
2048 * the system
2049 */
2050 if (mgpu_info.num_dgpu < 2)
2051 goto out;
2052
2053 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2054 gpu_ins = &(mgpu_info.gpu_ins[i]);
2055 adev = gpu_ins->adev;
2056 if (!(adev->flags & AMD_IS_APU) &&
2057 !gpu_ins->mgpu_fan_enabled &&
2058 adev->powerplay.pp_funcs &&
2059 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2060 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2061 if (ret)
2062 break;
2063
2064 gpu_ins->mgpu_fan_enabled = 1;
2065 }
2066 }
2067
2068out:
2069 mutex_unlock(&mgpu_info.mutex);
2070
2071 return ret;
2072}
2073
e3ecdffa
AD
2074/**
2075 * amdgpu_device_ip_late_init - run late init for hardware IPs
2076 *
2077 * @adev: amdgpu_device pointer
2078 *
2079 * Late initialization pass for hardware IPs. The list of all the hardware
2080 * IPs that make up the asic is walked and the late_init callbacks are run.
2081 * late_init covers any special initialization that an IP requires
2082 * after all of the have been initialized or something that needs to happen
2083 * late in the init process.
2084 * Returns 0 on success, negative error code on failure.
2085 */
06ec9070 2086static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2087{
60599a03 2088 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2089 int i = 0, r;
2090
2091 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2092 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2093 continue;
2094 if (adev->ip_blocks[i].version->funcs->late_init) {
2095 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2096 if (r) {
2097 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2098 adev->ip_blocks[i].version->funcs->name, r);
2099 return r;
2100 }
2dc80b00 2101 }
73f847db 2102 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2103 }
2104
1112a46b
RZ
2105 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2106 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2107
06ec9070 2108 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2109
beff74bc
AD
2110 r = amdgpu_device_enable_mgpu_fan_boost();
2111 if (r)
2112 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2113
60599a03
EQ
2114
2115 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2116 mutex_lock(&mgpu_info.mutex);
2117
2118 /*
2119 * Reset device p-state to low as this was booted with high.
2120 *
2121 * This should be performed only after all devices from the same
2122 * hive get initialized.
2123 *
2124 * However, it's unknown how many device in the hive in advance.
2125 * As this is counted one by one during devices initializations.
2126 *
2127 * So, we wait for all XGMI interlinked devices initialized.
2128 * This may bring some delays as those devices may come from
2129 * different hives. But that should be OK.
2130 */
2131 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2132 for (i = 0; i < mgpu_info.num_gpu; i++) {
2133 gpu_instance = &(mgpu_info.gpu_ins[i]);
2134 if (gpu_instance->adev->flags & AMD_IS_APU)
2135 continue;
2136
2137 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2138 if (r) {
2139 DRM_ERROR("pstate setting failed (%d).\n", r);
2140 break;
2141 }
2142 }
2143 }
2144
2145 mutex_unlock(&mgpu_info.mutex);
2146 }
2147
d38ceaf9
AD
2148 return 0;
2149}
2150
e3ecdffa
AD
2151/**
2152 * amdgpu_device_ip_fini - run fini for hardware IPs
2153 *
2154 * @adev: amdgpu_device pointer
2155 *
2156 * Main teardown pass for hardware IPs. The list of all the hardware
2157 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2158 * are run. hw_fini tears down the hardware associated with each IP
2159 * and sw_fini tears down any software state associated with each IP.
2160 * Returns 0 on success, negative error code on failure.
2161 */
06ec9070 2162static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2163{
2164 int i, r;
2165
c030f2e4 2166 amdgpu_ras_pre_fini(adev);
2167
a82400b5
AG
2168 if (adev->gmc.xgmi.num_physical_nodes > 1)
2169 amdgpu_xgmi_remove_device(adev);
2170
1884734a 2171 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2172
2173 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2174 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2175
3e96dbfd
AD
2176 /* need to disable SMC first */
2177 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2178 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2179 continue;
fdd34271 2180 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2181 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2182 /* XXX handle errors */
2183 if (r) {
2184 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2185 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2186 }
a1255107 2187 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2188 break;
2189 }
2190 }
2191
d38ceaf9 2192 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2193 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2194 continue;
8201a67a 2195
a1255107 2196 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2197 /* XXX handle errors */
2c1a2784 2198 if (r) {
a1255107
AD
2199 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2200 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2201 }
8201a67a 2202
a1255107 2203 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2204 }
2205
9950cda2 2206
d38ceaf9 2207 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2208 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2209 continue;
c12aba3a
ML
2210
2211 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2212 amdgpu_ucode_free_bo(adev);
1e256e27 2213 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2214 amdgpu_device_wb_fini(adev);
2215 amdgpu_device_vram_scratch_fini(adev);
533aed27 2216 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2217 }
2218
a1255107 2219 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2220 /* XXX handle errors */
2c1a2784 2221 if (r) {
a1255107
AD
2222 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2223 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2224 }
a1255107
AD
2225 adev->ip_blocks[i].status.sw = false;
2226 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2227 }
2228
a6dcfd9c 2229 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2230 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2231 continue;
a1255107
AD
2232 if (adev->ip_blocks[i].version->funcs->late_fini)
2233 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2234 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2235 }
2236
c030f2e4 2237 amdgpu_ras_fini(adev);
2238
030308fc 2239 if (amdgpu_sriov_vf(adev))
24136135
ML
2240 if (amdgpu_virt_release_full_gpu(adev, false))
2241 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2242
d38ceaf9
AD
2243 return 0;
2244}
2245
e3ecdffa 2246/**
beff74bc 2247 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2248 *
1112a46b 2249 * @work: work_struct.
e3ecdffa 2250 */
beff74bc 2251static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2252{
2253 struct amdgpu_device *adev =
beff74bc 2254 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2255 int r;
2256
2257 r = amdgpu_ib_ring_tests(adev);
2258 if (r)
2259 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2260}
2261
1e317b99
RZ
2262static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2263{
2264 struct amdgpu_device *adev =
2265 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2266
2267 mutex_lock(&adev->gfx.gfx_off_mutex);
2268 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2269 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2270 adev->gfx.gfx_off_state = true;
2271 }
2272 mutex_unlock(&adev->gfx.gfx_off_mutex);
2273}
2274
e3ecdffa 2275/**
e7854a03 2276 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2277 *
2278 * @adev: amdgpu_device pointer
2279 *
2280 * Main suspend function for hardware IPs. The list of all the hardware
2281 * IPs that make up the asic is walked, clockgating is disabled and the
2282 * suspend callbacks are run. suspend puts the hardware and software state
2283 * in each IP into a state suitable for suspend.
2284 * Returns 0 on success, negative error code on failure.
2285 */
e7854a03
AD
2286static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2287{
2288 int i, r;
2289
05df1f01 2290 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2291 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2292
e7854a03
AD
2293 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2294 if (!adev->ip_blocks[i].status.valid)
2295 continue;
2296 /* displays are handled separately */
2297 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2298 /* XXX handle errors */
2299 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2300 /* XXX handle errors */
2301 if (r) {
2302 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2303 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2304 return r;
e7854a03 2305 }
482f0e53 2306 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2307 }
2308 }
2309
e7854a03
AD
2310 return 0;
2311}
2312
2313/**
2314 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2315 *
2316 * @adev: amdgpu_device pointer
2317 *
2318 * Main suspend function for hardware IPs. The list of all the hardware
2319 * IPs that make up the asic is walked, clockgating is disabled and the
2320 * suspend callbacks are run. suspend puts the hardware and software state
2321 * in each IP into a state suitable for suspend.
2322 * Returns 0 on success, negative error code on failure.
2323 */
2324static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2325{
2326 int i, r;
2327
2328 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2329 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2330 continue;
e7854a03
AD
2331 /* displays are handled in phase1 */
2332 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2333 continue;
bff77e86
LM
2334 /* PSP lost connection when err_event_athub occurs */
2335 if (amdgpu_ras_intr_triggered() &&
2336 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2337 adev->ip_blocks[i].status.hw = false;
2338 continue;
2339 }
d38ceaf9 2340 /* XXX handle errors */
a1255107 2341 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2342 /* XXX handle errors */
2c1a2784 2343 if (r) {
a1255107
AD
2344 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2345 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2346 }
876923fb 2347 adev->ip_blocks[i].status.hw = false;
a3a09142
AD
2348 /* handle putting the SMC in the appropriate state */
2349 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2350 if (is_support_sw_smu(adev)) {
0e0b89c0 2351 r = smu_set_mp1_state(&adev->smu, adev->mp1_state);
a3a09142 2352 } else if (adev->powerplay.pp_funcs &&
482f0e53 2353 adev->powerplay.pp_funcs->set_mp1_state) {
a3a09142
AD
2354 r = adev->powerplay.pp_funcs->set_mp1_state(
2355 adev->powerplay.pp_handle,
2356 adev->mp1_state);
0e0b89c0
EQ
2357 }
2358 if (r) {
2359 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2360 adev->mp1_state, r);
2361 return r;
a3a09142
AD
2362 }
2363 }
b5507c7e
AG
2364
2365 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2366 }
2367
2368 return 0;
2369}
2370
e7854a03
AD
2371/**
2372 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2373 *
2374 * @adev: amdgpu_device pointer
2375 *
2376 * Main suspend function for hardware IPs. The list of all the hardware
2377 * IPs that make up the asic is walked, clockgating is disabled and the
2378 * suspend callbacks are run. suspend puts the hardware and software state
2379 * in each IP into a state suitable for suspend.
2380 * Returns 0 on success, negative error code on failure.
2381 */
2382int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2383{
2384 int r;
2385
e7819644
YT
2386 if (amdgpu_sriov_vf(adev))
2387 amdgpu_virt_request_full_gpu(adev, false);
2388
e7854a03
AD
2389 r = amdgpu_device_ip_suspend_phase1(adev);
2390 if (r)
2391 return r;
2392 r = amdgpu_device_ip_suspend_phase2(adev);
2393
e7819644
YT
2394 if (amdgpu_sriov_vf(adev))
2395 amdgpu_virt_release_full_gpu(adev, false);
2396
e7854a03
AD
2397 return r;
2398}
2399
06ec9070 2400static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2401{
2402 int i, r;
2403
2cb681b6
ML
2404 static enum amd_ip_block_type ip_order[] = {
2405 AMD_IP_BLOCK_TYPE_GMC,
2406 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2407 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2408 AMD_IP_BLOCK_TYPE_IH,
2409 };
a90ad3c2 2410
2cb681b6
ML
2411 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2412 int j;
2413 struct amdgpu_ip_block *block;
a90ad3c2 2414
2cb681b6
ML
2415 for (j = 0; j < adev->num_ip_blocks; j++) {
2416 block = &adev->ip_blocks[j];
2417
482f0e53 2418 block->status.hw = false;
2cb681b6
ML
2419 if (block->version->type != ip_order[i] ||
2420 !block->status.valid)
2421 continue;
2422
2423 r = block->version->funcs->hw_init(adev);
0aaeefcc 2424 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2425 if (r)
2426 return r;
482f0e53 2427 block->status.hw = true;
a90ad3c2
ML
2428 }
2429 }
2430
2431 return 0;
2432}
2433
06ec9070 2434static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2435{
2436 int i, r;
2437
2cb681b6
ML
2438 static enum amd_ip_block_type ip_order[] = {
2439 AMD_IP_BLOCK_TYPE_SMC,
2440 AMD_IP_BLOCK_TYPE_DCE,
2441 AMD_IP_BLOCK_TYPE_GFX,
2442 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2443 AMD_IP_BLOCK_TYPE_UVD,
2444 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2445 };
a90ad3c2 2446
2cb681b6
ML
2447 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2448 int j;
2449 struct amdgpu_ip_block *block;
a90ad3c2 2450
2cb681b6
ML
2451 for (j = 0; j < adev->num_ip_blocks; j++) {
2452 block = &adev->ip_blocks[j];
2453
2454 if (block->version->type != ip_order[i] ||
482f0e53
ML
2455 !block->status.valid ||
2456 block->status.hw)
2cb681b6
ML
2457 continue;
2458
2459 r = block->version->funcs->hw_init(adev);
0aaeefcc 2460 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2461 if (r)
2462 return r;
482f0e53 2463 block->status.hw = true;
a90ad3c2
ML
2464 }
2465 }
2466
2467 return 0;
2468}
2469
e3ecdffa
AD
2470/**
2471 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2472 *
2473 * @adev: amdgpu_device pointer
2474 *
2475 * First resume function for hardware IPs. The list of all the hardware
2476 * IPs that make up the asic is walked and the resume callbacks are run for
2477 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2478 * after a suspend and updates the software state as necessary. This
2479 * function is also used for restoring the GPU after a GPU reset.
2480 * Returns 0 on success, negative error code on failure.
2481 */
06ec9070 2482static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2483{
2484 int i, r;
2485
a90ad3c2 2486 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2487 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2488 continue;
a90ad3c2 2489 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2490 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2491 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2492
fcf0649f
CZ
2493 r = adev->ip_blocks[i].version->funcs->resume(adev);
2494 if (r) {
2495 DRM_ERROR("resume of IP block <%s> failed %d\n",
2496 adev->ip_blocks[i].version->funcs->name, r);
2497 return r;
2498 }
482f0e53 2499 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2500 }
2501 }
2502
2503 return 0;
2504}
2505
e3ecdffa
AD
2506/**
2507 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2508 *
2509 * @adev: amdgpu_device pointer
2510 *
2511 * First resume function for hardware IPs. The list of all the hardware
2512 * IPs that make up the asic is walked and the resume callbacks are run for
2513 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2514 * functional state after a suspend and updates the software state as
2515 * necessary. This function is also used for restoring the GPU after a GPU
2516 * reset.
2517 * Returns 0 on success, negative error code on failure.
2518 */
06ec9070 2519static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2520{
2521 int i, r;
2522
2523 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2524 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2525 continue;
fcf0649f 2526 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2527 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2528 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2529 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2530 continue;
a1255107 2531 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2532 if (r) {
a1255107
AD
2533 DRM_ERROR("resume of IP block <%s> failed %d\n",
2534 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2535 return r;
2c1a2784 2536 }
482f0e53 2537 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2538 }
2539
2540 return 0;
2541}
2542
e3ecdffa
AD
2543/**
2544 * amdgpu_device_ip_resume - run resume for hardware IPs
2545 *
2546 * @adev: amdgpu_device pointer
2547 *
2548 * Main resume function for hardware IPs. The hardware IPs
2549 * are split into two resume functions because they are
2550 * are also used in in recovering from a GPU reset and some additional
2551 * steps need to be take between them. In this case (S3/S4) they are
2552 * run sequentially.
2553 * Returns 0 on success, negative error code on failure.
2554 */
06ec9070 2555static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2556{
2557 int r;
2558
06ec9070 2559 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2560 if (r)
2561 return r;
7a3e0bb2
RZ
2562
2563 r = amdgpu_device_fw_loading(adev);
2564 if (r)
2565 return r;
2566
06ec9070 2567 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2568
2569 return r;
2570}
2571
e3ecdffa
AD
2572/**
2573 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2574 *
2575 * @adev: amdgpu_device pointer
2576 *
2577 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2578 */
4e99a44e 2579static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2580{
6867e1b5
ML
2581 if (amdgpu_sriov_vf(adev)) {
2582 if (adev->is_atom_fw) {
2583 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2584 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2585 } else {
2586 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2587 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2588 }
2589
2590 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2591 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2592 }
048765ad
AR
2593}
2594
e3ecdffa
AD
2595/**
2596 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2597 *
2598 * @asic_type: AMD asic type
2599 *
2600 * Check if there is DC (new modesetting infrastructre) support for an asic.
2601 * returns true if DC has support, false if not.
2602 */
4562236b
HW
2603bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2604{
2605 switch (asic_type) {
2606#if defined(CONFIG_DRM_AMD_DC)
2607 case CHIP_BONAIRE:
0d6fbccb 2608 case CHIP_KAVERI:
367e6687
AD
2609 case CHIP_KABINI:
2610 case CHIP_MULLINS:
d9fda248
HW
2611 /*
2612 * We have systems in the wild with these ASICs that require
2613 * LVDS and VGA support which is not supported with DC.
2614 *
2615 * Fallback to the non-DC driver here by default so as not to
2616 * cause regressions.
2617 */
2618 return amdgpu_dc > 0;
2619 case CHIP_HAWAII:
4562236b
HW
2620 case CHIP_CARRIZO:
2621 case CHIP_STONEY:
4562236b 2622 case CHIP_POLARIS10:
675fd32b 2623 case CHIP_POLARIS11:
2c8ad2d5 2624 case CHIP_POLARIS12:
675fd32b 2625 case CHIP_VEGAM:
4562236b
HW
2626 case CHIP_TONGA:
2627 case CHIP_FIJI:
42f8ffa1 2628 case CHIP_VEGA10:
dca7b401 2629 case CHIP_VEGA12:
c6034aa2 2630 case CHIP_VEGA20:
b86a1aa3 2631#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2632 case CHIP_RAVEN:
b4f199c7 2633 case CHIP_NAVI10:
8fceceb6 2634 case CHIP_NAVI14:
078655d9 2635 case CHIP_NAVI12:
e1c14c43 2636 case CHIP_RENOIR:
42f8ffa1 2637#endif
fd187853 2638 return amdgpu_dc != 0;
4562236b
HW
2639#endif
2640 default:
93b09a9a
SS
2641 if (amdgpu_dc > 0)
2642 DRM_INFO("Display Core has been requested via kernel parameter "
2643 "but isn't supported by ASIC, ignoring\n");
4562236b
HW
2644 return false;
2645 }
2646}
2647
2648/**
2649 * amdgpu_device_has_dc_support - check if dc is supported
2650 *
2651 * @adev: amdgpu_device_pointer
2652 *
2653 * Returns true for supported, false for not supported
2654 */
2655bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2656{
2555039d
XY
2657 if (amdgpu_sriov_vf(adev))
2658 return false;
2659
4562236b
HW
2660 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2661}
2662
d4535e2c
AG
2663
2664static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2665{
2666 struct amdgpu_device *adev =
2667 container_of(__work, struct amdgpu_device, xgmi_reset_work);
c6a6e2db 2668 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
d4535e2c 2669
c6a6e2db
AG
2670 /* It's a bug to not have a hive within this function */
2671 if (WARN_ON(!hive))
2672 return;
2673
2674 /*
2675 * Use task barrier to synchronize all xgmi reset works across the
2676 * hive. task_barrier_enter and task_barrier_exit will block
2677 * until all the threads running the xgmi reset works reach
2678 * those points. task_barrier_full will do both blocks.
2679 */
2680 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
2681
2682 task_barrier_enter(&hive->tb);
2683 adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev);
2684
2685 if (adev->asic_reset_res)
2686 goto fail;
2687
2688 task_barrier_exit(&hive->tb);
2689 adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev);
2690
2691 if (adev->asic_reset_res)
2692 goto fail;
2693 } else {
2694
2695 task_barrier_full(&hive->tb);
2696 adev->asic_reset_res = amdgpu_asic_reset(adev);
2697 }
ce316fa5 2698
c6a6e2db 2699fail:
d4535e2c 2700 if (adev->asic_reset_res)
fed184e9 2701 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2702 adev->asic_reset_res, adev->ddev->unique);
2703}
2704
71f98027
AD
2705static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2706{
2707 char *input = amdgpu_lockup_timeout;
2708 char *timeout_setting = NULL;
2709 int index = 0;
2710 long timeout;
2711 int ret = 0;
2712
2713 /*
2714 * By default timeout for non compute jobs is 10000.
2715 * And there is no timeout enforced on compute jobs.
2716 * In SR-IOV or passthrough mode, timeout for compute
2717 * jobs are 10000 by default.
2718 */
2719 adev->gfx_timeout = msecs_to_jiffies(10000);
2720 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2721 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2722 adev->compute_timeout = adev->gfx_timeout;
2723 else
2724 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2725
f440ff44 2726 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2727 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2728 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2729 ret = kstrtol(timeout_setting, 0, &timeout);
2730 if (ret)
2731 return ret;
2732
2733 if (timeout == 0) {
2734 index++;
2735 continue;
2736 } else if (timeout < 0) {
2737 timeout = MAX_SCHEDULE_TIMEOUT;
2738 } else {
2739 timeout = msecs_to_jiffies(timeout);
2740 }
2741
2742 switch (index++) {
2743 case 0:
2744 adev->gfx_timeout = timeout;
2745 break;
2746 case 1:
2747 adev->compute_timeout = timeout;
2748 break;
2749 case 2:
2750 adev->sdma_timeout = timeout;
2751 break;
2752 case 3:
2753 adev->video_timeout = timeout;
2754 break;
2755 default:
2756 break;
2757 }
2758 }
2759 /*
2760 * There is only one value specified and
2761 * it should apply to all non-compute jobs.
2762 */
bcccee89 2763 if (index == 1) {
71f98027 2764 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2765 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2766 adev->compute_timeout = adev->gfx_timeout;
2767 }
71f98027
AD
2768 }
2769
2770 return ret;
2771}
d4535e2c 2772
d38ceaf9
AD
2773/**
2774 * amdgpu_device_init - initialize the driver
2775 *
2776 * @adev: amdgpu_device pointer
87e3f136 2777 * @ddev: drm dev pointer
d38ceaf9
AD
2778 * @pdev: pci dev pointer
2779 * @flags: driver flags
2780 *
2781 * Initializes the driver info and hw (all asics).
2782 * Returns 0 for success or an error on failure.
2783 * Called at driver startup.
2784 */
2785int amdgpu_device_init(struct amdgpu_device *adev,
2786 struct drm_device *ddev,
2787 struct pci_dev *pdev,
2788 uint32_t flags)
2789{
2790 int r, i;
3840c5bc 2791 bool boco = false;
95844d20 2792 u32 max_MBps;
d38ceaf9
AD
2793
2794 adev->shutdown = false;
2795 adev->dev = &pdev->dev;
2796 adev->ddev = ddev;
2797 adev->pdev = pdev;
2798 adev->flags = flags;
4e66d7d2
YZ
2799
2800 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2801 adev->asic_type = amdgpu_force_asic_type;
2802 else
2803 adev->asic_type = flags & AMD_ASIC_MASK;
2804
d38ceaf9 2805 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2806 if (amdgpu_emu_mode == 1)
2807 adev->usec_timeout *= 2;
770d13b1 2808 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2809 adev->accel_working = false;
2810 adev->num_rings = 0;
2811 adev->mman.buffer_funcs = NULL;
2812 adev->mman.buffer_funcs_ring = NULL;
2813 adev->vm_manager.vm_pte_funcs = NULL;
0c88b430 2814 adev->vm_manager.vm_pte_num_scheds = 0;
132f34e4 2815 adev->gmc.gmc_funcs = NULL;
f54d1867 2816 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2817 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2818
2819 adev->smc_rreg = &amdgpu_invalid_rreg;
2820 adev->smc_wreg = &amdgpu_invalid_wreg;
2821 adev->pcie_rreg = &amdgpu_invalid_rreg;
2822 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2823 adev->pciep_rreg = &amdgpu_invalid_rreg;
2824 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2825 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2826 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2827 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2828 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2829 adev->didt_rreg = &amdgpu_invalid_rreg;
2830 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2831 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2832 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2833 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2834 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2835
3e39ab90
AD
2836 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2837 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2838 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2839
2840 /* mutex initialization are all done here so we
2841 * can recall function without having locking issues */
d38ceaf9 2842 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2843 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2844 mutex_init(&adev->pm.mutex);
2845 mutex_init(&adev->gfx.gpu_clock_mutex);
2846 mutex_init(&adev->srbm_mutex);
b8866c26 2847 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2848 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2849 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2850 mutex_init(&adev->mn_lock);
e23b74aa 2851 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2852 hash_init(adev->mn_hash);
13a752e3 2853 mutex_init(&adev->lock_reset);
32eaeae0 2854 mutex_init(&adev->psp.mutex);
d38ceaf9 2855
912dfc84
EQ
2856 r = amdgpu_device_check_arguments(adev);
2857 if (r)
2858 return r;
d38ceaf9 2859
d38ceaf9
AD
2860 spin_lock_init(&adev->mmio_idx_lock);
2861 spin_lock_init(&adev->smc_idx_lock);
2862 spin_lock_init(&adev->pcie_idx_lock);
2863 spin_lock_init(&adev->uvd_ctx_idx_lock);
2864 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2865 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2866 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2867 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2868 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2869
0c4e7fa5
CZ
2870 INIT_LIST_HEAD(&adev->shadow_list);
2871 mutex_init(&adev->shadow_list_lock);
2872
795f2813
AR
2873 INIT_LIST_HEAD(&adev->ring_lru_list);
2874 spin_lock_init(&adev->ring_lru_list_lock);
2875
beff74bc
AD
2876 INIT_DELAYED_WORK(&adev->delayed_init_work,
2877 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2878 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2879 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2880
d4535e2c
AG
2881 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2882
d23ee13f 2883 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2884 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2885
0fa49558
AX
2886 /* Registers mapping */
2887 /* TODO: block userspace mapping of io register */
da69c161
KW
2888 if (adev->asic_type >= CHIP_BONAIRE) {
2889 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2890 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2891 } else {
2892 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2893 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2894 }
d38ceaf9 2895
d38ceaf9
AD
2896 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2897 if (adev->rmmio == NULL) {
2898 return -ENOMEM;
2899 }
2900 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2901 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2902
d38ceaf9
AD
2903 /* io port mapping */
2904 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2905 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2906 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2907 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2908 break;
2909 }
2910 }
2911 if (adev->rio_mem == NULL)
b64a18c5 2912 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2913
b2109d8e
JX
2914 /* enable PCIE atomic ops */
2915 r = pci_enable_atomic_ops_to_root(adev->pdev,
2916 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2917 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2918 if (r) {
2919 adev->have_atomics_support = false;
2920 DRM_INFO("PCIE atomic ops is not supported\n");
2921 } else {
2922 adev->have_atomics_support = true;
2923 }
2924
5494d864
AD
2925 amdgpu_device_get_pcie_info(adev);
2926
b239c017
JX
2927 if (amdgpu_mcbp)
2928 DRM_INFO("MCBP is enabled\n");
2929
5f84cc63
JX
2930 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2931 adev->enable_mes = true;
2932
f54eeab4 2933 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2934 r = amdgpu_discovery_init(adev);
2935 if (r) {
2936 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2937 return r;
2938 }
2939 }
2940
d38ceaf9 2941 /* early init functions */
06ec9070 2942 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2943 if (r)
2944 return r;
2945
df99ac0f
JZ
2946 r = amdgpu_device_get_job_timeout_settings(adev);
2947 if (r) {
2948 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
2949 return r;
2950 }
2951
6585661d
OZ
2952 /* doorbell bar mapping and doorbell index init*/
2953 amdgpu_device_doorbell_init(adev);
2954
d38ceaf9
AD
2955 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2956 /* this will fail for cards that aren't VGA class devices, just
2957 * ignore it */
06ec9070 2958 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2959
31af062a 2960 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
2961 boco = true;
2962 if (amdgpu_has_atpx() &&
2963 (amdgpu_is_atpx_hybrid() ||
2964 amdgpu_has_atpx_dgpu_power_cntl()) &&
2965 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 2966 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
2967 &amdgpu_switcheroo_ops, boco);
2968 if (boco)
d38ceaf9
AD
2969 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2970
9475a943
SL
2971 if (amdgpu_emu_mode == 1) {
2972 /* post the asic on emulation mode */
2973 emu_soc_asic_init(adev);
bfca0289 2974 goto fence_driver_init;
9475a943 2975 }
bfca0289 2976
4e99a44e
ML
2977 /* detect if we are with an SRIOV vbios */
2978 amdgpu_device_detect_sriov_bios(adev);
048765ad 2979
95e8e59e
AD
2980 /* check if we need to reset the asic
2981 * E.g., driver was not cleanly unloaded previously, etc.
2982 */
f14899fd 2983 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2984 r = amdgpu_asic_reset(adev);
2985 if (r) {
2986 dev_err(adev->dev, "asic reset on init failed\n");
2987 goto failed;
2988 }
2989 }
2990
d38ceaf9 2991 /* Post card if necessary */
39c640c0 2992 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2993 if (!adev->bios) {
bec86378 2994 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2995 r = -EINVAL;
2996 goto failed;
d38ceaf9 2997 }
bec86378 2998 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2999 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3000 if (r) {
3001 dev_err(adev->dev, "gpu post error!\n");
3002 goto failed;
3003 }
d38ceaf9
AD
3004 }
3005
88b64e95
AD
3006 if (adev->is_atom_fw) {
3007 /* Initialize clocks */
3008 r = amdgpu_atomfirmware_get_clock_info(adev);
3009 if (r) {
3010 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 3011 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
3012 goto failed;
3013 }
3014 } else {
a5bde2f9
AD
3015 /* Initialize clocks */
3016 r = amdgpu_atombios_get_clock_info(adev);
3017 if (r) {
3018 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 3019 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 3020 goto failed;
a5bde2f9
AD
3021 }
3022 /* init i2c buses */
4562236b
HW
3023 if (!amdgpu_device_has_dc_support(adev))
3024 amdgpu_atombios_i2c_init(adev);
2c1a2784 3025 }
d38ceaf9 3026
bfca0289 3027fence_driver_init:
d38ceaf9
AD
3028 /* Fence driver */
3029 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3030 if (r) {
3031 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3032 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3033 goto failed;
2c1a2784 3034 }
d38ceaf9
AD
3035
3036 /* init the mode config */
3037 drm_mode_config_init(adev->ddev);
3038
06ec9070 3039 r = amdgpu_device_ip_init(adev);
d38ceaf9 3040 if (r) {
8840a387 3041 /* failed in exclusive mode due to timeout */
3042 if (amdgpu_sriov_vf(adev) &&
3043 !amdgpu_sriov_runtime(adev) &&
3044 amdgpu_virt_mmio_blocked(adev) &&
3045 !amdgpu_virt_wait_reset(adev)) {
3046 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3047 /* Don't send request since VF is inactive. */
3048 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3049 adev->virt.ops = NULL;
8840a387 3050 r = -EAGAIN;
3051 goto failed;
3052 }
06ec9070 3053 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3054 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3055 goto failed;
d38ceaf9
AD
3056 }
3057
d7f72fe4
YZ
3058 DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3059 adev->gfx.config.max_shader_engines,
3060 adev->gfx.config.max_sh_per_se,
3061 adev->gfx.config.max_cu_per_sh,
3062 adev->gfx.cu_info.number);
3063
f880799d
ND
3064 amdgpu_ctx_init_sched(adev);
3065
d38ceaf9
AD
3066 adev->accel_working = true;
3067
e59c0205
AX
3068 amdgpu_vm_check_compute_bug(adev);
3069
95844d20
MO
3070 /* Initialize the buffer migration limit. */
3071 if (amdgpu_moverate >= 0)
3072 max_MBps = amdgpu_moverate;
3073 else
3074 max_MBps = 8; /* Allow 8 MB/s. */
3075 /* Get a log2 for easy divisions. */
3076 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3077
9bc92b9c
ML
3078 amdgpu_fbdev_init(adev);
3079
d2f52ac8 3080 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3081 if (r) {
3082 adev->pm_sysfs_en = false;
d2f52ac8 3083 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3084 } else
3085 adev->pm_sysfs_en = true;
d2f52ac8 3086
5bb23532 3087 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3088 if (r) {
3089 adev->ucode_sysfs_en = false;
5bb23532 3090 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3091 } else
3092 adev->ucode_sysfs_en = true;
5bb23532 3093
75758255 3094 r = amdgpu_debugfs_gem_init(adev);
3f14e623 3095 if (r)
d38ceaf9 3096 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
3097
3098 r = amdgpu_debugfs_regs_init(adev);
3f14e623 3099 if (r)
d38ceaf9 3100 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 3101
50ab2533 3102 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 3103 if (r)
50ab2533 3104 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 3105
763efb6c 3106 r = amdgpu_debugfs_init(adev);
db95e218 3107 if (r)
763efb6c 3108 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 3109
d38ceaf9
AD
3110 if ((amdgpu_testing & 1)) {
3111 if (adev->accel_working)
3112 amdgpu_test_moves(adev);
3113 else
3114 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3115 }
d38ceaf9
AD
3116 if (amdgpu_benchmarking) {
3117 if (adev->accel_working)
3118 amdgpu_benchmark(adev, amdgpu_benchmarking);
3119 else
3120 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3121 }
3122
b0adca4d
EQ
3123 /*
3124 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3125 * Otherwise the mgpu fan boost feature will be skipped due to the
3126 * gpu instance is counted less.
3127 */
3128 amdgpu_register_gpu_instance(adev);
3129
d38ceaf9
AD
3130 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3131 * explicit gating rather than handling it automatically.
3132 */
06ec9070 3133 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3134 if (r) {
06ec9070 3135 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3136 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3137 goto failed;
2c1a2784 3138 }
d38ceaf9 3139
108c6a63 3140 /* must succeed. */
511fdbc3 3141 amdgpu_ras_resume(adev);
108c6a63 3142
beff74bc
AD
3143 queue_delayed_work(system_wq, &adev->delayed_init_work,
3144 msecs_to_jiffies(AMDGPU_RESUME_MS));
3145
dcea6e65
KR
3146 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3147 if (r) {
3148 dev_err(adev->dev, "Could not create pcie_replay_count");
3149 return r;
3150 }
108c6a63 3151
d155bef0
AB
3152 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3153 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3154 if (r)
3155 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3156
d38ceaf9 3157 return 0;
83ba126a
AD
3158
3159failed:
89041940 3160 amdgpu_vf_error_trans_all(adev);
3840c5bc 3161 if (boco)
83ba126a 3162 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3163
83ba126a 3164 return r;
d38ceaf9
AD
3165}
3166
d38ceaf9
AD
3167/**
3168 * amdgpu_device_fini - tear down the driver
3169 *
3170 * @adev: amdgpu_device pointer
3171 *
3172 * Tear down the driver info (all asics).
3173 * Called at driver shutdown.
3174 */
3175void amdgpu_device_fini(struct amdgpu_device *adev)
3176{
3177 int r;
3178
3179 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3180 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3181 adev->shutdown = true;
9f875167 3182
e5b03032
ML
3183 /* disable all interrupts */
3184 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3185 if (adev->mode_info.mode_config_initialized){
3186 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3187 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3188 else
3189 drm_atomic_helper_shutdown(adev->ddev);
3190 }
d38ceaf9 3191 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3192 if (adev->pm_sysfs_en)
3193 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3194 amdgpu_fbdev_fini(adev);
06ec9070 3195 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3196 if (adev->firmware.gpu_info_fw) {
3197 release_firmware(adev->firmware.gpu_info_fw);
3198 adev->firmware.gpu_info_fw = NULL;
3199 }
d38ceaf9
AD
3200 adev->accel_working = false;
3201 /* free i2c buses */
4562236b
HW
3202 if (!amdgpu_device_has_dc_support(adev))
3203 amdgpu_i2c_fini(adev);
bfca0289
SL
3204
3205 if (amdgpu_emu_mode != 1)
3206 amdgpu_atombios_fini(adev);
3207
d38ceaf9
AD
3208 kfree(adev->bios);
3209 adev->bios = NULL;
3840c5bc
AD
3210 if (amdgpu_has_atpx() &&
3211 (amdgpu_is_atpx_hybrid() ||
3212 amdgpu_has_atpx_dgpu_power_cntl()) &&
3213 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3214 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3215 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3216 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3217 vga_client_register(adev->pdev, NULL, NULL, NULL);
3218 if (adev->rio_mem)
3219 pci_iounmap(adev->pdev, adev->rio_mem);
3220 adev->rio_mem = NULL;
3221 iounmap(adev->rmmio);
3222 adev->rmmio = NULL;
06ec9070 3223 amdgpu_device_doorbell_fini(adev);
e9bc1bf7 3224
d38ceaf9 3225 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 3226 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
7c868b59
YT
3227 if (adev->ucode_sysfs_en)
3228 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
3229 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3230 amdgpu_pmu_fini(adev);
6698a3d0 3231 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 3232 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3233 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3234}
3235
3236
3237/*
3238 * Suspend & resume.
3239 */
3240/**
810ddc3a 3241 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3242 *
87e3f136
DP
3243 * @dev: drm dev pointer
3244 * @suspend: suspend state
3245 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3246 *
3247 * Puts the hw in the suspend state (all asics).
3248 * Returns 0 for success or an error on failure.
3249 * Called at driver suspend.
3250 */
de185019 3251int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3252{
3253 struct amdgpu_device *adev;
3254 struct drm_crtc *crtc;
3255 struct drm_connector *connector;
f8d2d39e 3256 struct drm_connector_list_iter iter;
5ceb54c6 3257 int r;
d38ceaf9
AD
3258
3259 if (dev == NULL || dev->dev_private == NULL) {
3260 return -ENODEV;
3261 }
3262
3263 adev = dev->dev_private;
3264
3265 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3266 return 0;
3267
44779b43 3268 adev->in_suspend = true;
d38ceaf9
AD
3269 drm_kms_helper_poll_disable(dev);
3270
5f818173
S
3271 if (fbcon)
3272 amdgpu_fbdev_set_suspend(adev, 1);
3273
beff74bc 3274 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3275
4562236b
HW
3276 if (!amdgpu_device_has_dc_support(adev)) {
3277 /* turn off display hw */
3278 drm_modeset_lock_all(dev);
f8d2d39e
LP
3279 drm_connector_list_iter_begin(dev, &iter);
3280 drm_for_each_connector_iter(connector, &iter)
3281 drm_helper_connector_dpms(connector,
3282 DRM_MODE_DPMS_OFF);
3283 drm_connector_list_iter_end(&iter);
4562236b 3284 drm_modeset_unlock_all(dev);
fe1053b7
AD
3285 /* unpin the front buffers and cursors */
3286 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3287 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3288 struct drm_framebuffer *fb = crtc->primary->fb;
3289 struct amdgpu_bo *robj;
3290
91334223 3291 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3292 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3293 r = amdgpu_bo_reserve(aobj, true);
3294 if (r == 0) {
3295 amdgpu_bo_unpin(aobj);
3296 amdgpu_bo_unreserve(aobj);
3297 }
756e6880 3298 }
756e6880 3299
fe1053b7
AD
3300 if (fb == NULL || fb->obj[0] == NULL) {
3301 continue;
3302 }
3303 robj = gem_to_amdgpu_bo(fb->obj[0]);
3304 /* don't unpin kernel fb objects */
3305 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3306 r = amdgpu_bo_reserve(robj, true);
3307 if (r == 0) {
3308 amdgpu_bo_unpin(robj);
3309 amdgpu_bo_unreserve(robj);
3310 }
d38ceaf9
AD
3311 }
3312 }
3313 }
fe1053b7
AD
3314
3315 amdgpu_amdkfd_suspend(adev);
3316
5e6932fe 3317 amdgpu_ras_suspend(adev);
3318
fe1053b7
AD
3319 r = amdgpu_device_ip_suspend_phase1(adev);
3320
d38ceaf9
AD
3321 /* evict vram memory */
3322 amdgpu_bo_evict_vram(adev);
3323
5ceb54c6 3324 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3325
fe1053b7 3326 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3327
a0a71e49
AD
3328 /* evict remaining vram memory
3329 * This second call to evict vram is to evict the gart page table
3330 * using the CPU.
3331 */
d38ceaf9
AD
3332 amdgpu_bo_evict_vram(adev);
3333
d38ceaf9
AD
3334 return 0;
3335}
3336
3337/**
810ddc3a 3338 * amdgpu_device_resume - initiate device resume
d38ceaf9 3339 *
87e3f136
DP
3340 * @dev: drm dev pointer
3341 * @resume: resume state
3342 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3343 *
3344 * Bring the hw back to operating state (all asics).
3345 * Returns 0 for success or an error on failure.
3346 * Called at driver resume.
3347 */
de185019 3348int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3349{
3350 struct drm_connector *connector;
f8d2d39e 3351 struct drm_connector_list_iter iter;
d38ceaf9 3352 struct amdgpu_device *adev = dev->dev_private;
756e6880 3353 struct drm_crtc *crtc;
03161a6e 3354 int r = 0;
d38ceaf9
AD
3355
3356 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3357 return 0;
3358
d38ceaf9 3359 /* post card */
39c640c0 3360 if (amdgpu_device_need_post(adev)) {
74b0b157 3361 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3362 if (r)
3363 DRM_ERROR("amdgpu asic init failed\n");
3364 }
d38ceaf9 3365
06ec9070 3366 r = amdgpu_device_ip_resume(adev);
e6707218 3367 if (r) {
06ec9070 3368 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3369 return r;
e6707218 3370 }
5ceb54c6
AD
3371 amdgpu_fence_driver_resume(adev);
3372
d38ceaf9 3373
06ec9070 3374 r = amdgpu_device_ip_late_init(adev);
03161a6e 3375 if (r)
4d3b9ae5 3376 return r;
d38ceaf9 3377
beff74bc
AD
3378 queue_delayed_work(system_wq, &adev->delayed_init_work,
3379 msecs_to_jiffies(AMDGPU_RESUME_MS));
3380
fe1053b7
AD
3381 if (!amdgpu_device_has_dc_support(adev)) {
3382 /* pin cursors */
3383 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3384 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3385
91334223 3386 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3387 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3388 r = amdgpu_bo_reserve(aobj, true);
3389 if (r == 0) {
3390 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3391 if (r != 0)
3392 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3393 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3394 amdgpu_bo_unreserve(aobj);
3395 }
756e6880
AD
3396 }
3397 }
3398 }
ba997709
YZ
3399 r = amdgpu_amdkfd_resume(adev);
3400 if (r)
3401 return r;
756e6880 3402
96a5d8d4 3403 /* Make sure IB tests flushed */
beff74bc 3404 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3405
d38ceaf9
AD
3406 /* blat the mode back in */
3407 if (fbcon) {
4562236b
HW
3408 if (!amdgpu_device_has_dc_support(adev)) {
3409 /* pre DCE11 */
3410 drm_helper_resume_force_mode(dev);
3411
3412 /* turn on display hw */
3413 drm_modeset_lock_all(dev);
f8d2d39e
LP
3414
3415 drm_connector_list_iter_begin(dev, &iter);
3416 drm_for_each_connector_iter(connector, &iter)
3417 drm_helper_connector_dpms(connector,
3418 DRM_MODE_DPMS_ON);
3419 drm_connector_list_iter_end(&iter);
3420
4562236b 3421 drm_modeset_unlock_all(dev);
d38ceaf9 3422 }
4d3b9ae5 3423 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3424 }
3425
3426 drm_kms_helper_poll_enable(dev);
23a1a9e5 3427
5e6932fe 3428 amdgpu_ras_resume(adev);
3429
23a1a9e5
L
3430 /*
3431 * Most of the connector probing functions try to acquire runtime pm
3432 * refs to ensure that the GPU is powered on when connector polling is
3433 * performed. Since we're calling this from a runtime PM callback,
3434 * trying to acquire rpm refs will cause us to deadlock.
3435 *
3436 * Since we're guaranteed to be holding the rpm lock, it's safe to
3437 * temporarily disable the rpm helpers so this doesn't deadlock us.
3438 */
3439#ifdef CONFIG_PM
3440 dev->dev->power.disable_depth++;
3441#endif
4562236b
HW
3442 if (!amdgpu_device_has_dc_support(adev))
3443 drm_helper_hpd_irq_event(dev);
3444 else
3445 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3446#ifdef CONFIG_PM
3447 dev->dev->power.disable_depth--;
3448#endif
44779b43
RZ
3449 adev->in_suspend = false;
3450
4d3b9ae5 3451 return 0;
d38ceaf9
AD
3452}
3453
e3ecdffa
AD
3454/**
3455 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3456 *
3457 * @adev: amdgpu_device pointer
3458 *
3459 * The list of all the hardware IPs that make up the asic is walked and
3460 * the check_soft_reset callbacks are run. check_soft_reset determines
3461 * if the asic is still hung or not.
3462 * Returns true if any of the IPs are still in a hung state, false if not.
3463 */
06ec9070 3464static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3465{
3466 int i;
3467 bool asic_hang = false;
3468
f993d628
ML
3469 if (amdgpu_sriov_vf(adev))
3470 return true;
3471
8bc04c29
AD
3472 if (amdgpu_asic_need_full_reset(adev))
3473 return true;
3474
63fbf42f 3475 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3476 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3477 continue;
a1255107
AD
3478 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3479 adev->ip_blocks[i].status.hang =
3480 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3481 if (adev->ip_blocks[i].status.hang) {
3482 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3483 asic_hang = true;
3484 }
3485 }
3486 return asic_hang;
3487}
3488
e3ecdffa
AD
3489/**
3490 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3491 *
3492 * @adev: amdgpu_device pointer
3493 *
3494 * The list of all the hardware IPs that make up the asic is walked and the
3495 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3496 * handles any IP specific hardware or software state changes that are
3497 * necessary for a soft reset to succeed.
3498 * Returns 0 on success, negative error code on failure.
3499 */
06ec9070 3500static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3501{
3502 int i, r = 0;
3503
3504 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3505 if (!adev->ip_blocks[i].status.valid)
d31a501e 3506 continue;
a1255107
AD
3507 if (adev->ip_blocks[i].status.hang &&
3508 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3509 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3510 if (r)
3511 return r;
3512 }
3513 }
3514
3515 return 0;
3516}
3517
e3ecdffa
AD
3518/**
3519 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3520 *
3521 * @adev: amdgpu_device pointer
3522 *
3523 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3524 * reset is necessary to recover.
3525 * Returns true if a full asic reset is required, false if not.
3526 */
06ec9070 3527static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3528{
da146d3b
AD
3529 int i;
3530
8bc04c29
AD
3531 if (amdgpu_asic_need_full_reset(adev))
3532 return true;
3533
da146d3b 3534 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3535 if (!adev->ip_blocks[i].status.valid)
da146d3b 3536 continue;
a1255107
AD
3537 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3538 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3539 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3540 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3541 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3542 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3543 DRM_INFO("Some block need full reset!\n");
3544 return true;
3545 }
3546 }
35d782fe
CZ
3547 }
3548 return false;
3549}
3550
e3ecdffa
AD
3551/**
3552 * amdgpu_device_ip_soft_reset - do a soft reset
3553 *
3554 * @adev: amdgpu_device pointer
3555 *
3556 * The list of all the hardware IPs that make up the asic is walked and the
3557 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3558 * IP specific hardware or software state changes that are necessary to soft
3559 * reset the IP.
3560 * Returns 0 on success, negative error code on failure.
3561 */
06ec9070 3562static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3563{
3564 int i, r = 0;
3565
3566 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3567 if (!adev->ip_blocks[i].status.valid)
35d782fe 3568 continue;
a1255107
AD
3569 if (adev->ip_blocks[i].status.hang &&
3570 adev->ip_blocks[i].version->funcs->soft_reset) {
3571 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3572 if (r)
3573 return r;
3574 }
3575 }
3576
3577 return 0;
3578}
3579
e3ecdffa
AD
3580/**
3581 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3582 *
3583 * @adev: amdgpu_device pointer
3584 *
3585 * The list of all the hardware IPs that make up the asic is walked and the
3586 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3587 * handles any IP specific hardware or software state changes that are
3588 * necessary after the IP has been soft reset.
3589 * Returns 0 on success, negative error code on failure.
3590 */
06ec9070 3591static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3592{
3593 int i, r = 0;
3594
3595 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3596 if (!adev->ip_blocks[i].status.valid)
35d782fe 3597 continue;
a1255107
AD
3598 if (adev->ip_blocks[i].status.hang &&
3599 adev->ip_blocks[i].version->funcs->post_soft_reset)
3600 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3601 if (r)
3602 return r;
3603 }
3604
3605 return 0;
3606}
3607
e3ecdffa 3608/**
c33adbc7 3609 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3610 *
3611 * @adev: amdgpu_device pointer
3612 *
3613 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3614 * restore things like GPUVM page tables after a GPU reset where
3615 * the contents of VRAM might be lost.
403009bf
CK
3616 *
3617 * Returns:
3618 * 0 on success, negative error code on failure.
e3ecdffa 3619 */
c33adbc7 3620static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3621{
c41d1cf6 3622 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3623 struct amdgpu_bo *shadow;
3624 long r = 1, tmo;
c41d1cf6
ML
3625
3626 if (amdgpu_sriov_runtime(adev))
b045d3af 3627 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3628 else
3629 tmo = msecs_to_jiffies(100);
3630
3631 DRM_INFO("recover vram bo from shadow start\n");
3632 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3633 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3634
3635 /* No need to recover an evicted BO */
3636 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3637 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3638 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3639 continue;
3640
3641 r = amdgpu_bo_restore_shadow(shadow, &next);
3642 if (r)
3643 break;
3644
c41d1cf6 3645 if (fence) {
1712fb1a 3646 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3647 dma_fence_put(fence);
3648 fence = next;
1712fb1a 3649 if (tmo == 0) {
3650 r = -ETIMEDOUT;
c41d1cf6 3651 break;
1712fb1a 3652 } else if (tmo < 0) {
3653 r = tmo;
3654 break;
3655 }
403009bf
CK
3656 } else {
3657 fence = next;
c41d1cf6 3658 }
c41d1cf6
ML
3659 }
3660 mutex_unlock(&adev->shadow_list_lock);
3661
403009bf
CK
3662 if (fence)
3663 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3664 dma_fence_put(fence);
3665
1712fb1a 3666 if (r < 0 || tmo <= 0) {
3667 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3668 return -EIO;
3669 }
c41d1cf6 3670
403009bf
CK
3671 DRM_INFO("recover vram bo from shadow done\n");
3672 return 0;
c41d1cf6
ML
3673}
3674
a90ad3c2 3675
e3ecdffa 3676/**
06ec9070 3677 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3678 *
3679 * @adev: amdgpu device pointer
87e3f136 3680 * @from_hypervisor: request from hypervisor
5740682e
ML
3681 *
3682 * do VF FLR and reinitialize Asic
3f48c681 3683 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3684 */
3685static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3686 bool from_hypervisor)
5740682e
ML
3687{
3688 int r;
3689
3690 if (from_hypervisor)
3691 r = amdgpu_virt_request_full_gpu(adev, true);
3692 else
3693 r = amdgpu_virt_reset_gpu(adev);
3694 if (r)
3695 return r;
a90ad3c2
ML
3696
3697 /* Resume IP prior to SMC */
06ec9070 3698 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3699 if (r)
3700 goto error;
a90ad3c2 3701
c9ffa427 3702 amdgpu_virt_init_data_exchange(adev);
a90ad3c2 3703 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3704 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3705
7a3e0bb2
RZ
3706 r = amdgpu_device_fw_loading(adev);
3707 if (r)
3708 return r;
3709
a90ad3c2 3710 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3711 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3712 if (r)
3713 goto error;
a90ad3c2
ML
3714
3715 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3716 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3717 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3718
abc34253
ED
3719error:
3720 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3721 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3722 amdgpu_inc_vram_lost(adev);
c33adbc7 3723 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3724 }
3725
3726 return r;
3727}
3728
12938fad
CK
3729/**
3730 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3731 *
3732 * @adev: amdgpu device pointer
3733 *
3734 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3735 * a hung GPU.
3736 */
3737bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3738{
3739 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3740 DRM_INFO("Timeout, but no hardware hang detected.\n");
3741 return false;
3742 }
3743
3ba7b418
AG
3744 if (amdgpu_gpu_recovery == 0)
3745 goto disabled;
3746
3747 if (amdgpu_sriov_vf(adev))
3748 return true;
3749
3750 if (amdgpu_gpu_recovery == -1) {
3751 switch (adev->asic_type) {
fc42d47c
AG
3752 case CHIP_BONAIRE:
3753 case CHIP_HAWAII:
3ba7b418
AG
3754 case CHIP_TOPAZ:
3755 case CHIP_TONGA:
3756 case CHIP_FIJI:
3757 case CHIP_POLARIS10:
3758 case CHIP_POLARIS11:
3759 case CHIP_POLARIS12:
3760 case CHIP_VEGAM:
3761 case CHIP_VEGA20:
3762 case CHIP_VEGA10:
3763 case CHIP_VEGA12:
c43b849f 3764 case CHIP_RAVEN:
3ba7b418
AG
3765 break;
3766 default:
3767 goto disabled;
3768 }
12938fad
CK
3769 }
3770
3771 return true;
3ba7b418
AG
3772
3773disabled:
3774 DRM_INFO("GPU recovery disabled.\n");
3775 return false;
12938fad
CK
3776}
3777
5c6dd71e 3778
26bc5340
AG
3779static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3780 struct amdgpu_job *job,
3781 bool *need_full_reset_arg)
3782{
3783 int i, r = 0;
3784 bool need_full_reset = *need_full_reset_arg;
71182665 3785
71182665 3786 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3787 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3788 struct amdgpu_ring *ring = adev->rings[i];
3789
51687759 3790 if (!ring || !ring->sched.thread)
0875dc9e 3791 continue;
5740682e 3792
2f9d4084
ML
3793 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3794 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3795 }
d38ceaf9 3796
222b5f04
AG
3797 if(job)
3798 drm_sched_increase_karma(&job->base);
3799
1d721ed6 3800 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3801 if (!amdgpu_sriov_vf(adev)) {
3802
3803 if (!need_full_reset)
3804 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3805
3806 if (!need_full_reset) {
3807 amdgpu_device_ip_pre_soft_reset(adev);
3808 r = amdgpu_device_ip_soft_reset(adev);
3809 amdgpu_device_ip_post_soft_reset(adev);
3810 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3811 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3812 need_full_reset = true;
3813 }
3814 }
3815
3816 if (need_full_reset)
3817 r = amdgpu_device_ip_suspend(adev);
3818
3819 *need_full_reset_arg = need_full_reset;
3820 }
3821
3822 return r;
3823}
3824
041a62bc 3825static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
26bc5340
AG
3826 struct list_head *device_list_handle,
3827 bool *need_full_reset_arg)
3828{
3829 struct amdgpu_device *tmp_adev = NULL;
3830 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3831 int r = 0;
3832
3833 /*
3834 * ASIC reset has to be done on all HGMI hive nodes ASAP
3835 * to allow proper links negotiation in FW (within 1 sec)
3836 */
3837 if (need_full_reset) {
3838 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
041a62bc 3839 /* For XGMI run all resets in parallel to speed up the process */
d4535e2c 3840 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
c96cf282 3841 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
d4535e2c
AG
3842 r = -EALREADY;
3843 } else
3844 r = amdgpu_asic_reset(tmp_adev);
d4535e2c 3845
041a62bc
AG
3846 if (r) {
3847 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3848 r, tmp_adev->ddev->unique);
3849 break;
ce316fa5
LM
3850 }
3851 }
3852
041a62bc
AG
3853 /* For XGMI wait for all resets to complete before proceed */
3854 if (!r) {
ce316fa5
LM
3855 list_for_each_entry(tmp_adev, device_list_handle,
3856 gmc.xgmi.head) {
3857 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3858 flush_work(&tmp_adev->xgmi_reset_work);
3859 r = tmp_adev->asic_reset_res;
3860 if (r)
3861 break;
ce316fa5
LM
3862 }
3863 }
3864 }
ce316fa5 3865 }
26bc5340 3866
00eaa571
LM
3867 if (!r && amdgpu_ras_intr_triggered())
3868 amdgpu_ras_intr_cleared();
3869
26bc5340
AG
3870 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3871 if (need_full_reset) {
3872 /* post card */
3873 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3874 DRM_WARN("asic atom init failed!");
3875
3876 if (!r) {
3877 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3878 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3879 if (r)
3880 goto out;
3881
3882 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3883 if (vram_lost) {
77e7f829 3884 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 3885 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
3886 }
3887
3888 r = amdgpu_gtt_mgr_recover(
3889 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3890 if (r)
3891 goto out;
3892
3893 r = amdgpu_device_fw_loading(tmp_adev);
3894 if (r)
3895 return r;
3896
3897 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3898 if (r)
3899 goto out;
3900
3901 if (vram_lost)
3902 amdgpu_device_fill_reset_magic(tmp_adev);
3903
fdafb359
EQ
3904 /*
3905 * Add this ASIC as tracked as reset was already
3906 * complete successfully.
3907 */
3908 amdgpu_register_gpu_instance(tmp_adev);
3909
7c04ca50 3910 r = amdgpu_device_ip_late_init(tmp_adev);
3911 if (r)
3912 goto out;
3913
e79a04d5 3914 /* must succeed. */
511fdbc3 3915 amdgpu_ras_resume(tmp_adev);
e79a04d5 3916
26bc5340
AG
3917 /* Update PSP FW topology after reset */
3918 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3919 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3920 }
3921 }
3922
3923
3924out:
3925 if (!r) {
3926 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3927 r = amdgpu_ib_ring_tests(tmp_adev);
3928 if (r) {
3929 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3930 r = amdgpu_device_ip_suspend(tmp_adev);
3931 need_full_reset = true;
3932 r = -EAGAIN;
3933 goto end;
3934 }
3935 }
3936
3937 if (!r)
3938 r = amdgpu_device_recover_vram(tmp_adev);
3939 else
3940 tmp_adev->asic_reset_res = r;
3941 }
3942
3943end:
3944 *need_full_reset_arg = need_full_reset;
3945 return r;
3946}
3947
1d721ed6 3948static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3949{
1d721ed6
AG
3950 if (trylock) {
3951 if (!mutex_trylock(&adev->lock_reset))
3952 return false;
3953 } else
3954 mutex_lock(&adev->lock_reset);
5740682e 3955
26bc5340
AG
3956 atomic_inc(&adev->gpu_reset_counter);
3957 adev->in_gpu_reset = 1;
a3a09142
AD
3958 switch (amdgpu_asic_reset_method(adev)) {
3959 case AMD_RESET_METHOD_MODE1:
3960 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3961 break;
3962 case AMD_RESET_METHOD_MODE2:
3963 adev->mp1_state = PP_MP1_STATE_RESET;
3964 break;
3965 default:
3966 adev->mp1_state = PP_MP1_STATE_NONE;
3967 break;
3968 }
1d721ed6
AG
3969
3970 return true;
26bc5340 3971}
d38ceaf9 3972
26bc5340
AG
3973static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3974{
89041940 3975 amdgpu_vf_error_trans_all(adev);
a3a09142 3976 adev->mp1_state = PP_MP1_STATE_NONE;
13a752e3
ML
3977 adev->in_gpu_reset = 0;
3978 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3979}
3980
26bc5340
AG
3981/**
3982 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3983 *
3984 * @adev: amdgpu device pointer
3985 * @job: which job trigger hang
3986 *
3987 * Attempt to reset the GPU if it has hung (all asics).
3988 * Attempt to do soft-reset or full-reset and reinitialize Asic
3989 * Returns 0 for success or an error on failure.
3990 */
3991
3992int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3993 struct amdgpu_job *job)
3994{
1d721ed6
AG
3995 struct list_head device_list, *device_list_handle = NULL;
3996 bool need_full_reset, job_signaled;
26bc5340 3997 struct amdgpu_hive_info *hive = NULL;
26bc5340 3998 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 3999 int i, r = 0;
7c6e68c7 4000 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4001 bool use_baco =
4002 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4003 true : false;
26bc5340 4004
d5ea093e
AG
4005 /*
4006 * Flush RAM to disk so that after reboot
4007 * the user can read log and see why the system rebooted.
4008 */
b823821f 4009 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4010
4011 DRM_WARN("Emergency reboot.");
4012
4013 ksys_sync_helper();
4014 emergency_restart();
4015 }
4016
1d721ed6 4017 need_full_reset = job_signaled = false;
26bc5340
AG
4018 INIT_LIST_HEAD(&device_list);
4019
b823821f
LM
4020 dev_info(adev->dev, "GPU %s begin!\n",
4021 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340 4022
beff74bc 4023 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 4024
1d721ed6
AG
4025 hive = amdgpu_get_xgmi_hive(adev, false);
4026
26bc5340 4027 /*
1d721ed6
AG
4028 * Here we trylock to avoid chain of resets executing from
4029 * either trigger by jobs on different adevs in XGMI hive or jobs on
4030 * different schedulers for same device while this TO handler is running.
4031 * We always reset all schedulers for device and all devices for XGMI
4032 * hive so that should take care of them too.
26bc5340 4033 */
1d721ed6
AG
4034
4035 if (hive && !mutex_trylock(&hive->reset_lock)) {
4036 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4037 job ? job->base.id : -1, hive->hive_id);
26bc5340 4038 return 0;
1d721ed6 4039 }
26bc5340
AG
4040
4041 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
4042 if (!amdgpu_device_lock_adev(adev, !hive)) {
4043 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 4044 job ? job->base.id : -1);
1d721ed6 4045 return 0;
26bc5340
AG
4046 }
4047
7c6e68c7
AG
4048 /* Block kfd: SRIOV would do it separately */
4049 if (!amdgpu_sriov_vf(adev))
4050 amdgpu_amdkfd_pre_reset(adev);
4051
26bc5340 4052 /* Build list of devices to reset */
1d721ed6 4053 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 4054 if (!hive) {
7c6e68c7
AG
4055 /*unlock kfd: SRIOV would do it separately */
4056 if (!amdgpu_sriov_vf(adev))
4057 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
4058 amdgpu_device_unlock_adev(adev);
4059 return -ENODEV;
4060 }
4061
4062 /*
4063 * In case we are in XGMI hive mode device reset is done for all the
4064 * nodes in the hive to retrain all XGMI links and hence the reset
4065 * sequence is executed in loop on all nodes.
4066 */
4067 device_list_handle = &hive->device_list;
4068 } else {
4069 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4070 device_list_handle = &device_list;
4071 }
4072
1d721ed6
AG
4073 /* block all schedulers and reset given job's ring */
4074 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4075 if (tmp_adev != adev) {
12ffa55d 4076 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
4077 if (!amdgpu_sriov_vf(tmp_adev))
4078 amdgpu_amdkfd_pre_reset(tmp_adev);
4079 }
4080
12ffa55d
AG
4081 /*
4082 * Mark these ASICs to be reseted as untracked first
4083 * And add them back after reset completed
4084 */
4085 amdgpu_unregister_gpu_instance(tmp_adev);
4086
f1c1314b 4087 /* disable ras on ALL IPs */
b823821f
LM
4088 if (!(in_ras_intr && !use_baco) &&
4089 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4090 amdgpu_ras_suspend(tmp_adev);
4091
1d721ed6
AG
4092 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4093 struct amdgpu_ring *ring = tmp_adev->rings[i];
4094
4095 if (!ring || !ring->sched.thread)
4096 continue;
4097
0b2d2c2e 4098 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4099
b823821f 4100 if (in_ras_intr && !use_baco)
7c6e68c7 4101 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4102 }
4103 }
4104
4105
b823821f 4106 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4107 goto skip_sched_resume;
4108
1d721ed6
AG
4109 /*
4110 * Must check guilty signal here since after this point all old
4111 * HW fences are force signaled.
4112 *
4113 * job->base holds a reference to parent fence
4114 */
4115 if (job && job->base.s_fence->parent &&
4116 dma_fence_is_signaled(job->base.s_fence->parent))
4117 job_signaled = true;
4118
1d721ed6
AG
4119 if (job_signaled) {
4120 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4121 goto skip_hw_reset;
4122 }
4123
4124
4125 /* Guilty job will be freed after this*/
0b2d2c2e 4126 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
4127 if (r) {
4128 /*TODO Should we stop ?*/
4129 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4130 r, adev->ddev->unique);
4131 adev->asic_reset_res = r;
4132 }
4133
26bc5340
AG
4134retry: /* Rest of adevs pre asic reset from XGMI hive. */
4135 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4136
4137 if (tmp_adev == adev)
4138 continue;
4139
26bc5340
AG
4140 r = amdgpu_device_pre_asic_reset(tmp_adev,
4141 NULL,
4142 &need_full_reset);
4143 /*TODO Should we stop ?*/
4144 if (r) {
4145 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4146 r, tmp_adev->ddev->unique);
4147 tmp_adev->asic_reset_res = r;
4148 }
4149 }
4150
4151 /* Actual ASIC resets if needed.*/
4152 /* TODO Implement XGMI hive reset logic for SRIOV */
4153 if (amdgpu_sriov_vf(adev)) {
4154 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4155 if (r)
4156 adev->asic_reset_res = r;
4157 } else {
041a62bc 4158 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
26bc5340
AG
4159 if (r && r == -EAGAIN)
4160 goto retry;
4161 }
4162
1d721ed6
AG
4163skip_hw_reset:
4164
26bc5340
AG
4165 /* Post ASIC reset for all devs .*/
4166 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4167
1d721ed6
AG
4168 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4169 struct amdgpu_ring *ring = tmp_adev->rings[i];
4170
4171 if (!ring || !ring->sched.thread)
4172 continue;
4173
4174 /* No point to resubmit jobs if we didn't HW reset*/
4175 if (!tmp_adev->asic_reset_res && !job_signaled)
4176 drm_sched_resubmit_jobs(&ring->sched);
4177
4178 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4179 }
4180
4181 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4182 drm_helper_resume_force_mode(tmp_adev->ddev);
4183 }
4184
4185 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4186
4187 if (r) {
4188 /* bad news, how to tell it to userspace ? */
12ffa55d 4189 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4190 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4191 } else {
12ffa55d 4192 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4193 }
7c6e68c7 4194 }
26bc5340 4195
7c6e68c7
AG
4196skip_sched_resume:
4197 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4198 /*unlock kfd: SRIOV would do it separately */
b823821f 4199 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4200 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
4201 amdgpu_device_unlock_adev(tmp_adev);
4202 }
4203
1d721ed6 4204 if (hive)
22d6575b 4205 mutex_unlock(&hive->reset_lock);
26bc5340
AG
4206
4207 if (r)
4208 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4209 return r;
4210}
4211
e3ecdffa
AD
4212/**
4213 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4214 *
4215 * @adev: amdgpu_device pointer
4216 *
4217 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4218 * and lanes) of the slot the device is in. Handles APUs and
4219 * virtualized environments where PCIE config space may not be available.
4220 */
5494d864 4221static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4222{
5d9a6330 4223 struct pci_dev *pdev;
c5313457
HK
4224 enum pci_bus_speed speed_cap, platform_speed_cap;
4225 enum pcie_link_width platform_link_width;
d0dd7f0c 4226
cd474ba0
AD
4227 if (amdgpu_pcie_gen_cap)
4228 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4229
cd474ba0
AD
4230 if (amdgpu_pcie_lane_cap)
4231 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4232
cd474ba0
AD
4233 /* covers APUs as well */
4234 if (pci_is_root_bus(adev->pdev->bus)) {
4235 if (adev->pm.pcie_gen_mask == 0)
4236 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4237 if (adev->pm.pcie_mlw_mask == 0)
4238 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4239 return;
cd474ba0 4240 }
d0dd7f0c 4241
c5313457
HK
4242 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4243 return;
4244
dbaa922b
AD
4245 pcie_bandwidth_available(adev->pdev, NULL,
4246 &platform_speed_cap, &platform_link_width);
c5313457 4247
cd474ba0 4248 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4249 /* asic caps */
4250 pdev = adev->pdev;
4251 speed_cap = pcie_get_speed_cap(pdev);
4252 if (speed_cap == PCI_SPEED_UNKNOWN) {
4253 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4254 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4255 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4256 } else {
5d9a6330
AD
4257 if (speed_cap == PCIE_SPEED_16_0GT)
4258 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4259 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4260 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4261 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4262 else if (speed_cap == PCIE_SPEED_8_0GT)
4263 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4264 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4265 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4266 else if (speed_cap == PCIE_SPEED_5_0GT)
4267 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4268 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4269 else
4270 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4271 }
4272 /* platform caps */
c5313457 4273 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4274 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4275 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4276 } else {
c5313457 4277 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4278 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4279 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4280 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4281 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4282 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4283 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4284 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4285 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4286 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4287 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4288 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4289 else
4290 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4291
cd474ba0
AD
4292 }
4293 }
4294 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4295 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4296 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4297 } else {
c5313457 4298 switch (platform_link_width) {
5d9a6330 4299 case PCIE_LNK_X32:
cd474ba0
AD
4300 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4301 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4302 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4303 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4304 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4305 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4306 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4307 break;
5d9a6330 4308 case PCIE_LNK_X16:
cd474ba0
AD
4309 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4310 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4311 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4312 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4313 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4314 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4315 break;
5d9a6330 4316 case PCIE_LNK_X12:
cd474ba0
AD
4317 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4318 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4319 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4320 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4321 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4322 break;
5d9a6330 4323 case PCIE_LNK_X8:
cd474ba0
AD
4324 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4325 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4326 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4327 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4328 break;
5d9a6330 4329 case PCIE_LNK_X4:
cd474ba0
AD
4330 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4331 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4332 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4333 break;
5d9a6330 4334 case PCIE_LNK_X2:
cd474ba0
AD
4335 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4336 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4337 break;
5d9a6330 4338 case PCIE_LNK_X1:
cd474ba0
AD
4339 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4340 break;
4341 default:
4342 break;
4343 }
d0dd7f0c
AD
4344 }
4345 }
4346}
d38ceaf9 4347
361dbd01
AD
4348int amdgpu_device_baco_enter(struct drm_device *dev)
4349{
4350 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4351 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4352
4353 if (!amdgpu_device_supports_baco(adev->ddev))
4354 return -ENOTSUPP;
4355
7a22677b
LM
4356 if (ras && ras->supported)
4357 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4358
361dbd01
AD
4359 if (is_support_sw_smu(adev)) {
4360 struct smu_context *smu = &adev->smu;
4361 int ret;
4362
4363 ret = smu_baco_enter(smu);
4364 if (ret)
4365 return ret;
361dbd01
AD
4366 } else {
4367 void *pp_handle = adev->powerplay.pp_handle;
4368 const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
4369
4370 if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
4371 return -ENOENT;
4372
4373 /* enter BACO state */
4374 if (pp_funcs->set_asic_baco_state(pp_handle, 1))
4375 return -EIO;
361dbd01 4376 }
7a22677b
LM
4377
4378 return 0;
361dbd01
AD
4379}
4380
4381int amdgpu_device_baco_exit(struct drm_device *dev)
4382{
4383 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4384 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4385
4386 if (!amdgpu_device_supports_baco(adev->ddev))
4387 return -ENOTSUPP;
4388
4389 if (is_support_sw_smu(adev)) {
4390 struct smu_context *smu = &adev->smu;
4391 int ret;
4392
4393 ret = smu_baco_exit(smu);
4394 if (ret)
4395 return ret;
4396
361dbd01
AD
4397 } else {
4398 void *pp_handle = adev->powerplay.pp_handle;
4399 const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
4400
4401 if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
4402 return -ENOENT;
4403
4404 /* exit BACO state */
4405 if (pp_funcs->set_asic_baco_state(pp_handle, 0))
4406 return -EIO;
361dbd01 4407 }
7a22677b
LM
4408
4409 if (ras && ras->supported)
4410 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4411
4412 return 0;
361dbd01 4413}