drm/amdgpu: clear uncorrectable parity error status bit
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
d5ea093e
AG
68#include <linux/suspend.h>
69
e2a75f88 70MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 71MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 72MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 73MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 74MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 75MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 76MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 77MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 78MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 79MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 80
2dc80b00
S
81#define AMDGPU_RESUME_MS 2000
82
050091ab 83const char *amdgpu_asic_name[] = {
da69c161
KW
84 "TAHITI",
85 "PITCAIRN",
86 "VERDE",
87 "OLAND",
88 "HAINAN",
d38ceaf9
AD
89 "BONAIRE",
90 "KAVERI",
91 "KABINI",
92 "HAWAII",
93 "MULLINS",
94 "TOPAZ",
95 "TONGA",
48299f95 96 "FIJI",
d38ceaf9 97 "CARRIZO",
139f4917 98 "STONEY",
2cc0c0b5
FC
99 "POLARIS10",
100 "POLARIS11",
c4642a47 101 "POLARIS12",
48ff108d 102 "VEGAM",
d4196f01 103 "VEGA10",
8fab806a 104 "VEGA12",
956fcddc 105 "VEGA20",
2ca8a5d2 106 "RAVEN",
d6c3b24e 107 "ARCTURUS",
1eee4228 108 "RENOIR",
852a6626 109 "NAVI10",
87dbad02 110 "NAVI14",
9802f5d7 111 "NAVI12",
d38ceaf9
AD
112 "LAST",
113};
114
dcea6e65
KR
115/**
116 * DOC: pcie_replay_count
117 *
118 * The amdgpu driver provides a sysfs API for reporting the total number
119 * of PCIe replays (NAKs)
120 * The file pcie_replay_count is used for this and returns the total
121 * number of replays as a sum of the NAKs generated and NAKs received
122 */
123
124static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
125 struct device_attribute *attr, char *buf)
126{
127 struct drm_device *ddev = dev_get_drvdata(dev);
128 struct amdgpu_device *adev = ddev->dev_private;
129 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
130
131 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
132}
133
134static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
135 amdgpu_device_get_pcie_replay_count, NULL);
136
5494d864
AD
137static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
138
e3ecdffa 139/**
31af062a 140 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
141 *
142 * @dev: drm_device pointer
143 *
144 * Returns true if the device is a dGPU with HG/PX power control,
145 * otherwise return false.
146 */
31af062a 147bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
148{
149 struct amdgpu_device *adev = dev->dev_private;
150
2f7d10b3 151 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
152 return true;
153 return false;
154}
155
a69cba42
AD
156/**
157 * amdgpu_device_supports_baco - Does the device support BACO
158 *
159 * @dev: drm_device pointer
160 *
161 * Returns true if the device supporte BACO,
162 * otherwise return false.
163 */
164bool amdgpu_device_supports_baco(struct drm_device *dev)
165{
166 struct amdgpu_device *adev = dev->dev_private;
167
168 return amdgpu_asic_supports_baco(adev);
169}
170
e35e2b11
TY
171/**
172 * VRAM access helper functions.
173 *
174 * amdgpu_device_vram_access - read/write a buffer in vram
175 *
176 * @adev: amdgpu_device pointer
177 * @pos: offset of the buffer in vram
178 * @buf: virtual address of the buffer in system memory
179 * @size: read/write size, sizeof(@buf) must > @size
180 * @write: true - write to vram, otherwise - read from vram
181 */
182void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
183 uint32_t *buf, size_t size, bool write)
184{
185 uint64_t last;
186 unsigned long flags;
187
188 last = size - 4;
189 for (last += pos; pos <= last; pos += 4) {
190 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
191 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
192 WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
193 if (write)
194 WREG32_NO_KIQ(mmMM_DATA, *buf++);
195 else
196 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
197 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
198 }
199}
200
d38ceaf9
AD
201/*
202 * MMIO register access helper functions.
203 */
e3ecdffa
AD
204/**
205 * amdgpu_mm_rreg - read a memory mapped IO register
206 *
207 * @adev: amdgpu_device pointer
208 * @reg: dword aligned register offset
209 * @acc_flags: access flags which require special behavior
210 *
211 * Returns the 32 bit value from the offset specified.
212 */
d38ceaf9 213uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 214 uint32_t acc_flags)
d38ceaf9 215{
f4b373f4
TSD
216 uint32_t ret;
217
43ca8efa 218 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 219 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 220
15d72fd7 221 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 222 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
223 else {
224 unsigned long flags;
d38ceaf9
AD
225
226 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
227 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
228 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
229 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 230 }
f4b373f4
TSD
231 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
232 return ret;
d38ceaf9
AD
233}
234
421a2a30
ML
235/*
236 * MMIO register read with bytes helper functions
237 * @offset:bytes offset from MMIO start
238 *
239*/
240
e3ecdffa
AD
241/**
242 * amdgpu_mm_rreg8 - read a memory mapped IO register
243 *
244 * @adev: amdgpu_device pointer
245 * @offset: byte aligned register offset
246 *
247 * Returns the 8 bit value from the offset specified.
248 */
421a2a30
ML
249uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
250 if (offset < adev->rmmio_size)
251 return (readb(adev->rmmio + offset));
252 BUG();
253}
254
255/*
256 * MMIO register write with bytes helper functions
257 * @offset:bytes offset from MMIO start
258 * @value: the value want to be written to the register
259 *
260*/
e3ecdffa
AD
261/**
262 * amdgpu_mm_wreg8 - read a memory mapped IO register
263 *
264 * @adev: amdgpu_device pointer
265 * @offset: byte aligned register offset
266 * @value: 8 bit value to write
267 *
268 * Writes the value specified to the offset specified.
269 */
421a2a30
ML
270void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
271 if (offset < adev->rmmio_size)
272 writeb(value, adev->rmmio + offset);
273 else
274 BUG();
275}
276
e3ecdffa
AD
277/**
278 * amdgpu_mm_wreg - write to a memory mapped IO register
279 *
280 * @adev: amdgpu_device pointer
281 * @reg: dword aligned register offset
282 * @v: 32 bit value to write to the register
283 * @acc_flags: access flags which require special behavior
284 *
285 * Writes the value specified to the offset specified.
286 */
d38ceaf9 287void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 288 uint32_t acc_flags)
d38ceaf9 289{
f4b373f4 290 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 291
47ed4e1c
KW
292 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
293 adev->last_mm_index = v;
294 }
295
43ca8efa 296 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 297 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 298
15d72fd7 299 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
300 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
301 else {
302 unsigned long flags;
303
304 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
305 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
306 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
307 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
308 }
47ed4e1c
KW
309
310 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
311 udelay(500);
312 }
d38ceaf9
AD
313}
314
e3ecdffa
AD
315/**
316 * amdgpu_io_rreg - read an IO register
317 *
318 * @adev: amdgpu_device pointer
319 * @reg: dword aligned register offset
320 *
321 * Returns the 32 bit value from the offset specified.
322 */
d38ceaf9
AD
323u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
324{
325 if ((reg * 4) < adev->rio_mem_size)
326 return ioread32(adev->rio_mem + (reg * 4));
327 else {
328 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
329 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
330 }
331}
332
e3ecdffa
AD
333/**
334 * amdgpu_io_wreg - write to an IO register
335 *
336 * @adev: amdgpu_device pointer
337 * @reg: dword aligned register offset
338 * @v: 32 bit value to write to the register
339 *
340 * Writes the value specified to the offset specified.
341 */
d38ceaf9
AD
342void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
343{
47ed4e1c
KW
344 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
345 adev->last_mm_index = v;
346 }
d38ceaf9
AD
347
348 if ((reg * 4) < adev->rio_mem_size)
349 iowrite32(v, adev->rio_mem + (reg * 4));
350 else {
351 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
352 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
353 }
47ed4e1c
KW
354
355 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
356 udelay(500);
357 }
d38ceaf9
AD
358}
359
360/**
361 * amdgpu_mm_rdoorbell - read a doorbell dword
362 *
363 * @adev: amdgpu_device pointer
364 * @index: doorbell index
365 *
366 * Returns the value in the doorbell aperture at the
367 * requested doorbell index (CIK).
368 */
369u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
370{
371 if (index < adev->doorbell.num_doorbells) {
372 return readl(adev->doorbell.ptr + index);
373 } else {
374 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
375 return 0;
376 }
377}
378
379/**
380 * amdgpu_mm_wdoorbell - write a doorbell dword
381 *
382 * @adev: amdgpu_device pointer
383 * @index: doorbell index
384 * @v: value to write
385 *
386 * Writes @v to the doorbell aperture at the
387 * requested doorbell index (CIK).
388 */
389void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
390{
391 if (index < adev->doorbell.num_doorbells) {
392 writel(v, adev->doorbell.ptr + index);
393 } else {
394 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
395 }
396}
397
832be404
KW
398/**
399 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
400 *
401 * @adev: amdgpu_device pointer
402 * @index: doorbell index
403 *
404 * Returns the value in the doorbell aperture at the
405 * requested doorbell index (VEGA10+).
406 */
407u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
408{
409 if (index < adev->doorbell.num_doorbells) {
410 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
411 } else {
412 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
413 return 0;
414 }
415}
416
417/**
418 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
419 *
420 * @adev: amdgpu_device pointer
421 * @index: doorbell index
422 * @v: value to write
423 *
424 * Writes @v to the doorbell aperture at the
425 * requested doorbell index (VEGA10+).
426 */
427void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
428{
429 if (index < adev->doorbell.num_doorbells) {
430 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
431 } else {
432 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
433 }
434}
435
d38ceaf9
AD
436/**
437 * amdgpu_invalid_rreg - dummy reg read function
438 *
439 * @adev: amdgpu device pointer
440 * @reg: offset of register
441 *
442 * Dummy register read function. Used for register blocks
443 * that certain asics don't have (all asics).
444 * Returns the value in the register.
445 */
446static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
447{
448 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
449 BUG();
450 return 0;
451}
452
453/**
454 * amdgpu_invalid_wreg - dummy reg write function
455 *
456 * @adev: amdgpu device pointer
457 * @reg: offset of register
458 * @v: value to write to the register
459 *
460 * Dummy register read function. Used for register blocks
461 * that certain asics don't have (all asics).
462 */
463static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
464{
465 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
466 reg, v);
467 BUG();
468}
469
4fa1c6a6
TZ
470/**
471 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
472 *
473 * @adev: amdgpu device pointer
474 * @reg: offset of register
475 *
476 * Dummy register read function. Used for register blocks
477 * that certain asics don't have (all asics).
478 * Returns the value in the register.
479 */
480static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
481{
482 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
483 BUG();
484 return 0;
485}
486
487/**
488 * amdgpu_invalid_wreg64 - dummy reg write function
489 *
490 * @adev: amdgpu device pointer
491 * @reg: offset of register
492 * @v: value to write to the register
493 *
494 * Dummy register read function. Used for register blocks
495 * that certain asics don't have (all asics).
496 */
497static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
498{
499 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
500 reg, v);
501 BUG();
502}
503
d38ceaf9
AD
504/**
505 * amdgpu_block_invalid_rreg - dummy reg read function
506 *
507 * @adev: amdgpu device pointer
508 * @block: offset of instance
509 * @reg: offset of register
510 *
511 * Dummy register read function. Used for register blocks
512 * that certain asics don't have (all asics).
513 * Returns the value in the register.
514 */
515static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
516 uint32_t block, uint32_t reg)
517{
518 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
519 reg, block);
520 BUG();
521 return 0;
522}
523
524/**
525 * amdgpu_block_invalid_wreg - dummy reg write function
526 *
527 * @adev: amdgpu device pointer
528 * @block: offset of instance
529 * @reg: offset of register
530 * @v: value to write to the register
531 *
532 * Dummy register read function. Used for register blocks
533 * that certain asics don't have (all asics).
534 */
535static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
536 uint32_t block,
537 uint32_t reg, uint32_t v)
538{
539 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
540 reg, block, v);
541 BUG();
542}
543
e3ecdffa
AD
544/**
545 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
546 *
547 * @adev: amdgpu device pointer
548 *
549 * Allocates a scratch page of VRAM for use by various things in the
550 * driver.
551 */
06ec9070 552static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 553{
a4a02777
CK
554 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
555 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
556 &adev->vram_scratch.robj,
557 &adev->vram_scratch.gpu_addr,
558 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
559}
560
e3ecdffa
AD
561/**
562 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
563 *
564 * @adev: amdgpu device pointer
565 *
566 * Frees the VRAM scratch page.
567 */
06ec9070 568static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 569{
078af1a3 570 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
571}
572
573/**
9c3f2b54 574 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
575 *
576 * @adev: amdgpu_device pointer
577 * @registers: pointer to the register array
578 * @array_size: size of the register array
579 *
580 * Programs an array or registers with and and or masks.
581 * This is a helper for setting golden registers.
582 */
9c3f2b54
AD
583void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
584 const u32 *registers,
585 const u32 array_size)
d38ceaf9
AD
586{
587 u32 tmp, reg, and_mask, or_mask;
588 int i;
589
590 if (array_size % 3)
591 return;
592
593 for (i = 0; i < array_size; i +=3) {
594 reg = registers[i + 0];
595 and_mask = registers[i + 1];
596 or_mask = registers[i + 2];
597
598 if (and_mask == 0xffffffff) {
599 tmp = or_mask;
600 } else {
601 tmp = RREG32(reg);
602 tmp &= ~and_mask;
e0d07657
HZ
603 if (adev->family >= AMDGPU_FAMILY_AI)
604 tmp |= (or_mask & and_mask);
605 else
606 tmp |= or_mask;
d38ceaf9
AD
607 }
608 WREG32(reg, tmp);
609 }
610}
611
e3ecdffa
AD
612/**
613 * amdgpu_device_pci_config_reset - reset the GPU
614 *
615 * @adev: amdgpu_device pointer
616 *
617 * Resets the GPU using the pci config reset sequence.
618 * Only applicable to asics prior to vega10.
619 */
8111c387 620void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
621{
622 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
623}
624
625/*
626 * GPU doorbell aperture helpers function.
627 */
628/**
06ec9070 629 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
630 *
631 * @adev: amdgpu_device pointer
632 *
633 * Init doorbell driver information (CIK)
634 * Returns 0 on success, error on failure.
635 */
06ec9070 636static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 637{
6585661d 638
705e519e
CK
639 /* No doorbell on SI hardware generation */
640 if (adev->asic_type < CHIP_BONAIRE) {
641 adev->doorbell.base = 0;
642 adev->doorbell.size = 0;
643 adev->doorbell.num_doorbells = 0;
644 adev->doorbell.ptr = NULL;
645 return 0;
646 }
647
d6895ad3
CK
648 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
649 return -EINVAL;
650
22357775
AD
651 amdgpu_asic_init_doorbell_index(adev);
652
d38ceaf9
AD
653 /* doorbell bar mapping */
654 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
655 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
656
edf600da 657 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 658 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
659 if (adev->doorbell.num_doorbells == 0)
660 return -EINVAL;
661
ec3db8a6 662 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
663 * paging queue doorbell use the second page. The
664 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
665 * doorbells are in the first page. So with paging queue enabled,
666 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
667 */
668 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 669 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 670
8972e5d2
CK
671 adev->doorbell.ptr = ioremap(adev->doorbell.base,
672 adev->doorbell.num_doorbells *
673 sizeof(u32));
674 if (adev->doorbell.ptr == NULL)
d38ceaf9 675 return -ENOMEM;
d38ceaf9
AD
676
677 return 0;
678}
679
680/**
06ec9070 681 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
682 *
683 * @adev: amdgpu_device pointer
684 *
685 * Tear down doorbell driver information (CIK)
686 */
06ec9070 687static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
688{
689 iounmap(adev->doorbell.ptr);
690 adev->doorbell.ptr = NULL;
691}
692
22cb0164 693
d38ceaf9
AD
694
695/*
06ec9070 696 * amdgpu_device_wb_*()
455a7bc2 697 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 698 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
699 */
700
701/**
06ec9070 702 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
703 *
704 * @adev: amdgpu_device pointer
705 *
706 * Disables Writeback and frees the Writeback memory (all asics).
707 * Used at driver shutdown.
708 */
06ec9070 709static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
710{
711 if (adev->wb.wb_obj) {
a76ed485
AD
712 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
713 &adev->wb.gpu_addr,
714 (void **)&adev->wb.wb);
d38ceaf9
AD
715 adev->wb.wb_obj = NULL;
716 }
717}
718
719/**
06ec9070 720 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
721 *
722 * @adev: amdgpu_device pointer
723 *
455a7bc2 724 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
725 * Used at driver startup.
726 * Returns 0 on success or an -error on failure.
727 */
06ec9070 728static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
729{
730 int r;
731
732 if (adev->wb.wb_obj == NULL) {
97407b63
AD
733 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
734 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
735 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
736 &adev->wb.wb_obj, &adev->wb.gpu_addr,
737 (void **)&adev->wb.wb);
d38ceaf9
AD
738 if (r) {
739 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
740 return r;
741 }
d38ceaf9
AD
742
743 adev->wb.num_wb = AMDGPU_MAX_WB;
744 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
745
746 /* clear wb memory */
73469585 747 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
748 }
749
750 return 0;
751}
752
753/**
131b4b36 754 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
755 *
756 * @adev: amdgpu_device pointer
757 * @wb: wb index
758 *
759 * Allocate a wb slot for use by the driver (all asics).
760 * Returns 0 on success or -EINVAL on failure.
761 */
131b4b36 762int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
763{
764 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 765
97407b63 766 if (offset < adev->wb.num_wb) {
7014285a 767 __set_bit(offset, adev->wb.used);
63ae07ca 768 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
769 return 0;
770 } else {
771 return -EINVAL;
772 }
773}
774
d38ceaf9 775/**
131b4b36 776 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
777 *
778 * @adev: amdgpu_device pointer
779 * @wb: wb index
780 *
781 * Free a wb slot allocated for use by the driver (all asics)
782 */
131b4b36 783void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 784{
73469585 785 wb >>= 3;
d38ceaf9 786 if (wb < adev->wb.num_wb)
73469585 787 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
788}
789
d6895ad3
CK
790/**
791 * amdgpu_device_resize_fb_bar - try to resize FB BAR
792 *
793 * @adev: amdgpu_device pointer
794 *
795 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
796 * to fail, but if any of the BARs is not accessible after the size we abort
797 * driver loading by returning -ENODEV.
798 */
799int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
800{
770d13b1 801 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 802 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
803 struct pci_bus *root;
804 struct resource *res;
805 unsigned i;
d6895ad3
CK
806 u16 cmd;
807 int r;
808
0c03b912 809 /* Bypass for VF */
810 if (amdgpu_sriov_vf(adev))
811 return 0;
812
31b8adab
CK
813 /* Check if the root BUS has 64bit memory resources */
814 root = adev->pdev->bus;
815 while (root->parent)
816 root = root->parent;
817
818 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 819 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
820 res->start > 0x100000000ull)
821 break;
822 }
823
824 /* Trying to resize is pointless without a root hub window above 4GB */
825 if (!res)
826 return 0;
827
d6895ad3
CK
828 /* Disable memory decoding while we change the BAR addresses and size */
829 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
830 pci_write_config_word(adev->pdev, PCI_COMMAND,
831 cmd & ~PCI_COMMAND_MEMORY);
832
833 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 834 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
835 if (adev->asic_type >= CHIP_BONAIRE)
836 pci_release_resource(adev->pdev, 2);
837
838 pci_release_resource(adev->pdev, 0);
839
840 r = pci_resize_resource(adev->pdev, 0, rbar_size);
841 if (r == -ENOSPC)
842 DRM_INFO("Not enough PCI address space for a large BAR.");
843 else if (r && r != -ENOTSUPP)
844 DRM_ERROR("Problem resizing BAR0 (%d).", r);
845
846 pci_assign_unassigned_bus_resources(adev->pdev->bus);
847
848 /* When the doorbell or fb BAR isn't available we have no chance of
849 * using the device.
850 */
06ec9070 851 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
852 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
853 return -ENODEV;
854
855 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
856
857 return 0;
858}
a05502e5 859
d38ceaf9
AD
860/*
861 * GPU helpers function.
862 */
863/**
39c640c0 864 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
865 *
866 * @adev: amdgpu_device pointer
867 *
c836fec5
JQ
868 * Check if the asic has been initialized (all asics) at driver startup
869 * or post is needed if hw reset is performed.
870 * Returns true if need or false if not.
d38ceaf9 871 */
39c640c0 872bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
873{
874 uint32_t reg;
875
bec86378
ML
876 if (amdgpu_sriov_vf(adev))
877 return false;
878
879 if (amdgpu_passthrough(adev)) {
1da2c326
ML
880 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
881 * some old smc fw still need driver do vPost otherwise gpu hang, while
882 * those smc fw version above 22.15 doesn't have this flaw, so we force
883 * vpost executed for smc version below 22.15
bec86378
ML
884 */
885 if (adev->asic_type == CHIP_FIJI) {
886 int err;
887 uint32_t fw_ver;
888 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
889 /* force vPost if error occured */
890 if (err)
891 return true;
892
893 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
894 if (fw_ver < 0x00160e00)
895 return true;
bec86378 896 }
bec86378 897 }
91fe77eb 898
899 if (adev->has_hw_reset) {
900 adev->has_hw_reset = false;
901 return true;
902 }
903
904 /* bios scratch used on CIK+ */
905 if (adev->asic_type >= CHIP_BONAIRE)
906 return amdgpu_atombios_scratch_need_asic_init(adev);
907
908 /* check MEM_SIZE for older asics */
909 reg = amdgpu_asic_get_config_memsize(adev);
910
911 if ((reg != 0) && (reg != 0xffffffff))
912 return false;
913
914 return true;
bec86378
ML
915}
916
d38ceaf9
AD
917/* if we get transitioned to only one device, take VGA back */
918/**
06ec9070 919 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
920 *
921 * @cookie: amdgpu_device pointer
922 * @state: enable/disable vga decode
923 *
924 * Enable/disable vga decode (all asics).
925 * Returns VGA resource flags.
926 */
06ec9070 927static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
928{
929 struct amdgpu_device *adev = cookie;
930 amdgpu_asic_set_vga_state(adev, state);
931 if (state)
932 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
933 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
934 else
935 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
936}
937
e3ecdffa
AD
938/**
939 * amdgpu_device_check_block_size - validate the vm block size
940 *
941 * @adev: amdgpu_device pointer
942 *
943 * Validates the vm block size specified via module parameter.
944 * The vm block size defines number of bits in page table versus page directory,
945 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
946 * page table and the remaining bits are in the page directory.
947 */
06ec9070 948static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
949{
950 /* defines number of bits in page table versus page directory,
951 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
952 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
953 if (amdgpu_vm_block_size == -1)
954 return;
a1adf8be 955
bab4fee7 956 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
957 dev_warn(adev->dev, "VM page table size (%d) too small\n",
958 amdgpu_vm_block_size);
97489129 959 amdgpu_vm_block_size = -1;
a1adf8be 960 }
a1adf8be
CZ
961}
962
e3ecdffa
AD
963/**
964 * amdgpu_device_check_vm_size - validate the vm size
965 *
966 * @adev: amdgpu_device pointer
967 *
968 * Validates the vm size in GB specified via module parameter.
969 * The VM size is the size of the GPU virtual memory space in GB.
970 */
06ec9070 971static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 972{
64dab074
AD
973 /* no need to check the default value */
974 if (amdgpu_vm_size == -1)
975 return;
976
83ca145d
ZJ
977 if (amdgpu_vm_size < 1) {
978 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
979 amdgpu_vm_size);
f3368128 980 amdgpu_vm_size = -1;
83ca145d 981 }
83ca145d
ZJ
982}
983
7951e376
RZ
984static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
985{
986 struct sysinfo si;
987 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
988 uint64_t total_memory;
989 uint64_t dram_size_seven_GB = 0x1B8000000;
990 uint64_t dram_size_three_GB = 0xB8000000;
991
992 if (amdgpu_smu_memory_pool_size == 0)
993 return;
994
995 if (!is_os_64) {
996 DRM_WARN("Not 64-bit OS, feature not supported\n");
997 goto def_value;
998 }
999 si_meminfo(&si);
1000 total_memory = (uint64_t)si.totalram * si.mem_unit;
1001
1002 if ((amdgpu_smu_memory_pool_size == 1) ||
1003 (amdgpu_smu_memory_pool_size == 2)) {
1004 if (total_memory < dram_size_three_GB)
1005 goto def_value1;
1006 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1007 (amdgpu_smu_memory_pool_size == 8)) {
1008 if (total_memory < dram_size_seven_GB)
1009 goto def_value1;
1010 } else {
1011 DRM_WARN("Smu memory pool size not supported\n");
1012 goto def_value;
1013 }
1014 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1015
1016 return;
1017
1018def_value1:
1019 DRM_WARN("No enough system memory\n");
1020def_value:
1021 adev->pm.smu_prv_buffer_size = 0;
1022}
1023
d38ceaf9 1024/**
06ec9070 1025 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1026 *
1027 * @adev: amdgpu_device pointer
1028 *
1029 * Validates certain module parameters and updates
1030 * the associated values used by the driver (all asics).
1031 */
912dfc84 1032static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1033{
912dfc84
EQ
1034 int ret = 0;
1035
5b011235
CZ
1036 if (amdgpu_sched_jobs < 4) {
1037 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1038 amdgpu_sched_jobs);
1039 amdgpu_sched_jobs = 4;
76117507 1040 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1041 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1042 amdgpu_sched_jobs);
1043 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1044 }
d38ceaf9 1045
83e74db6 1046 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1047 /* gart size must be greater or equal to 32M */
1048 dev_warn(adev->dev, "gart size (%d) too small\n",
1049 amdgpu_gart_size);
83e74db6 1050 amdgpu_gart_size = -1;
d38ceaf9
AD
1051 }
1052
36d38372 1053 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1054 /* gtt size must be greater or equal to 32M */
36d38372
CK
1055 dev_warn(adev->dev, "gtt size (%d) too small\n",
1056 amdgpu_gtt_size);
1057 amdgpu_gtt_size = -1;
d38ceaf9
AD
1058 }
1059
d07f14be
RH
1060 /* valid range is between 4 and 9 inclusive */
1061 if (amdgpu_vm_fragment_size != -1 &&
1062 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1063 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1064 amdgpu_vm_fragment_size = -1;
1065 }
1066
7951e376
RZ
1067 amdgpu_device_check_smu_prv_buffer_size(adev);
1068
06ec9070 1069 amdgpu_device_check_vm_size(adev);
d38ceaf9 1070
06ec9070 1071 amdgpu_device_check_block_size(adev);
6a7f76e7 1072
19aede77 1073 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
1074
1075 return ret;
d38ceaf9
AD
1076}
1077
1078/**
1079 * amdgpu_switcheroo_set_state - set switcheroo state
1080 *
1081 * @pdev: pci dev pointer
1694467b 1082 * @state: vga_switcheroo state
d38ceaf9
AD
1083 *
1084 * Callback for the switcheroo driver. Suspends or resumes the
1085 * the asics before or after it is powered up using ACPI methods.
1086 */
1087static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1088{
1089 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1090 int r;
d38ceaf9 1091
31af062a 1092 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1093 return;
1094
1095 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1096 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1097 /* don't suspend or resume card normally */
1098 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1099
de185019
AD
1100 pci_set_power_state(dev->pdev, PCI_D0);
1101 pci_restore_state(dev->pdev);
1102 r = pci_enable_device(dev->pdev);
1103 if (r)
1104 DRM_WARN("pci_enable_device failed (%d)\n", r);
1105 amdgpu_device_resume(dev, true);
d38ceaf9 1106
d38ceaf9
AD
1107 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1108 drm_kms_helper_poll_enable(dev);
1109 } else {
7ca85295 1110 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1111 drm_kms_helper_poll_disable(dev);
1112 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1113 amdgpu_device_suspend(dev, true);
1114 pci_save_state(dev->pdev);
1115 /* Shut down the device */
1116 pci_disable_device(dev->pdev);
1117 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1118 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1119 }
1120}
1121
1122/**
1123 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1124 *
1125 * @pdev: pci dev pointer
1126 *
1127 * Callback for the switcheroo driver. Check of the switcheroo
1128 * state can be changed.
1129 * Returns true if the state can be changed, false if not.
1130 */
1131static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1132{
1133 struct drm_device *dev = pci_get_drvdata(pdev);
1134
1135 /*
1136 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1137 * locking inversion with the driver load path. And the access here is
1138 * completely racy anyway. So don't bother with locking for now.
1139 */
1140 return dev->open_count == 0;
1141}
1142
1143static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1144 .set_gpu_state = amdgpu_switcheroo_set_state,
1145 .reprobe = NULL,
1146 .can_switch = amdgpu_switcheroo_can_switch,
1147};
1148
e3ecdffa
AD
1149/**
1150 * amdgpu_device_ip_set_clockgating_state - set the CG state
1151 *
87e3f136 1152 * @dev: amdgpu_device pointer
e3ecdffa
AD
1153 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1154 * @state: clockgating state (gate or ungate)
1155 *
1156 * Sets the requested clockgating state for all instances of
1157 * the hardware IP specified.
1158 * Returns the error code from the last instance.
1159 */
43fa561f 1160int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1161 enum amd_ip_block_type block_type,
1162 enum amd_clockgating_state state)
d38ceaf9 1163{
43fa561f 1164 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1165 int i, r = 0;
1166
1167 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1168 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1169 continue;
c722865a
RZ
1170 if (adev->ip_blocks[i].version->type != block_type)
1171 continue;
1172 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1173 continue;
1174 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1175 (void *)adev, state);
1176 if (r)
1177 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1178 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1179 }
1180 return r;
1181}
1182
e3ecdffa
AD
1183/**
1184 * amdgpu_device_ip_set_powergating_state - set the PG state
1185 *
87e3f136 1186 * @dev: amdgpu_device pointer
e3ecdffa
AD
1187 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1188 * @state: powergating state (gate or ungate)
1189 *
1190 * Sets the requested powergating state for all instances of
1191 * the hardware IP specified.
1192 * Returns the error code from the last instance.
1193 */
43fa561f 1194int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1195 enum amd_ip_block_type block_type,
1196 enum amd_powergating_state state)
d38ceaf9 1197{
43fa561f 1198 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1199 int i, r = 0;
1200
1201 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1202 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1203 continue;
c722865a
RZ
1204 if (adev->ip_blocks[i].version->type != block_type)
1205 continue;
1206 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1207 continue;
1208 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1209 (void *)adev, state);
1210 if (r)
1211 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1212 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1213 }
1214 return r;
1215}
1216
e3ecdffa
AD
1217/**
1218 * amdgpu_device_ip_get_clockgating_state - get the CG state
1219 *
1220 * @adev: amdgpu_device pointer
1221 * @flags: clockgating feature flags
1222 *
1223 * Walks the list of IPs on the device and updates the clockgating
1224 * flags for each IP.
1225 * Updates @flags with the feature flags for each hardware IP where
1226 * clockgating is enabled.
1227 */
2990a1fc
AD
1228void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1229 u32 *flags)
6cb2d4e4
HR
1230{
1231 int i;
1232
1233 for (i = 0; i < adev->num_ip_blocks; i++) {
1234 if (!adev->ip_blocks[i].status.valid)
1235 continue;
1236 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1237 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1238 }
1239}
1240
e3ecdffa
AD
1241/**
1242 * amdgpu_device_ip_wait_for_idle - wait for idle
1243 *
1244 * @adev: amdgpu_device pointer
1245 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1246 *
1247 * Waits for the request hardware IP to be idle.
1248 * Returns 0 for success or a negative error code on failure.
1249 */
2990a1fc
AD
1250int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1251 enum amd_ip_block_type block_type)
5dbbb60b
AD
1252{
1253 int i, r;
1254
1255 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1256 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1257 continue;
a1255107
AD
1258 if (adev->ip_blocks[i].version->type == block_type) {
1259 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1260 if (r)
1261 return r;
1262 break;
1263 }
1264 }
1265 return 0;
1266
1267}
1268
e3ecdffa
AD
1269/**
1270 * amdgpu_device_ip_is_idle - is the hardware IP idle
1271 *
1272 * @adev: amdgpu_device pointer
1273 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1274 *
1275 * Check if the hardware IP is idle or not.
1276 * Returns true if it the IP is idle, false if not.
1277 */
2990a1fc
AD
1278bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1279 enum amd_ip_block_type block_type)
5dbbb60b
AD
1280{
1281 int i;
1282
1283 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1284 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1285 continue;
a1255107
AD
1286 if (adev->ip_blocks[i].version->type == block_type)
1287 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1288 }
1289 return true;
1290
1291}
1292
e3ecdffa
AD
1293/**
1294 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1295 *
1296 * @adev: amdgpu_device pointer
87e3f136 1297 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1298 *
1299 * Returns a pointer to the hardware IP block structure
1300 * if it exists for the asic, otherwise NULL.
1301 */
2990a1fc
AD
1302struct amdgpu_ip_block *
1303amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1304 enum amd_ip_block_type type)
d38ceaf9
AD
1305{
1306 int i;
1307
1308 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1309 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1310 return &adev->ip_blocks[i];
1311
1312 return NULL;
1313}
1314
1315/**
2990a1fc 1316 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1317 *
1318 * @adev: amdgpu_device pointer
5fc3aeeb 1319 * @type: enum amd_ip_block_type
d38ceaf9
AD
1320 * @major: major version
1321 * @minor: minor version
1322 *
1323 * return 0 if equal or greater
1324 * return 1 if smaller or the ip_block doesn't exist
1325 */
2990a1fc
AD
1326int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1327 enum amd_ip_block_type type,
1328 u32 major, u32 minor)
d38ceaf9 1329{
2990a1fc 1330 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1331
a1255107
AD
1332 if (ip_block && ((ip_block->version->major > major) ||
1333 ((ip_block->version->major == major) &&
1334 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1335 return 0;
1336
1337 return 1;
1338}
1339
a1255107 1340/**
2990a1fc 1341 * amdgpu_device_ip_block_add
a1255107
AD
1342 *
1343 * @adev: amdgpu_device pointer
1344 * @ip_block_version: pointer to the IP to add
1345 *
1346 * Adds the IP block driver information to the collection of IPs
1347 * on the asic.
1348 */
2990a1fc
AD
1349int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1350 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1351{
1352 if (!ip_block_version)
1353 return -EINVAL;
1354
e966a725 1355 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1356 ip_block_version->funcs->name);
1357
a1255107
AD
1358 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1359
1360 return 0;
1361}
1362
e3ecdffa
AD
1363/**
1364 * amdgpu_device_enable_virtual_display - enable virtual display feature
1365 *
1366 * @adev: amdgpu_device pointer
1367 *
1368 * Enabled the virtual display feature if the user has enabled it via
1369 * the module parameter virtual_display. This feature provides a virtual
1370 * display hardware on headless boards or in virtualized environments.
1371 * This function parses and validates the configuration string specified by
1372 * the user and configues the virtual display configuration (number of
1373 * virtual connectors, crtcs, etc.) specified.
1374 */
483ef985 1375static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1376{
1377 adev->enable_virtual_display = false;
1378
1379 if (amdgpu_virtual_display) {
1380 struct drm_device *ddev = adev->ddev;
1381 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1382 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1383
1384 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1385 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1386 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1387 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1388 if (!strcmp("all", pciaddname)
1389 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1390 long num_crtc;
1391 int res = -1;
1392
9accf2fd 1393 adev->enable_virtual_display = true;
0f66356d
ED
1394
1395 if (pciaddname_tmp)
1396 res = kstrtol(pciaddname_tmp, 10,
1397 &num_crtc);
1398
1399 if (!res) {
1400 if (num_crtc < 1)
1401 num_crtc = 1;
1402 if (num_crtc > 6)
1403 num_crtc = 6;
1404 adev->mode_info.num_crtc = num_crtc;
1405 } else {
1406 adev->mode_info.num_crtc = 1;
1407 }
9accf2fd
ED
1408 break;
1409 }
1410 }
1411
0f66356d
ED
1412 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1413 amdgpu_virtual_display, pci_address_name,
1414 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1415
1416 kfree(pciaddstr);
1417 }
1418}
1419
e3ecdffa
AD
1420/**
1421 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1422 *
1423 * @adev: amdgpu_device pointer
1424 *
1425 * Parses the asic configuration parameters specified in the gpu info
1426 * firmware and makes them availale to the driver for use in configuring
1427 * the asic.
1428 * Returns 0 on success, -EINVAL on failure.
1429 */
e2a75f88
AD
1430static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1431{
e2a75f88
AD
1432 const char *chip_name;
1433 char fw_name[30];
1434 int err;
1435 const struct gpu_info_firmware_header_v1_0 *hdr;
1436
ab4fe3e1
HR
1437 adev->firmware.gpu_info_fw = NULL;
1438
e2a75f88
AD
1439 switch (adev->asic_type) {
1440 case CHIP_TOPAZ:
1441 case CHIP_TONGA:
1442 case CHIP_FIJI:
e2a75f88 1443 case CHIP_POLARIS10:
cc07f18d 1444 case CHIP_POLARIS11:
e2a75f88 1445 case CHIP_POLARIS12:
cc07f18d 1446 case CHIP_VEGAM:
e2a75f88
AD
1447 case CHIP_CARRIZO:
1448 case CHIP_STONEY:
1449#ifdef CONFIG_DRM_AMDGPU_SI
1450 case CHIP_VERDE:
1451 case CHIP_TAHITI:
1452 case CHIP_PITCAIRN:
1453 case CHIP_OLAND:
1454 case CHIP_HAINAN:
1455#endif
1456#ifdef CONFIG_DRM_AMDGPU_CIK
1457 case CHIP_BONAIRE:
1458 case CHIP_HAWAII:
1459 case CHIP_KAVERI:
1460 case CHIP_KABINI:
1461 case CHIP_MULLINS:
1462#endif
27c0bc71 1463 case CHIP_VEGA20:
e2a75f88
AD
1464 default:
1465 return 0;
1466 case CHIP_VEGA10:
1467 chip_name = "vega10";
1468 break;
3f76dced
AD
1469 case CHIP_VEGA12:
1470 chip_name = "vega12";
1471 break;
2d2e5e7e 1472 case CHIP_RAVEN:
54c4d17e
FX
1473 if (adev->rev_id >= 8)
1474 chip_name = "raven2";
741deade
AD
1475 else if (adev->pdev->device == 0x15d8)
1476 chip_name = "picasso";
54c4d17e
FX
1477 else
1478 chip_name = "raven";
2d2e5e7e 1479 break;
65e60f6e
LM
1480 case CHIP_ARCTURUS:
1481 chip_name = "arcturus";
1482 break;
b51a26a0
HR
1483 case CHIP_RENOIR:
1484 chip_name = "renoir";
1485 break;
23c6268e
HR
1486 case CHIP_NAVI10:
1487 chip_name = "navi10";
1488 break;
ed42cfe1
XY
1489 case CHIP_NAVI14:
1490 chip_name = "navi14";
1491 break;
42b325e5
XY
1492 case CHIP_NAVI12:
1493 chip_name = "navi12";
1494 break;
e2a75f88
AD
1495 }
1496
1497 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1498 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1499 if (err) {
1500 dev_err(adev->dev,
1501 "Failed to load gpu_info firmware \"%s\"\n",
1502 fw_name);
1503 goto out;
1504 }
ab4fe3e1 1505 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1506 if (err) {
1507 dev_err(adev->dev,
1508 "Failed to validate gpu_info firmware \"%s\"\n",
1509 fw_name);
1510 goto out;
1511 }
1512
ab4fe3e1 1513 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1514 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1515
1516 switch (hdr->version_major) {
1517 case 1:
1518 {
1519 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1520 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1521 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1522
ec51d3fa
XY
1523 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1524 goto parse_soc_bounding_box;
1525
b5ab16bf
AD
1526 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1527 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1528 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1529 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1530 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1531 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1532 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1533 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1534 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1535 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1536 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1537 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1538 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1539 adev->gfx.cu_info.max_waves_per_simd =
1540 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1541 adev->gfx.cu_info.max_scratch_slots_per_cu =
1542 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1543 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1544 if (hdr->version_minor >= 1) {
35c2e910
HZ
1545 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1546 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1547 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1548 adev->gfx.config.num_sc_per_sh =
1549 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1550 adev->gfx.config.num_packer_per_sc =
1551 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1552 }
ec51d3fa
XY
1553
1554parse_soc_bounding_box:
ec51d3fa
XY
1555 /*
1556 * soc bounding box info is not integrated in disocovery table,
1557 * we always need to parse it from gpu info firmware.
1558 */
48321c3d
HW
1559 if (hdr->version_minor == 2) {
1560 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1561 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1562 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1563 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1564 }
e2a75f88
AD
1565 break;
1566 }
1567 default:
1568 dev_err(adev->dev,
1569 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1570 err = -EINVAL;
1571 goto out;
1572 }
1573out:
e2a75f88
AD
1574 return err;
1575}
1576
e3ecdffa
AD
1577/**
1578 * amdgpu_device_ip_early_init - run early init for hardware IPs
1579 *
1580 * @adev: amdgpu_device pointer
1581 *
1582 * Early initialization pass for hardware IPs. The hardware IPs that make
1583 * up each asic are discovered each IP's early_init callback is run. This
1584 * is the first stage in initializing the asic.
1585 * Returns 0 on success, negative error code on failure.
1586 */
06ec9070 1587static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1588{
aaa36a97 1589 int i, r;
d38ceaf9 1590
483ef985 1591 amdgpu_device_enable_virtual_display(adev);
a6be7570 1592
d38ceaf9 1593 switch (adev->asic_type) {
aaa36a97
AD
1594 case CHIP_TOPAZ:
1595 case CHIP_TONGA:
48299f95 1596 case CHIP_FIJI:
2cc0c0b5 1597 case CHIP_POLARIS10:
32cc7e53 1598 case CHIP_POLARIS11:
c4642a47 1599 case CHIP_POLARIS12:
32cc7e53 1600 case CHIP_VEGAM:
aaa36a97 1601 case CHIP_CARRIZO:
39bb0c92
SL
1602 case CHIP_STONEY:
1603 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1604 adev->family = AMDGPU_FAMILY_CZ;
1605 else
1606 adev->family = AMDGPU_FAMILY_VI;
1607
1608 r = vi_set_ip_blocks(adev);
1609 if (r)
1610 return r;
1611 break;
33f34802
KW
1612#ifdef CONFIG_DRM_AMDGPU_SI
1613 case CHIP_VERDE:
1614 case CHIP_TAHITI:
1615 case CHIP_PITCAIRN:
1616 case CHIP_OLAND:
1617 case CHIP_HAINAN:
295d0daf 1618 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1619 r = si_set_ip_blocks(adev);
1620 if (r)
1621 return r;
1622 break;
1623#endif
a2e73f56
AD
1624#ifdef CONFIG_DRM_AMDGPU_CIK
1625 case CHIP_BONAIRE:
1626 case CHIP_HAWAII:
1627 case CHIP_KAVERI:
1628 case CHIP_KABINI:
1629 case CHIP_MULLINS:
1630 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1631 adev->family = AMDGPU_FAMILY_CI;
1632 else
1633 adev->family = AMDGPU_FAMILY_KV;
1634
1635 r = cik_set_ip_blocks(adev);
1636 if (r)
1637 return r;
1638 break;
1639#endif
e48a3cd9
AD
1640 case CHIP_VEGA10:
1641 case CHIP_VEGA12:
e4bd8170 1642 case CHIP_VEGA20:
e48a3cd9 1643 case CHIP_RAVEN:
61cf44c1 1644 case CHIP_ARCTURUS:
b51a26a0
HR
1645 case CHIP_RENOIR:
1646 if (adev->asic_type == CHIP_RAVEN ||
1647 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1648 adev->family = AMDGPU_FAMILY_RV;
1649 else
1650 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1651
1652 r = soc15_set_ip_blocks(adev);
1653 if (r)
1654 return r;
1655 break;
0a5b8c7b 1656 case CHIP_NAVI10:
7ecb5cd4 1657 case CHIP_NAVI14:
4808cf9c 1658 case CHIP_NAVI12:
0a5b8c7b
HR
1659 adev->family = AMDGPU_FAMILY_NV;
1660
1661 r = nv_set_ip_blocks(adev);
1662 if (r)
1663 return r;
1664 break;
d38ceaf9
AD
1665 default:
1666 /* FIXME: not supported yet */
1667 return -EINVAL;
1668 }
1669
e2a75f88
AD
1670 r = amdgpu_device_parse_gpu_info_fw(adev);
1671 if (r)
1672 return r;
1673
ec51d3fa
XY
1674 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1675 amdgpu_discovery_get_gfx_info(adev);
1676
1884734a 1677 amdgpu_amdkfd_device_probe(adev);
1678
3149d9da
XY
1679 if (amdgpu_sriov_vf(adev)) {
1680 r = amdgpu_virt_request_full_gpu(adev, true);
1681 if (r)
5ffa61c1 1682 return -EAGAIN;
3149d9da
XY
1683 }
1684
3b94fb10 1685 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1686 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1687 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1688
d38ceaf9
AD
1689 for (i = 0; i < adev->num_ip_blocks; i++) {
1690 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1691 DRM_ERROR("disabled ip block: %d <%s>\n",
1692 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1693 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1694 } else {
a1255107
AD
1695 if (adev->ip_blocks[i].version->funcs->early_init) {
1696 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1697 if (r == -ENOENT) {
a1255107 1698 adev->ip_blocks[i].status.valid = false;
2c1a2784 1699 } else if (r) {
a1255107
AD
1700 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1701 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1702 return r;
2c1a2784 1703 } else {
a1255107 1704 adev->ip_blocks[i].status.valid = true;
2c1a2784 1705 }
974e6b64 1706 } else {
a1255107 1707 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1708 }
d38ceaf9 1709 }
21a249ca
AD
1710 /* get the vbios after the asic_funcs are set up */
1711 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1712 /* Read BIOS */
1713 if (!amdgpu_get_bios(adev))
1714 return -EINVAL;
1715
1716 r = amdgpu_atombios_init(adev);
1717 if (r) {
1718 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1719 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1720 return r;
1721 }
1722 }
d38ceaf9
AD
1723 }
1724
395d1fb9
NH
1725 adev->cg_flags &= amdgpu_cg_mask;
1726 adev->pg_flags &= amdgpu_pg_mask;
1727
d38ceaf9
AD
1728 return 0;
1729}
1730
0a4f2520
RZ
1731static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1732{
1733 int i, r;
1734
1735 for (i = 0; i < adev->num_ip_blocks; i++) {
1736 if (!adev->ip_blocks[i].status.sw)
1737 continue;
1738 if (adev->ip_blocks[i].status.hw)
1739 continue;
1740 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1741 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1742 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1743 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1744 if (r) {
1745 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1746 adev->ip_blocks[i].version->funcs->name, r);
1747 return r;
1748 }
1749 adev->ip_blocks[i].status.hw = true;
1750 }
1751 }
1752
1753 return 0;
1754}
1755
1756static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1757{
1758 int i, r;
1759
1760 for (i = 0; i < adev->num_ip_blocks; i++) {
1761 if (!adev->ip_blocks[i].status.sw)
1762 continue;
1763 if (adev->ip_blocks[i].status.hw)
1764 continue;
1765 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1766 if (r) {
1767 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1768 adev->ip_blocks[i].version->funcs->name, r);
1769 return r;
1770 }
1771 adev->ip_blocks[i].status.hw = true;
1772 }
1773
1774 return 0;
1775}
1776
7a3e0bb2
RZ
1777static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1778{
1779 int r = 0;
1780 int i;
80f41f84 1781 uint32_t smu_version;
7a3e0bb2
RZ
1782
1783 if (adev->asic_type >= CHIP_VEGA10) {
1784 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1785 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1786 continue;
1787
1788 /* no need to do the fw loading again if already done*/
1789 if (adev->ip_blocks[i].status.hw == true)
1790 break;
1791
1792 if (adev->in_gpu_reset || adev->in_suspend) {
1793 r = adev->ip_blocks[i].version->funcs->resume(adev);
1794 if (r) {
1795 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1796 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1797 return r;
1798 }
1799 } else {
1800 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1801 if (r) {
1802 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1803 adev->ip_blocks[i].version->funcs->name, r);
1804 return r;
7a3e0bb2 1805 }
7a3e0bb2 1806 }
482f0e53
ML
1807
1808 adev->ip_blocks[i].status.hw = true;
1809 break;
7a3e0bb2
RZ
1810 }
1811 }
482f0e53 1812
80f41f84 1813 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1814
80f41f84 1815 return r;
7a3e0bb2
RZ
1816}
1817
e3ecdffa
AD
1818/**
1819 * amdgpu_device_ip_init - run init for hardware IPs
1820 *
1821 * @adev: amdgpu_device pointer
1822 *
1823 * Main initialization pass for hardware IPs. The list of all the hardware
1824 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1825 * are run. sw_init initializes the software state associated with each IP
1826 * and hw_init initializes the hardware associated with each IP.
1827 * Returns 0 on success, negative error code on failure.
1828 */
06ec9070 1829static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1830{
1831 int i, r;
1832
c030f2e4 1833 r = amdgpu_ras_init(adev);
1834 if (r)
1835 return r;
1836
d38ceaf9 1837 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1838 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1839 continue;
a1255107 1840 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1841 if (r) {
a1255107
AD
1842 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1843 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1844 goto init_failed;
2c1a2784 1845 }
a1255107 1846 adev->ip_blocks[i].status.sw = true;
bfca0289 1847
d38ceaf9 1848 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1849 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1850 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1851 if (r) {
1852 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1853 goto init_failed;
2c1a2784 1854 }
a1255107 1855 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1856 if (r) {
1857 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1858 goto init_failed;
2c1a2784 1859 }
06ec9070 1860 r = amdgpu_device_wb_init(adev);
2c1a2784 1861 if (r) {
06ec9070 1862 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1863 goto init_failed;
2c1a2784 1864 }
a1255107 1865 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1866
1867 /* right after GMC hw init, we create CSA */
f92d5c61 1868 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1869 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1870 AMDGPU_GEM_DOMAIN_VRAM,
1871 AMDGPU_CSA_SIZE);
2493664f
ML
1872 if (r) {
1873 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1874 goto init_failed;
2493664f
ML
1875 }
1876 }
d38ceaf9
AD
1877 }
1878 }
1879
533aed27
AG
1880 r = amdgpu_ib_pool_init(adev);
1881 if (r) {
1882 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1883 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1884 goto init_failed;
1885 }
1886
c8963ea4
RZ
1887 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1888 if (r)
72d3f592 1889 goto init_failed;
0a4f2520
RZ
1890
1891 r = amdgpu_device_ip_hw_init_phase1(adev);
1892 if (r)
72d3f592 1893 goto init_failed;
0a4f2520 1894
7a3e0bb2
RZ
1895 r = amdgpu_device_fw_loading(adev);
1896 if (r)
72d3f592 1897 goto init_failed;
7a3e0bb2 1898
0a4f2520
RZ
1899 r = amdgpu_device_ip_hw_init_phase2(adev);
1900 if (r)
72d3f592 1901 goto init_failed;
d38ceaf9 1902
121a2bc6
AG
1903 /*
1904 * retired pages will be loaded from eeprom and reserved here,
1905 * it should be called after amdgpu_device_ip_hw_init_phase2 since
1906 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
1907 * for I2C communication which only true at this point.
1908 * recovery_init may fail, but it can free all resources allocated by
1909 * itself and its failure should not stop amdgpu init process.
1910 *
1911 * Note: theoretically, this should be called before all vram allocations
1912 * to protect retired page from abusing
1913 */
1914 amdgpu_ras_recovery_init(adev);
1915
3e2e2ab5
HZ
1916 if (adev->gmc.xgmi.num_physical_nodes > 1)
1917 amdgpu_xgmi_add_device(adev);
1884734a 1918 amdgpu_amdkfd_device_init(adev);
c6332b97 1919
72d3f592 1920init_failed:
d3c117e5 1921 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1922 if (!r)
1923 amdgpu_virt_init_data_exchange(adev);
c6332b97 1924 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1925 }
c6332b97 1926
72d3f592 1927 return r;
d38ceaf9
AD
1928}
1929
e3ecdffa
AD
1930/**
1931 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1932 *
1933 * @adev: amdgpu_device pointer
1934 *
1935 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1936 * this function before a GPU reset. If the value is retained after a
1937 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1938 */
06ec9070 1939static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1940{
1941 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1942}
1943
e3ecdffa
AD
1944/**
1945 * amdgpu_device_check_vram_lost - check if vram is valid
1946 *
1947 * @adev: amdgpu_device pointer
1948 *
1949 * Checks the reset magic value written to the gart pointer in VRAM.
1950 * The driver calls this after a GPU reset to see if the contents of
1951 * VRAM is lost or now.
1952 * returns true if vram is lost, false if not.
1953 */
06ec9070 1954static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1955{
1956 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1957 AMDGPU_RESET_MAGIC_NUM);
1958}
1959
e3ecdffa 1960/**
1112a46b 1961 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1962 *
1963 * @adev: amdgpu_device pointer
b8b72130 1964 * @state: clockgating state (gate or ungate)
e3ecdffa 1965 *
e3ecdffa 1966 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1967 * set_clockgating_state callbacks are run.
1968 * Late initialization pass enabling clockgating for hardware IPs.
1969 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1970 * Returns 0 on success, negative error code on failure.
1971 */
fdd34271 1972
1112a46b
RZ
1973static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1974 enum amd_clockgating_state state)
d38ceaf9 1975{
1112a46b 1976 int i, j, r;
d38ceaf9 1977
4a2ba394
SL
1978 if (amdgpu_emu_mode == 1)
1979 return 0;
1980
1112a46b
RZ
1981 for (j = 0; j < adev->num_ip_blocks; j++) {
1982 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1983 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1984 continue;
4a446d55 1985 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1986 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1987 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1988 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 1989 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 1990 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1991 /* enable clockgating to save power */
a1255107 1992 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1993 state);
4a446d55
AD
1994 if (r) {
1995 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1996 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1997 return r;
1998 }
b0b00ff1 1999 }
d38ceaf9 2000 }
06b18f61 2001
c9f96fd5
RZ
2002 return 0;
2003}
2004
1112a46b 2005static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2006{
1112a46b 2007 int i, j, r;
06b18f61 2008
c9f96fd5
RZ
2009 if (amdgpu_emu_mode == 1)
2010 return 0;
2011
1112a46b
RZ
2012 for (j = 0; j < adev->num_ip_blocks; j++) {
2013 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2014 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2015 continue;
2016 /* skip CG for VCE/UVD, it's handled specially */
2017 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2018 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2019 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2020 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2021 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2022 /* enable powergating to save power */
2023 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2024 state);
c9f96fd5
RZ
2025 if (r) {
2026 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2027 adev->ip_blocks[i].version->funcs->name, r);
2028 return r;
2029 }
2030 }
2031 }
2dc80b00
S
2032 return 0;
2033}
2034
beff74bc
AD
2035static int amdgpu_device_enable_mgpu_fan_boost(void)
2036{
2037 struct amdgpu_gpu_instance *gpu_ins;
2038 struct amdgpu_device *adev;
2039 int i, ret = 0;
2040
2041 mutex_lock(&mgpu_info.mutex);
2042
2043 /*
2044 * MGPU fan boost feature should be enabled
2045 * only when there are two or more dGPUs in
2046 * the system
2047 */
2048 if (mgpu_info.num_dgpu < 2)
2049 goto out;
2050
2051 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2052 gpu_ins = &(mgpu_info.gpu_ins[i]);
2053 adev = gpu_ins->adev;
2054 if (!(adev->flags & AMD_IS_APU) &&
2055 !gpu_ins->mgpu_fan_enabled &&
2056 adev->powerplay.pp_funcs &&
2057 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2058 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2059 if (ret)
2060 break;
2061
2062 gpu_ins->mgpu_fan_enabled = 1;
2063 }
2064 }
2065
2066out:
2067 mutex_unlock(&mgpu_info.mutex);
2068
2069 return ret;
2070}
2071
e3ecdffa
AD
2072/**
2073 * amdgpu_device_ip_late_init - run late init for hardware IPs
2074 *
2075 * @adev: amdgpu_device pointer
2076 *
2077 * Late initialization pass for hardware IPs. The list of all the hardware
2078 * IPs that make up the asic is walked and the late_init callbacks are run.
2079 * late_init covers any special initialization that an IP requires
2080 * after all of the have been initialized or something that needs to happen
2081 * late in the init process.
2082 * Returns 0 on success, negative error code on failure.
2083 */
06ec9070 2084static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2085{
60599a03 2086 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2087 int i = 0, r;
2088
2089 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2090 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2091 continue;
2092 if (adev->ip_blocks[i].version->funcs->late_init) {
2093 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2094 if (r) {
2095 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2096 adev->ip_blocks[i].version->funcs->name, r);
2097 return r;
2098 }
2dc80b00 2099 }
73f847db 2100 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2101 }
2102
1112a46b
RZ
2103 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2104 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2105
06ec9070 2106 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2107
beff74bc
AD
2108 r = amdgpu_device_enable_mgpu_fan_boost();
2109 if (r)
2110 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2111
60599a03
EQ
2112
2113 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2114 mutex_lock(&mgpu_info.mutex);
2115
2116 /*
2117 * Reset device p-state to low as this was booted with high.
2118 *
2119 * This should be performed only after all devices from the same
2120 * hive get initialized.
2121 *
2122 * However, it's unknown how many device in the hive in advance.
2123 * As this is counted one by one during devices initializations.
2124 *
2125 * So, we wait for all XGMI interlinked devices initialized.
2126 * This may bring some delays as those devices may come from
2127 * different hives. But that should be OK.
2128 */
2129 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2130 for (i = 0; i < mgpu_info.num_gpu; i++) {
2131 gpu_instance = &(mgpu_info.gpu_ins[i]);
2132 if (gpu_instance->adev->flags & AMD_IS_APU)
2133 continue;
2134
2135 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2136 if (r) {
2137 DRM_ERROR("pstate setting failed (%d).\n", r);
2138 break;
2139 }
2140 }
2141 }
2142
2143 mutex_unlock(&mgpu_info.mutex);
2144 }
2145
d38ceaf9
AD
2146 return 0;
2147}
2148
e3ecdffa
AD
2149/**
2150 * amdgpu_device_ip_fini - run fini for hardware IPs
2151 *
2152 * @adev: amdgpu_device pointer
2153 *
2154 * Main teardown pass for hardware IPs. The list of all the hardware
2155 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2156 * are run. hw_fini tears down the hardware associated with each IP
2157 * and sw_fini tears down any software state associated with each IP.
2158 * Returns 0 on success, negative error code on failure.
2159 */
06ec9070 2160static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2161{
2162 int i, r;
2163
c030f2e4 2164 amdgpu_ras_pre_fini(adev);
2165
a82400b5
AG
2166 if (adev->gmc.xgmi.num_physical_nodes > 1)
2167 amdgpu_xgmi_remove_device(adev);
2168
1884734a 2169 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2170
2171 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2172 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2173
3e96dbfd
AD
2174 /* need to disable SMC first */
2175 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2176 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2177 continue;
fdd34271 2178 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2179 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2180 /* XXX handle errors */
2181 if (r) {
2182 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2183 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2184 }
a1255107 2185 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2186 break;
2187 }
2188 }
2189
d38ceaf9 2190 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2191 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2192 continue;
8201a67a 2193
a1255107 2194 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2195 /* XXX handle errors */
2c1a2784 2196 if (r) {
a1255107
AD
2197 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2198 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2199 }
8201a67a 2200
a1255107 2201 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2202 }
2203
9950cda2 2204
d38ceaf9 2205 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2206 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2207 continue;
c12aba3a
ML
2208
2209 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2210 amdgpu_ucode_free_bo(adev);
1e256e27 2211 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2212 amdgpu_device_wb_fini(adev);
2213 amdgpu_device_vram_scratch_fini(adev);
533aed27 2214 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2215 }
2216
a1255107 2217 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2218 /* XXX handle errors */
2c1a2784 2219 if (r) {
a1255107
AD
2220 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2221 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2222 }
a1255107
AD
2223 adev->ip_blocks[i].status.sw = false;
2224 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2225 }
2226
a6dcfd9c 2227 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2228 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2229 continue;
a1255107
AD
2230 if (adev->ip_blocks[i].version->funcs->late_fini)
2231 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2232 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2233 }
2234
c030f2e4 2235 amdgpu_ras_fini(adev);
2236
030308fc 2237 if (amdgpu_sriov_vf(adev))
24136135
ML
2238 if (amdgpu_virt_release_full_gpu(adev, false))
2239 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2240
d38ceaf9
AD
2241 return 0;
2242}
2243
e3ecdffa 2244/**
beff74bc 2245 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2246 *
1112a46b 2247 * @work: work_struct.
e3ecdffa 2248 */
beff74bc 2249static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2250{
2251 struct amdgpu_device *adev =
beff74bc 2252 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2253 int r;
2254
2255 r = amdgpu_ib_ring_tests(adev);
2256 if (r)
2257 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2258}
2259
1e317b99
RZ
2260static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2261{
2262 struct amdgpu_device *adev =
2263 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2264
2265 mutex_lock(&adev->gfx.gfx_off_mutex);
2266 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2267 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2268 adev->gfx.gfx_off_state = true;
2269 }
2270 mutex_unlock(&adev->gfx.gfx_off_mutex);
2271}
2272
e3ecdffa 2273/**
e7854a03 2274 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2275 *
2276 * @adev: amdgpu_device pointer
2277 *
2278 * Main suspend function for hardware IPs. The list of all the hardware
2279 * IPs that make up the asic is walked, clockgating is disabled and the
2280 * suspend callbacks are run. suspend puts the hardware and software state
2281 * in each IP into a state suitable for suspend.
2282 * Returns 0 on success, negative error code on failure.
2283 */
e7854a03
AD
2284static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2285{
2286 int i, r;
2287
05df1f01 2288 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2289 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2290
e7854a03
AD
2291 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2292 if (!adev->ip_blocks[i].status.valid)
2293 continue;
2294 /* displays are handled separately */
2295 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2296 /* XXX handle errors */
2297 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2298 /* XXX handle errors */
2299 if (r) {
2300 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2301 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2302 return r;
e7854a03 2303 }
482f0e53 2304 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2305 }
2306 }
2307
e7854a03
AD
2308 return 0;
2309}
2310
2311/**
2312 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2313 *
2314 * @adev: amdgpu_device pointer
2315 *
2316 * Main suspend function for hardware IPs. The list of all the hardware
2317 * IPs that make up the asic is walked, clockgating is disabled and the
2318 * suspend callbacks are run. suspend puts the hardware and software state
2319 * in each IP into a state suitable for suspend.
2320 * Returns 0 on success, negative error code on failure.
2321 */
2322static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2323{
2324 int i, r;
2325
2326 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2327 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2328 continue;
e7854a03
AD
2329 /* displays are handled in phase1 */
2330 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2331 continue;
bff77e86
LM
2332 /* PSP lost connection when err_event_athub occurs */
2333 if (amdgpu_ras_intr_triggered() &&
2334 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2335 adev->ip_blocks[i].status.hw = false;
2336 continue;
2337 }
d38ceaf9 2338 /* XXX handle errors */
a1255107 2339 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2340 /* XXX handle errors */
2c1a2784 2341 if (r) {
a1255107
AD
2342 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2343 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2344 }
876923fb 2345 adev->ip_blocks[i].status.hw = false;
a3a09142
AD
2346 /* handle putting the SMC in the appropriate state */
2347 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2348 if (is_support_sw_smu(adev)) {
0e0b89c0 2349 r = smu_set_mp1_state(&adev->smu, adev->mp1_state);
a3a09142 2350 } else if (adev->powerplay.pp_funcs &&
482f0e53 2351 adev->powerplay.pp_funcs->set_mp1_state) {
a3a09142
AD
2352 r = adev->powerplay.pp_funcs->set_mp1_state(
2353 adev->powerplay.pp_handle,
2354 adev->mp1_state);
0e0b89c0
EQ
2355 }
2356 if (r) {
2357 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2358 adev->mp1_state, r);
2359 return r;
a3a09142
AD
2360 }
2361 }
b5507c7e
AG
2362
2363 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2364 }
2365
2366 return 0;
2367}
2368
e7854a03
AD
2369/**
2370 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2371 *
2372 * @adev: amdgpu_device pointer
2373 *
2374 * Main suspend function for hardware IPs. The list of all the hardware
2375 * IPs that make up the asic is walked, clockgating is disabled and the
2376 * suspend callbacks are run. suspend puts the hardware and software state
2377 * in each IP into a state suitable for suspend.
2378 * Returns 0 on success, negative error code on failure.
2379 */
2380int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2381{
2382 int r;
2383
e7819644
YT
2384 if (amdgpu_sriov_vf(adev))
2385 amdgpu_virt_request_full_gpu(adev, false);
2386
e7854a03
AD
2387 r = amdgpu_device_ip_suspend_phase1(adev);
2388 if (r)
2389 return r;
2390 r = amdgpu_device_ip_suspend_phase2(adev);
2391
e7819644
YT
2392 if (amdgpu_sriov_vf(adev))
2393 amdgpu_virt_release_full_gpu(adev, false);
2394
e7854a03
AD
2395 return r;
2396}
2397
06ec9070 2398static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2399{
2400 int i, r;
2401
2cb681b6
ML
2402 static enum amd_ip_block_type ip_order[] = {
2403 AMD_IP_BLOCK_TYPE_GMC,
2404 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2405 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2406 AMD_IP_BLOCK_TYPE_IH,
2407 };
a90ad3c2 2408
2cb681b6
ML
2409 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2410 int j;
2411 struct amdgpu_ip_block *block;
a90ad3c2 2412
2cb681b6
ML
2413 for (j = 0; j < adev->num_ip_blocks; j++) {
2414 block = &adev->ip_blocks[j];
2415
482f0e53 2416 block->status.hw = false;
2cb681b6
ML
2417 if (block->version->type != ip_order[i] ||
2418 !block->status.valid)
2419 continue;
2420
2421 r = block->version->funcs->hw_init(adev);
0aaeefcc 2422 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2423 if (r)
2424 return r;
482f0e53 2425 block->status.hw = true;
a90ad3c2
ML
2426 }
2427 }
2428
2429 return 0;
2430}
2431
06ec9070 2432static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2433{
2434 int i, r;
2435
2cb681b6
ML
2436 static enum amd_ip_block_type ip_order[] = {
2437 AMD_IP_BLOCK_TYPE_SMC,
2438 AMD_IP_BLOCK_TYPE_DCE,
2439 AMD_IP_BLOCK_TYPE_GFX,
2440 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2441 AMD_IP_BLOCK_TYPE_UVD,
2442 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2443 };
a90ad3c2 2444
2cb681b6
ML
2445 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2446 int j;
2447 struct amdgpu_ip_block *block;
a90ad3c2 2448
2cb681b6
ML
2449 for (j = 0; j < adev->num_ip_blocks; j++) {
2450 block = &adev->ip_blocks[j];
2451
2452 if (block->version->type != ip_order[i] ||
482f0e53
ML
2453 !block->status.valid ||
2454 block->status.hw)
2cb681b6
ML
2455 continue;
2456
2457 r = block->version->funcs->hw_init(adev);
0aaeefcc 2458 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2459 if (r)
2460 return r;
482f0e53 2461 block->status.hw = true;
a90ad3c2
ML
2462 }
2463 }
2464
2465 return 0;
2466}
2467
e3ecdffa
AD
2468/**
2469 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2470 *
2471 * @adev: amdgpu_device pointer
2472 *
2473 * First resume function for hardware IPs. The list of all the hardware
2474 * IPs that make up the asic is walked and the resume callbacks are run for
2475 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2476 * after a suspend and updates the software state as necessary. This
2477 * function is also used for restoring the GPU after a GPU reset.
2478 * Returns 0 on success, negative error code on failure.
2479 */
06ec9070 2480static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2481{
2482 int i, r;
2483
a90ad3c2 2484 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2485 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2486 continue;
a90ad3c2 2487 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2488 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2489 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2490
fcf0649f
CZ
2491 r = adev->ip_blocks[i].version->funcs->resume(adev);
2492 if (r) {
2493 DRM_ERROR("resume of IP block <%s> failed %d\n",
2494 adev->ip_blocks[i].version->funcs->name, r);
2495 return r;
2496 }
482f0e53 2497 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2498 }
2499 }
2500
2501 return 0;
2502}
2503
e3ecdffa
AD
2504/**
2505 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2506 *
2507 * @adev: amdgpu_device pointer
2508 *
2509 * First resume function for hardware IPs. The list of all the hardware
2510 * IPs that make up the asic is walked and the resume callbacks are run for
2511 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2512 * functional state after a suspend and updates the software state as
2513 * necessary. This function is also used for restoring the GPU after a GPU
2514 * reset.
2515 * Returns 0 on success, negative error code on failure.
2516 */
06ec9070 2517static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2518{
2519 int i, r;
2520
2521 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2522 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2523 continue;
fcf0649f 2524 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2525 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2526 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2527 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2528 continue;
a1255107 2529 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2530 if (r) {
a1255107
AD
2531 DRM_ERROR("resume of IP block <%s> failed %d\n",
2532 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2533 return r;
2c1a2784 2534 }
482f0e53 2535 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2536 }
2537
2538 return 0;
2539}
2540
e3ecdffa
AD
2541/**
2542 * amdgpu_device_ip_resume - run resume for hardware IPs
2543 *
2544 * @adev: amdgpu_device pointer
2545 *
2546 * Main resume function for hardware IPs. The hardware IPs
2547 * are split into two resume functions because they are
2548 * are also used in in recovering from a GPU reset and some additional
2549 * steps need to be take between them. In this case (S3/S4) they are
2550 * run sequentially.
2551 * Returns 0 on success, negative error code on failure.
2552 */
06ec9070 2553static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2554{
2555 int r;
2556
06ec9070 2557 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2558 if (r)
2559 return r;
7a3e0bb2
RZ
2560
2561 r = amdgpu_device_fw_loading(adev);
2562 if (r)
2563 return r;
2564
06ec9070 2565 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2566
2567 return r;
2568}
2569
e3ecdffa
AD
2570/**
2571 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2572 *
2573 * @adev: amdgpu_device pointer
2574 *
2575 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2576 */
4e99a44e 2577static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2578{
6867e1b5
ML
2579 if (amdgpu_sriov_vf(adev)) {
2580 if (adev->is_atom_fw) {
2581 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2582 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2583 } else {
2584 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2585 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2586 }
2587
2588 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2589 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2590 }
048765ad
AR
2591}
2592
e3ecdffa
AD
2593/**
2594 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2595 *
2596 * @asic_type: AMD asic type
2597 *
2598 * Check if there is DC (new modesetting infrastructre) support for an asic.
2599 * returns true if DC has support, false if not.
2600 */
4562236b
HW
2601bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2602{
2603 switch (asic_type) {
2604#if defined(CONFIG_DRM_AMD_DC)
2605 case CHIP_BONAIRE:
0d6fbccb 2606 case CHIP_KAVERI:
367e6687
AD
2607 case CHIP_KABINI:
2608 case CHIP_MULLINS:
d9fda248
HW
2609 /*
2610 * We have systems in the wild with these ASICs that require
2611 * LVDS and VGA support which is not supported with DC.
2612 *
2613 * Fallback to the non-DC driver here by default so as not to
2614 * cause regressions.
2615 */
2616 return amdgpu_dc > 0;
2617 case CHIP_HAWAII:
4562236b
HW
2618 case CHIP_CARRIZO:
2619 case CHIP_STONEY:
4562236b 2620 case CHIP_POLARIS10:
675fd32b 2621 case CHIP_POLARIS11:
2c8ad2d5 2622 case CHIP_POLARIS12:
675fd32b 2623 case CHIP_VEGAM:
4562236b
HW
2624 case CHIP_TONGA:
2625 case CHIP_FIJI:
42f8ffa1 2626 case CHIP_VEGA10:
dca7b401 2627 case CHIP_VEGA12:
c6034aa2 2628 case CHIP_VEGA20:
b86a1aa3 2629#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2630 case CHIP_RAVEN:
b4f199c7 2631 case CHIP_NAVI10:
8fceceb6 2632 case CHIP_NAVI14:
078655d9 2633 case CHIP_NAVI12:
e1c14c43 2634 case CHIP_RENOIR:
42f8ffa1 2635#endif
fd187853 2636 return amdgpu_dc != 0;
4562236b
HW
2637#endif
2638 default:
2639 return false;
2640 }
2641}
2642
2643/**
2644 * amdgpu_device_has_dc_support - check if dc is supported
2645 *
2646 * @adev: amdgpu_device_pointer
2647 *
2648 * Returns true for supported, false for not supported
2649 */
2650bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2651{
2555039d
XY
2652 if (amdgpu_sriov_vf(adev))
2653 return false;
2654
4562236b
HW
2655 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2656}
2657
d4535e2c
AG
2658
2659static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2660{
2661 struct amdgpu_device *adev =
2662 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2663
2664 adev->asic_reset_res = amdgpu_asic_reset(adev);
2665 if (adev->asic_reset_res)
fed184e9 2666 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2667 adev->asic_reset_res, adev->ddev->unique);
2668}
2669
71f98027
AD
2670static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2671{
2672 char *input = amdgpu_lockup_timeout;
2673 char *timeout_setting = NULL;
2674 int index = 0;
2675 long timeout;
2676 int ret = 0;
2677
2678 /*
2679 * By default timeout for non compute jobs is 10000.
2680 * And there is no timeout enforced on compute jobs.
2681 * In SR-IOV or passthrough mode, timeout for compute
2682 * jobs are 10000 by default.
2683 */
2684 adev->gfx_timeout = msecs_to_jiffies(10000);
2685 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2686 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2687 adev->compute_timeout = adev->gfx_timeout;
2688 else
2689 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2690
f440ff44 2691 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2692 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2693 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2694 ret = kstrtol(timeout_setting, 0, &timeout);
2695 if (ret)
2696 return ret;
2697
2698 if (timeout == 0) {
2699 index++;
2700 continue;
2701 } else if (timeout < 0) {
2702 timeout = MAX_SCHEDULE_TIMEOUT;
2703 } else {
2704 timeout = msecs_to_jiffies(timeout);
2705 }
2706
2707 switch (index++) {
2708 case 0:
2709 adev->gfx_timeout = timeout;
2710 break;
2711 case 1:
2712 adev->compute_timeout = timeout;
2713 break;
2714 case 2:
2715 adev->sdma_timeout = timeout;
2716 break;
2717 case 3:
2718 adev->video_timeout = timeout;
2719 break;
2720 default:
2721 break;
2722 }
2723 }
2724 /*
2725 * There is only one value specified and
2726 * it should apply to all non-compute jobs.
2727 */
bcccee89 2728 if (index == 1) {
71f98027 2729 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2730 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2731 adev->compute_timeout = adev->gfx_timeout;
2732 }
71f98027
AD
2733 }
2734
2735 return ret;
2736}
d4535e2c 2737
d38ceaf9
AD
2738/**
2739 * amdgpu_device_init - initialize the driver
2740 *
2741 * @adev: amdgpu_device pointer
87e3f136 2742 * @ddev: drm dev pointer
d38ceaf9
AD
2743 * @pdev: pci dev pointer
2744 * @flags: driver flags
2745 *
2746 * Initializes the driver info and hw (all asics).
2747 * Returns 0 for success or an error on failure.
2748 * Called at driver startup.
2749 */
2750int amdgpu_device_init(struct amdgpu_device *adev,
2751 struct drm_device *ddev,
2752 struct pci_dev *pdev,
2753 uint32_t flags)
2754{
2755 int r, i;
3840c5bc 2756 bool boco = false;
95844d20 2757 u32 max_MBps;
d38ceaf9
AD
2758
2759 adev->shutdown = false;
2760 adev->dev = &pdev->dev;
2761 adev->ddev = ddev;
2762 adev->pdev = pdev;
2763 adev->flags = flags;
4e66d7d2
YZ
2764
2765 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2766 adev->asic_type = amdgpu_force_asic_type;
2767 else
2768 adev->asic_type = flags & AMD_ASIC_MASK;
2769
d38ceaf9 2770 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2771 if (amdgpu_emu_mode == 1)
2772 adev->usec_timeout *= 2;
770d13b1 2773 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2774 adev->accel_working = false;
2775 adev->num_rings = 0;
2776 adev->mman.buffer_funcs = NULL;
2777 adev->mman.buffer_funcs_ring = NULL;
2778 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2779 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2780 adev->gmc.gmc_funcs = NULL;
f54d1867 2781 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2782 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2783
2784 adev->smc_rreg = &amdgpu_invalid_rreg;
2785 adev->smc_wreg = &amdgpu_invalid_wreg;
2786 adev->pcie_rreg = &amdgpu_invalid_rreg;
2787 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2788 adev->pciep_rreg = &amdgpu_invalid_rreg;
2789 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2790 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2791 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2792 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2793 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2794 adev->didt_rreg = &amdgpu_invalid_rreg;
2795 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2796 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2797 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2798 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2799 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2800
3e39ab90
AD
2801 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2802 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2803 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2804
2805 /* mutex initialization are all done here so we
2806 * can recall function without having locking issues */
d38ceaf9 2807 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2808 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2809 mutex_init(&adev->pm.mutex);
2810 mutex_init(&adev->gfx.gpu_clock_mutex);
2811 mutex_init(&adev->srbm_mutex);
b8866c26 2812 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2813 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2814 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2815 mutex_init(&adev->mn_lock);
e23b74aa 2816 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2817 hash_init(adev->mn_hash);
13a752e3 2818 mutex_init(&adev->lock_reset);
bb5a2bdf 2819 mutex_init(&adev->virt.dpm_mutex);
32eaeae0 2820 mutex_init(&adev->psp.mutex);
d38ceaf9 2821
912dfc84
EQ
2822 r = amdgpu_device_check_arguments(adev);
2823 if (r)
2824 return r;
d38ceaf9 2825
d38ceaf9
AD
2826 spin_lock_init(&adev->mmio_idx_lock);
2827 spin_lock_init(&adev->smc_idx_lock);
2828 spin_lock_init(&adev->pcie_idx_lock);
2829 spin_lock_init(&adev->uvd_ctx_idx_lock);
2830 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2831 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2832 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2833 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2834 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2835
0c4e7fa5
CZ
2836 INIT_LIST_HEAD(&adev->shadow_list);
2837 mutex_init(&adev->shadow_list_lock);
2838
795f2813
AR
2839 INIT_LIST_HEAD(&adev->ring_lru_list);
2840 spin_lock_init(&adev->ring_lru_list_lock);
2841
beff74bc
AD
2842 INIT_DELAYED_WORK(&adev->delayed_init_work,
2843 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2844 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2845 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2846
d4535e2c
AG
2847 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2848
d23ee13f 2849 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2850 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2851
0fa49558
AX
2852 /* Registers mapping */
2853 /* TODO: block userspace mapping of io register */
da69c161
KW
2854 if (adev->asic_type >= CHIP_BONAIRE) {
2855 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2856 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2857 } else {
2858 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2859 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2860 }
d38ceaf9 2861
d38ceaf9
AD
2862 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2863 if (adev->rmmio == NULL) {
2864 return -ENOMEM;
2865 }
2866 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2867 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2868
d38ceaf9
AD
2869 /* io port mapping */
2870 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2871 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2872 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2873 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2874 break;
2875 }
2876 }
2877 if (adev->rio_mem == NULL)
b64a18c5 2878 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2879
b2109d8e
JX
2880 /* enable PCIE atomic ops */
2881 r = pci_enable_atomic_ops_to_root(adev->pdev,
2882 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2883 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2884 if (r) {
2885 adev->have_atomics_support = false;
2886 DRM_INFO("PCIE atomic ops is not supported\n");
2887 } else {
2888 adev->have_atomics_support = true;
2889 }
2890
5494d864
AD
2891 amdgpu_device_get_pcie_info(adev);
2892
b239c017
JX
2893 if (amdgpu_mcbp)
2894 DRM_INFO("MCBP is enabled\n");
2895
5f84cc63
JX
2896 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2897 adev->enable_mes = true;
2898
f54eeab4 2899 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2900 r = amdgpu_discovery_init(adev);
2901 if (r) {
2902 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2903 return r;
2904 }
2905 }
2906
d38ceaf9 2907 /* early init functions */
06ec9070 2908 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2909 if (r)
2910 return r;
2911
df99ac0f
JZ
2912 r = amdgpu_device_get_job_timeout_settings(adev);
2913 if (r) {
2914 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
2915 return r;
2916 }
2917
6585661d
OZ
2918 /* doorbell bar mapping and doorbell index init*/
2919 amdgpu_device_doorbell_init(adev);
2920
d38ceaf9
AD
2921 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2922 /* this will fail for cards that aren't VGA class devices, just
2923 * ignore it */
06ec9070 2924 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2925
31af062a 2926 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
2927 boco = true;
2928 if (amdgpu_has_atpx() &&
2929 (amdgpu_is_atpx_hybrid() ||
2930 amdgpu_has_atpx_dgpu_power_cntl()) &&
2931 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 2932 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
2933 &amdgpu_switcheroo_ops, boco);
2934 if (boco)
d38ceaf9
AD
2935 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2936
9475a943
SL
2937 if (amdgpu_emu_mode == 1) {
2938 /* post the asic on emulation mode */
2939 emu_soc_asic_init(adev);
bfca0289 2940 goto fence_driver_init;
9475a943 2941 }
bfca0289 2942
4e99a44e
ML
2943 /* detect if we are with an SRIOV vbios */
2944 amdgpu_device_detect_sriov_bios(adev);
048765ad 2945
95e8e59e
AD
2946 /* check if we need to reset the asic
2947 * E.g., driver was not cleanly unloaded previously, etc.
2948 */
f14899fd 2949 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2950 r = amdgpu_asic_reset(adev);
2951 if (r) {
2952 dev_err(adev->dev, "asic reset on init failed\n");
2953 goto failed;
2954 }
2955 }
2956
d38ceaf9 2957 /* Post card if necessary */
39c640c0 2958 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2959 if (!adev->bios) {
bec86378 2960 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2961 r = -EINVAL;
2962 goto failed;
d38ceaf9 2963 }
bec86378 2964 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2965 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2966 if (r) {
2967 dev_err(adev->dev, "gpu post error!\n");
2968 goto failed;
2969 }
d38ceaf9
AD
2970 }
2971
88b64e95
AD
2972 if (adev->is_atom_fw) {
2973 /* Initialize clocks */
2974 r = amdgpu_atomfirmware_get_clock_info(adev);
2975 if (r) {
2976 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2977 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2978 goto failed;
2979 }
2980 } else {
a5bde2f9
AD
2981 /* Initialize clocks */
2982 r = amdgpu_atombios_get_clock_info(adev);
2983 if (r) {
2984 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2985 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2986 goto failed;
a5bde2f9
AD
2987 }
2988 /* init i2c buses */
4562236b
HW
2989 if (!amdgpu_device_has_dc_support(adev))
2990 amdgpu_atombios_i2c_init(adev);
2c1a2784 2991 }
d38ceaf9 2992
bfca0289 2993fence_driver_init:
d38ceaf9
AD
2994 /* Fence driver */
2995 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2996 if (r) {
2997 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2998 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2999 goto failed;
2c1a2784 3000 }
d38ceaf9
AD
3001
3002 /* init the mode config */
3003 drm_mode_config_init(adev->ddev);
3004
06ec9070 3005 r = amdgpu_device_ip_init(adev);
d38ceaf9 3006 if (r) {
8840a387 3007 /* failed in exclusive mode due to timeout */
3008 if (amdgpu_sriov_vf(adev) &&
3009 !amdgpu_sriov_runtime(adev) &&
3010 amdgpu_virt_mmio_blocked(adev) &&
3011 !amdgpu_virt_wait_reset(adev)) {
3012 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3013 /* Don't send request since VF is inactive. */
3014 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3015 adev->virt.ops = NULL;
8840a387 3016 r = -EAGAIN;
3017 goto failed;
3018 }
06ec9070 3019 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3020 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3021 goto failed;
d38ceaf9
AD
3022 }
3023
3024 adev->accel_working = true;
3025
e59c0205
AX
3026 amdgpu_vm_check_compute_bug(adev);
3027
95844d20
MO
3028 /* Initialize the buffer migration limit. */
3029 if (amdgpu_moverate >= 0)
3030 max_MBps = amdgpu_moverate;
3031 else
3032 max_MBps = 8; /* Allow 8 MB/s. */
3033 /* Get a log2 for easy divisions. */
3034 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3035
9bc92b9c
ML
3036 amdgpu_fbdev_init(adev);
3037
e9bc1bf7
YT
3038 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
3039 amdgpu_pm_virt_sysfs_init(adev);
3040
d2f52ac8 3041 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3042 if (r) {
3043 adev->pm_sysfs_en = false;
d2f52ac8 3044 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3045 } else
3046 adev->pm_sysfs_en = true;
d2f52ac8 3047
5bb23532 3048 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3049 if (r) {
3050 adev->ucode_sysfs_en = false;
5bb23532 3051 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3052 } else
3053 adev->ucode_sysfs_en = true;
5bb23532 3054
75758255 3055 r = amdgpu_debugfs_gem_init(adev);
3f14e623 3056 if (r)
d38ceaf9 3057 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
3058
3059 r = amdgpu_debugfs_regs_init(adev);
3f14e623 3060 if (r)
d38ceaf9 3061 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 3062
50ab2533 3063 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 3064 if (r)
50ab2533 3065 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 3066
763efb6c 3067 r = amdgpu_debugfs_init(adev);
db95e218 3068 if (r)
763efb6c 3069 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 3070
d38ceaf9
AD
3071 if ((amdgpu_testing & 1)) {
3072 if (adev->accel_working)
3073 amdgpu_test_moves(adev);
3074 else
3075 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3076 }
d38ceaf9
AD
3077 if (amdgpu_benchmarking) {
3078 if (adev->accel_working)
3079 amdgpu_benchmark(adev, amdgpu_benchmarking);
3080 else
3081 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3082 }
3083
b0adca4d
EQ
3084 /*
3085 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3086 * Otherwise the mgpu fan boost feature will be skipped due to the
3087 * gpu instance is counted less.
3088 */
3089 amdgpu_register_gpu_instance(adev);
3090
d38ceaf9
AD
3091 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3092 * explicit gating rather than handling it automatically.
3093 */
06ec9070 3094 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3095 if (r) {
06ec9070 3096 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3097 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3098 goto failed;
2c1a2784 3099 }
d38ceaf9 3100
108c6a63 3101 /* must succeed. */
511fdbc3 3102 amdgpu_ras_resume(adev);
108c6a63 3103
beff74bc
AD
3104 queue_delayed_work(system_wq, &adev->delayed_init_work,
3105 msecs_to_jiffies(AMDGPU_RESUME_MS));
3106
dcea6e65
KR
3107 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3108 if (r) {
3109 dev_err(adev->dev, "Could not create pcie_replay_count");
3110 return r;
3111 }
108c6a63 3112
d155bef0
AB
3113 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3114 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3115 if (r)
3116 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3117
d38ceaf9 3118 return 0;
83ba126a
AD
3119
3120failed:
89041940 3121 amdgpu_vf_error_trans_all(adev);
3840c5bc 3122 if (boco)
83ba126a 3123 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3124
83ba126a 3125 return r;
d38ceaf9
AD
3126}
3127
d38ceaf9
AD
3128/**
3129 * amdgpu_device_fini - tear down the driver
3130 *
3131 * @adev: amdgpu_device pointer
3132 *
3133 * Tear down the driver info (all asics).
3134 * Called at driver shutdown.
3135 */
3136void amdgpu_device_fini(struct amdgpu_device *adev)
3137{
3138 int r;
3139
3140 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3141 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3142 adev->shutdown = true;
9f875167 3143
e5b03032
ML
3144 /* disable all interrupts */
3145 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3146 if (adev->mode_info.mode_config_initialized){
3147 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3148 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3149 else
3150 drm_atomic_helper_shutdown(adev->ddev);
3151 }
d38ceaf9 3152 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3153 if (adev->pm_sysfs_en)
3154 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3155 amdgpu_fbdev_fini(adev);
06ec9070 3156 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3157 if (adev->firmware.gpu_info_fw) {
3158 release_firmware(adev->firmware.gpu_info_fw);
3159 adev->firmware.gpu_info_fw = NULL;
3160 }
d38ceaf9
AD
3161 adev->accel_working = false;
3162 /* free i2c buses */
4562236b
HW
3163 if (!amdgpu_device_has_dc_support(adev))
3164 amdgpu_i2c_fini(adev);
bfca0289
SL
3165
3166 if (amdgpu_emu_mode != 1)
3167 amdgpu_atombios_fini(adev);
3168
d38ceaf9
AD
3169 kfree(adev->bios);
3170 adev->bios = NULL;
3840c5bc
AD
3171 if (amdgpu_has_atpx() &&
3172 (amdgpu_is_atpx_hybrid() ||
3173 amdgpu_has_atpx_dgpu_power_cntl()) &&
3174 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3175 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3176 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3177 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3178 vga_client_register(adev->pdev, NULL, NULL, NULL);
3179 if (adev->rio_mem)
3180 pci_iounmap(adev->pdev, adev->rio_mem);
3181 adev->rio_mem = NULL;
3182 iounmap(adev->rmmio);
3183 adev->rmmio = NULL;
06ec9070 3184 amdgpu_device_doorbell_fini(adev);
e9bc1bf7
YT
3185 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
3186 amdgpu_pm_virt_sysfs_fini(adev);
3187
d38ceaf9 3188 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 3189 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
7c868b59
YT
3190 if (adev->ucode_sysfs_en)
3191 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
3192 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3193 amdgpu_pmu_fini(adev);
6698a3d0 3194 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 3195 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3196 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3197}
3198
3199
3200/*
3201 * Suspend & resume.
3202 */
3203/**
810ddc3a 3204 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3205 *
87e3f136
DP
3206 * @dev: drm dev pointer
3207 * @suspend: suspend state
3208 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3209 *
3210 * Puts the hw in the suspend state (all asics).
3211 * Returns 0 for success or an error on failure.
3212 * Called at driver suspend.
3213 */
de185019 3214int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3215{
3216 struct amdgpu_device *adev;
3217 struct drm_crtc *crtc;
3218 struct drm_connector *connector;
f8d2d39e 3219 struct drm_connector_list_iter iter;
5ceb54c6 3220 int r;
d38ceaf9
AD
3221
3222 if (dev == NULL || dev->dev_private == NULL) {
3223 return -ENODEV;
3224 }
3225
3226 adev = dev->dev_private;
3227
3228 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3229 return 0;
3230
44779b43 3231 adev->in_suspend = true;
d38ceaf9
AD
3232 drm_kms_helper_poll_disable(dev);
3233
5f818173
S
3234 if (fbcon)
3235 amdgpu_fbdev_set_suspend(adev, 1);
3236
beff74bc 3237 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3238
4562236b
HW
3239 if (!amdgpu_device_has_dc_support(adev)) {
3240 /* turn off display hw */
3241 drm_modeset_lock_all(dev);
f8d2d39e
LP
3242 drm_connector_list_iter_begin(dev, &iter);
3243 drm_for_each_connector_iter(connector, &iter)
3244 drm_helper_connector_dpms(connector,
3245 DRM_MODE_DPMS_OFF);
3246 drm_connector_list_iter_end(&iter);
4562236b 3247 drm_modeset_unlock_all(dev);
fe1053b7
AD
3248 /* unpin the front buffers and cursors */
3249 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3250 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3251 struct drm_framebuffer *fb = crtc->primary->fb;
3252 struct amdgpu_bo *robj;
3253
91334223 3254 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3255 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3256 r = amdgpu_bo_reserve(aobj, true);
3257 if (r == 0) {
3258 amdgpu_bo_unpin(aobj);
3259 amdgpu_bo_unreserve(aobj);
3260 }
756e6880 3261 }
756e6880 3262
fe1053b7
AD
3263 if (fb == NULL || fb->obj[0] == NULL) {
3264 continue;
3265 }
3266 robj = gem_to_amdgpu_bo(fb->obj[0]);
3267 /* don't unpin kernel fb objects */
3268 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3269 r = amdgpu_bo_reserve(robj, true);
3270 if (r == 0) {
3271 amdgpu_bo_unpin(robj);
3272 amdgpu_bo_unreserve(robj);
3273 }
d38ceaf9
AD
3274 }
3275 }
3276 }
fe1053b7
AD
3277
3278 amdgpu_amdkfd_suspend(adev);
3279
5e6932fe 3280 amdgpu_ras_suspend(adev);
3281
fe1053b7
AD
3282 r = amdgpu_device_ip_suspend_phase1(adev);
3283
d38ceaf9
AD
3284 /* evict vram memory */
3285 amdgpu_bo_evict_vram(adev);
3286
5ceb54c6 3287 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3288
fe1053b7 3289 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3290
a0a71e49
AD
3291 /* evict remaining vram memory
3292 * This second call to evict vram is to evict the gart page table
3293 * using the CPU.
3294 */
d38ceaf9
AD
3295 amdgpu_bo_evict_vram(adev);
3296
d38ceaf9
AD
3297 return 0;
3298}
3299
3300/**
810ddc3a 3301 * amdgpu_device_resume - initiate device resume
d38ceaf9 3302 *
87e3f136
DP
3303 * @dev: drm dev pointer
3304 * @resume: resume state
3305 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3306 *
3307 * Bring the hw back to operating state (all asics).
3308 * Returns 0 for success or an error on failure.
3309 * Called at driver resume.
3310 */
de185019 3311int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3312{
3313 struct drm_connector *connector;
f8d2d39e 3314 struct drm_connector_list_iter iter;
d38ceaf9 3315 struct amdgpu_device *adev = dev->dev_private;
756e6880 3316 struct drm_crtc *crtc;
03161a6e 3317 int r = 0;
d38ceaf9
AD
3318
3319 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3320 return 0;
3321
d38ceaf9 3322 /* post card */
39c640c0 3323 if (amdgpu_device_need_post(adev)) {
74b0b157 3324 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3325 if (r)
3326 DRM_ERROR("amdgpu asic init failed\n");
3327 }
d38ceaf9 3328
06ec9070 3329 r = amdgpu_device_ip_resume(adev);
e6707218 3330 if (r) {
06ec9070 3331 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3332 return r;
e6707218 3333 }
5ceb54c6
AD
3334 amdgpu_fence_driver_resume(adev);
3335
d38ceaf9 3336
06ec9070 3337 r = amdgpu_device_ip_late_init(adev);
03161a6e 3338 if (r)
4d3b9ae5 3339 return r;
d38ceaf9 3340
beff74bc
AD
3341 queue_delayed_work(system_wq, &adev->delayed_init_work,
3342 msecs_to_jiffies(AMDGPU_RESUME_MS));
3343
fe1053b7
AD
3344 if (!amdgpu_device_has_dc_support(adev)) {
3345 /* pin cursors */
3346 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3347 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3348
91334223 3349 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3350 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3351 r = amdgpu_bo_reserve(aobj, true);
3352 if (r == 0) {
3353 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3354 if (r != 0)
3355 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3356 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3357 amdgpu_bo_unreserve(aobj);
3358 }
756e6880
AD
3359 }
3360 }
3361 }
ba997709
YZ
3362 r = amdgpu_amdkfd_resume(adev);
3363 if (r)
3364 return r;
756e6880 3365
96a5d8d4 3366 /* Make sure IB tests flushed */
beff74bc 3367 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3368
d38ceaf9
AD
3369 /* blat the mode back in */
3370 if (fbcon) {
4562236b
HW
3371 if (!amdgpu_device_has_dc_support(adev)) {
3372 /* pre DCE11 */
3373 drm_helper_resume_force_mode(dev);
3374
3375 /* turn on display hw */
3376 drm_modeset_lock_all(dev);
f8d2d39e
LP
3377
3378 drm_connector_list_iter_begin(dev, &iter);
3379 drm_for_each_connector_iter(connector, &iter)
3380 drm_helper_connector_dpms(connector,
3381 DRM_MODE_DPMS_ON);
3382 drm_connector_list_iter_end(&iter);
3383
4562236b 3384 drm_modeset_unlock_all(dev);
d38ceaf9 3385 }
4d3b9ae5 3386 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3387 }
3388
3389 drm_kms_helper_poll_enable(dev);
23a1a9e5 3390
5e6932fe 3391 amdgpu_ras_resume(adev);
3392
23a1a9e5
L
3393 /*
3394 * Most of the connector probing functions try to acquire runtime pm
3395 * refs to ensure that the GPU is powered on when connector polling is
3396 * performed. Since we're calling this from a runtime PM callback,
3397 * trying to acquire rpm refs will cause us to deadlock.
3398 *
3399 * Since we're guaranteed to be holding the rpm lock, it's safe to
3400 * temporarily disable the rpm helpers so this doesn't deadlock us.
3401 */
3402#ifdef CONFIG_PM
3403 dev->dev->power.disable_depth++;
3404#endif
4562236b
HW
3405 if (!amdgpu_device_has_dc_support(adev))
3406 drm_helper_hpd_irq_event(dev);
3407 else
3408 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3409#ifdef CONFIG_PM
3410 dev->dev->power.disable_depth--;
3411#endif
44779b43
RZ
3412 adev->in_suspend = false;
3413
4d3b9ae5 3414 return 0;
d38ceaf9
AD
3415}
3416
e3ecdffa
AD
3417/**
3418 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3419 *
3420 * @adev: amdgpu_device pointer
3421 *
3422 * The list of all the hardware IPs that make up the asic is walked and
3423 * the check_soft_reset callbacks are run. check_soft_reset determines
3424 * if the asic is still hung or not.
3425 * Returns true if any of the IPs are still in a hung state, false if not.
3426 */
06ec9070 3427static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3428{
3429 int i;
3430 bool asic_hang = false;
3431
f993d628
ML
3432 if (amdgpu_sriov_vf(adev))
3433 return true;
3434
8bc04c29
AD
3435 if (amdgpu_asic_need_full_reset(adev))
3436 return true;
3437
63fbf42f 3438 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3439 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3440 continue;
a1255107
AD
3441 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3442 adev->ip_blocks[i].status.hang =
3443 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3444 if (adev->ip_blocks[i].status.hang) {
3445 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3446 asic_hang = true;
3447 }
3448 }
3449 return asic_hang;
3450}
3451
e3ecdffa
AD
3452/**
3453 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3454 *
3455 * @adev: amdgpu_device pointer
3456 *
3457 * The list of all the hardware IPs that make up the asic is walked and the
3458 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3459 * handles any IP specific hardware or software state changes that are
3460 * necessary for a soft reset to succeed.
3461 * Returns 0 on success, negative error code on failure.
3462 */
06ec9070 3463static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3464{
3465 int i, r = 0;
3466
3467 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3468 if (!adev->ip_blocks[i].status.valid)
d31a501e 3469 continue;
a1255107
AD
3470 if (adev->ip_blocks[i].status.hang &&
3471 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3472 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3473 if (r)
3474 return r;
3475 }
3476 }
3477
3478 return 0;
3479}
3480
e3ecdffa
AD
3481/**
3482 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3483 *
3484 * @adev: amdgpu_device pointer
3485 *
3486 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3487 * reset is necessary to recover.
3488 * Returns true if a full asic reset is required, false if not.
3489 */
06ec9070 3490static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3491{
da146d3b
AD
3492 int i;
3493
8bc04c29
AD
3494 if (amdgpu_asic_need_full_reset(adev))
3495 return true;
3496
da146d3b 3497 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3498 if (!adev->ip_blocks[i].status.valid)
da146d3b 3499 continue;
a1255107
AD
3500 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3501 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3502 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3503 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3504 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3505 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3506 DRM_INFO("Some block need full reset!\n");
3507 return true;
3508 }
3509 }
35d782fe
CZ
3510 }
3511 return false;
3512}
3513
e3ecdffa
AD
3514/**
3515 * amdgpu_device_ip_soft_reset - do a soft reset
3516 *
3517 * @adev: amdgpu_device pointer
3518 *
3519 * The list of all the hardware IPs that make up the asic is walked and the
3520 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3521 * IP specific hardware or software state changes that are necessary to soft
3522 * reset the IP.
3523 * Returns 0 on success, negative error code on failure.
3524 */
06ec9070 3525static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3526{
3527 int i, r = 0;
3528
3529 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3530 if (!adev->ip_blocks[i].status.valid)
35d782fe 3531 continue;
a1255107
AD
3532 if (adev->ip_blocks[i].status.hang &&
3533 adev->ip_blocks[i].version->funcs->soft_reset) {
3534 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3535 if (r)
3536 return r;
3537 }
3538 }
3539
3540 return 0;
3541}
3542
e3ecdffa
AD
3543/**
3544 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3545 *
3546 * @adev: amdgpu_device pointer
3547 *
3548 * The list of all the hardware IPs that make up the asic is walked and the
3549 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3550 * handles any IP specific hardware or software state changes that are
3551 * necessary after the IP has been soft reset.
3552 * Returns 0 on success, negative error code on failure.
3553 */
06ec9070 3554static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3555{
3556 int i, r = 0;
3557
3558 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3559 if (!adev->ip_blocks[i].status.valid)
35d782fe 3560 continue;
a1255107
AD
3561 if (adev->ip_blocks[i].status.hang &&
3562 adev->ip_blocks[i].version->funcs->post_soft_reset)
3563 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3564 if (r)
3565 return r;
3566 }
3567
3568 return 0;
3569}
3570
e3ecdffa 3571/**
c33adbc7 3572 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3573 *
3574 * @adev: amdgpu_device pointer
3575 *
3576 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3577 * restore things like GPUVM page tables after a GPU reset where
3578 * the contents of VRAM might be lost.
403009bf
CK
3579 *
3580 * Returns:
3581 * 0 on success, negative error code on failure.
e3ecdffa 3582 */
c33adbc7 3583static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3584{
c41d1cf6 3585 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3586 struct amdgpu_bo *shadow;
3587 long r = 1, tmo;
c41d1cf6
ML
3588
3589 if (amdgpu_sriov_runtime(adev))
b045d3af 3590 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3591 else
3592 tmo = msecs_to_jiffies(100);
3593
3594 DRM_INFO("recover vram bo from shadow start\n");
3595 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3596 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3597
3598 /* No need to recover an evicted BO */
3599 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3600 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3601 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3602 continue;
3603
3604 r = amdgpu_bo_restore_shadow(shadow, &next);
3605 if (r)
3606 break;
3607
c41d1cf6 3608 if (fence) {
1712fb1a 3609 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3610 dma_fence_put(fence);
3611 fence = next;
1712fb1a 3612 if (tmo == 0) {
3613 r = -ETIMEDOUT;
c41d1cf6 3614 break;
1712fb1a 3615 } else if (tmo < 0) {
3616 r = tmo;
3617 break;
3618 }
403009bf
CK
3619 } else {
3620 fence = next;
c41d1cf6 3621 }
c41d1cf6
ML
3622 }
3623 mutex_unlock(&adev->shadow_list_lock);
3624
403009bf
CK
3625 if (fence)
3626 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3627 dma_fence_put(fence);
3628
1712fb1a 3629 if (r < 0 || tmo <= 0) {
3630 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3631 return -EIO;
3632 }
c41d1cf6 3633
403009bf
CK
3634 DRM_INFO("recover vram bo from shadow done\n");
3635 return 0;
c41d1cf6
ML
3636}
3637
a90ad3c2 3638
e3ecdffa 3639/**
06ec9070 3640 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3641 *
3642 * @adev: amdgpu device pointer
87e3f136 3643 * @from_hypervisor: request from hypervisor
5740682e
ML
3644 *
3645 * do VF FLR and reinitialize Asic
3f48c681 3646 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3647 */
3648static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3649 bool from_hypervisor)
5740682e
ML
3650{
3651 int r;
3652
3653 if (from_hypervisor)
3654 r = amdgpu_virt_request_full_gpu(adev, true);
3655 else
3656 r = amdgpu_virt_reset_gpu(adev);
3657 if (r)
3658 return r;
a90ad3c2 3659
f81e8d53
WL
3660 amdgpu_amdkfd_pre_reset(adev);
3661
a90ad3c2 3662 /* Resume IP prior to SMC */
06ec9070 3663 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3664 if (r)
3665 goto error;
a90ad3c2
ML
3666
3667 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3668 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3669
7a3e0bb2
RZ
3670 r = amdgpu_device_fw_loading(adev);
3671 if (r)
3672 return r;
3673
a90ad3c2 3674 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3675 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3676 if (r)
3677 goto error;
a90ad3c2
ML
3678
3679 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3680 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3681 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3682
abc34253 3683error:
d3c117e5 3684 amdgpu_virt_init_data_exchange(adev);
abc34253 3685 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3686 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3687 amdgpu_inc_vram_lost(adev);
c33adbc7 3688 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3689 }
3690
3691 return r;
3692}
3693
12938fad
CK
3694/**
3695 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3696 *
3697 * @adev: amdgpu device pointer
3698 *
3699 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3700 * a hung GPU.
3701 */
3702bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3703{
3704 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3705 DRM_INFO("Timeout, but no hardware hang detected.\n");
3706 return false;
3707 }
3708
3ba7b418
AG
3709 if (amdgpu_gpu_recovery == 0)
3710 goto disabled;
3711
3712 if (amdgpu_sriov_vf(adev))
3713 return true;
3714
3715 if (amdgpu_gpu_recovery == -1) {
3716 switch (adev->asic_type) {
fc42d47c
AG
3717 case CHIP_BONAIRE:
3718 case CHIP_HAWAII:
3ba7b418
AG
3719 case CHIP_TOPAZ:
3720 case CHIP_TONGA:
3721 case CHIP_FIJI:
3722 case CHIP_POLARIS10:
3723 case CHIP_POLARIS11:
3724 case CHIP_POLARIS12:
3725 case CHIP_VEGAM:
3726 case CHIP_VEGA20:
3727 case CHIP_VEGA10:
3728 case CHIP_VEGA12:
c43b849f 3729 case CHIP_RAVEN:
3ba7b418
AG
3730 break;
3731 default:
3732 goto disabled;
3733 }
12938fad
CK
3734 }
3735
3736 return true;
3ba7b418
AG
3737
3738disabled:
3739 DRM_INFO("GPU recovery disabled.\n");
3740 return false;
12938fad
CK
3741}
3742
5c6dd71e 3743
26bc5340
AG
3744static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3745 struct amdgpu_job *job,
3746 bool *need_full_reset_arg)
3747{
3748 int i, r = 0;
3749 bool need_full_reset = *need_full_reset_arg;
71182665 3750
71182665 3751 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3752 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3753 struct amdgpu_ring *ring = adev->rings[i];
3754
51687759 3755 if (!ring || !ring->sched.thread)
0875dc9e 3756 continue;
5740682e 3757
2f9d4084
ML
3758 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3759 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3760 }
d38ceaf9 3761
222b5f04
AG
3762 if(job)
3763 drm_sched_increase_karma(&job->base);
3764
1d721ed6 3765 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3766 if (!amdgpu_sriov_vf(adev)) {
3767
3768 if (!need_full_reset)
3769 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3770
3771 if (!need_full_reset) {
3772 amdgpu_device_ip_pre_soft_reset(adev);
3773 r = amdgpu_device_ip_soft_reset(adev);
3774 amdgpu_device_ip_post_soft_reset(adev);
3775 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3776 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3777 need_full_reset = true;
3778 }
3779 }
3780
3781 if (need_full_reset)
3782 r = amdgpu_device_ip_suspend(adev);
3783
3784 *need_full_reset_arg = need_full_reset;
3785 }
3786
3787 return r;
3788}
3789
3790static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3791 struct list_head *device_list_handle,
3792 bool *need_full_reset_arg)
3793{
3794 struct amdgpu_device *tmp_adev = NULL;
3795 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3796 int r = 0;
3797
3798 /*
3799 * ASIC reset has to be done on all HGMI hive nodes ASAP
3800 * to allow proper links negotiation in FW (within 1 sec)
3801 */
3802 if (need_full_reset) {
3803 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3804 /* For XGMI run all resets in parallel to speed up the process */
3805 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3806 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3807 r = -EALREADY;
3808 } else
3809 r = amdgpu_asic_reset(tmp_adev);
3810
3811 if (r) {
fed184e9 3812 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
26bc5340 3813 r, tmp_adev->ddev->unique);
d4535e2c
AG
3814 break;
3815 }
3816 }
3817
3818 /* For XGMI wait for all PSP resets to complete before proceed */
3819 if (!r) {
3820 list_for_each_entry(tmp_adev, device_list_handle,
3821 gmc.xgmi.head) {
3822 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3823 flush_work(&tmp_adev->xgmi_reset_work);
3824 r = tmp_adev->asic_reset_res;
3825 if (r)
3826 break;
3827 }
3828 }
26bc5340
AG
3829 }
3830 }
3831
3832
3833 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3834 if (need_full_reset) {
3835 /* post card */
3836 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3837 DRM_WARN("asic atom init failed!");
3838
3839 if (!r) {
3840 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3841 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3842 if (r)
3843 goto out;
3844
3845 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3846 if (vram_lost) {
77e7f829 3847 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 3848 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
3849 }
3850
3851 r = amdgpu_gtt_mgr_recover(
3852 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3853 if (r)
3854 goto out;
3855
3856 r = amdgpu_device_fw_loading(tmp_adev);
3857 if (r)
3858 return r;
3859
3860 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3861 if (r)
3862 goto out;
3863
3864 if (vram_lost)
3865 amdgpu_device_fill_reset_magic(tmp_adev);
3866
fdafb359
EQ
3867 /*
3868 * Add this ASIC as tracked as reset was already
3869 * complete successfully.
3870 */
3871 amdgpu_register_gpu_instance(tmp_adev);
3872
7c04ca50 3873 r = amdgpu_device_ip_late_init(tmp_adev);
3874 if (r)
3875 goto out;
3876
e79a04d5 3877 /* must succeed. */
511fdbc3 3878 amdgpu_ras_resume(tmp_adev);
e79a04d5 3879
26bc5340
AG
3880 /* Update PSP FW topology after reset */
3881 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3882 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3883 }
3884 }
3885
3886
3887out:
3888 if (!r) {
3889 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3890 r = amdgpu_ib_ring_tests(tmp_adev);
3891 if (r) {
3892 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3893 r = amdgpu_device_ip_suspend(tmp_adev);
3894 need_full_reset = true;
3895 r = -EAGAIN;
3896 goto end;
3897 }
3898 }
3899
3900 if (!r)
3901 r = amdgpu_device_recover_vram(tmp_adev);
3902 else
3903 tmp_adev->asic_reset_res = r;
3904 }
3905
3906end:
3907 *need_full_reset_arg = need_full_reset;
3908 return r;
3909}
3910
1d721ed6 3911static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3912{
1d721ed6
AG
3913 if (trylock) {
3914 if (!mutex_trylock(&adev->lock_reset))
3915 return false;
3916 } else
3917 mutex_lock(&adev->lock_reset);
5740682e 3918
26bc5340
AG
3919 atomic_inc(&adev->gpu_reset_counter);
3920 adev->in_gpu_reset = 1;
a3a09142
AD
3921 switch (amdgpu_asic_reset_method(adev)) {
3922 case AMD_RESET_METHOD_MODE1:
3923 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3924 break;
3925 case AMD_RESET_METHOD_MODE2:
3926 adev->mp1_state = PP_MP1_STATE_RESET;
3927 break;
3928 default:
3929 adev->mp1_state = PP_MP1_STATE_NONE;
3930 break;
3931 }
1d721ed6
AG
3932
3933 return true;
26bc5340 3934}
d38ceaf9 3935
26bc5340
AG
3936static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3937{
89041940 3938 amdgpu_vf_error_trans_all(adev);
a3a09142 3939 adev->mp1_state = PP_MP1_STATE_NONE;
13a752e3
ML
3940 adev->in_gpu_reset = 0;
3941 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3942}
3943
26bc5340
AG
3944/**
3945 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3946 *
3947 * @adev: amdgpu device pointer
3948 * @job: which job trigger hang
3949 *
3950 * Attempt to reset the GPU if it has hung (all asics).
3951 * Attempt to do soft-reset or full-reset and reinitialize Asic
3952 * Returns 0 for success or an error on failure.
3953 */
3954
3955int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3956 struct amdgpu_job *job)
3957{
1d721ed6
AG
3958 struct list_head device_list, *device_list_handle = NULL;
3959 bool need_full_reset, job_signaled;
26bc5340 3960 struct amdgpu_hive_info *hive = NULL;
26bc5340 3961 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 3962 int i, r = 0;
7c6e68c7 3963 bool in_ras_intr = amdgpu_ras_intr_triggered();
26bc5340 3964
d5ea093e
AG
3965 /*
3966 * Flush RAM to disk so that after reboot
3967 * the user can read log and see why the system rebooted.
3968 */
3969 if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) {
3970
3971 DRM_WARN("Emergency reboot.");
3972
3973 ksys_sync_helper();
3974 emergency_restart();
3975 }
3976
1d721ed6 3977 need_full_reset = job_signaled = false;
26bc5340
AG
3978 INIT_LIST_HEAD(&device_list);
3979
7c6e68c7 3980 dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset");
26bc5340 3981
beff74bc 3982 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 3983
1d721ed6
AG
3984 hive = amdgpu_get_xgmi_hive(adev, false);
3985
26bc5340 3986 /*
1d721ed6
AG
3987 * Here we trylock to avoid chain of resets executing from
3988 * either trigger by jobs on different adevs in XGMI hive or jobs on
3989 * different schedulers for same device while this TO handler is running.
3990 * We always reset all schedulers for device and all devices for XGMI
3991 * hive so that should take care of them too.
26bc5340 3992 */
1d721ed6
AG
3993
3994 if (hive && !mutex_trylock(&hive->reset_lock)) {
3995 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 3996 job ? job->base.id : -1, hive->hive_id);
26bc5340 3997 return 0;
1d721ed6 3998 }
26bc5340
AG
3999
4000 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
4001 if (!amdgpu_device_lock_adev(adev, !hive)) {
4002 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 4003 job ? job->base.id : -1);
1d721ed6 4004 return 0;
26bc5340
AG
4005 }
4006
7c6e68c7
AG
4007 /* Block kfd: SRIOV would do it separately */
4008 if (!amdgpu_sriov_vf(adev))
4009 amdgpu_amdkfd_pre_reset(adev);
4010
26bc5340 4011 /* Build list of devices to reset */
1d721ed6 4012 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 4013 if (!hive) {
7c6e68c7
AG
4014 /*unlock kfd: SRIOV would do it separately */
4015 if (!amdgpu_sriov_vf(adev))
4016 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
4017 amdgpu_device_unlock_adev(adev);
4018 return -ENODEV;
4019 }
4020
4021 /*
4022 * In case we are in XGMI hive mode device reset is done for all the
4023 * nodes in the hive to retrain all XGMI links and hence the reset
4024 * sequence is executed in loop on all nodes.
4025 */
4026 device_list_handle = &hive->device_list;
4027 } else {
4028 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4029 device_list_handle = &device_list;
4030 }
4031
1d721ed6
AG
4032 /* block all schedulers and reset given job's ring */
4033 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4034 if (tmp_adev != adev) {
12ffa55d 4035 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
4036 if (!amdgpu_sriov_vf(tmp_adev))
4037 amdgpu_amdkfd_pre_reset(tmp_adev);
4038 }
4039
12ffa55d
AG
4040 /*
4041 * Mark these ASICs to be reseted as untracked first
4042 * And add them back after reset completed
4043 */
4044 amdgpu_unregister_gpu_instance(tmp_adev);
4045
f1c1314b 4046 /* disable ras on ALL IPs */
7c6e68c7 4047 if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4048 amdgpu_ras_suspend(tmp_adev);
4049
1d721ed6
AG
4050 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4051 struct amdgpu_ring *ring = tmp_adev->rings[i];
4052
4053 if (!ring || !ring->sched.thread)
4054 continue;
4055
0b2d2c2e 4056 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7
AG
4057
4058 if (in_ras_intr)
4059 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4060 }
4061 }
4062
4063
7c6e68c7
AG
4064 if (in_ras_intr)
4065 goto skip_sched_resume;
4066
1d721ed6
AG
4067 /*
4068 * Must check guilty signal here since after this point all old
4069 * HW fences are force signaled.
4070 *
4071 * job->base holds a reference to parent fence
4072 */
4073 if (job && job->base.s_fence->parent &&
4074 dma_fence_is_signaled(job->base.s_fence->parent))
4075 job_signaled = true;
4076
1d721ed6
AG
4077 if (job_signaled) {
4078 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4079 goto skip_hw_reset;
4080 }
4081
4082
4083 /* Guilty job will be freed after this*/
0b2d2c2e 4084 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
4085 if (r) {
4086 /*TODO Should we stop ?*/
4087 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4088 r, adev->ddev->unique);
4089 adev->asic_reset_res = r;
4090 }
4091
26bc5340
AG
4092retry: /* Rest of adevs pre asic reset from XGMI hive. */
4093 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4094
4095 if (tmp_adev == adev)
4096 continue;
4097
26bc5340
AG
4098 r = amdgpu_device_pre_asic_reset(tmp_adev,
4099 NULL,
4100 &need_full_reset);
4101 /*TODO Should we stop ?*/
4102 if (r) {
4103 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4104 r, tmp_adev->ddev->unique);
4105 tmp_adev->asic_reset_res = r;
4106 }
4107 }
4108
4109 /* Actual ASIC resets if needed.*/
4110 /* TODO Implement XGMI hive reset logic for SRIOV */
4111 if (amdgpu_sriov_vf(adev)) {
4112 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4113 if (r)
4114 adev->asic_reset_res = r;
4115 } else {
4116 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
4117 if (r && r == -EAGAIN)
4118 goto retry;
4119 }
4120
1d721ed6
AG
4121skip_hw_reset:
4122
26bc5340
AG
4123 /* Post ASIC reset for all devs .*/
4124 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4125
1d721ed6
AG
4126 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4127 struct amdgpu_ring *ring = tmp_adev->rings[i];
4128
4129 if (!ring || !ring->sched.thread)
4130 continue;
4131
4132 /* No point to resubmit jobs if we didn't HW reset*/
4133 if (!tmp_adev->asic_reset_res && !job_signaled)
4134 drm_sched_resubmit_jobs(&ring->sched);
4135
4136 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4137 }
4138
4139 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4140 drm_helper_resume_force_mode(tmp_adev->ddev);
4141 }
4142
4143 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4144
4145 if (r) {
4146 /* bad news, how to tell it to userspace ? */
12ffa55d 4147 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4148 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4149 } else {
12ffa55d 4150 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4151 }
7c6e68c7 4152 }
26bc5340 4153
7c6e68c7
AG
4154skip_sched_resume:
4155 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4156 /*unlock kfd: SRIOV would do it separately */
4157 if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev))
4158 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
4159 amdgpu_device_unlock_adev(tmp_adev);
4160 }
4161
1d721ed6 4162 if (hive)
22d6575b 4163 mutex_unlock(&hive->reset_lock);
26bc5340
AG
4164
4165 if (r)
4166 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4167 return r;
4168}
4169
e3ecdffa
AD
4170/**
4171 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4172 *
4173 * @adev: amdgpu_device pointer
4174 *
4175 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4176 * and lanes) of the slot the device is in. Handles APUs and
4177 * virtualized environments where PCIE config space may not be available.
4178 */
5494d864 4179static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4180{
5d9a6330 4181 struct pci_dev *pdev;
c5313457
HK
4182 enum pci_bus_speed speed_cap, platform_speed_cap;
4183 enum pcie_link_width platform_link_width;
d0dd7f0c 4184
cd474ba0
AD
4185 if (amdgpu_pcie_gen_cap)
4186 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4187
cd474ba0
AD
4188 if (amdgpu_pcie_lane_cap)
4189 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4190
cd474ba0
AD
4191 /* covers APUs as well */
4192 if (pci_is_root_bus(adev->pdev->bus)) {
4193 if (adev->pm.pcie_gen_mask == 0)
4194 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4195 if (adev->pm.pcie_mlw_mask == 0)
4196 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4197 return;
cd474ba0 4198 }
d0dd7f0c 4199
c5313457
HK
4200 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4201 return;
4202
dbaa922b
AD
4203 pcie_bandwidth_available(adev->pdev, NULL,
4204 &platform_speed_cap, &platform_link_width);
c5313457 4205
cd474ba0 4206 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4207 /* asic caps */
4208 pdev = adev->pdev;
4209 speed_cap = pcie_get_speed_cap(pdev);
4210 if (speed_cap == PCI_SPEED_UNKNOWN) {
4211 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4212 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4213 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4214 } else {
5d9a6330
AD
4215 if (speed_cap == PCIE_SPEED_16_0GT)
4216 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4217 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4218 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4219 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4220 else if (speed_cap == PCIE_SPEED_8_0GT)
4221 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4222 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4223 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4224 else if (speed_cap == PCIE_SPEED_5_0GT)
4225 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4226 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4227 else
4228 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4229 }
4230 /* platform caps */
c5313457 4231 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4232 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4233 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4234 } else {
c5313457 4235 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4236 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4237 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4238 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4239 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4240 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4241 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4242 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4243 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4244 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4245 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4246 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4247 else
4248 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4249
cd474ba0
AD
4250 }
4251 }
4252 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4253 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4254 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4255 } else {
c5313457 4256 switch (platform_link_width) {
5d9a6330 4257 case PCIE_LNK_X32:
cd474ba0
AD
4258 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4259 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4260 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4261 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4262 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4263 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4264 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4265 break;
5d9a6330 4266 case PCIE_LNK_X16:
cd474ba0
AD
4267 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4268 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4269 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4270 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4271 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4272 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4273 break;
5d9a6330 4274 case PCIE_LNK_X12:
cd474ba0
AD
4275 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4276 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4277 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4278 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4279 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4280 break;
5d9a6330 4281 case PCIE_LNK_X8:
cd474ba0
AD
4282 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4283 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4284 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4285 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4286 break;
5d9a6330 4287 case PCIE_LNK_X4:
cd474ba0
AD
4288 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4289 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4290 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4291 break;
5d9a6330 4292 case PCIE_LNK_X2:
cd474ba0
AD
4293 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4294 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4295 break;
5d9a6330 4296 case PCIE_LNK_X1:
cd474ba0
AD
4297 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4298 break;
4299 default:
4300 break;
4301 }
d0dd7f0c
AD
4302 }
4303 }
4304}
d38ceaf9 4305
361dbd01
AD
4306int amdgpu_device_baco_enter(struct drm_device *dev)
4307{
4308 struct amdgpu_device *adev = dev->dev_private;
4309
4310 if (!amdgpu_device_supports_baco(adev->ddev))
4311 return -ENOTSUPP;
4312
4313 if (is_support_sw_smu(adev)) {
4314 struct smu_context *smu = &adev->smu;
4315 int ret;
4316
4317 ret = smu_baco_enter(smu);
4318 if (ret)
4319 return ret;
4320
4321 return 0;
4322 } else {
4323 void *pp_handle = adev->powerplay.pp_handle;
4324 const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
4325
4326 if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
4327 return -ENOENT;
4328
4329 /* enter BACO state */
4330 if (pp_funcs->set_asic_baco_state(pp_handle, 1))
4331 return -EIO;
4332
4333 return 0;
4334 }
4335}
4336
4337int amdgpu_device_baco_exit(struct drm_device *dev)
4338{
4339 struct amdgpu_device *adev = dev->dev_private;
4340
4341 if (!amdgpu_device_supports_baco(adev->ddev))
4342 return -ENOTSUPP;
4343
4344 if (is_support_sw_smu(adev)) {
4345 struct smu_context *smu = &adev->smu;
4346 int ret;
4347
4348 ret = smu_baco_exit(smu);
4349 if (ret)
4350 return ret;
4351
4352 return 0;
4353 } else {
4354 void *pp_handle = adev->powerplay.pp_handle;
4355 const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
4356
4357 if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
4358 return -ENOENT;
4359
4360 /* exit BACO state */
4361 if (pp_funcs->set_asic_baco_state(pp_handle, 0))
4362 return -EIO;
4363
4364 return 0;
4365 }
4366}