drm/amdgpu: support full gpu reset workflow when ras err_event_athub occurs
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
d5ea093e
AG
68#include <linux/suspend.h>
69
e2a75f88 70MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 71MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 72MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 73MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 74MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
65e60f6e 75MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
b51a26a0 76MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
23c6268e 77MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
ed42cfe1 78MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
42b325e5 79MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
e2a75f88 80
2dc80b00
S
81#define AMDGPU_RESUME_MS 2000
82
050091ab 83const char *amdgpu_asic_name[] = {
da69c161
KW
84 "TAHITI",
85 "PITCAIRN",
86 "VERDE",
87 "OLAND",
88 "HAINAN",
d38ceaf9
AD
89 "BONAIRE",
90 "KAVERI",
91 "KABINI",
92 "HAWAII",
93 "MULLINS",
94 "TOPAZ",
95 "TONGA",
48299f95 96 "FIJI",
d38ceaf9 97 "CARRIZO",
139f4917 98 "STONEY",
2cc0c0b5
FC
99 "POLARIS10",
100 "POLARIS11",
c4642a47 101 "POLARIS12",
48ff108d 102 "VEGAM",
d4196f01 103 "VEGA10",
8fab806a 104 "VEGA12",
956fcddc 105 "VEGA20",
2ca8a5d2 106 "RAVEN",
d6c3b24e 107 "ARCTURUS",
1eee4228 108 "RENOIR",
852a6626 109 "NAVI10",
87dbad02 110 "NAVI14",
9802f5d7 111 "NAVI12",
d38ceaf9
AD
112 "LAST",
113};
114
dcea6e65
KR
115/**
116 * DOC: pcie_replay_count
117 *
118 * The amdgpu driver provides a sysfs API for reporting the total number
119 * of PCIe replays (NAKs)
120 * The file pcie_replay_count is used for this and returns the total
121 * number of replays as a sum of the NAKs generated and NAKs received
122 */
123
124static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
125 struct device_attribute *attr, char *buf)
126{
127 struct drm_device *ddev = dev_get_drvdata(dev);
128 struct amdgpu_device *adev = ddev->dev_private;
129 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
130
131 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
132}
133
134static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
135 amdgpu_device_get_pcie_replay_count, NULL);
136
5494d864
AD
137static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
138
e3ecdffa 139/**
31af062a 140 * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
e3ecdffa
AD
141 *
142 * @dev: drm_device pointer
143 *
144 * Returns true if the device is a dGPU with HG/PX power control,
145 * otherwise return false.
146 */
31af062a 147bool amdgpu_device_supports_boco(struct drm_device *dev)
d38ceaf9
AD
148{
149 struct amdgpu_device *adev = dev->dev_private;
150
2f7d10b3 151 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
152 return true;
153 return false;
154}
155
a69cba42
AD
156/**
157 * amdgpu_device_supports_baco - Does the device support BACO
158 *
159 * @dev: drm_device pointer
160 *
161 * Returns true if the device supporte BACO,
162 * otherwise return false.
163 */
164bool amdgpu_device_supports_baco(struct drm_device *dev)
165{
166 struct amdgpu_device *adev = dev->dev_private;
167
168 return amdgpu_asic_supports_baco(adev);
169}
170
e35e2b11
TY
171/**
172 * VRAM access helper functions.
173 *
174 * amdgpu_device_vram_access - read/write a buffer in vram
175 *
176 * @adev: amdgpu_device pointer
177 * @pos: offset of the buffer in vram
178 * @buf: virtual address of the buffer in system memory
179 * @size: read/write size, sizeof(@buf) must > @size
180 * @write: true - write to vram, otherwise - read from vram
181 */
182void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
183 uint32_t *buf, size_t size, bool write)
184{
185 uint64_t last;
186 unsigned long flags;
187
188 last = size - 4;
189 for (last += pos; pos <= last; pos += 4) {
190 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
191 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
192 WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
193 if (write)
194 WREG32_NO_KIQ(mmMM_DATA, *buf++);
195 else
196 *buf++ = RREG32_NO_KIQ(mmMM_DATA);
197 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
198 }
199}
200
d38ceaf9
AD
201/*
202 * MMIO register access helper functions.
203 */
e3ecdffa
AD
204/**
205 * amdgpu_mm_rreg - read a memory mapped IO register
206 *
207 * @adev: amdgpu_device pointer
208 * @reg: dword aligned register offset
209 * @acc_flags: access flags which require special behavior
210 *
211 * Returns the 32 bit value from the offset specified.
212 */
d38ceaf9 213uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 214 uint32_t acc_flags)
d38ceaf9 215{
f4b373f4
TSD
216 uint32_t ret;
217
43ca8efa 218 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 219 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 220
15d72fd7 221 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 222 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
223 else {
224 unsigned long flags;
d38ceaf9
AD
225
226 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
227 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
228 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
229 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 230 }
f4b373f4
TSD
231 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
232 return ret;
d38ceaf9
AD
233}
234
421a2a30
ML
235/*
236 * MMIO register read with bytes helper functions
237 * @offset:bytes offset from MMIO start
238 *
239*/
240
e3ecdffa
AD
241/**
242 * amdgpu_mm_rreg8 - read a memory mapped IO register
243 *
244 * @adev: amdgpu_device pointer
245 * @offset: byte aligned register offset
246 *
247 * Returns the 8 bit value from the offset specified.
248 */
421a2a30
ML
249uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
250 if (offset < adev->rmmio_size)
251 return (readb(adev->rmmio + offset));
252 BUG();
253}
254
255/*
256 * MMIO register write with bytes helper functions
257 * @offset:bytes offset from MMIO start
258 * @value: the value want to be written to the register
259 *
260*/
e3ecdffa
AD
261/**
262 * amdgpu_mm_wreg8 - read a memory mapped IO register
263 *
264 * @adev: amdgpu_device pointer
265 * @offset: byte aligned register offset
266 * @value: 8 bit value to write
267 *
268 * Writes the value specified to the offset specified.
269 */
421a2a30
ML
270void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
271 if (offset < adev->rmmio_size)
272 writeb(value, adev->rmmio + offset);
273 else
274 BUG();
275}
276
e3ecdffa
AD
277/**
278 * amdgpu_mm_wreg - write to a memory mapped IO register
279 *
280 * @adev: amdgpu_device pointer
281 * @reg: dword aligned register offset
282 * @v: 32 bit value to write to the register
283 * @acc_flags: access flags which require special behavior
284 *
285 * Writes the value specified to the offset specified.
286 */
d38ceaf9 287void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 288 uint32_t acc_flags)
d38ceaf9 289{
f4b373f4 290 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 291
47ed4e1c
KW
292 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
293 adev->last_mm_index = v;
294 }
295
43ca8efa 296 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 297 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 298
15d72fd7 299 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
300 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
301 else {
302 unsigned long flags;
303
304 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
305 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
306 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
307 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
308 }
47ed4e1c
KW
309
310 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
311 udelay(500);
312 }
d38ceaf9
AD
313}
314
e3ecdffa
AD
315/**
316 * amdgpu_io_rreg - read an IO register
317 *
318 * @adev: amdgpu_device pointer
319 * @reg: dword aligned register offset
320 *
321 * Returns the 32 bit value from the offset specified.
322 */
d38ceaf9
AD
323u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
324{
325 if ((reg * 4) < adev->rio_mem_size)
326 return ioread32(adev->rio_mem + (reg * 4));
327 else {
328 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
329 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
330 }
331}
332
e3ecdffa
AD
333/**
334 * amdgpu_io_wreg - write to an IO register
335 *
336 * @adev: amdgpu_device pointer
337 * @reg: dword aligned register offset
338 * @v: 32 bit value to write to the register
339 *
340 * Writes the value specified to the offset specified.
341 */
d38ceaf9
AD
342void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
343{
47ed4e1c
KW
344 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
345 adev->last_mm_index = v;
346 }
d38ceaf9
AD
347
348 if ((reg * 4) < adev->rio_mem_size)
349 iowrite32(v, adev->rio_mem + (reg * 4));
350 else {
351 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
352 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
353 }
47ed4e1c
KW
354
355 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
356 udelay(500);
357 }
d38ceaf9
AD
358}
359
360/**
361 * amdgpu_mm_rdoorbell - read a doorbell dword
362 *
363 * @adev: amdgpu_device pointer
364 * @index: doorbell index
365 *
366 * Returns the value in the doorbell aperture at the
367 * requested doorbell index (CIK).
368 */
369u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
370{
371 if (index < adev->doorbell.num_doorbells) {
372 return readl(adev->doorbell.ptr + index);
373 } else {
374 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
375 return 0;
376 }
377}
378
379/**
380 * amdgpu_mm_wdoorbell - write a doorbell dword
381 *
382 * @adev: amdgpu_device pointer
383 * @index: doorbell index
384 * @v: value to write
385 *
386 * Writes @v to the doorbell aperture at the
387 * requested doorbell index (CIK).
388 */
389void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
390{
391 if (index < adev->doorbell.num_doorbells) {
392 writel(v, adev->doorbell.ptr + index);
393 } else {
394 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
395 }
396}
397
832be404
KW
398/**
399 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
400 *
401 * @adev: amdgpu_device pointer
402 * @index: doorbell index
403 *
404 * Returns the value in the doorbell aperture at the
405 * requested doorbell index (VEGA10+).
406 */
407u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
408{
409 if (index < adev->doorbell.num_doorbells) {
410 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
411 } else {
412 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
413 return 0;
414 }
415}
416
417/**
418 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
419 *
420 * @adev: amdgpu_device pointer
421 * @index: doorbell index
422 * @v: value to write
423 *
424 * Writes @v to the doorbell aperture at the
425 * requested doorbell index (VEGA10+).
426 */
427void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
428{
429 if (index < adev->doorbell.num_doorbells) {
430 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
431 } else {
432 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
433 }
434}
435
d38ceaf9
AD
436/**
437 * amdgpu_invalid_rreg - dummy reg read function
438 *
439 * @adev: amdgpu device pointer
440 * @reg: offset of register
441 *
442 * Dummy register read function. Used for register blocks
443 * that certain asics don't have (all asics).
444 * Returns the value in the register.
445 */
446static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
447{
448 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
449 BUG();
450 return 0;
451}
452
453/**
454 * amdgpu_invalid_wreg - dummy reg write function
455 *
456 * @adev: amdgpu device pointer
457 * @reg: offset of register
458 * @v: value to write to the register
459 *
460 * Dummy register read function. Used for register blocks
461 * that certain asics don't have (all asics).
462 */
463static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
464{
465 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
466 reg, v);
467 BUG();
468}
469
4fa1c6a6
TZ
470/**
471 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
472 *
473 * @adev: amdgpu device pointer
474 * @reg: offset of register
475 *
476 * Dummy register read function. Used for register blocks
477 * that certain asics don't have (all asics).
478 * Returns the value in the register.
479 */
480static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
481{
482 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
483 BUG();
484 return 0;
485}
486
487/**
488 * amdgpu_invalid_wreg64 - dummy reg write function
489 *
490 * @adev: amdgpu device pointer
491 * @reg: offset of register
492 * @v: value to write to the register
493 *
494 * Dummy register read function. Used for register blocks
495 * that certain asics don't have (all asics).
496 */
497static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
498{
499 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
500 reg, v);
501 BUG();
502}
503
d38ceaf9
AD
504/**
505 * amdgpu_block_invalid_rreg - dummy reg read function
506 *
507 * @adev: amdgpu device pointer
508 * @block: offset of instance
509 * @reg: offset of register
510 *
511 * Dummy register read function. Used for register blocks
512 * that certain asics don't have (all asics).
513 * Returns the value in the register.
514 */
515static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
516 uint32_t block, uint32_t reg)
517{
518 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
519 reg, block);
520 BUG();
521 return 0;
522}
523
524/**
525 * amdgpu_block_invalid_wreg - dummy reg write function
526 *
527 * @adev: amdgpu device pointer
528 * @block: offset of instance
529 * @reg: offset of register
530 * @v: value to write to the register
531 *
532 * Dummy register read function. Used for register blocks
533 * that certain asics don't have (all asics).
534 */
535static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
536 uint32_t block,
537 uint32_t reg, uint32_t v)
538{
539 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
540 reg, block, v);
541 BUG();
542}
543
e3ecdffa
AD
544/**
545 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
546 *
547 * @adev: amdgpu device pointer
548 *
549 * Allocates a scratch page of VRAM for use by various things in the
550 * driver.
551 */
06ec9070 552static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 553{
a4a02777
CK
554 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
555 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
556 &adev->vram_scratch.robj,
557 &adev->vram_scratch.gpu_addr,
558 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
559}
560
e3ecdffa
AD
561/**
562 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
563 *
564 * @adev: amdgpu device pointer
565 *
566 * Frees the VRAM scratch page.
567 */
06ec9070 568static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 569{
078af1a3 570 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
571}
572
573/**
9c3f2b54 574 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
575 *
576 * @adev: amdgpu_device pointer
577 * @registers: pointer to the register array
578 * @array_size: size of the register array
579 *
580 * Programs an array or registers with and and or masks.
581 * This is a helper for setting golden registers.
582 */
9c3f2b54
AD
583void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
584 const u32 *registers,
585 const u32 array_size)
d38ceaf9
AD
586{
587 u32 tmp, reg, and_mask, or_mask;
588 int i;
589
590 if (array_size % 3)
591 return;
592
593 for (i = 0; i < array_size; i +=3) {
594 reg = registers[i + 0];
595 and_mask = registers[i + 1];
596 or_mask = registers[i + 2];
597
598 if (and_mask == 0xffffffff) {
599 tmp = or_mask;
600 } else {
601 tmp = RREG32(reg);
602 tmp &= ~and_mask;
e0d07657
HZ
603 if (adev->family >= AMDGPU_FAMILY_AI)
604 tmp |= (or_mask & and_mask);
605 else
606 tmp |= or_mask;
d38ceaf9
AD
607 }
608 WREG32(reg, tmp);
609 }
610}
611
e3ecdffa
AD
612/**
613 * amdgpu_device_pci_config_reset - reset the GPU
614 *
615 * @adev: amdgpu_device pointer
616 *
617 * Resets the GPU using the pci config reset sequence.
618 * Only applicable to asics prior to vega10.
619 */
8111c387 620void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
621{
622 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
623}
624
625/*
626 * GPU doorbell aperture helpers function.
627 */
628/**
06ec9070 629 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
630 *
631 * @adev: amdgpu_device pointer
632 *
633 * Init doorbell driver information (CIK)
634 * Returns 0 on success, error on failure.
635 */
06ec9070 636static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 637{
6585661d 638
705e519e
CK
639 /* No doorbell on SI hardware generation */
640 if (adev->asic_type < CHIP_BONAIRE) {
641 adev->doorbell.base = 0;
642 adev->doorbell.size = 0;
643 adev->doorbell.num_doorbells = 0;
644 adev->doorbell.ptr = NULL;
645 return 0;
646 }
647
d6895ad3
CK
648 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
649 return -EINVAL;
650
22357775
AD
651 amdgpu_asic_init_doorbell_index(adev);
652
d38ceaf9
AD
653 /* doorbell bar mapping */
654 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
655 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
656
edf600da 657 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 658 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
659 if (adev->doorbell.num_doorbells == 0)
660 return -EINVAL;
661
ec3db8a6 662 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
663 * paging queue doorbell use the second page. The
664 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
665 * doorbells are in the first page. So with paging queue enabled,
666 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
667 */
668 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 669 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 670
8972e5d2
CK
671 adev->doorbell.ptr = ioremap(adev->doorbell.base,
672 adev->doorbell.num_doorbells *
673 sizeof(u32));
674 if (adev->doorbell.ptr == NULL)
d38ceaf9 675 return -ENOMEM;
d38ceaf9
AD
676
677 return 0;
678}
679
680/**
06ec9070 681 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
682 *
683 * @adev: amdgpu_device pointer
684 *
685 * Tear down doorbell driver information (CIK)
686 */
06ec9070 687static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
688{
689 iounmap(adev->doorbell.ptr);
690 adev->doorbell.ptr = NULL;
691}
692
22cb0164 693
d38ceaf9
AD
694
695/*
06ec9070 696 * amdgpu_device_wb_*()
455a7bc2 697 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 698 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
699 */
700
701/**
06ec9070 702 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
703 *
704 * @adev: amdgpu_device pointer
705 *
706 * Disables Writeback and frees the Writeback memory (all asics).
707 * Used at driver shutdown.
708 */
06ec9070 709static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
710{
711 if (adev->wb.wb_obj) {
a76ed485
AD
712 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
713 &adev->wb.gpu_addr,
714 (void **)&adev->wb.wb);
d38ceaf9
AD
715 adev->wb.wb_obj = NULL;
716 }
717}
718
719/**
06ec9070 720 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
721 *
722 * @adev: amdgpu_device pointer
723 *
455a7bc2 724 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
725 * Used at driver startup.
726 * Returns 0 on success or an -error on failure.
727 */
06ec9070 728static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
729{
730 int r;
731
732 if (adev->wb.wb_obj == NULL) {
97407b63
AD
733 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
734 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
735 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
736 &adev->wb.wb_obj, &adev->wb.gpu_addr,
737 (void **)&adev->wb.wb);
d38ceaf9
AD
738 if (r) {
739 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
740 return r;
741 }
d38ceaf9
AD
742
743 adev->wb.num_wb = AMDGPU_MAX_WB;
744 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
745
746 /* clear wb memory */
73469585 747 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
748 }
749
750 return 0;
751}
752
753/**
131b4b36 754 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
755 *
756 * @adev: amdgpu_device pointer
757 * @wb: wb index
758 *
759 * Allocate a wb slot for use by the driver (all asics).
760 * Returns 0 on success or -EINVAL on failure.
761 */
131b4b36 762int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
763{
764 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 765
97407b63 766 if (offset < adev->wb.num_wb) {
7014285a 767 __set_bit(offset, adev->wb.used);
63ae07ca 768 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
769 return 0;
770 } else {
771 return -EINVAL;
772 }
773}
774
d38ceaf9 775/**
131b4b36 776 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
777 *
778 * @adev: amdgpu_device pointer
779 * @wb: wb index
780 *
781 * Free a wb slot allocated for use by the driver (all asics)
782 */
131b4b36 783void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 784{
73469585 785 wb >>= 3;
d38ceaf9 786 if (wb < adev->wb.num_wb)
73469585 787 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
788}
789
d6895ad3
CK
790/**
791 * amdgpu_device_resize_fb_bar - try to resize FB BAR
792 *
793 * @adev: amdgpu_device pointer
794 *
795 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
796 * to fail, but if any of the BARs is not accessible after the size we abort
797 * driver loading by returning -ENODEV.
798 */
799int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
800{
770d13b1 801 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 802 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
803 struct pci_bus *root;
804 struct resource *res;
805 unsigned i;
d6895ad3
CK
806 u16 cmd;
807 int r;
808
0c03b912 809 /* Bypass for VF */
810 if (amdgpu_sriov_vf(adev))
811 return 0;
812
31b8adab
CK
813 /* Check if the root BUS has 64bit memory resources */
814 root = adev->pdev->bus;
815 while (root->parent)
816 root = root->parent;
817
818 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 819 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
820 res->start > 0x100000000ull)
821 break;
822 }
823
824 /* Trying to resize is pointless without a root hub window above 4GB */
825 if (!res)
826 return 0;
827
d6895ad3
CK
828 /* Disable memory decoding while we change the BAR addresses and size */
829 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
830 pci_write_config_word(adev->pdev, PCI_COMMAND,
831 cmd & ~PCI_COMMAND_MEMORY);
832
833 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 834 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
835 if (adev->asic_type >= CHIP_BONAIRE)
836 pci_release_resource(adev->pdev, 2);
837
838 pci_release_resource(adev->pdev, 0);
839
840 r = pci_resize_resource(adev->pdev, 0, rbar_size);
841 if (r == -ENOSPC)
842 DRM_INFO("Not enough PCI address space for a large BAR.");
843 else if (r && r != -ENOTSUPP)
844 DRM_ERROR("Problem resizing BAR0 (%d).", r);
845
846 pci_assign_unassigned_bus_resources(adev->pdev->bus);
847
848 /* When the doorbell or fb BAR isn't available we have no chance of
849 * using the device.
850 */
06ec9070 851 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
852 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
853 return -ENODEV;
854
855 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
856
857 return 0;
858}
a05502e5 859
d38ceaf9
AD
860/*
861 * GPU helpers function.
862 */
863/**
39c640c0 864 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
865 *
866 * @adev: amdgpu_device pointer
867 *
c836fec5
JQ
868 * Check if the asic has been initialized (all asics) at driver startup
869 * or post is needed if hw reset is performed.
870 * Returns true if need or false if not.
d38ceaf9 871 */
39c640c0 872bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
873{
874 uint32_t reg;
875
bec86378
ML
876 if (amdgpu_sriov_vf(adev))
877 return false;
878
879 if (amdgpu_passthrough(adev)) {
1da2c326
ML
880 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
881 * some old smc fw still need driver do vPost otherwise gpu hang, while
882 * those smc fw version above 22.15 doesn't have this flaw, so we force
883 * vpost executed for smc version below 22.15
bec86378
ML
884 */
885 if (adev->asic_type == CHIP_FIJI) {
886 int err;
887 uint32_t fw_ver;
888 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
889 /* force vPost if error occured */
890 if (err)
891 return true;
892
893 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
894 if (fw_ver < 0x00160e00)
895 return true;
bec86378 896 }
bec86378 897 }
91fe77eb 898
899 if (adev->has_hw_reset) {
900 adev->has_hw_reset = false;
901 return true;
902 }
903
904 /* bios scratch used on CIK+ */
905 if (adev->asic_type >= CHIP_BONAIRE)
906 return amdgpu_atombios_scratch_need_asic_init(adev);
907
908 /* check MEM_SIZE for older asics */
909 reg = amdgpu_asic_get_config_memsize(adev);
910
911 if ((reg != 0) && (reg != 0xffffffff))
912 return false;
913
914 return true;
bec86378
ML
915}
916
d38ceaf9
AD
917/* if we get transitioned to only one device, take VGA back */
918/**
06ec9070 919 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
920 *
921 * @cookie: amdgpu_device pointer
922 * @state: enable/disable vga decode
923 *
924 * Enable/disable vga decode (all asics).
925 * Returns VGA resource flags.
926 */
06ec9070 927static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
928{
929 struct amdgpu_device *adev = cookie;
930 amdgpu_asic_set_vga_state(adev, state);
931 if (state)
932 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
933 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
934 else
935 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
936}
937
e3ecdffa
AD
938/**
939 * amdgpu_device_check_block_size - validate the vm block size
940 *
941 * @adev: amdgpu_device pointer
942 *
943 * Validates the vm block size specified via module parameter.
944 * The vm block size defines number of bits in page table versus page directory,
945 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
946 * page table and the remaining bits are in the page directory.
947 */
06ec9070 948static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
949{
950 /* defines number of bits in page table versus page directory,
951 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
952 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
953 if (amdgpu_vm_block_size == -1)
954 return;
a1adf8be 955
bab4fee7 956 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
957 dev_warn(adev->dev, "VM page table size (%d) too small\n",
958 amdgpu_vm_block_size);
97489129 959 amdgpu_vm_block_size = -1;
a1adf8be 960 }
a1adf8be
CZ
961}
962
e3ecdffa
AD
963/**
964 * amdgpu_device_check_vm_size - validate the vm size
965 *
966 * @adev: amdgpu_device pointer
967 *
968 * Validates the vm size in GB specified via module parameter.
969 * The VM size is the size of the GPU virtual memory space in GB.
970 */
06ec9070 971static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 972{
64dab074
AD
973 /* no need to check the default value */
974 if (amdgpu_vm_size == -1)
975 return;
976
83ca145d
ZJ
977 if (amdgpu_vm_size < 1) {
978 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
979 amdgpu_vm_size);
f3368128 980 amdgpu_vm_size = -1;
83ca145d 981 }
83ca145d
ZJ
982}
983
7951e376
RZ
984static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
985{
986 struct sysinfo si;
987 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
988 uint64_t total_memory;
989 uint64_t dram_size_seven_GB = 0x1B8000000;
990 uint64_t dram_size_three_GB = 0xB8000000;
991
992 if (amdgpu_smu_memory_pool_size == 0)
993 return;
994
995 if (!is_os_64) {
996 DRM_WARN("Not 64-bit OS, feature not supported\n");
997 goto def_value;
998 }
999 si_meminfo(&si);
1000 total_memory = (uint64_t)si.totalram * si.mem_unit;
1001
1002 if ((amdgpu_smu_memory_pool_size == 1) ||
1003 (amdgpu_smu_memory_pool_size == 2)) {
1004 if (total_memory < dram_size_three_GB)
1005 goto def_value1;
1006 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1007 (amdgpu_smu_memory_pool_size == 8)) {
1008 if (total_memory < dram_size_seven_GB)
1009 goto def_value1;
1010 } else {
1011 DRM_WARN("Smu memory pool size not supported\n");
1012 goto def_value;
1013 }
1014 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1015
1016 return;
1017
1018def_value1:
1019 DRM_WARN("No enough system memory\n");
1020def_value:
1021 adev->pm.smu_prv_buffer_size = 0;
1022}
1023
d38ceaf9 1024/**
06ec9070 1025 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
1026 *
1027 * @adev: amdgpu_device pointer
1028 *
1029 * Validates certain module parameters and updates
1030 * the associated values used by the driver (all asics).
1031 */
912dfc84 1032static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 1033{
912dfc84
EQ
1034 int ret = 0;
1035
5b011235
CZ
1036 if (amdgpu_sched_jobs < 4) {
1037 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1038 amdgpu_sched_jobs);
1039 amdgpu_sched_jobs = 4;
76117507 1040 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
1041 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1042 amdgpu_sched_jobs);
1043 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1044 }
d38ceaf9 1045
83e74db6 1046 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
1047 /* gart size must be greater or equal to 32M */
1048 dev_warn(adev->dev, "gart size (%d) too small\n",
1049 amdgpu_gart_size);
83e74db6 1050 amdgpu_gart_size = -1;
d38ceaf9
AD
1051 }
1052
36d38372 1053 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 1054 /* gtt size must be greater or equal to 32M */
36d38372
CK
1055 dev_warn(adev->dev, "gtt size (%d) too small\n",
1056 amdgpu_gtt_size);
1057 amdgpu_gtt_size = -1;
d38ceaf9
AD
1058 }
1059
d07f14be
RH
1060 /* valid range is between 4 and 9 inclusive */
1061 if (amdgpu_vm_fragment_size != -1 &&
1062 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1063 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1064 amdgpu_vm_fragment_size = -1;
1065 }
1066
7951e376
RZ
1067 amdgpu_device_check_smu_prv_buffer_size(adev);
1068
06ec9070 1069 amdgpu_device_check_vm_size(adev);
d38ceaf9 1070
06ec9070 1071 amdgpu_device_check_block_size(adev);
6a7f76e7 1072
19aede77 1073 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
1074
1075 return ret;
d38ceaf9
AD
1076}
1077
1078/**
1079 * amdgpu_switcheroo_set_state - set switcheroo state
1080 *
1081 * @pdev: pci dev pointer
1694467b 1082 * @state: vga_switcheroo state
d38ceaf9
AD
1083 *
1084 * Callback for the switcheroo driver. Suspends or resumes the
1085 * the asics before or after it is powered up using ACPI methods.
1086 */
1087static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1088{
1089 struct drm_device *dev = pci_get_drvdata(pdev);
de185019 1090 int r;
d38ceaf9 1091
31af062a 1092 if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
d38ceaf9
AD
1093 return;
1094
1095 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1096 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1097 /* don't suspend or resume card normally */
1098 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1099
de185019
AD
1100 pci_set_power_state(dev->pdev, PCI_D0);
1101 pci_restore_state(dev->pdev);
1102 r = pci_enable_device(dev->pdev);
1103 if (r)
1104 DRM_WARN("pci_enable_device failed (%d)\n", r);
1105 amdgpu_device_resume(dev, true);
d38ceaf9 1106
d38ceaf9
AD
1107 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1108 drm_kms_helper_poll_enable(dev);
1109 } else {
7ca85295 1110 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1111 drm_kms_helper_poll_disable(dev);
1112 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
de185019
AD
1113 amdgpu_device_suspend(dev, true);
1114 pci_save_state(dev->pdev);
1115 /* Shut down the device */
1116 pci_disable_device(dev->pdev);
1117 pci_set_power_state(dev->pdev, PCI_D3cold);
d38ceaf9
AD
1118 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1119 }
1120}
1121
1122/**
1123 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1124 *
1125 * @pdev: pci dev pointer
1126 *
1127 * Callback for the switcheroo driver. Check of the switcheroo
1128 * state can be changed.
1129 * Returns true if the state can be changed, false if not.
1130 */
1131static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1132{
1133 struct drm_device *dev = pci_get_drvdata(pdev);
1134
1135 /*
1136 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1137 * locking inversion with the driver load path. And the access here is
1138 * completely racy anyway. So don't bother with locking for now.
1139 */
1140 return dev->open_count == 0;
1141}
1142
1143static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1144 .set_gpu_state = amdgpu_switcheroo_set_state,
1145 .reprobe = NULL,
1146 .can_switch = amdgpu_switcheroo_can_switch,
1147};
1148
e3ecdffa
AD
1149/**
1150 * amdgpu_device_ip_set_clockgating_state - set the CG state
1151 *
87e3f136 1152 * @dev: amdgpu_device pointer
e3ecdffa
AD
1153 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1154 * @state: clockgating state (gate or ungate)
1155 *
1156 * Sets the requested clockgating state for all instances of
1157 * the hardware IP specified.
1158 * Returns the error code from the last instance.
1159 */
43fa561f 1160int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1161 enum amd_ip_block_type block_type,
1162 enum amd_clockgating_state state)
d38ceaf9 1163{
43fa561f 1164 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1165 int i, r = 0;
1166
1167 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1168 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1169 continue;
c722865a
RZ
1170 if (adev->ip_blocks[i].version->type != block_type)
1171 continue;
1172 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1173 continue;
1174 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1175 (void *)adev, state);
1176 if (r)
1177 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1178 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1179 }
1180 return r;
1181}
1182
e3ecdffa
AD
1183/**
1184 * amdgpu_device_ip_set_powergating_state - set the PG state
1185 *
87e3f136 1186 * @dev: amdgpu_device pointer
e3ecdffa
AD
1187 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1188 * @state: powergating state (gate or ungate)
1189 *
1190 * Sets the requested powergating state for all instances of
1191 * the hardware IP specified.
1192 * Returns the error code from the last instance.
1193 */
43fa561f 1194int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1195 enum amd_ip_block_type block_type,
1196 enum amd_powergating_state state)
d38ceaf9 1197{
43fa561f 1198 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1199 int i, r = 0;
1200
1201 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1202 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1203 continue;
c722865a
RZ
1204 if (adev->ip_blocks[i].version->type != block_type)
1205 continue;
1206 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1207 continue;
1208 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1209 (void *)adev, state);
1210 if (r)
1211 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1212 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1213 }
1214 return r;
1215}
1216
e3ecdffa
AD
1217/**
1218 * amdgpu_device_ip_get_clockgating_state - get the CG state
1219 *
1220 * @adev: amdgpu_device pointer
1221 * @flags: clockgating feature flags
1222 *
1223 * Walks the list of IPs on the device and updates the clockgating
1224 * flags for each IP.
1225 * Updates @flags with the feature flags for each hardware IP where
1226 * clockgating is enabled.
1227 */
2990a1fc
AD
1228void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1229 u32 *flags)
6cb2d4e4
HR
1230{
1231 int i;
1232
1233 for (i = 0; i < adev->num_ip_blocks; i++) {
1234 if (!adev->ip_blocks[i].status.valid)
1235 continue;
1236 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1237 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1238 }
1239}
1240
e3ecdffa
AD
1241/**
1242 * amdgpu_device_ip_wait_for_idle - wait for idle
1243 *
1244 * @adev: amdgpu_device pointer
1245 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1246 *
1247 * Waits for the request hardware IP to be idle.
1248 * Returns 0 for success or a negative error code on failure.
1249 */
2990a1fc
AD
1250int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1251 enum amd_ip_block_type block_type)
5dbbb60b
AD
1252{
1253 int i, r;
1254
1255 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1256 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1257 continue;
a1255107
AD
1258 if (adev->ip_blocks[i].version->type == block_type) {
1259 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1260 if (r)
1261 return r;
1262 break;
1263 }
1264 }
1265 return 0;
1266
1267}
1268
e3ecdffa
AD
1269/**
1270 * amdgpu_device_ip_is_idle - is the hardware IP idle
1271 *
1272 * @adev: amdgpu_device pointer
1273 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1274 *
1275 * Check if the hardware IP is idle or not.
1276 * Returns true if it the IP is idle, false if not.
1277 */
2990a1fc
AD
1278bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1279 enum amd_ip_block_type block_type)
5dbbb60b
AD
1280{
1281 int i;
1282
1283 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1284 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1285 continue;
a1255107
AD
1286 if (adev->ip_blocks[i].version->type == block_type)
1287 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1288 }
1289 return true;
1290
1291}
1292
e3ecdffa
AD
1293/**
1294 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1295 *
1296 * @adev: amdgpu_device pointer
87e3f136 1297 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1298 *
1299 * Returns a pointer to the hardware IP block structure
1300 * if it exists for the asic, otherwise NULL.
1301 */
2990a1fc
AD
1302struct amdgpu_ip_block *
1303amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1304 enum amd_ip_block_type type)
d38ceaf9
AD
1305{
1306 int i;
1307
1308 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1309 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1310 return &adev->ip_blocks[i];
1311
1312 return NULL;
1313}
1314
1315/**
2990a1fc 1316 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1317 *
1318 * @adev: amdgpu_device pointer
5fc3aeeb 1319 * @type: enum amd_ip_block_type
d38ceaf9
AD
1320 * @major: major version
1321 * @minor: minor version
1322 *
1323 * return 0 if equal or greater
1324 * return 1 if smaller or the ip_block doesn't exist
1325 */
2990a1fc
AD
1326int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1327 enum amd_ip_block_type type,
1328 u32 major, u32 minor)
d38ceaf9 1329{
2990a1fc 1330 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1331
a1255107
AD
1332 if (ip_block && ((ip_block->version->major > major) ||
1333 ((ip_block->version->major == major) &&
1334 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1335 return 0;
1336
1337 return 1;
1338}
1339
a1255107 1340/**
2990a1fc 1341 * amdgpu_device_ip_block_add
a1255107
AD
1342 *
1343 * @adev: amdgpu_device pointer
1344 * @ip_block_version: pointer to the IP to add
1345 *
1346 * Adds the IP block driver information to the collection of IPs
1347 * on the asic.
1348 */
2990a1fc
AD
1349int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1350 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1351{
1352 if (!ip_block_version)
1353 return -EINVAL;
1354
e966a725 1355 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1356 ip_block_version->funcs->name);
1357
a1255107
AD
1358 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1359
1360 return 0;
1361}
1362
e3ecdffa
AD
1363/**
1364 * amdgpu_device_enable_virtual_display - enable virtual display feature
1365 *
1366 * @adev: amdgpu_device pointer
1367 *
1368 * Enabled the virtual display feature if the user has enabled it via
1369 * the module parameter virtual_display. This feature provides a virtual
1370 * display hardware on headless boards or in virtualized environments.
1371 * This function parses and validates the configuration string specified by
1372 * the user and configues the virtual display configuration (number of
1373 * virtual connectors, crtcs, etc.) specified.
1374 */
483ef985 1375static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1376{
1377 adev->enable_virtual_display = false;
1378
1379 if (amdgpu_virtual_display) {
1380 struct drm_device *ddev = adev->ddev;
1381 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1382 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1383
1384 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1385 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1386 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1387 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1388 if (!strcmp("all", pciaddname)
1389 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1390 long num_crtc;
1391 int res = -1;
1392
9accf2fd 1393 adev->enable_virtual_display = true;
0f66356d
ED
1394
1395 if (pciaddname_tmp)
1396 res = kstrtol(pciaddname_tmp, 10,
1397 &num_crtc);
1398
1399 if (!res) {
1400 if (num_crtc < 1)
1401 num_crtc = 1;
1402 if (num_crtc > 6)
1403 num_crtc = 6;
1404 adev->mode_info.num_crtc = num_crtc;
1405 } else {
1406 adev->mode_info.num_crtc = 1;
1407 }
9accf2fd
ED
1408 break;
1409 }
1410 }
1411
0f66356d
ED
1412 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1413 amdgpu_virtual_display, pci_address_name,
1414 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1415
1416 kfree(pciaddstr);
1417 }
1418}
1419
e3ecdffa
AD
1420/**
1421 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1422 *
1423 * @adev: amdgpu_device pointer
1424 *
1425 * Parses the asic configuration parameters specified in the gpu info
1426 * firmware and makes them availale to the driver for use in configuring
1427 * the asic.
1428 * Returns 0 on success, -EINVAL on failure.
1429 */
e2a75f88
AD
1430static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1431{
e2a75f88
AD
1432 const char *chip_name;
1433 char fw_name[30];
1434 int err;
1435 const struct gpu_info_firmware_header_v1_0 *hdr;
1436
ab4fe3e1
HR
1437 adev->firmware.gpu_info_fw = NULL;
1438
e2a75f88
AD
1439 switch (adev->asic_type) {
1440 case CHIP_TOPAZ:
1441 case CHIP_TONGA:
1442 case CHIP_FIJI:
e2a75f88 1443 case CHIP_POLARIS10:
cc07f18d 1444 case CHIP_POLARIS11:
e2a75f88 1445 case CHIP_POLARIS12:
cc07f18d 1446 case CHIP_VEGAM:
e2a75f88
AD
1447 case CHIP_CARRIZO:
1448 case CHIP_STONEY:
1449#ifdef CONFIG_DRM_AMDGPU_SI
1450 case CHIP_VERDE:
1451 case CHIP_TAHITI:
1452 case CHIP_PITCAIRN:
1453 case CHIP_OLAND:
1454 case CHIP_HAINAN:
1455#endif
1456#ifdef CONFIG_DRM_AMDGPU_CIK
1457 case CHIP_BONAIRE:
1458 case CHIP_HAWAII:
1459 case CHIP_KAVERI:
1460 case CHIP_KABINI:
1461 case CHIP_MULLINS:
1462#endif
27c0bc71 1463 case CHIP_VEGA20:
e2a75f88
AD
1464 default:
1465 return 0;
1466 case CHIP_VEGA10:
1467 chip_name = "vega10";
1468 break;
3f76dced
AD
1469 case CHIP_VEGA12:
1470 chip_name = "vega12";
1471 break;
2d2e5e7e 1472 case CHIP_RAVEN:
54c4d17e
FX
1473 if (adev->rev_id >= 8)
1474 chip_name = "raven2";
741deade
AD
1475 else if (adev->pdev->device == 0x15d8)
1476 chip_name = "picasso";
54c4d17e
FX
1477 else
1478 chip_name = "raven";
2d2e5e7e 1479 break;
65e60f6e
LM
1480 case CHIP_ARCTURUS:
1481 chip_name = "arcturus";
1482 break;
b51a26a0
HR
1483 case CHIP_RENOIR:
1484 chip_name = "renoir";
1485 break;
23c6268e
HR
1486 case CHIP_NAVI10:
1487 chip_name = "navi10";
1488 break;
ed42cfe1
XY
1489 case CHIP_NAVI14:
1490 chip_name = "navi14";
1491 break;
42b325e5
XY
1492 case CHIP_NAVI12:
1493 chip_name = "navi12";
1494 break;
e2a75f88
AD
1495 }
1496
1497 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1498 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1499 if (err) {
1500 dev_err(adev->dev,
1501 "Failed to load gpu_info firmware \"%s\"\n",
1502 fw_name);
1503 goto out;
1504 }
ab4fe3e1 1505 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1506 if (err) {
1507 dev_err(adev->dev,
1508 "Failed to validate gpu_info firmware \"%s\"\n",
1509 fw_name);
1510 goto out;
1511 }
1512
ab4fe3e1 1513 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1514 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1515
1516 switch (hdr->version_major) {
1517 case 1:
1518 {
1519 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1520 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1521 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1522
ec51d3fa
XY
1523 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1524 goto parse_soc_bounding_box;
1525
b5ab16bf
AD
1526 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1527 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1528 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1529 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1530 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1531 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1532 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1533 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1534 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1535 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1536 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1537 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1538 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1539 adev->gfx.cu_info.max_waves_per_simd =
1540 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1541 adev->gfx.cu_info.max_scratch_slots_per_cu =
1542 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1543 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1544 if (hdr->version_minor >= 1) {
35c2e910
HZ
1545 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1546 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1547 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1548 adev->gfx.config.num_sc_per_sh =
1549 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1550 adev->gfx.config.num_packer_per_sc =
1551 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1552 }
ec51d3fa
XY
1553
1554parse_soc_bounding_box:
ec51d3fa
XY
1555 /*
1556 * soc bounding box info is not integrated in disocovery table,
1557 * we always need to parse it from gpu info firmware.
1558 */
48321c3d
HW
1559 if (hdr->version_minor == 2) {
1560 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1561 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1562 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1563 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1564 }
e2a75f88
AD
1565 break;
1566 }
1567 default:
1568 dev_err(adev->dev,
1569 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1570 err = -EINVAL;
1571 goto out;
1572 }
1573out:
e2a75f88
AD
1574 return err;
1575}
1576
e3ecdffa
AD
1577/**
1578 * amdgpu_device_ip_early_init - run early init for hardware IPs
1579 *
1580 * @adev: amdgpu_device pointer
1581 *
1582 * Early initialization pass for hardware IPs. The hardware IPs that make
1583 * up each asic are discovered each IP's early_init callback is run. This
1584 * is the first stage in initializing the asic.
1585 * Returns 0 on success, negative error code on failure.
1586 */
06ec9070 1587static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1588{
aaa36a97 1589 int i, r;
d38ceaf9 1590
483ef985 1591 amdgpu_device_enable_virtual_display(adev);
a6be7570 1592
d38ceaf9 1593 switch (adev->asic_type) {
aaa36a97
AD
1594 case CHIP_TOPAZ:
1595 case CHIP_TONGA:
48299f95 1596 case CHIP_FIJI:
2cc0c0b5 1597 case CHIP_POLARIS10:
32cc7e53 1598 case CHIP_POLARIS11:
c4642a47 1599 case CHIP_POLARIS12:
32cc7e53 1600 case CHIP_VEGAM:
aaa36a97 1601 case CHIP_CARRIZO:
39bb0c92
SL
1602 case CHIP_STONEY:
1603 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1604 adev->family = AMDGPU_FAMILY_CZ;
1605 else
1606 adev->family = AMDGPU_FAMILY_VI;
1607
1608 r = vi_set_ip_blocks(adev);
1609 if (r)
1610 return r;
1611 break;
33f34802
KW
1612#ifdef CONFIG_DRM_AMDGPU_SI
1613 case CHIP_VERDE:
1614 case CHIP_TAHITI:
1615 case CHIP_PITCAIRN:
1616 case CHIP_OLAND:
1617 case CHIP_HAINAN:
295d0daf 1618 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1619 r = si_set_ip_blocks(adev);
1620 if (r)
1621 return r;
1622 break;
1623#endif
a2e73f56
AD
1624#ifdef CONFIG_DRM_AMDGPU_CIK
1625 case CHIP_BONAIRE:
1626 case CHIP_HAWAII:
1627 case CHIP_KAVERI:
1628 case CHIP_KABINI:
1629 case CHIP_MULLINS:
1630 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1631 adev->family = AMDGPU_FAMILY_CI;
1632 else
1633 adev->family = AMDGPU_FAMILY_KV;
1634
1635 r = cik_set_ip_blocks(adev);
1636 if (r)
1637 return r;
1638 break;
1639#endif
e48a3cd9
AD
1640 case CHIP_VEGA10:
1641 case CHIP_VEGA12:
e4bd8170 1642 case CHIP_VEGA20:
e48a3cd9 1643 case CHIP_RAVEN:
61cf44c1 1644 case CHIP_ARCTURUS:
b51a26a0
HR
1645 case CHIP_RENOIR:
1646 if (adev->asic_type == CHIP_RAVEN ||
1647 adev->asic_type == CHIP_RENOIR)
2ca8a5d2
CZ
1648 adev->family = AMDGPU_FAMILY_RV;
1649 else
1650 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1651
1652 r = soc15_set_ip_blocks(adev);
1653 if (r)
1654 return r;
1655 break;
0a5b8c7b 1656 case CHIP_NAVI10:
7ecb5cd4 1657 case CHIP_NAVI14:
4808cf9c 1658 case CHIP_NAVI12:
0a5b8c7b
HR
1659 adev->family = AMDGPU_FAMILY_NV;
1660
1661 r = nv_set_ip_blocks(adev);
1662 if (r)
1663 return r;
1664 break;
d38ceaf9
AD
1665 default:
1666 /* FIXME: not supported yet */
1667 return -EINVAL;
1668 }
1669
e2a75f88
AD
1670 r = amdgpu_device_parse_gpu_info_fw(adev);
1671 if (r)
1672 return r;
1673
ec51d3fa
XY
1674 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
1675 amdgpu_discovery_get_gfx_info(adev);
1676
1884734a 1677 amdgpu_amdkfd_device_probe(adev);
1678
3149d9da
XY
1679 if (amdgpu_sriov_vf(adev)) {
1680 r = amdgpu_virt_request_full_gpu(adev, true);
1681 if (r)
5ffa61c1 1682 return -EAGAIN;
3149d9da
XY
1683 }
1684
3b94fb10 1685 adev->pm.pp_feature = amdgpu_pp_feature_mask;
a35ad98b 1686 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
00544006 1687 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1688
d38ceaf9
AD
1689 for (i = 0; i < adev->num_ip_blocks; i++) {
1690 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1691 DRM_ERROR("disabled ip block: %d <%s>\n",
1692 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1693 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1694 } else {
a1255107
AD
1695 if (adev->ip_blocks[i].version->funcs->early_init) {
1696 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1697 if (r == -ENOENT) {
a1255107 1698 adev->ip_blocks[i].status.valid = false;
2c1a2784 1699 } else if (r) {
a1255107
AD
1700 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1701 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1702 return r;
2c1a2784 1703 } else {
a1255107 1704 adev->ip_blocks[i].status.valid = true;
2c1a2784 1705 }
974e6b64 1706 } else {
a1255107 1707 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1708 }
d38ceaf9 1709 }
21a249ca
AD
1710 /* get the vbios after the asic_funcs are set up */
1711 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1712 /* Read BIOS */
1713 if (!amdgpu_get_bios(adev))
1714 return -EINVAL;
1715
1716 r = amdgpu_atombios_init(adev);
1717 if (r) {
1718 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1719 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1720 return r;
1721 }
1722 }
d38ceaf9
AD
1723 }
1724
395d1fb9
NH
1725 adev->cg_flags &= amdgpu_cg_mask;
1726 adev->pg_flags &= amdgpu_pg_mask;
1727
d38ceaf9
AD
1728 return 0;
1729}
1730
0a4f2520
RZ
1731static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1732{
1733 int i, r;
1734
1735 for (i = 0; i < adev->num_ip_blocks; i++) {
1736 if (!adev->ip_blocks[i].status.sw)
1737 continue;
1738 if (adev->ip_blocks[i].status.hw)
1739 continue;
1740 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1741 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1742 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1743 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1744 if (r) {
1745 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1746 adev->ip_blocks[i].version->funcs->name, r);
1747 return r;
1748 }
1749 adev->ip_blocks[i].status.hw = true;
1750 }
1751 }
1752
1753 return 0;
1754}
1755
1756static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1757{
1758 int i, r;
1759
1760 for (i = 0; i < adev->num_ip_blocks; i++) {
1761 if (!adev->ip_blocks[i].status.sw)
1762 continue;
1763 if (adev->ip_blocks[i].status.hw)
1764 continue;
1765 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1766 if (r) {
1767 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1768 adev->ip_blocks[i].version->funcs->name, r);
1769 return r;
1770 }
1771 adev->ip_blocks[i].status.hw = true;
1772 }
1773
1774 return 0;
1775}
1776
7a3e0bb2
RZ
1777static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1778{
1779 int r = 0;
1780 int i;
80f41f84 1781 uint32_t smu_version;
7a3e0bb2
RZ
1782
1783 if (adev->asic_type >= CHIP_VEGA10) {
1784 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53
ML
1785 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
1786 continue;
1787
1788 /* no need to do the fw loading again if already done*/
1789 if (adev->ip_blocks[i].status.hw == true)
1790 break;
1791
1792 if (adev->in_gpu_reset || adev->in_suspend) {
1793 r = adev->ip_blocks[i].version->funcs->resume(adev);
1794 if (r) {
1795 DRM_ERROR("resume of IP block <%s> failed %d\n",
7a3e0bb2 1796 adev->ip_blocks[i].version->funcs->name, r);
482f0e53
ML
1797 return r;
1798 }
1799 } else {
1800 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1801 if (r) {
1802 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1803 adev->ip_blocks[i].version->funcs->name, r);
1804 return r;
7a3e0bb2 1805 }
7a3e0bb2 1806 }
482f0e53
ML
1807
1808 adev->ip_blocks[i].status.hw = true;
1809 break;
7a3e0bb2
RZ
1810 }
1811 }
482f0e53 1812
80f41f84 1813 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1814
80f41f84 1815 return r;
7a3e0bb2
RZ
1816}
1817
e3ecdffa
AD
1818/**
1819 * amdgpu_device_ip_init - run init for hardware IPs
1820 *
1821 * @adev: amdgpu_device pointer
1822 *
1823 * Main initialization pass for hardware IPs. The list of all the hardware
1824 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1825 * are run. sw_init initializes the software state associated with each IP
1826 * and hw_init initializes the hardware associated with each IP.
1827 * Returns 0 on success, negative error code on failure.
1828 */
06ec9070 1829static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1830{
1831 int i, r;
1832
c030f2e4 1833 r = amdgpu_ras_init(adev);
1834 if (r)
1835 return r;
1836
d38ceaf9 1837 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1838 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1839 continue;
a1255107 1840 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1841 if (r) {
a1255107
AD
1842 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1843 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1844 goto init_failed;
2c1a2784 1845 }
a1255107 1846 adev->ip_blocks[i].status.sw = true;
bfca0289 1847
d38ceaf9 1848 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1849 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1850 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1851 if (r) {
1852 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1853 goto init_failed;
2c1a2784 1854 }
a1255107 1855 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1856 if (r) {
1857 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1858 goto init_failed;
2c1a2784 1859 }
06ec9070 1860 r = amdgpu_device_wb_init(adev);
2c1a2784 1861 if (r) {
06ec9070 1862 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1863 goto init_failed;
2c1a2784 1864 }
a1255107 1865 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1866
1867 /* right after GMC hw init, we create CSA */
f92d5c61 1868 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1869 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1870 AMDGPU_GEM_DOMAIN_VRAM,
1871 AMDGPU_CSA_SIZE);
2493664f
ML
1872 if (r) {
1873 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1874 goto init_failed;
2493664f
ML
1875 }
1876 }
d38ceaf9
AD
1877 }
1878 }
1879
533aed27
AG
1880 r = amdgpu_ib_pool_init(adev);
1881 if (r) {
1882 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1883 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1884 goto init_failed;
1885 }
1886
c8963ea4
RZ
1887 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1888 if (r)
72d3f592 1889 goto init_failed;
0a4f2520
RZ
1890
1891 r = amdgpu_device_ip_hw_init_phase1(adev);
1892 if (r)
72d3f592 1893 goto init_failed;
0a4f2520 1894
7a3e0bb2
RZ
1895 r = amdgpu_device_fw_loading(adev);
1896 if (r)
72d3f592 1897 goto init_failed;
7a3e0bb2 1898
0a4f2520
RZ
1899 r = amdgpu_device_ip_hw_init_phase2(adev);
1900 if (r)
72d3f592 1901 goto init_failed;
d38ceaf9 1902
121a2bc6
AG
1903 /*
1904 * retired pages will be loaded from eeprom and reserved here,
1905 * it should be called after amdgpu_device_ip_hw_init_phase2 since
1906 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
1907 * for I2C communication which only true at this point.
1908 * recovery_init may fail, but it can free all resources allocated by
1909 * itself and its failure should not stop amdgpu init process.
1910 *
1911 * Note: theoretically, this should be called before all vram allocations
1912 * to protect retired page from abusing
1913 */
1914 amdgpu_ras_recovery_init(adev);
1915
3e2e2ab5
HZ
1916 if (adev->gmc.xgmi.num_physical_nodes > 1)
1917 amdgpu_xgmi_add_device(adev);
1884734a 1918 amdgpu_amdkfd_device_init(adev);
c6332b97 1919
72d3f592 1920init_failed:
d3c117e5 1921 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1922 if (!r)
1923 amdgpu_virt_init_data_exchange(adev);
c6332b97 1924 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1925 }
c6332b97 1926
72d3f592 1927 return r;
d38ceaf9
AD
1928}
1929
e3ecdffa
AD
1930/**
1931 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1932 *
1933 * @adev: amdgpu_device pointer
1934 *
1935 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1936 * this function before a GPU reset. If the value is retained after a
1937 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1938 */
06ec9070 1939static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1940{
1941 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1942}
1943
e3ecdffa
AD
1944/**
1945 * amdgpu_device_check_vram_lost - check if vram is valid
1946 *
1947 * @adev: amdgpu_device pointer
1948 *
1949 * Checks the reset magic value written to the gart pointer in VRAM.
1950 * The driver calls this after a GPU reset to see if the contents of
1951 * VRAM is lost or now.
1952 * returns true if vram is lost, false if not.
1953 */
06ec9070 1954static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1955{
1956 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1957 AMDGPU_RESET_MAGIC_NUM);
1958}
1959
e3ecdffa 1960/**
1112a46b 1961 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1962 *
1963 * @adev: amdgpu_device pointer
b8b72130 1964 * @state: clockgating state (gate or ungate)
e3ecdffa 1965 *
e3ecdffa 1966 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1967 * set_clockgating_state callbacks are run.
1968 * Late initialization pass enabling clockgating for hardware IPs.
1969 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1970 * Returns 0 on success, negative error code on failure.
1971 */
fdd34271 1972
1112a46b
RZ
1973static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1974 enum amd_clockgating_state state)
d38ceaf9 1975{
1112a46b 1976 int i, j, r;
d38ceaf9 1977
4a2ba394
SL
1978 if (amdgpu_emu_mode == 1)
1979 return 0;
1980
1112a46b
RZ
1981 for (j = 0; j < adev->num_ip_blocks; j++) {
1982 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1983 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1984 continue;
4a446d55 1985 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1986 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1987 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1988 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 1989 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
57716327 1990 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1991 /* enable clockgating to save power */
a1255107 1992 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1993 state);
4a446d55
AD
1994 if (r) {
1995 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1996 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1997 return r;
1998 }
b0b00ff1 1999 }
d38ceaf9 2000 }
06b18f61 2001
c9f96fd5
RZ
2002 return 0;
2003}
2004
1112a46b 2005static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 2006{
1112a46b 2007 int i, j, r;
06b18f61 2008
c9f96fd5
RZ
2009 if (amdgpu_emu_mode == 1)
2010 return 0;
2011
1112a46b
RZ
2012 for (j = 0; j < adev->num_ip_blocks; j++) {
2013 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 2014 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
2015 continue;
2016 /* skip CG for VCE/UVD, it's handled specially */
2017 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2018 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2019 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
52f2e779 2020 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
c9f96fd5
RZ
2021 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2022 /* enable powergating to save power */
2023 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 2024 state);
c9f96fd5
RZ
2025 if (r) {
2026 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2027 adev->ip_blocks[i].version->funcs->name, r);
2028 return r;
2029 }
2030 }
2031 }
2dc80b00
S
2032 return 0;
2033}
2034
beff74bc
AD
2035static int amdgpu_device_enable_mgpu_fan_boost(void)
2036{
2037 struct amdgpu_gpu_instance *gpu_ins;
2038 struct amdgpu_device *adev;
2039 int i, ret = 0;
2040
2041 mutex_lock(&mgpu_info.mutex);
2042
2043 /*
2044 * MGPU fan boost feature should be enabled
2045 * only when there are two or more dGPUs in
2046 * the system
2047 */
2048 if (mgpu_info.num_dgpu < 2)
2049 goto out;
2050
2051 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2052 gpu_ins = &(mgpu_info.gpu_ins[i]);
2053 adev = gpu_ins->adev;
2054 if (!(adev->flags & AMD_IS_APU) &&
2055 !gpu_ins->mgpu_fan_enabled &&
2056 adev->powerplay.pp_funcs &&
2057 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2058 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2059 if (ret)
2060 break;
2061
2062 gpu_ins->mgpu_fan_enabled = 1;
2063 }
2064 }
2065
2066out:
2067 mutex_unlock(&mgpu_info.mutex);
2068
2069 return ret;
2070}
2071
e3ecdffa
AD
2072/**
2073 * amdgpu_device_ip_late_init - run late init for hardware IPs
2074 *
2075 * @adev: amdgpu_device pointer
2076 *
2077 * Late initialization pass for hardware IPs. The list of all the hardware
2078 * IPs that make up the asic is walked and the late_init callbacks are run.
2079 * late_init covers any special initialization that an IP requires
2080 * after all of the have been initialized or something that needs to happen
2081 * late in the init process.
2082 * Returns 0 on success, negative error code on failure.
2083 */
06ec9070 2084static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00 2085{
60599a03 2086 struct amdgpu_gpu_instance *gpu_instance;
2dc80b00
S
2087 int i = 0, r;
2088
2089 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 2090 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
2091 continue;
2092 if (adev->ip_blocks[i].version->funcs->late_init) {
2093 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2094 if (r) {
2095 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2096 adev->ip_blocks[i].version->funcs->name, r);
2097 return r;
2098 }
2dc80b00 2099 }
73f847db 2100 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
2101 }
2102
1112a46b
RZ
2103 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2104 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 2105
06ec9070 2106 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 2107
beff74bc
AD
2108 r = amdgpu_device_enable_mgpu_fan_boost();
2109 if (r)
2110 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2111
60599a03
EQ
2112
2113 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2114 mutex_lock(&mgpu_info.mutex);
2115
2116 /*
2117 * Reset device p-state to low as this was booted with high.
2118 *
2119 * This should be performed only after all devices from the same
2120 * hive get initialized.
2121 *
2122 * However, it's unknown how many device in the hive in advance.
2123 * As this is counted one by one during devices initializations.
2124 *
2125 * So, we wait for all XGMI interlinked devices initialized.
2126 * This may bring some delays as those devices may come from
2127 * different hives. But that should be OK.
2128 */
2129 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2130 for (i = 0; i < mgpu_info.num_gpu; i++) {
2131 gpu_instance = &(mgpu_info.gpu_ins[i]);
2132 if (gpu_instance->adev->flags & AMD_IS_APU)
2133 continue;
2134
2135 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0);
2136 if (r) {
2137 DRM_ERROR("pstate setting failed (%d).\n", r);
2138 break;
2139 }
2140 }
2141 }
2142
2143 mutex_unlock(&mgpu_info.mutex);
2144 }
2145
d38ceaf9
AD
2146 return 0;
2147}
2148
e3ecdffa
AD
2149/**
2150 * amdgpu_device_ip_fini - run fini for hardware IPs
2151 *
2152 * @adev: amdgpu_device pointer
2153 *
2154 * Main teardown pass for hardware IPs. The list of all the hardware
2155 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2156 * are run. hw_fini tears down the hardware associated with each IP
2157 * and sw_fini tears down any software state associated with each IP.
2158 * Returns 0 on success, negative error code on failure.
2159 */
06ec9070 2160static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
2161{
2162 int i, r;
2163
c030f2e4 2164 amdgpu_ras_pre_fini(adev);
2165
a82400b5
AG
2166 if (adev->gmc.xgmi.num_physical_nodes > 1)
2167 amdgpu_xgmi_remove_device(adev);
2168
1884734a 2169 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
2170
2171 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2172 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2173
3e96dbfd
AD
2174 /* need to disable SMC first */
2175 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2176 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2177 continue;
fdd34271 2178 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2179 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2180 /* XXX handle errors */
2181 if (r) {
2182 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2183 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2184 }
a1255107 2185 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2186 break;
2187 }
2188 }
2189
d38ceaf9 2190 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2191 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2192 continue;
8201a67a 2193
a1255107 2194 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2195 /* XXX handle errors */
2c1a2784 2196 if (r) {
a1255107
AD
2197 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2198 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2199 }
8201a67a 2200
a1255107 2201 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2202 }
2203
9950cda2 2204
d38ceaf9 2205 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2206 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2207 continue;
c12aba3a
ML
2208
2209 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2210 amdgpu_ucode_free_bo(adev);
1e256e27 2211 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2212 amdgpu_device_wb_fini(adev);
2213 amdgpu_device_vram_scratch_fini(adev);
533aed27 2214 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2215 }
2216
a1255107 2217 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2218 /* XXX handle errors */
2c1a2784 2219 if (r) {
a1255107
AD
2220 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2221 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2222 }
a1255107
AD
2223 adev->ip_blocks[i].status.sw = false;
2224 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2225 }
2226
a6dcfd9c 2227 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2228 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2229 continue;
a1255107
AD
2230 if (adev->ip_blocks[i].version->funcs->late_fini)
2231 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2232 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2233 }
2234
c030f2e4 2235 amdgpu_ras_fini(adev);
2236
030308fc 2237 if (amdgpu_sriov_vf(adev))
24136135
ML
2238 if (amdgpu_virt_release_full_gpu(adev, false))
2239 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2240
d38ceaf9
AD
2241 return 0;
2242}
2243
e3ecdffa 2244/**
beff74bc 2245 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2246 *
1112a46b 2247 * @work: work_struct.
e3ecdffa 2248 */
beff74bc 2249static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2250{
2251 struct amdgpu_device *adev =
beff74bc 2252 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2253 int r;
2254
2255 r = amdgpu_ib_ring_tests(adev);
2256 if (r)
2257 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2258}
2259
1e317b99
RZ
2260static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2261{
2262 struct amdgpu_device *adev =
2263 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2264
2265 mutex_lock(&adev->gfx.gfx_off_mutex);
2266 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2267 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2268 adev->gfx.gfx_off_state = true;
2269 }
2270 mutex_unlock(&adev->gfx.gfx_off_mutex);
2271}
2272
e3ecdffa 2273/**
e7854a03 2274 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2275 *
2276 * @adev: amdgpu_device pointer
2277 *
2278 * Main suspend function for hardware IPs. The list of all the hardware
2279 * IPs that make up the asic is walked, clockgating is disabled and the
2280 * suspend callbacks are run. suspend puts the hardware and software state
2281 * in each IP into a state suitable for suspend.
2282 * Returns 0 on success, negative error code on failure.
2283 */
e7854a03
AD
2284static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2285{
2286 int i, r;
2287
05df1f01 2288 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2289 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2290
e7854a03
AD
2291 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2292 if (!adev->ip_blocks[i].status.valid)
2293 continue;
2294 /* displays are handled separately */
2295 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2296 /* XXX handle errors */
2297 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2298 /* XXX handle errors */
2299 if (r) {
2300 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2301 adev->ip_blocks[i].version->funcs->name, r);
482f0e53 2302 return r;
e7854a03 2303 }
482f0e53 2304 adev->ip_blocks[i].status.hw = false;
e7854a03
AD
2305 }
2306 }
2307
e7854a03
AD
2308 return 0;
2309}
2310
2311/**
2312 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2313 *
2314 * @adev: amdgpu_device pointer
2315 *
2316 * Main suspend function for hardware IPs. The list of all the hardware
2317 * IPs that make up the asic is walked, clockgating is disabled and the
2318 * suspend callbacks are run. suspend puts the hardware and software state
2319 * in each IP into a state suitable for suspend.
2320 * Returns 0 on success, negative error code on failure.
2321 */
2322static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2323{
2324 int i, r;
2325
2326 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2327 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2328 continue;
e7854a03
AD
2329 /* displays are handled in phase1 */
2330 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2331 continue;
bff77e86
LM
2332 /* PSP lost connection when err_event_athub occurs */
2333 if (amdgpu_ras_intr_triggered() &&
2334 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2335 adev->ip_blocks[i].status.hw = false;
2336 continue;
2337 }
d38ceaf9 2338 /* XXX handle errors */
a1255107 2339 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2340 /* XXX handle errors */
2c1a2784 2341 if (r) {
a1255107
AD
2342 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2343 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2344 }
876923fb 2345 adev->ip_blocks[i].status.hw = false;
a3a09142
AD
2346 /* handle putting the SMC in the appropriate state */
2347 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2348 if (is_support_sw_smu(adev)) {
0e0b89c0 2349 r = smu_set_mp1_state(&adev->smu, adev->mp1_state);
a3a09142 2350 } else if (adev->powerplay.pp_funcs &&
482f0e53 2351 adev->powerplay.pp_funcs->set_mp1_state) {
a3a09142
AD
2352 r = adev->powerplay.pp_funcs->set_mp1_state(
2353 adev->powerplay.pp_handle,
2354 adev->mp1_state);
0e0b89c0
EQ
2355 }
2356 if (r) {
2357 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
2358 adev->mp1_state, r);
2359 return r;
a3a09142
AD
2360 }
2361 }
b5507c7e
AG
2362
2363 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2364 }
2365
2366 return 0;
2367}
2368
e7854a03
AD
2369/**
2370 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2371 *
2372 * @adev: amdgpu_device pointer
2373 *
2374 * Main suspend function for hardware IPs. The list of all the hardware
2375 * IPs that make up the asic is walked, clockgating is disabled and the
2376 * suspend callbacks are run. suspend puts the hardware and software state
2377 * in each IP into a state suitable for suspend.
2378 * Returns 0 on success, negative error code on failure.
2379 */
2380int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2381{
2382 int r;
2383
e7819644
YT
2384 if (amdgpu_sriov_vf(adev))
2385 amdgpu_virt_request_full_gpu(adev, false);
2386
e7854a03
AD
2387 r = amdgpu_device_ip_suspend_phase1(adev);
2388 if (r)
2389 return r;
2390 r = amdgpu_device_ip_suspend_phase2(adev);
2391
e7819644
YT
2392 if (amdgpu_sriov_vf(adev))
2393 amdgpu_virt_release_full_gpu(adev, false);
2394
e7854a03
AD
2395 return r;
2396}
2397
06ec9070 2398static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2399{
2400 int i, r;
2401
2cb681b6
ML
2402 static enum amd_ip_block_type ip_order[] = {
2403 AMD_IP_BLOCK_TYPE_GMC,
2404 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2405 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2406 AMD_IP_BLOCK_TYPE_IH,
2407 };
a90ad3c2 2408
2cb681b6
ML
2409 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2410 int j;
2411 struct amdgpu_ip_block *block;
a90ad3c2 2412
2cb681b6
ML
2413 for (j = 0; j < adev->num_ip_blocks; j++) {
2414 block = &adev->ip_blocks[j];
2415
482f0e53 2416 block->status.hw = false;
2cb681b6
ML
2417 if (block->version->type != ip_order[i] ||
2418 !block->status.valid)
2419 continue;
2420
2421 r = block->version->funcs->hw_init(adev);
0aaeefcc 2422 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2423 if (r)
2424 return r;
482f0e53 2425 block->status.hw = true;
a90ad3c2
ML
2426 }
2427 }
2428
2429 return 0;
2430}
2431
06ec9070 2432static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2433{
2434 int i, r;
2435
2cb681b6
ML
2436 static enum amd_ip_block_type ip_order[] = {
2437 AMD_IP_BLOCK_TYPE_SMC,
2438 AMD_IP_BLOCK_TYPE_DCE,
2439 AMD_IP_BLOCK_TYPE_GFX,
2440 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2441 AMD_IP_BLOCK_TYPE_UVD,
2442 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2443 };
a90ad3c2 2444
2cb681b6
ML
2445 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2446 int j;
2447 struct amdgpu_ip_block *block;
a90ad3c2 2448
2cb681b6
ML
2449 for (j = 0; j < adev->num_ip_blocks; j++) {
2450 block = &adev->ip_blocks[j];
2451
2452 if (block->version->type != ip_order[i] ||
482f0e53
ML
2453 !block->status.valid ||
2454 block->status.hw)
2cb681b6
ML
2455 continue;
2456
2457 r = block->version->funcs->hw_init(adev);
0aaeefcc 2458 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2459 if (r)
2460 return r;
482f0e53 2461 block->status.hw = true;
a90ad3c2
ML
2462 }
2463 }
2464
2465 return 0;
2466}
2467
e3ecdffa
AD
2468/**
2469 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2470 *
2471 * @adev: amdgpu_device pointer
2472 *
2473 * First resume function for hardware IPs. The list of all the hardware
2474 * IPs that make up the asic is walked and the resume callbacks are run for
2475 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2476 * after a suspend and updates the software state as necessary. This
2477 * function is also used for restoring the GPU after a GPU reset.
2478 * Returns 0 on success, negative error code on failure.
2479 */
06ec9070 2480static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2481{
2482 int i, r;
2483
a90ad3c2 2484 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2485 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
a90ad3c2 2486 continue;
a90ad3c2 2487 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2488 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2489 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
482f0e53 2490
fcf0649f
CZ
2491 r = adev->ip_blocks[i].version->funcs->resume(adev);
2492 if (r) {
2493 DRM_ERROR("resume of IP block <%s> failed %d\n",
2494 adev->ip_blocks[i].version->funcs->name, r);
2495 return r;
2496 }
482f0e53 2497 adev->ip_blocks[i].status.hw = true;
a90ad3c2
ML
2498 }
2499 }
2500
2501 return 0;
2502}
2503
e3ecdffa
AD
2504/**
2505 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2506 *
2507 * @adev: amdgpu_device pointer
2508 *
2509 * First resume function for hardware IPs. The list of all the hardware
2510 * IPs that make up the asic is walked and the resume callbacks are run for
2511 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2512 * functional state after a suspend and updates the software state as
2513 * necessary. This function is also used for restoring the GPU after a GPU
2514 * reset.
2515 * Returns 0 on success, negative error code on failure.
2516 */
06ec9070 2517static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2518{
2519 int i, r;
2520
2521 for (i = 0; i < adev->num_ip_blocks; i++) {
482f0e53 2522 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
d38ceaf9 2523 continue;
fcf0649f 2524 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2525 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2526 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2527 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2528 continue;
a1255107 2529 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2530 if (r) {
a1255107
AD
2531 DRM_ERROR("resume of IP block <%s> failed %d\n",
2532 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2533 return r;
2c1a2784 2534 }
482f0e53 2535 adev->ip_blocks[i].status.hw = true;
d38ceaf9
AD
2536 }
2537
2538 return 0;
2539}
2540
e3ecdffa
AD
2541/**
2542 * amdgpu_device_ip_resume - run resume for hardware IPs
2543 *
2544 * @adev: amdgpu_device pointer
2545 *
2546 * Main resume function for hardware IPs. The hardware IPs
2547 * are split into two resume functions because they are
2548 * are also used in in recovering from a GPU reset and some additional
2549 * steps need to be take between them. In this case (S3/S4) they are
2550 * run sequentially.
2551 * Returns 0 on success, negative error code on failure.
2552 */
06ec9070 2553static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2554{
2555 int r;
2556
06ec9070 2557 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2558 if (r)
2559 return r;
7a3e0bb2
RZ
2560
2561 r = amdgpu_device_fw_loading(adev);
2562 if (r)
2563 return r;
2564
06ec9070 2565 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2566
2567 return r;
2568}
2569
e3ecdffa
AD
2570/**
2571 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2572 *
2573 * @adev: amdgpu_device pointer
2574 *
2575 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2576 */
4e99a44e 2577static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2578{
6867e1b5
ML
2579 if (amdgpu_sriov_vf(adev)) {
2580 if (adev->is_atom_fw) {
2581 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2582 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2583 } else {
2584 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2585 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2586 }
2587
2588 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2589 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2590 }
048765ad
AR
2591}
2592
e3ecdffa
AD
2593/**
2594 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2595 *
2596 * @asic_type: AMD asic type
2597 *
2598 * Check if there is DC (new modesetting infrastructre) support for an asic.
2599 * returns true if DC has support, false if not.
2600 */
4562236b
HW
2601bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2602{
2603 switch (asic_type) {
2604#if defined(CONFIG_DRM_AMD_DC)
2605 case CHIP_BONAIRE:
0d6fbccb 2606 case CHIP_KAVERI:
367e6687
AD
2607 case CHIP_KABINI:
2608 case CHIP_MULLINS:
d9fda248
HW
2609 /*
2610 * We have systems in the wild with these ASICs that require
2611 * LVDS and VGA support which is not supported with DC.
2612 *
2613 * Fallback to the non-DC driver here by default so as not to
2614 * cause regressions.
2615 */
2616 return amdgpu_dc > 0;
2617 case CHIP_HAWAII:
4562236b
HW
2618 case CHIP_CARRIZO:
2619 case CHIP_STONEY:
4562236b 2620 case CHIP_POLARIS10:
675fd32b 2621 case CHIP_POLARIS11:
2c8ad2d5 2622 case CHIP_POLARIS12:
675fd32b 2623 case CHIP_VEGAM:
4562236b
HW
2624 case CHIP_TONGA:
2625 case CHIP_FIJI:
42f8ffa1 2626 case CHIP_VEGA10:
dca7b401 2627 case CHIP_VEGA12:
c6034aa2 2628 case CHIP_VEGA20:
b86a1aa3 2629#if defined(CONFIG_DRM_AMD_DC_DCN)
fd187853 2630 case CHIP_RAVEN:
b4f199c7 2631 case CHIP_NAVI10:
8fceceb6 2632 case CHIP_NAVI14:
078655d9 2633 case CHIP_NAVI12:
e1c14c43 2634 case CHIP_RENOIR:
42f8ffa1 2635#endif
fd187853 2636 return amdgpu_dc != 0;
4562236b
HW
2637#endif
2638 default:
2639 return false;
2640 }
2641}
2642
2643/**
2644 * amdgpu_device_has_dc_support - check if dc is supported
2645 *
2646 * @adev: amdgpu_device_pointer
2647 *
2648 * Returns true for supported, false for not supported
2649 */
2650bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2651{
2555039d
XY
2652 if (amdgpu_sriov_vf(adev))
2653 return false;
2654
4562236b
HW
2655 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2656}
2657
d4535e2c
AG
2658
2659static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2660{
2661 struct amdgpu_device *adev =
2662 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2663
ce316fa5
LM
2664 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
2665 adev->asic_reset_res = (adev->in_baco == false) ?
2666 amdgpu_device_baco_enter(adev->ddev) :
2667 amdgpu_device_baco_exit(adev->ddev);
2668 else
2669 adev->asic_reset_res = amdgpu_asic_reset(adev);
2670
d4535e2c 2671 if (adev->asic_reset_res)
fed184e9 2672 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2673 adev->asic_reset_res, adev->ddev->unique);
2674}
2675
71f98027
AD
2676static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
2677{
2678 char *input = amdgpu_lockup_timeout;
2679 char *timeout_setting = NULL;
2680 int index = 0;
2681 long timeout;
2682 int ret = 0;
2683
2684 /*
2685 * By default timeout for non compute jobs is 10000.
2686 * And there is no timeout enforced on compute jobs.
2687 * In SR-IOV or passthrough mode, timeout for compute
2688 * jobs are 10000 by default.
2689 */
2690 adev->gfx_timeout = msecs_to_jiffies(10000);
2691 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
2692 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2693 adev->compute_timeout = adev->gfx_timeout;
2694 else
2695 adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
2696
f440ff44 2697 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027 2698 while ((timeout_setting = strsep(&input, ",")) &&
f440ff44 2699 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
71f98027
AD
2700 ret = kstrtol(timeout_setting, 0, &timeout);
2701 if (ret)
2702 return ret;
2703
2704 if (timeout == 0) {
2705 index++;
2706 continue;
2707 } else if (timeout < 0) {
2708 timeout = MAX_SCHEDULE_TIMEOUT;
2709 } else {
2710 timeout = msecs_to_jiffies(timeout);
2711 }
2712
2713 switch (index++) {
2714 case 0:
2715 adev->gfx_timeout = timeout;
2716 break;
2717 case 1:
2718 adev->compute_timeout = timeout;
2719 break;
2720 case 2:
2721 adev->sdma_timeout = timeout;
2722 break;
2723 case 3:
2724 adev->video_timeout = timeout;
2725 break;
2726 default:
2727 break;
2728 }
2729 }
2730 /*
2731 * There is only one value specified and
2732 * it should apply to all non-compute jobs.
2733 */
bcccee89 2734 if (index == 1) {
71f98027 2735 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
bcccee89
ED
2736 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
2737 adev->compute_timeout = adev->gfx_timeout;
2738 }
71f98027
AD
2739 }
2740
2741 return ret;
2742}
d4535e2c 2743
d38ceaf9
AD
2744/**
2745 * amdgpu_device_init - initialize the driver
2746 *
2747 * @adev: amdgpu_device pointer
87e3f136 2748 * @ddev: drm dev pointer
d38ceaf9
AD
2749 * @pdev: pci dev pointer
2750 * @flags: driver flags
2751 *
2752 * Initializes the driver info and hw (all asics).
2753 * Returns 0 for success or an error on failure.
2754 * Called at driver startup.
2755 */
2756int amdgpu_device_init(struct amdgpu_device *adev,
2757 struct drm_device *ddev,
2758 struct pci_dev *pdev,
2759 uint32_t flags)
2760{
2761 int r, i;
3840c5bc 2762 bool boco = false;
95844d20 2763 u32 max_MBps;
d38ceaf9
AD
2764
2765 adev->shutdown = false;
2766 adev->dev = &pdev->dev;
2767 adev->ddev = ddev;
2768 adev->pdev = pdev;
2769 adev->flags = flags;
4e66d7d2
YZ
2770
2771 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
2772 adev->asic_type = amdgpu_force_asic_type;
2773 else
2774 adev->asic_type = flags & AMD_ASIC_MASK;
2775
d38ceaf9 2776 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2777 if (amdgpu_emu_mode == 1)
2778 adev->usec_timeout *= 2;
770d13b1 2779 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2780 adev->accel_working = false;
2781 adev->num_rings = 0;
2782 adev->mman.buffer_funcs = NULL;
2783 adev->mman.buffer_funcs_ring = NULL;
2784 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2785 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2786 adev->gmc.gmc_funcs = NULL;
f54d1867 2787 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2788 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2789
2790 adev->smc_rreg = &amdgpu_invalid_rreg;
2791 adev->smc_wreg = &amdgpu_invalid_wreg;
2792 adev->pcie_rreg = &amdgpu_invalid_rreg;
2793 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2794 adev->pciep_rreg = &amdgpu_invalid_rreg;
2795 adev->pciep_wreg = &amdgpu_invalid_wreg;
4fa1c6a6
TZ
2796 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
2797 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
d38ceaf9
AD
2798 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2799 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2800 adev->didt_rreg = &amdgpu_invalid_rreg;
2801 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2802 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2803 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2804 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2805 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2806
3e39ab90
AD
2807 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2808 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2809 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2810
2811 /* mutex initialization are all done here so we
2812 * can recall function without having locking issues */
d38ceaf9 2813 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2814 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2815 mutex_init(&adev->pm.mutex);
2816 mutex_init(&adev->gfx.gpu_clock_mutex);
2817 mutex_init(&adev->srbm_mutex);
b8866c26 2818 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2819 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2820 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2821 mutex_init(&adev->mn_lock);
e23b74aa 2822 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2823 hash_init(adev->mn_hash);
13a752e3 2824 mutex_init(&adev->lock_reset);
bb5a2bdf 2825 mutex_init(&adev->virt.dpm_mutex);
32eaeae0 2826 mutex_init(&adev->psp.mutex);
d38ceaf9 2827
912dfc84
EQ
2828 r = amdgpu_device_check_arguments(adev);
2829 if (r)
2830 return r;
d38ceaf9 2831
d38ceaf9
AD
2832 spin_lock_init(&adev->mmio_idx_lock);
2833 spin_lock_init(&adev->smc_idx_lock);
2834 spin_lock_init(&adev->pcie_idx_lock);
2835 spin_lock_init(&adev->uvd_ctx_idx_lock);
2836 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2837 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2838 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2839 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2840 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2841
0c4e7fa5
CZ
2842 INIT_LIST_HEAD(&adev->shadow_list);
2843 mutex_init(&adev->shadow_list_lock);
2844
795f2813
AR
2845 INIT_LIST_HEAD(&adev->ring_lru_list);
2846 spin_lock_init(&adev->ring_lru_list_lock);
2847
beff74bc
AD
2848 INIT_DELAYED_WORK(&adev->delayed_init_work,
2849 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2850 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2851 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2852
d4535e2c
AG
2853 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2854
d23ee13f 2855 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2856 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2857
0fa49558
AX
2858 /* Registers mapping */
2859 /* TODO: block userspace mapping of io register */
da69c161
KW
2860 if (adev->asic_type >= CHIP_BONAIRE) {
2861 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2862 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2863 } else {
2864 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2865 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2866 }
d38ceaf9 2867
d38ceaf9
AD
2868 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2869 if (adev->rmmio == NULL) {
2870 return -ENOMEM;
2871 }
2872 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2873 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2874
d38ceaf9
AD
2875 /* io port mapping */
2876 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2877 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2878 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2879 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2880 break;
2881 }
2882 }
2883 if (adev->rio_mem == NULL)
b64a18c5 2884 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2885
b2109d8e
JX
2886 /* enable PCIE atomic ops */
2887 r = pci_enable_atomic_ops_to_root(adev->pdev,
2888 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2889 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2890 if (r) {
2891 adev->have_atomics_support = false;
2892 DRM_INFO("PCIE atomic ops is not supported\n");
2893 } else {
2894 adev->have_atomics_support = true;
2895 }
2896
5494d864
AD
2897 amdgpu_device_get_pcie_info(adev);
2898
b239c017
JX
2899 if (amdgpu_mcbp)
2900 DRM_INFO("MCBP is enabled\n");
2901
5f84cc63
JX
2902 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2903 adev->enable_mes = true;
2904
f54eeab4 2905 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2906 r = amdgpu_discovery_init(adev);
2907 if (r) {
2908 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2909 return r;
2910 }
2911 }
2912
d38ceaf9 2913 /* early init functions */
06ec9070 2914 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2915 if (r)
2916 return r;
2917
df99ac0f
JZ
2918 r = amdgpu_device_get_job_timeout_settings(adev);
2919 if (r) {
2920 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
2921 return r;
2922 }
2923
6585661d
OZ
2924 /* doorbell bar mapping and doorbell index init*/
2925 amdgpu_device_doorbell_init(adev);
2926
d38ceaf9
AD
2927 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2928 /* this will fail for cards that aren't VGA class devices, just
2929 * ignore it */
06ec9070 2930 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2931
31af062a 2932 if (amdgpu_device_supports_boco(ddev))
3840c5bc
AD
2933 boco = true;
2934 if (amdgpu_has_atpx() &&
2935 (amdgpu_is_atpx_hybrid() ||
2936 amdgpu_has_atpx_dgpu_power_cntl()) &&
2937 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 2938 vga_switcheroo_register_client(adev->pdev,
3840c5bc
AD
2939 &amdgpu_switcheroo_ops, boco);
2940 if (boco)
d38ceaf9
AD
2941 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2942
9475a943
SL
2943 if (amdgpu_emu_mode == 1) {
2944 /* post the asic on emulation mode */
2945 emu_soc_asic_init(adev);
bfca0289 2946 goto fence_driver_init;
9475a943 2947 }
bfca0289 2948
4e99a44e
ML
2949 /* detect if we are with an SRIOV vbios */
2950 amdgpu_device_detect_sriov_bios(adev);
048765ad 2951
95e8e59e
AD
2952 /* check if we need to reset the asic
2953 * E.g., driver was not cleanly unloaded previously, etc.
2954 */
f14899fd 2955 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2956 r = amdgpu_asic_reset(adev);
2957 if (r) {
2958 dev_err(adev->dev, "asic reset on init failed\n");
2959 goto failed;
2960 }
2961 }
2962
d38ceaf9 2963 /* Post card if necessary */
39c640c0 2964 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2965 if (!adev->bios) {
bec86378 2966 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2967 r = -EINVAL;
2968 goto failed;
d38ceaf9 2969 }
bec86378 2970 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2971 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2972 if (r) {
2973 dev_err(adev->dev, "gpu post error!\n");
2974 goto failed;
2975 }
d38ceaf9
AD
2976 }
2977
88b64e95
AD
2978 if (adev->is_atom_fw) {
2979 /* Initialize clocks */
2980 r = amdgpu_atomfirmware_get_clock_info(adev);
2981 if (r) {
2982 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2983 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2984 goto failed;
2985 }
2986 } else {
a5bde2f9
AD
2987 /* Initialize clocks */
2988 r = amdgpu_atombios_get_clock_info(adev);
2989 if (r) {
2990 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2991 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2992 goto failed;
a5bde2f9
AD
2993 }
2994 /* init i2c buses */
4562236b
HW
2995 if (!amdgpu_device_has_dc_support(adev))
2996 amdgpu_atombios_i2c_init(adev);
2c1a2784 2997 }
d38ceaf9 2998
bfca0289 2999fence_driver_init:
d38ceaf9
AD
3000 /* Fence driver */
3001 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
3002 if (r) {
3003 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 3004 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 3005 goto failed;
2c1a2784 3006 }
d38ceaf9
AD
3007
3008 /* init the mode config */
3009 drm_mode_config_init(adev->ddev);
3010
06ec9070 3011 r = amdgpu_device_ip_init(adev);
d38ceaf9 3012 if (r) {
8840a387 3013 /* failed in exclusive mode due to timeout */
3014 if (amdgpu_sriov_vf(adev) &&
3015 !amdgpu_sriov_runtime(adev) &&
3016 amdgpu_virt_mmio_blocked(adev) &&
3017 !amdgpu_virt_wait_reset(adev)) {
3018 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
3019 /* Don't send request since VF is inactive. */
3020 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3021 adev->virt.ops = NULL;
8840a387 3022 r = -EAGAIN;
3023 goto failed;
3024 }
06ec9070 3025 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 3026 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
83ba126a 3027 goto failed;
d38ceaf9
AD
3028 }
3029
3030 adev->accel_working = true;
3031
e59c0205
AX
3032 amdgpu_vm_check_compute_bug(adev);
3033
95844d20
MO
3034 /* Initialize the buffer migration limit. */
3035 if (amdgpu_moverate >= 0)
3036 max_MBps = amdgpu_moverate;
3037 else
3038 max_MBps = 8; /* Allow 8 MB/s. */
3039 /* Get a log2 for easy divisions. */
3040 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3041
9bc92b9c
ML
3042 amdgpu_fbdev_init(adev);
3043
e9bc1bf7
YT
3044 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
3045 amdgpu_pm_virt_sysfs_init(adev);
3046
d2f52ac8 3047 r = amdgpu_pm_sysfs_init(adev);
7c868b59
YT
3048 if (r) {
3049 adev->pm_sysfs_en = false;
d2f52ac8 3050 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
7c868b59
YT
3051 } else
3052 adev->pm_sysfs_en = true;
d2f52ac8 3053
5bb23532 3054 r = amdgpu_ucode_sysfs_init(adev);
7c868b59
YT
3055 if (r) {
3056 adev->ucode_sysfs_en = false;
5bb23532 3057 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
7c868b59
YT
3058 } else
3059 adev->ucode_sysfs_en = true;
5bb23532 3060
75758255 3061 r = amdgpu_debugfs_gem_init(adev);
3f14e623 3062 if (r)
d38ceaf9 3063 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
3064
3065 r = amdgpu_debugfs_regs_init(adev);
3f14e623 3066 if (r)
d38ceaf9 3067 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 3068
50ab2533 3069 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 3070 if (r)
50ab2533 3071 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 3072
763efb6c 3073 r = amdgpu_debugfs_init(adev);
db95e218 3074 if (r)
763efb6c 3075 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 3076
d38ceaf9
AD
3077 if ((amdgpu_testing & 1)) {
3078 if (adev->accel_working)
3079 amdgpu_test_moves(adev);
3080 else
3081 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
3082 }
d38ceaf9
AD
3083 if (amdgpu_benchmarking) {
3084 if (adev->accel_working)
3085 amdgpu_benchmark(adev, amdgpu_benchmarking);
3086 else
3087 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
3088 }
3089
b0adca4d
EQ
3090 /*
3091 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3092 * Otherwise the mgpu fan boost feature will be skipped due to the
3093 * gpu instance is counted less.
3094 */
3095 amdgpu_register_gpu_instance(adev);
3096
d38ceaf9
AD
3097 /* enable clockgating, etc. after ib tests, etc. since some blocks require
3098 * explicit gating rather than handling it automatically.
3099 */
06ec9070 3100 r = amdgpu_device_ip_late_init(adev);
2c1a2784 3101 if (r) {
06ec9070 3102 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 3103 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 3104 goto failed;
2c1a2784 3105 }
d38ceaf9 3106
108c6a63 3107 /* must succeed. */
511fdbc3 3108 amdgpu_ras_resume(adev);
108c6a63 3109
beff74bc
AD
3110 queue_delayed_work(system_wq, &adev->delayed_init_work,
3111 msecs_to_jiffies(AMDGPU_RESUME_MS));
3112
dcea6e65
KR
3113 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
3114 if (r) {
3115 dev_err(adev->dev, "Could not create pcie_replay_count");
3116 return r;
3117 }
108c6a63 3118
d155bef0
AB
3119 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3120 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
3121 if (r)
3122 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3123
d38ceaf9 3124 return 0;
83ba126a
AD
3125
3126failed:
89041940 3127 amdgpu_vf_error_trans_all(adev);
3840c5bc 3128 if (boco)
83ba126a 3129 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 3130
83ba126a 3131 return r;
d38ceaf9
AD
3132}
3133
d38ceaf9
AD
3134/**
3135 * amdgpu_device_fini - tear down the driver
3136 *
3137 * @adev: amdgpu_device pointer
3138 *
3139 * Tear down the driver info (all asics).
3140 * Called at driver shutdown.
3141 */
3142void amdgpu_device_fini(struct amdgpu_device *adev)
3143{
3144 int r;
3145
3146 DRM_INFO("amdgpu: finishing device.\n");
9f875167 3147 flush_delayed_work(&adev->delayed_init_work);
d0d13fe8 3148 adev->shutdown = true;
9f875167 3149
e5b03032
ML
3150 /* disable all interrupts */
3151 amdgpu_irq_disable_all(adev);
ff97cba8
ML
3152 if (adev->mode_info.mode_config_initialized){
3153 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 3154 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
3155 else
3156 drm_atomic_helper_shutdown(adev->ddev);
3157 }
d38ceaf9 3158 amdgpu_fence_driver_fini(adev);
7c868b59
YT
3159 if (adev->pm_sysfs_en)
3160 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 3161 amdgpu_fbdev_fini(adev);
06ec9070 3162 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
3163 if (adev->firmware.gpu_info_fw) {
3164 release_firmware(adev->firmware.gpu_info_fw);
3165 adev->firmware.gpu_info_fw = NULL;
3166 }
d38ceaf9
AD
3167 adev->accel_working = false;
3168 /* free i2c buses */
4562236b
HW
3169 if (!amdgpu_device_has_dc_support(adev))
3170 amdgpu_i2c_fini(adev);
bfca0289
SL
3171
3172 if (amdgpu_emu_mode != 1)
3173 amdgpu_atombios_fini(adev);
3174
d38ceaf9
AD
3175 kfree(adev->bios);
3176 adev->bios = NULL;
3840c5bc
AD
3177 if (amdgpu_has_atpx() &&
3178 (amdgpu_is_atpx_hybrid() ||
3179 amdgpu_has_atpx_dgpu_power_cntl()) &&
3180 !pci_is_thunderbolt_attached(adev->pdev))
84c8b22e 3181 vga_switcheroo_unregister_client(adev->pdev);
3840c5bc 3182 if (amdgpu_device_supports_boco(adev->ddev))
83ba126a 3183 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
3184 vga_client_register(adev->pdev, NULL, NULL, NULL);
3185 if (adev->rio_mem)
3186 pci_iounmap(adev->pdev, adev->rio_mem);
3187 adev->rio_mem = NULL;
3188 iounmap(adev->rmmio);
3189 adev->rmmio = NULL;
06ec9070 3190 amdgpu_device_doorbell_fini(adev);
e9bc1bf7
YT
3191 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
3192 amdgpu_pm_virt_sysfs_fini(adev);
3193
d38ceaf9 3194 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 3195 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
7c868b59
YT
3196 if (adev->ucode_sysfs_en)
3197 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
3198 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3199 amdgpu_pmu_fini(adev);
6698a3d0 3200 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 3201 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 3202 amdgpu_discovery_fini(adev);
d38ceaf9
AD
3203}
3204
3205
3206/*
3207 * Suspend & resume.
3208 */
3209/**
810ddc3a 3210 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 3211 *
87e3f136
DP
3212 * @dev: drm dev pointer
3213 * @suspend: suspend state
3214 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
3215 *
3216 * Puts the hw in the suspend state (all asics).
3217 * Returns 0 for success or an error on failure.
3218 * Called at driver suspend.
3219 */
de185019 3220int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3221{
3222 struct amdgpu_device *adev;
3223 struct drm_crtc *crtc;
3224 struct drm_connector *connector;
f8d2d39e 3225 struct drm_connector_list_iter iter;
5ceb54c6 3226 int r;
d38ceaf9
AD
3227
3228 if (dev == NULL || dev->dev_private == NULL) {
3229 return -ENODEV;
3230 }
3231
3232 adev = dev->dev_private;
3233
3234 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3235 return 0;
3236
44779b43 3237 adev->in_suspend = true;
d38ceaf9
AD
3238 drm_kms_helper_poll_disable(dev);
3239
5f818173
S
3240 if (fbcon)
3241 amdgpu_fbdev_set_suspend(adev, 1);
3242
beff74bc 3243 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 3244
4562236b
HW
3245 if (!amdgpu_device_has_dc_support(adev)) {
3246 /* turn off display hw */
3247 drm_modeset_lock_all(dev);
f8d2d39e
LP
3248 drm_connector_list_iter_begin(dev, &iter);
3249 drm_for_each_connector_iter(connector, &iter)
3250 drm_helper_connector_dpms(connector,
3251 DRM_MODE_DPMS_OFF);
3252 drm_connector_list_iter_end(&iter);
4562236b 3253 drm_modeset_unlock_all(dev);
fe1053b7
AD
3254 /* unpin the front buffers and cursors */
3255 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3256 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3257 struct drm_framebuffer *fb = crtc->primary->fb;
3258 struct amdgpu_bo *robj;
3259
91334223 3260 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3261 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3262 r = amdgpu_bo_reserve(aobj, true);
3263 if (r == 0) {
3264 amdgpu_bo_unpin(aobj);
3265 amdgpu_bo_unreserve(aobj);
3266 }
756e6880 3267 }
756e6880 3268
fe1053b7
AD
3269 if (fb == NULL || fb->obj[0] == NULL) {
3270 continue;
3271 }
3272 robj = gem_to_amdgpu_bo(fb->obj[0]);
3273 /* don't unpin kernel fb objects */
3274 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
3275 r = amdgpu_bo_reserve(robj, true);
3276 if (r == 0) {
3277 amdgpu_bo_unpin(robj);
3278 amdgpu_bo_unreserve(robj);
3279 }
d38ceaf9
AD
3280 }
3281 }
3282 }
fe1053b7
AD
3283
3284 amdgpu_amdkfd_suspend(adev);
3285
5e6932fe 3286 amdgpu_ras_suspend(adev);
3287
fe1053b7
AD
3288 r = amdgpu_device_ip_suspend_phase1(adev);
3289
d38ceaf9
AD
3290 /* evict vram memory */
3291 amdgpu_bo_evict_vram(adev);
3292
5ceb54c6 3293 amdgpu_fence_driver_suspend(adev);
d38ceaf9 3294
fe1053b7 3295 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 3296
a0a71e49
AD
3297 /* evict remaining vram memory
3298 * This second call to evict vram is to evict the gart page table
3299 * using the CPU.
3300 */
d38ceaf9
AD
3301 amdgpu_bo_evict_vram(adev);
3302
d38ceaf9
AD
3303 return 0;
3304}
3305
3306/**
810ddc3a 3307 * amdgpu_device_resume - initiate device resume
d38ceaf9 3308 *
87e3f136
DP
3309 * @dev: drm dev pointer
3310 * @resume: resume state
3311 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3312 *
3313 * Bring the hw back to operating state (all asics).
3314 * Returns 0 for success or an error on failure.
3315 * Called at driver resume.
3316 */
de185019 3317int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
d38ceaf9
AD
3318{
3319 struct drm_connector *connector;
f8d2d39e 3320 struct drm_connector_list_iter iter;
d38ceaf9 3321 struct amdgpu_device *adev = dev->dev_private;
756e6880 3322 struct drm_crtc *crtc;
03161a6e 3323 int r = 0;
d38ceaf9
AD
3324
3325 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3326 return 0;
3327
d38ceaf9 3328 /* post card */
39c640c0 3329 if (amdgpu_device_need_post(adev)) {
74b0b157 3330 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3331 if (r)
3332 DRM_ERROR("amdgpu asic init failed\n");
3333 }
d38ceaf9 3334
06ec9070 3335 r = amdgpu_device_ip_resume(adev);
e6707218 3336 if (r) {
06ec9070 3337 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3338 return r;
e6707218 3339 }
5ceb54c6
AD
3340 amdgpu_fence_driver_resume(adev);
3341
d38ceaf9 3342
06ec9070 3343 r = amdgpu_device_ip_late_init(adev);
03161a6e 3344 if (r)
4d3b9ae5 3345 return r;
d38ceaf9 3346
beff74bc
AD
3347 queue_delayed_work(system_wq, &adev->delayed_init_work,
3348 msecs_to_jiffies(AMDGPU_RESUME_MS));
3349
fe1053b7
AD
3350 if (!amdgpu_device_has_dc_support(adev)) {
3351 /* pin cursors */
3352 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3353 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3354
91334223 3355 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3356 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3357 r = amdgpu_bo_reserve(aobj, true);
3358 if (r == 0) {
3359 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3360 if (r != 0)
3361 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3362 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3363 amdgpu_bo_unreserve(aobj);
3364 }
756e6880
AD
3365 }
3366 }
3367 }
ba997709
YZ
3368 r = amdgpu_amdkfd_resume(adev);
3369 if (r)
3370 return r;
756e6880 3371
96a5d8d4 3372 /* Make sure IB tests flushed */
beff74bc 3373 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3374
d38ceaf9
AD
3375 /* blat the mode back in */
3376 if (fbcon) {
4562236b
HW
3377 if (!amdgpu_device_has_dc_support(adev)) {
3378 /* pre DCE11 */
3379 drm_helper_resume_force_mode(dev);
3380
3381 /* turn on display hw */
3382 drm_modeset_lock_all(dev);
f8d2d39e
LP
3383
3384 drm_connector_list_iter_begin(dev, &iter);
3385 drm_for_each_connector_iter(connector, &iter)
3386 drm_helper_connector_dpms(connector,
3387 DRM_MODE_DPMS_ON);
3388 drm_connector_list_iter_end(&iter);
3389
4562236b 3390 drm_modeset_unlock_all(dev);
d38ceaf9 3391 }
4d3b9ae5 3392 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3393 }
3394
3395 drm_kms_helper_poll_enable(dev);
23a1a9e5 3396
5e6932fe 3397 amdgpu_ras_resume(adev);
3398
23a1a9e5
L
3399 /*
3400 * Most of the connector probing functions try to acquire runtime pm
3401 * refs to ensure that the GPU is powered on when connector polling is
3402 * performed. Since we're calling this from a runtime PM callback,
3403 * trying to acquire rpm refs will cause us to deadlock.
3404 *
3405 * Since we're guaranteed to be holding the rpm lock, it's safe to
3406 * temporarily disable the rpm helpers so this doesn't deadlock us.
3407 */
3408#ifdef CONFIG_PM
3409 dev->dev->power.disable_depth++;
3410#endif
4562236b
HW
3411 if (!amdgpu_device_has_dc_support(adev))
3412 drm_helper_hpd_irq_event(dev);
3413 else
3414 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3415#ifdef CONFIG_PM
3416 dev->dev->power.disable_depth--;
3417#endif
44779b43
RZ
3418 adev->in_suspend = false;
3419
4d3b9ae5 3420 return 0;
d38ceaf9
AD
3421}
3422
e3ecdffa
AD
3423/**
3424 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3425 *
3426 * @adev: amdgpu_device pointer
3427 *
3428 * The list of all the hardware IPs that make up the asic is walked and
3429 * the check_soft_reset callbacks are run. check_soft_reset determines
3430 * if the asic is still hung or not.
3431 * Returns true if any of the IPs are still in a hung state, false if not.
3432 */
06ec9070 3433static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3434{
3435 int i;
3436 bool asic_hang = false;
3437
f993d628
ML
3438 if (amdgpu_sriov_vf(adev))
3439 return true;
3440
8bc04c29
AD
3441 if (amdgpu_asic_need_full_reset(adev))
3442 return true;
3443
63fbf42f 3444 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3445 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3446 continue;
a1255107
AD
3447 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3448 adev->ip_blocks[i].status.hang =
3449 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3450 if (adev->ip_blocks[i].status.hang) {
3451 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3452 asic_hang = true;
3453 }
3454 }
3455 return asic_hang;
3456}
3457
e3ecdffa
AD
3458/**
3459 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3460 *
3461 * @adev: amdgpu_device pointer
3462 *
3463 * The list of all the hardware IPs that make up the asic is walked and the
3464 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3465 * handles any IP specific hardware or software state changes that are
3466 * necessary for a soft reset to succeed.
3467 * Returns 0 on success, negative error code on failure.
3468 */
06ec9070 3469static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3470{
3471 int i, r = 0;
3472
3473 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3474 if (!adev->ip_blocks[i].status.valid)
d31a501e 3475 continue;
a1255107
AD
3476 if (adev->ip_blocks[i].status.hang &&
3477 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3478 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3479 if (r)
3480 return r;
3481 }
3482 }
3483
3484 return 0;
3485}
3486
e3ecdffa
AD
3487/**
3488 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3489 *
3490 * @adev: amdgpu_device pointer
3491 *
3492 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3493 * reset is necessary to recover.
3494 * Returns true if a full asic reset is required, false if not.
3495 */
06ec9070 3496static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3497{
da146d3b
AD
3498 int i;
3499
8bc04c29
AD
3500 if (amdgpu_asic_need_full_reset(adev))
3501 return true;
3502
da146d3b 3503 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3504 if (!adev->ip_blocks[i].status.valid)
da146d3b 3505 continue;
a1255107
AD
3506 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3507 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3508 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3509 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3510 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3511 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3512 DRM_INFO("Some block need full reset!\n");
3513 return true;
3514 }
3515 }
35d782fe
CZ
3516 }
3517 return false;
3518}
3519
e3ecdffa
AD
3520/**
3521 * amdgpu_device_ip_soft_reset - do a soft reset
3522 *
3523 * @adev: amdgpu_device pointer
3524 *
3525 * The list of all the hardware IPs that make up the asic is walked and the
3526 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3527 * IP specific hardware or software state changes that are necessary to soft
3528 * reset the IP.
3529 * Returns 0 on success, negative error code on failure.
3530 */
06ec9070 3531static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3532{
3533 int i, r = 0;
3534
3535 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3536 if (!adev->ip_blocks[i].status.valid)
35d782fe 3537 continue;
a1255107
AD
3538 if (adev->ip_blocks[i].status.hang &&
3539 adev->ip_blocks[i].version->funcs->soft_reset) {
3540 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3541 if (r)
3542 return r;
3543 }
3544 }
3545
3546 return 0;
3547}
3548
e3ecdffa
AD
3549/**
3550 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3551 *
3552 * @adev: amdgpu_device pointer
3553 *
3554 * The list of all the hardware IPs that make up the asic is walked and the
3555 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3556 * handles any IP specific hardware or software state changes that are
3557 * necessary after the IP has been soft reset.
3558 * Returns 0 on success, negative error code on failure.
3559 */
06ec9070 3560static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3561{
3562 int i, r = 0;
3563
3564 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3565 if (!adev->ip_blocks[i].status.valid)
35d782fe 3566 continue;
a1255107
AD
3567 if (adev->ip_blocks[i].status.hang &&
3568 adev->ip_blocks[i].version->funcs->post_soft_reset)
3569 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3570 if (r)
3571 return r;
3572 }
3573
3574 return 0;
3575}
3576
e3ecdffa 3577/**
c33adbc7 3578 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3579 *
3580 * @adev: amdgpu_device pointer
3581 *
3582 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3583 * restore things like GPUVM page tables after a GPU reset where
3584 * the contents of VRAM might be lost.
403009bf
CK
3585 *
3586 * Returns:
3587 * 0 on success, negative error code on failure.
e3ecdffa 3588 */
c33adbc7 3589static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3590{
c41d1cf6 3591 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3592 struct amdgpu_bo *shadow;
3593 long r = 1, tmo;
c41d1cf6
ML
3594
3595 if (amdgpu_sriov_runtime(adev))
b045d3af 3596 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3597 else
3598 tmo = msecs_to_jiffies(100);
3599
3600 DRM_INFO("recover vram bo from shadow start\n");
3601 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3602 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3603
3604 /* No need to recover an evicted BO */
3605 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3606 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3607 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3608 continue;
3609
3610 r = amdgpu_bo_restore_shadow(shadow, &next);
3611 if (r)
3612 break;
3613
c41d1cf6 3614 if (fence) {
1712fb1a 3615 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3616 dma_fence_put(fence);
3617 fence = next;
1712fb1a 3618 if (tmo == 0) {
3619 r = -ETIMEDOUT;
c41d1cf6 3620 break;
1712fb1a 3621 } else if (tmo < 0) {
3622 r = tmo;
3623 break;
3624 }
403009bf
CK
3625 } else {
3626 fence = next;
c41d1cf6 3627 }
c41d1cf6
ML
3628 }
3629 mutex_unlock(&adev->shadow_list_lock);
3630
403009bf
CK
3631 if (fence)
3632 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3633 dma_fence_put(fence);
3634
1712fb1a 3635 if (r < 0 || tmo <= 0) {
3636 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3637 return -EIO;
3638 }
c41d1cf6 3639
403009bf
CK
3640 DRM_INFO("recover vram bo from shadow done\n");
3641 return 0;
c41d1cf6
ML
3642}
3643
a90ad3c2 3644
e3ecdffa 3645/**
06ec9070 3646 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3647 *
3648 * @adev: amdgpu device pointer
87e3f136 3649 * @from_hypervisor: request from hypervisor
5740682e
ML
3650 *
3651 * do VF FLR and reinitialize Asic
3f48c681 3652 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3653 */
3654static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3655 bool from_hypervisor)
5740682e
ML
3656{
3657 int r;
3658
3659 if (from_hypervisor)
3660 r = amdgpu_virt_request_full_gpu(adev, true);
3661 else
3662 r = amdgpu_virt_reset_gpu(adev);
3663 if (r)
3664 return r;
a90ad3c2 3665
f81e8d53
WL
3666 amdgpu_amdkfd_pre_reset(adev);
3667
a90ad3c2 3668 /* Resume IP prior to SMC */
06ec9070 3669 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3670 if (r)
3671 goto error;
a90ad3c2
ML
3672
3673 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3674 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3675
7a3e0bb2
RZ
3676 r = amdgpu_device_fw_loading(adev);
3677 if (r)
3678 return r;
3679
a90ad3c2 3680 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3681 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3682 if (r)
3683 goto error;
a90ad3c2
ML
3684
3685 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3686 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3687 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3688
abc34253 3689error:
d3c117e5 3690 amdgpu_virt_init_data_exchange(adev);
abc34253 3691 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6 3692 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
e3526257 3693 amdgpu_inc_vram_lost(adev);
c33adbc7 3694 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3695 }
3696
3697 return r;
3698}
3699
12938fad
CK
3700/**
3701 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3702 *
3703 * @adev: amdgpu device pointer
3704 *
3705 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3706 * a hung GPU.
3707 */
3708bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3709{
3710 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3711 DRM_INFO("Timeout, but no hardware hang detected.\n");
3712 return false;
3713 }
3714
3ba7b418
AG
3715 if (amdgpu_gpu_recovery == 0)
3716 goto disabled;
3717
3718 if (amdgpu_sriov_vf(adev))
3719 return true;
3720
3721 if (amdgpu_gpu_recovery == -1) {
3722 switch (adev->asic_type) {
fc42d47c
AG
3723 case CHIP_BONAIRE:
3724 case CHIP_HAWAII:
3ba7b418
AG
3725 case CHIP_TOPAZ:
3726 case CHIP_TONGA:
3727 case CHIP_FIJI:
3728 case CHIP_POLARIS10:
3729 case CHIP_POLARIS11:
3730 case CHIP_POLARIS12:
3731 case CHIP_VEGAM:
3732 case CHIP_VEGA20:
3733 case CHIP_VEGA10:
3734 case CHIP_VEGA12:
c43b849f 3735 case CHIP_RAVEN:
3ba7b418
AG
3736 break;
3737 default:
3738 goto disabled;
3739 }
12938fad
CK
3740 }
3741
3742 return true;
3ba7b418
AG
3743
3744disabled:
3745 DRM_INFO("GPU recovery disabled.\n");
3746 return false;
12938fad
CK
3747}
3748
5c6dd71e 3749
26bc5340
AG
3750static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3751 struct amdgpu_job *job,
3752 bool *need_full_reset_arg)
3753{
3754 int i, r = 0;
3755 bool need_full_reset = *need_full_reset_arg;
71182665 3756
71182665 3757 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3758 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3759 struct amdgpu_ring *ring = adev->rings[i];
3760
51687759 3761 if (!ring || !ring->sched.thread)
0875dc9e 3762 continue;
5740682e 3763
2f9d4084
ML
3764 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3765 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3766 }
d38ceaf9 3767
222b5f04
AG
3768 if(job)
3769 drm_sched_increase_karma(&job->base);
3770
1d721ed6 3771 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3772 if (!amdgpu_sriov_vf(adev)) {
3773
3774 if (!need_full_reset)
3775 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3776
3777 if (!need_full_reset) {
3778 amdgpu_device_ip_pre_soft_reset(adev);
3779 r = amdgpu_device_ip_soft_reset(adev);
3780 amdgpu_device_ip_post_soft_reset(adev);
3781 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3782 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3783 need_full_reset = true;
3784 }
3785 }
3786
3787 if (need_full_reset)
3788 r = amdgpu_device_ip_suspend(adev);
3789
3790 *need_full_reset_arg = need_full_reset;
3791 }
3792
3793 return r;
3794}
3795
ce316fa5
LM
3796static int amdgpu_do_asic_reset(struct amdgpu_device *adev,
3797 struct amdgpu_hive_info *hive,
26bc5340
AG
3798 struct list_head *device_list_handle,
3799 bool *need_full_reset_arg)
3800{
3801 struct amdgpu_device *tmp_adev = NULL;
3802 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3803 int r = 0;
ce316fa5
LM
3804 int cpu = smp_processor_id();
3805 bool use_baco =
3806 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
3807 true : false;
26bc5340
AG
3808
3809 /*
3810 * ASIC reset has to be done on all HGMI hive nodes ASAP
3811 * to allow proper links negotiation in FW (within 1 sec)
3812 */
3813 if (need_full_reset) {
3814 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
ce316fa5
LM
3815 /*
3816 * For XGMI run all resets in parallel to speed up the
3817 * process by scheduling the highpri wq on different
3818 * cpus. For XGMI with baco reset, all nodes must enter
3819 * baco within close proximity before anyone exit.
3820 */
d4535e2c 3821 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
ce316fa5
LM
3822 if (!queue_work_on(cpu, system_highpri_wq,
3823 &tmp_adev->xgmi_reset_work))
d4535e2c 3824 r = -EALREADY;
ce316fa5 3825 cpu = cpumask_next(cpu, cpu_online_mask);
d4535e2c
AG
3826 } else
3827 r = amdgpu_asic_reset(tmp_adev);
ce316fa5 3828 if (r)
d4535e2c 3829 break;
d4535e2c
AG
3830 }
3831
ce316fa5 3832 /* For XGMI wait for all work to complete before proceed */
d4535e2c
AG
3833 if (!r) {
3834 list_for_each_entry(tmp_adev, device_list_handle,
3835 gmc.xgmi.head) {
3836 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3837 flush_work(&tmp_adev->xgmi_reset_work);
3838 r = tmp_adev->asic_reset_res;
3839 if (r)
3840 break;
ce316fa5
LM
3841 if (use_baco)
3842 tmp_adev->in_baco = true;
d4535e2c
AG
3843 }
3844 }
26bc5340 3845 }
26bc5340 3846
ce316fa5
LM
3847 /*
3848 * For XGMI with baco reset, need exit baco phase by scheduling
3849 * xgmi_reset_work one more time. PSP reset and sGPU skips this
3850 * phase. Not assume the situation that PSP reset and baco reset
3851 * coexist within an XGMI hive.
3852 */
3853
3854 if (!r && use_baco) {
3855 cpu = smp_processor_id();
3856 list_for_each_entry(tmp_adev, device_list_handle,
3857 gmc.xgmi.head) {
3858 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3859 if (!queue_work_on(cpu,
3860 system_highpri_wq,
3861 &tmp_adev->xgmi_reset_work))
3862 r = -EALREADY;
3863 if (r)
3864 break;
3865 cpu = cpumask_next(cpu, cpu_online_mask);
3866 }
3867 }
3868 }
3869
3870 if (!r && use_baco) {
3871 list_for_each_entry(tmp_adev, device_list_handle,
3872 gmc.xgmi.head) {
3873 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3874 flush_work(&tmp_adev->xgmi_reset_work);
3875 r = tmp_adev->asic_reset_res;
3876 if (r)
3877 break;
3878 tmp_adev->in_baco = false;
3879 }
3880 }
3881 }
3882
3883 if (r) {
3884 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
3885 r, tmp_adev->ddev->unique);
3886 goto end;
3887 }
3888 }
26bc5340
AG
3889
3890 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3891 if (need_full_reset) {
3892 /* post card */
3893 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3894 DRM_WARN("asic atom init failed!");
3895
3896 if (!r) {
3897 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3898 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3899 if (r)
3900 goto out;
3901
3902 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3903 if (vram_lost) {
77e7f829 3904 DRM_INFO("VRAM is lost due to GPU reset!\n");
e3526257 3905 amdgpu_inc_vram_lost(tmp_adev);
26bc5340
AG
3906 }
3907
3908 r = amdgpu_gtt_mgr_recover(
3909 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3910 if (r)
3911 goto out;
3912
3913 r = amdgpu_device_fw_loading(tmp_adev);
3914 if (r)
3915 return r;
3916
3917 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3918 if (r)
3919 goto out;
3920
3921 if (vram_lost)
3922 amdgpu_device_fill_reset_magic(tmp_adev);
3923
fdafb359
EQ
3924 /*
3925 * Add this ASIC as tracked as reset was already
3926 * complete successfully.
3927 */
3928 amdgpu_register_gpu_instance(tmp_adev);
3929
7c04ca50 3930 r = amdgpu_device_ip_late_init(tmp_adev);
3931 if (r)
3932 goto out;
3933
e79a04d5 3934 /* must succeed. */
511fdbc3 3935 amdgpu_ras_resume(tmp_adev);
e79a04d5 3936
26bc5340
AG
3937 /* Update PSP FW topology after reset */
3938 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3939 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3940 }
3941 }
3942
3943
3944out:
3945 if (!r) {
3946 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3947 r = amdgpu_ib_ring_tests(tmp_adev);
3948 if (r) {
3949 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3950 r = amdgpu_device_ip_suspend(tmp_adev);
3951 need_full_reset = true;
3952 r = -EAGAIN;
3953 goto end;
3954 }
3955 }
3956
3957 if (!r)
3958 r = amdgpu_device_recover_vram(tmp_adev);
3959 else
3960 tmp_adev->asic_reset_res = r;
3961 }
3962
3963end:
3964 *need_full_reset_arg = need_full_reset;
3965 return r;
3966}
3967
1d721ed6 3968static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3969{
1d721ed6
AG
3970 if (trylock) {
3971 if (!mutex_trylock(&adev->lock_reset))
3972 return false;
3973 } else
3974 mutex_lock(&adev->lock_reset);
5740682e 3975
26bc5340
AG
3976 atomic_inc(&adev->gpu_reset_counter);
3977 adev->in_gpu_reset = 1;
a3a09142
AD
3978 switch (amdgpu_asic_reset_method(adev)) {
3979 case AMD_RESET_METHOD_MODE1:
3980 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
3981 break;
3982 case AMD_RESET_METHOD_MODE2:
3983 adev->mp1_state = PP_MP1_STATE_RESET;
3984 break;
3985 default:
3986 adev->mp1_state = PP_MP1_STATE_NONE;
3987 break;
3988 }
1d721ed6
AG
3989
3990 return true;
26bc5340 3991}
d38ceaf9 3992
26bc5340
AG
3993static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3994{
89041940 3995 amdgpu_vf_error_trans_all(adev);
a3a09142 3996 adev->mp1_state = PP_MP1_STATE_NONE;
13a752e3
ML
3997 adev->in_gpu_reset = 0;
3998 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3999}
4000
26bc5340
AG
4001/**
4002 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
4003 *
4004 * @adev: amdgpu device pointer
4005 * @job: which job trigger hang
4006 *
4007 * Attempt to reset the GPU if it has hung (all asics).
4008 * Attempt to do soft-reset or full-reset and reinitialize Asic
4009 * Returns 0 for success or an error on failure.
4010 */
4011
4012int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
4013 struct amdgpu_job *job)
4014{
1d721ed6
AG
4015 struct list_head device_list, *device_list_handle = NULL;
4016 bool need_full_reset, job_signaled;
26bc5340 4017 struct amdgpu_hive_info *hive = NULL;
26bc5340 4018 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 4019 int i, r = 0;
7c6e68c7 4020 bool in_ras_intr = amdgpu_ras_intr_triggered();
b823821f
LM
4021 bool use_baco =
4022 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
4023 true : false;
26bc5340 4024
d5ea093e
AG
4025 /*
4026 * Flush RAM to disk so that after reboot
4027 * the user can read log and see why the system rebooted.
4028 */
b823821f 4029 if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
d5ea093e
AG
4030
4031 DRM_WARN("Emergency reboot.");
4032
4033 ksys_sync_helper();
4034 emergency_restart();
4035 }
4036
1d721ed6 4037 need_full_reset = job_signaled = false;
26bc5340
AG
4038 INIT_LIST_HEAD(&device_list);
4039
b823821f
LM
4040 dev_info(adev->dev, "GPU %s begin!\n",
4041 (in_ras_intr && !use_baco) ? "jobs stop":"reset");
26bc5340 4042
beff74bc 4043 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 4044
1d721ed6
AG
4045 hive = amdgpu_get_xgmi_hive(adev, false);
4046
26bc5340 4047 /*
1d721ed6
AG
4048 * Here we trylock to avoid chain of resets executing from
4049 * either trigger by jobs on different adevs in XGMI hive or jobs on
4050 * different schedulers for same device while this TO handler is running.
4051 * We always reset all schedulers for device and all devices for XGMI
4052 * hive so that should take care of them too.
26bc5340 4053 */
1d721ed6
AG
4054
4055 if (hive && !mutex_trylock(&hive->reset_lock)) {
4056 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
0b2d2c2e 4057 job ? job->base.id : -1, hive->hive_id);
26bc5340 4058 return 0;
1d721ed6 4059 }
26bc5340
AG
4060
4061 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
4062 if (!amdgpu_device_lock_adev(adev, !hive)) {
4063 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
0b2d2c2e 4064 job ? job->base.id : -1);
1d721ed6 4065 return 0;
26bc5340
AG
4066 }
4067
7c6e68c7
AG
4068 /* Block kfd: SRIOV would do it separately */
4069 if (!amdgpu_sriov_vf(adev))
4070 amdgpu_amdkfd_pre_reset(adev);
4071
26bc5340 4072 /* Build list of devices to reset */
1d721ed6 4073 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340 4074 if (!hive) {
7c6e68c7
AG
4075 /*unlock kfd: SRIOV would do it separately */
4076 if (!amdgpu_sriov_vf(adev))
4077 amdgpu_amdkfd_post_reset(adev);
26bc5340
AG
4078 amdgpu_device_unlock_adev(adev);
4079 return -ENODEV;
4080 }
4081
4082 /*
4083 * In case we are in XGMI hive mode device reset is done for all the
4084 * nodes in the hive to retrain all XGMI links and hence the reset
4085 * sequence is executed in loop on all nodes.
4086 */
4087 device_list_handle = &hive->device_list;
4088 } else {
4089 list_add_tail(&adev->gmc.xgmi.head, &device_list);
4090 device_list_handle = &device_list;
4091 }
4092
1d721ed6
AG
4093 /* block all schedulers and reset given job's ring */
4094 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4095 if (tmp_adev != adev) {
12ffa55d 4096 amdgpu_device_lock_adev(tmp_adev, false);
7c6e68c7
AG
4097 if (!amdgpu_sriov_vf(tmp_adev))
4098 amdgpu_amdkfd_pre_reset(tmp_adev);
4099 }
4100
12ffa55d
AG
4101 /*
4102 * Mark these ASICs to be reseted as untracked first
4103 * And add them back after reset completed
4104 */
4105 amdgpu_unregister_gpu_instance(tmp_adev);
4106
f1c1314b 4107 /* disable ras on ALL IPs */
b823821f
LM
4108 if (!(in_ras_intr && !use_baco) &&
4109 amdgpu_device_ip_need_full_reset(tmp_adev))
f1c1314b 4110 amdgpu_ras_suspend(tmp_adev);
4111
1d721ed6
AG
4112 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4113 struct amdgpu_ring *ring = tmp_adev->rings[i];
4114
4115 if (!ring || !ring->sched.thread)
4116 continue;
4117
0b2d2c2e 4118 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
7c6e68c7 4119
b823821f 4120 if (in_ras_intr && !use_baco)
7c6e68c7 4121 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
1d721ed6
AG
4122 }
4123 }
4124
4125
b823821f 4126 if (in_ras_intr && !use_baco)
7c6e68c7
AG
4127 goto skip_sched_resume;
4128
1d721ed6
AG
4129 /*
4130 * Must check guilty signal here since after this point all old
4131 * HW fences are force signaled.
4132 *
4133 * job->base holds a reference to parent fence
4134 */
4135 if (job && job->base.s_fence->parent &&
4136 dma_fence_is_signaled(job->base.s_fence->parent))
4137 job_signaled = true;
4138
1d721ed6
AG
4139 if (job_signaled) {
4140 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
4141 goto skip_hw_reset;
4142 }
4143
4144
4145 /* Guilty job will be freed after this*/
0b2d2c2e 4146 r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);
1d721ed6
AG
4147 if (r) {
4148 /*TODO Should we stop ?*/
4149 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4150 r, adev->ddev->unique);
4151 adev->asic_reset_res = r;
4152 }
4153
26bc5340
AG
4154retry: /* Rest of adevs pre asic reset from XGMI hive. */
4155 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4156
4157 if (tmp_adev == adev)
4158 continue;
4159
26bc5340
AG
4160 r = amdgpu_device_pre_asic_reset(tmp_adev,
4161 NULL,
4162 &need_full_reset);
4163 /*TODO Should we stop ?*/
4164 if (r) {
4165 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
4166 r, tmp_adev->ddev->unique);
4167 tmp_adev->asic_reset_res = r;
4168 }
4169 }
4170
4171 /* Actual ASIC resets if needed.*/
4172 /* TODO Implement XGMI hive reset logic for SRIOV */
4173 if (amdgpu_sriov_vf(adev)) {
4174 r = amdgpu_device_reset_sriov(adev, job ? false : true);
4175 if (r)
4176 adev->asic_reset_res = r;
4177 } else {
ce316fa5
LM
4178 r = amdgpu_do_asic_reset(adev, hive, device_list_handle,
4179 &need_full_reset);
26bc5340
AG
4180 if (r && r == -EAGAIN)
4181 goto retry;
4182 }
4183
1d721ed6
AG
4184skip_hw_reset:
4185
26bc5340
AG
4186 /* Post ASIC reset for all devs .*/
4187 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
7c6e68c7 4188
1d721ed6
AG
4189 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4190 struct amdgpu_ring *ring = tmp_adev->rings[i];
4191
4192 if (!ring || !ring->sched.thread)
4193 continue;
4194
4195 /* No point to resubmit jobs if we didn't HW reset*/
4196 if (!tmp_adev->asic_reset_res && !job_signaled)
4197 drm_sched_resubmit_jobs(&ring->sched);
4198
4199 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
4200 }
4201
4202 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
4203 drm_helper_resume_force_mode(tmp_adev->ddev);
4204 }
4205
4206 tmp_adev->asic_reset_res = 0;
26bc5340
AG
4207
4208 if (r) {
4209 /* bad news, how to tell it to userspace ? */
12ffa55d 4210 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340
AG
4211 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
4212 } else {
12ffa55d 4213 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
26bc5340 4214 }
7c6e68c7 4215 }
26bc5340 4216
7c6e68c7
AG
4217skip_sched_resume:
4218 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
4219 /*unlock kfd: SRIOV would do it separately */
b823821f 4220 if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
7c6e68c7 4221 amdgpu_amdkfd_post_reset(tmp_adev);
26bc5340
AG
4222 amdgpu_device_unlock_adev(tmp_adev);
4223 }
4224
1d721ed6 4225 if (hive)
22d6575b 4226 mutex_unlock(&hive->reset_lock);
26bc5340
AG
4227
4228 if (r)
4229 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
4230 return r;
4231}
4232
e3ecdffa
AD
4233/**
4234 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
4235 *
4236 * @adev: amdgpu_device pointer
4237 *
4238 * Fetchs and stores in the driver the PCIE capabilities (gen speed
4239 * and lanes) of the slot the device is in. Handles APUs and
4240 * virtualized environments where PCIE config space may not be available.
4241 */
5494d864 4242static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 4243{
5d9a6330 4244 struct pci_dev *pdev;
c5313457
HK
4245 enum pci_bus_speed speed_cap, platform_speed_cap;
4246 enum pcie_link_width platform_link_width;
d0dd7f0c 4247
cd474ba0
AD
4248 if (amdgpu_pcie_gen_cap)
4249 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 4250
cd474ba0
AD
4251 if (amdgpu_pcie_lane_cap)
4252 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 4253
cd474ba0
AD
4254 /* covers APUs as well */
4255 if (pci_is_root_bus(adev->pdev->bus)) {
4256 if (adev->pm.pcie_gen_mask == 0)
4257 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
4258 if (adev->pm.pcie_mlw_mask == 0)
4259 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 4260 return;
cd474ba0 4261 }
d0dd7f0c 4262
c5313457
HK
4263 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
4264 return;
4265
dbaa922b
AD
4266 pcie_bandwidth_available(adev->pdev, NULL,
4267 &platform_speed_cap, &platform_link_width);
c5313457 4268
cd474ba0 4269 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
4270 /* asic caps */
4271 pdev = adev->pdev;
4272 speed_cap = pcie_get_speed_cap(pdev);
4273 if (speed_cap == PCI_SPEED_UNKNOWN) {
4274 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
4275 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4276 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 4277 } else {
5d9a6330
AD
4278 if (speed_cap == PCIE_SPEED_16_0GT)
4279 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4280 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4281 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4282 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
4283 else if (speed_cap == PCIE_SPEED_8_0GT)
4284 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4285 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4286 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
4287 else if (speed_cap == PCIE_SPEED_5_0GT)
4288 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4289 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
4290 else
4291 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
4292 }
4293 /* platform caps */
c5313457 4294 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
4295 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4296 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4297 } else {
c5313457 4298 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
4299 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4300 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4301 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
4302 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 4303 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
4304 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4305 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
4306 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 4307 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
4308 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
4309 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
4310 else
4311 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
4312
cd474ba0
AD
4313 }
4314 }
4315 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 4316 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
4317 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
4318 } else {
c5313457 4319 switch (platform_link_width) {
5d9a6330 4320 case PCIE_LNK_X32:
cd474ba0
AD
4321 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
4322 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4323 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4324 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4325 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4326 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4327 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4328 break;
5d9a6330 4329 case PCIE_LNK_X16:
cd474ba0
AD
4330 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
4331 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4332 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4333 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4334 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4335 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4336 break;
5d9a6330 4337 case PCIE_LNK_X12:
cd474ba0
AD
4338 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
4339 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4340 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4341 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4342 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4343 break;
5d9a6330 4344 case PCIE_LNK_X8:
cd474ba0
AD
4345 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
4346 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4347 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4348 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4349 break;
5d9a6330 4350 case PCIE_LNK_X4:
cd474ba0
AD
4351 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
4352 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4353 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4354 break;
5d9a6330 4355 case PCIE_LNK_X2:
cd474ba0
AD
4356 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
4357 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
4358 break;
5d9a6330 4359 case PCIE_LNK_X1:
cd474ba0
AD
4360 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
4361 break;
4362 default:
4363 break;
4364 }
d0dd7f0c
AD
4365 }
4366 }
4367}
d38ceaf9 4368
361dbd01
AD
4369int amdgpu_device_baco_enter(struct drm_device *dev)
4370{
4371 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4372 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4373
4374 if (!amdgpu_device_supports_baco(adev->ddev))
4375 return -ENOTSUPP;
4376
7a22677b
LM
4377 if (ras && ras->supported)
4378 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
4379
361dbd01
AD
4380 if (is_support_sw_smu(adev)) {
4381 struct smu_context *smu = &adev->smu;
4382 int ret;
4383
4384 ret = smu_baco_enter(smu);
4385 if (ret)
4386 return ret;
361dbd01
AD
4387 } else {
4388 void *pp_handle = adev->powerplay.pp_handle;
4389 const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
4390
4391 if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
4392 return -ENOENT;
4393
4394 /* enter BACO state */
4395 if (pp_funcs->set_asic_baco_state(pp_handle, 1))
4396 return -EIO;
361dbd01 4397 }
7a22677b
LM
4398
4399 return 0;
361dbd01
AD
4400}
4401
4402int amdgpu_device_baco_exit(struct drm_device *dev)
4403{
4404 struct amdgpu_device *adev = dev->dev_private;
7a22677b 4405 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
361dbd01
AD
4406
4407 if (!amdgpu_device_supports_baco(adev->ddev))
4408 return -ENOTSUPP;
4409
4410 if (is_support_sw_smu(adev)) {
4411 struct smu_context *smu = &adev->smu;
4412 int ret;
4413
4414 ret = smu_baco_exit(smu);
4415 if (ret)
4416 return ret;
4417
361dbd01
AD
4418 } else {
4419 void *pp_handle = adev->powerplay.pp_handle;
4420 const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
4421
4422 if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
4423 return -ENOENT;
4424
4425 /* exit BACO state */
4426 if (pp_funcs->set_asic_baco_state(pp_handle, 0))
4427 return -EIO;
361dbd01 4428 }
7a22677b
LM
4429
4430 if (ras && ras->supported)
4431 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
4432
4433 return 0;
361dbd01 4434}