drm/amdgpu: cancel late_init_work before gpu reset
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
d38ceaf9
AD
30#include <linux/console.h>
31#include <linux/slab.h>
d38ceaf9 32#include <drm/drmP.h>
4562236b 33#include <drm/drm_atomic_helper.h>
fcd70cd3 34#include <drm/drm_probe_helper.h>
d38ceaf9
AD
35#include <drm/amdgpu_drm.h>
36#include <linux/vgaarb.h>
37#include <linux/vga_switcheroo.h>
38#include <linux/efi.h>
39#include "amdgpu.h"
f4b373f4 40#include "amdgpu_trace.h"
d38ceaf9
AD
41#include "amdgpu_i2c.h"
42#include "atom.h"
43#include "amdgpu_atombios.h"
a5bde2f9 44#include "amdgpu_atomfirmware.h"
d0dd7f0c 45#include "amd_pcie.h"
33f34802
KW
46#ifdef CONFIG_DRM_AMDGPU_SI
47#include "si.h"
48#endif
a2e73f56
AD
49#ifdef CONFIG_DRM_AMDGPU_CIK
50#include "cik.h"
51#endif
aaa36a97 52#include "vi.h"
460826e6 53#include "soc15.h"
d38ceaf9 54#include "bif/bif_4_1_d.h"
9accf2fd 55#include <linux/pci.h>
bec86378 56#include <linux/firmware.h>
89041940 57#include "amdgpu_vf_error.h"
d38ceaf9 58
ba997709 59#include "amdgpu_amdkfd.h"
d2f52ac8 60#include "amdgpu_pm.h"
d38ceaf9 61
5183411b 62#include "amdgpu_xgmi.h"
c030f2e4 63#include "amdgpu_ras.h"
5183411b 64
e2a75f88 65MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 66MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 67MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 68MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 69MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
e2a75f88 70
2dc80b00
S
71#define AMDGPU_RESUME_MS 2000
72
d38ceaf9 73static const char *amdgpu_asic_name[] = {
da69c161
KW
74 "TAHITI",
75 "PITCAIRN",
76 "VERDE",
77 "OLAND",
78 "HAINAN",
d38ceaf9
AD
79 "BONAIRE",
80 "KAVERI",
81 "KABINI",
82 "HAWAII",
83 "MULLINS",
84 "TOPAZ",
85 "TONGA",
48299f95 86 "FIJI",
d38ceaf9 87 "CARRIZO",
139f4917 88 "STONEY",
2cc0c0b5
FC
89 "POLARIS10",
90 "POLARIS11",
c4642a47 91 "POLARIS12",
48ff108d 92 "VEGAM",
d4196f01 93 "VEGA10",
8fab806a 94 "VEGA12",
956fcddc 95 "VEGA20",
2ca8a5d2 96 "RAVEN",
d38ceaf9
AD
97 "LAST",
98};
99
dcea6e65
KR
100/**
101 * DOC: pcie_replay_count
102 *
103 * The amdgpu driver provides a sysfs API for reporting the total number
104 * of PCIe replays (NAKs)
105 * The file pcie_replay_count is used for this and returns the total
106 * number of replays as a sum of the NAKs generated and NAKs received
107 */
108
109static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
110 struct device_attribute *attr, char *buf)
111{
112 struct drm_device *ddev = dev_get_drvdata(dev);
113 struct amdgpu_device *adev = ddev->dev_private;
114 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
115
116 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
117}
118
119static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
120 amdgpu_device_get_pcie_replay_count, NULL);
121
5494d864
AD
122static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
123
e3ecdffa
AD
124/**
125 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
126 *
127 * @dev: drm_device pointer
128 *
129 * Returns true if the device is a dGPU with HG/PX power control,
130 * otherwise return false.
131 */
d38ceaf9
AD
132bool amdgpu_device_is_px(struct drm_device *dev)
133{
134 struct amdgpu_device *adev = dev->dev_private;
135
2f7d10b3 136 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
137 return true;
138 return false;
139}
140
141/*
142 * MMIO register access helper functions.
143 */
e3ecdffa
AD
144/**
145 * amdgpu_mm_rreg - read a memory mapped IO register
146 *
147 * @adev: amdgpu_device pointer
148 * @reg: dword aligned register offset
149 * @acc_flags: access flags which require special behavior
150 *
151 * Returns the 32 bit value from the offset specified.
152 */
d38ceaf9 153uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 154 uint32_t acc_flags)
d38ceaf9 155{
f4b373f4
TSD
156 uint32_t ret;
157
43ca8efa 158 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 159 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 160
15d72fd7 161 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 162 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
163 else {
164 unsigned long flags;
d38ceaf9
AD
165
166 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
167 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
168 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
169 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 170 }
f4b373f4
TSD
171 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
172 return ret;
d38ceaf9
AD
173}
174
421a2a30
ML
175/*
176 * MMIO register read with bytes helper functions
177 * @offset:bytes offset from MMIO start
178 *
179*/
180
e3ecdffa
AD
181/**
182 * amdgpu_mm_rreg8 - read a memory mapped IO register
183 *
184 * @adev: amdgpu_device pointer
185 * @offset: byte aligned register offset
186 *
187 * Returns the 8 bit value from the offset specified.
188 */
421a2a30
ML
189uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
190 if (offset < adev->rmmio_size)
191 return (readb(adev->rmmio + offset));
192 BUG();
193}
194
195/*
196 * MMIO register write with bytes helper functions
197 * @offset:bytes offset from MMIO start
198 * @value: the value want to be written to the register
199 *
200*/
e3ecdffa
AD
201/**
202 * amdgpu_mm_wreg8 - read a memory mapped IO register
203 *
204 * @adev: amdgpu_device pointer
205 * @offset: byte aligned register offset
206 * @value: 8 bit value to write
207 *
208 * Writes the value specified to the offset specified.
209 */
421a2a30
ML
210void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
211 if (offset < adev->rmmio_size)
212 writeb(value, adev->rmmio + offset);
213 else
214 BUG();
215}
216
e3ecdffa
AD
217/**
218 * amdgpu_mm_wreg - write to a memory mapped IO register
219 *
220 * @adev: amdgpu_device pointer
221 * @reg: dword aligned register offset
222 * @v: 32 bit value to write to the register
223 * @acc_flags: access flags which require special behavior
224 *
225 * Writes the value specified to the offset specified.
226 */
d38ceaf9 227void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 228 uint32_t acc_flags)
d38ceaf9 229{
f4b373f4 230 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 231
47ed4e1c
KW
232 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
233 adev->last_mm_index = v;
234 }
235
43ca8efa 236 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 237 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 238
15d72fd7 239 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
240 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
241 else {
242 unsigned long flags;
243
244 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
245 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
246 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
247 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
248 }
47ed4e1c
KW
249
250 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
251 udelay(500);
252 }
d38ceaf9
AD
253}
254
e3ecdffa
AD
255/**
256 * amdgpu_io_rreg - read an IO register
257 *
258 * @adev: amdgpu_device pointer
259 * @reg: dword aligned register offset
260 *
261 * Returns the 32 bit value from the offset specified.
262 */
d38ceaf9
AD
263u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
264{
265 if ((reg * 4) < adev->rio_mem_size)
266 return ioread32(adev->rio_mem + (reg * 4));
267 else {
268 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
269 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
270 }
271}
272
e3ecdffa
AD
273/**
274 * amdgpu_io_wreg - write to an IO register
275 *
276 * @adev: amdgpu_device pointer
277 * @reg: dword aligned register offset
278 * @v: 32 bit value to write to the register
279 *
280 * Writes the value specified to the offset specified.
281 */
d38ceaf9
AD
282void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
283{
47ed4e1c
KW
284 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
285 adev->last_mm_index = v;
286 }
d38ceaf9
AD
287
288 if ((reg * 4) < adev->rio_mem_size)
289 iowrite32(v, adev->rio_mem + (reg * 4));
290 else {
291 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
292 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
293 }
47ed4e1c
KW
294
295 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
296 udelay(500);
297 }
d38ceaf9
AD
298}
299
300/**
301 * amdgpu_mm_rdoorbell - read a doorbell dword
302 *
303 * @adev: amdgpu_device pointer
304 * @index: doorbell index
305 *
306 * Returns the value in the doorbell aperture at the
307 * requested doorbell index (CIK).
308 */
309u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
310{
311 if (index < adev->doorbell.num_doorbells) {
312 return readl(adev->doorbell.ptr + index);
313 } else {
314 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
315 return 0;
316 }
317}
318
319/**
320 * amdgpu_mm_wdoorbell - write a doorbell dword
321 *
322 * @adev: amdgpu_device pointer
323 * @index: doorbell index
324 * @v: value to write
325 *
326 * Writes @v to the doorbell aperture at the
327 * requested doorbell index (CIK).
328 */
329void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
330{
331 if (index < adev->doorbell.num_doorbells) {
332 writel(v, adev->doorbell.ptr + index);
333 } else {
334 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
335 }
336}
337
832be404
KW
338/**
339 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
340 *
341 * @adev: amdgpu_device pointer
342 * @index: doorbell index
343 *
344 * Returns the value in the doorbell aperture at the
345 * requested doorbell index (VEGA10+).
346 */
347u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
348{
349 if (index < adev->doorbell.num_doorbells) {
350 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
351 } else {
352 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
353 return 0;
354 }
355}
356
357/**
358 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
359 *
360 * @adev: amdgpu_device pointer
361 * @index: doorbell index
362 * @v: value to write
363 *
364 * Writes @v to the doorbell aperture at the
365 * requested doorbell index (VEGA10+).
366 */
367void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
368{
369 if (index < adev->doorbell.num_doorbells) {
370 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
371 } else {
372 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
373 }
374}
375
d38ceaf9
AD
376/**
377 * amdgpu_invalid_rreg - dummy reg read function
378 *
379 * @adev: amdgpu device pointer
380 * @reg: offset of register
381 *
382 * Dummy register read function. Used for register blocks
383 * that certain asics don't have (all asics).
384 * Returns the value in the register.
385 */
386static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
387{
388 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
389 BUG();
390 return 0;
391}
392
393/**
394 * amdgpu_invalid_wreg - dummy reg write function
395 *
396 * @adev: amdgpu device pointer
397 * @reg: offset of register
398 * @v: value to write to the register
399 *
400 * Dummy register read function. Used for register blocks
401 * that certain asics don't have (all asics).
402 */
403static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
404{
405 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
406 reg, v);
407 BUG();
408}
409
410/**
411 * amdgpu_block_invalid_rreg - dummy reg read function
412 *
413 * @adev: amdgpu device pointer
414 * @block: offset of instance
415 * @reg: offset of register
416 *
417 * Dummy register read function. Used for register blocks
418 * that certain asics don't have (all asics).
419 * Returns the value in the register.
420 */
421static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
422 uint32_t block, uint32_t reg)
423{
424 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
425 reg, block);
426 BUG();
427 return 0;
428}
429
430/**
431 * amdgpu_block_invalid_wreg - dummy reg write function
432 *
433 * @adev: amdgpu device pointer
434 * @block: offset of instance
435 * @reg: offset of register
436 * @v: value to write to the register
437 *
438 * Dummy register read function. Used for register blocks
439 * that certain asics don't have (all asics).
440 */
441static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
442 uint32_t block,
443 uint32_t reg, uint32_t v)
444{
445 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
446 reg, block, v);
447 BUG();
448}
449
e3ecdffa
AD
450/**
451 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
452 *
453 * @adev: amdgpu device pointer
454 *
455 * Allocates a scratch page of VRAM for use by various things in the
456 * driver.
457 */
06ec9070 458static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 459{
a4a02777
CK
460 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
461 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
462 &adev->vram_scratch.robj,
463 &adev->vram_scratch.gpu_addr,
464 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
465}
466
e3ecdffa
AD
467/**
468 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
469 *
470 * @adev: amdgpu device pointer
471 *
472 * Frees the VRAM scratch page.
473 */
06ec9070 474static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 475{
078af1a3 476 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
477}
478
479/**
9c3f2b54 480 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
481 *
482 * @adev: amdgpu_device pointer
483 * @registers: pointer to the register array
484 * @array_size: size of the register array
485 *
486 * Programs an array or registers with and and or masks.
487 * This is a helper for setting golden registers.
488 */
9c3f2b54
AD
489void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
490 const u32 *registers,
491 const u32 array_size)
d38ceaf9
AD
492{
493 u32 tmp, reg, and_mask, or_mask;
494 int i;
495
496 if (array_size % 3)
497 return;
498
499 for (i = 0; i < array_size; i +=3) {
500 reg = registers[i + 0];
501 and_mask = registers[i + 1];
502 or_mask = registers[i + 2];
503
504 if (and_mask == 0xffffffff) {
505 tmp = or_mask;
506 } else {
507 tmp = RREG32(reg);
508 tmp &= ~and_mask;
509 tmp |= or_mask;
510 }
511 WREG32(reg, tmp);
512 }
513}
514
e3ecdffa
AD
515/**
516 * amdgpu_device_pci_config_reset - reset the GPU
517 *
518 * @adev: amdgpu_device pointer
519 *
520 * Resets the GPU using the pci config reset sequence.
521 * Only applicable to asics prior to vega10.
522 */
8111c387 523void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
524{
525 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
526}
527
528/*
529 * GPU doorbell aperture helpers function.
530 */
531/**
06ec9070 532 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
533 *
534 * @adev: amdgpu_device pointer
535 *
536 * Init doorbell driver information (CIK)
537 * Returns 0 on success, error on failure.
538 */
06ec9070 539static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 540{
6585661d 541
705e519e
CK
542 /* No doorbell on SI hardware generation */
543 if (adev->asic_type < CHIP_BONAIRE) {
544 adev->doorbell.base = 0;
545 adev->doorbell.size = 0;
546 adev->doorbell.num_doorbells = 0;
547 adev->doorbell.ptr = NULL;
548 return 0;
549 }
550
d6895ad3
CK
551 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
552 return -EINVAL;
553
22357775
AD
554 amdgpu_asic_init_doorbell_index(adev);
555
d38ceaf9
AD
556 /* doorbell bar mapping */
557 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
558 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
559
edf600da 560 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 561 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
562 if (adev->doorbell.num_doorbells == 0)
563 return -EINVAL;
564
ec3db8a6 565 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
566 * paging queue doorbell use the second page. The
567 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
568 * doorbells are in the first page. So with paging queue enabled,
569 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
570 */
571 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 572 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 573
8972e5d2
CK
574 adev->doorbell.ptr = ioremap(adev->doorbell.base,
575 adev->doorbell.num_doorbells *
576 sizeof(u32));
577 if (adev->doorbell.ptr == NULL)
d38ceaf9 578 return -ENOMEM;
d38ceaf9
AD
579
580 return 0;
581}
582
583/**
06ec9070 584 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
585 *
586 * @adev: amdgpu_device pointer
587 *
588 * Tear down doorbell driver information (CIK)
589 */
06ec9070 590static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
591{
592 iounmap(adev->doorbell.ptr);
593 adev->doorbell.ptr = NULL;
594}
595
22cb0164 596
d38ceaf9
AD
597
598/*
06ec9070 599 * amdgpu_device_wb_*()
455a7bc2 600 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 601 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
602 */
603
604/**
06ec9070 605 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
606 *
607 * @adev: amdgpu_device pointer
608 *
609 * Disables Writeback and frees the Writeback memory (all asics).
610 * Used at driver shutdown.
611 */
06ec9070 612static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
613{
614 if (adev->wb.wb_obj) {
a76ed485
AD
615 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
616 &adev->wb.gpu_addr,
617 (void **)&adev->wb.wb);
d38ceaf9
AD
618 adev->wb.wb_obj = NULL;
619 }
620}
621
622/**
06ec9070 623 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
624 *
625 * @adev: amdgpu_device pointer
626 *
455a7bc2 627 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
628 * Used at driver startup.
629 * Returns 0 on success or an -error on failure.
630 */
06ec9070 631static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
632{
633 int r;
634
635 if (adev->wb.wb_obj == NULL) {
97407b63
AD
636 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
637 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
638 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
639 &adev->wb.wb_obj, &adev->wb.gpu_addr,
640 (void **)&adev->wb.wb);
d38ceaf9
AD
641 if (r) {
642 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
643 return r;
644 }
d38ceaf9
AD
645
646 adev->wb.num_wb = AMDGPU_MAX_WB;
647 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
648
649 /* clear wb memory */
73469585 650 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
651 }
652
653 return 0;
654}
655
656/**
131b4b36 657 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
658 *
659 * @adev: amdgpu_device pointer
660 * @wb: wb index
661 *
662 * Allocate a wb slot for use by the driver (all asics).
663 * Returns 0 on success or -EINVAL on failure.
664 */
131b4b36 665int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
666{
667 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 668
97407b63 669 if (offset < adev->wb.num_wb) {
7014285a 670 __set_bit(offset, adev->wb.used);
63ae07ca 671 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
672 return 0;
673 } else {
674 return -EINVAL;
675 }
676}
677
d38ceaf9 678/**
131b4b36 679 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
680 *
681 * @adev: amdgpu_device pointer
682 * @wb: wb index
683 *
684 * Free a wb slot allocated for use by the driver (all asics)
685 */
131b4b36 686void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 687{
73469585 688 wb >>= 3;
d38ceaf9 689 if (wb < adev->wb.num_wb)
73469585 690 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
691}
692
d6895ad3
CK
693/**
694 * amdgpu_device_resize_fb_bar - try to resize FB BAR
695 *
696 * @adev: amdgpu_device pointer
697 *
698 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
699 * to fail, but if any of the BARs is not accessible after the size we abort
700 * driver loading by returning -ENODEV.
701 */
702int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
703{
770d13b1 704 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 705 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
706 struct pci_bus *root;
707 struct resource *res;
708 unsigned i;
d6895ad3
CK
709 u16 cmd;
710 int r;
711
0c03b912 712 /* Bypass for VF */
713 if (amdgpu_sriov_vf(adev))
714 return 0;
715
31b8adab
CK
716 /* Check if the root BUS has 64bit memory resources */
717 root = adev->pdev->bus;
718 while (root->parent)
719 root = root->parent;
720
721 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 722 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
723 res->start > 0x100000000ull)
724 break;
725 }
726
727 /* Trying to resize is pointless without a root hub window above 4GB */
728 if (!res)
729 return 0;
730
d6895ad3
CK
731 /* Disable memory decoding while we change the BAR addresses and size */
732 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
733 pci_write_config_word(adev->pdev, PCI_COMMAND,
734 cmd & ~PCI_COMMAND_MEMORY);
735
736 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 737 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
738 if (adev->asic_type >= CHIP_BONAIRE)
739 pci_release_resource(adev->pdev, 2);
740
741 pci_release_resource(adev->pdev, 0);
742
743 r = pci_resize_resource(adev->pdev, 0, rbar_size);
744 if (r == -ENOSPC)
745 DRM_INFO("Not enough PCI address space for a large BAR.");
746 else if (r && r != -ENOTSUPP)
747 DRM_ERROR("Problem resizing BAR0 (%d).", r);
748
749 pci_assign_unassigned_bus_resources(adev->pdev->bus);
750
751 /* When the doorbell or fb BAR isn't available we have no chance of
752 * using the device.
753 */
06ec9070 754 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
755 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
756 return -ENODEV;
757
758 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
759
760 return 0;
761}
a05502e5 762
d38ceaf9
AD
763/*
764 * GPU helpers function.
765 */
766/**
39c640c0 767 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
768 *
769 * @adev: amdgpu_device pointer
770 *
c836fec5
JQ
771 * Check if the asic has been initialized (all asics) at driver startup
772 * or post is needed if hw reset is performed.
773 * Returns true if need or false if not.
d38ceaf9 774 */
39c640c0 775bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
776{
777 uint32_t reg;
778
bec86378
ML
779 if (amdgpu_sriov_vf(adev))
780 return false;
781
782 if (amdgpu_passthrough(adev)) {
1da2c326
ML
783 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
784 * some old smc fw still need driver do vPost otherwise gpu hang, while
785 * those smc fw version above 22.15 doesn't have this flaw, so we force
786 * vpost executed for smc version below 22.15
bec86378
ML
787 */
788 if (adev->asic_type == CHIP_FIJI) {
789 int err;
790 uint32_t fw_ver;
791 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
792 /* force vPost if error occured */
793 if (err)
794 return true;
795
796 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
797 if (fw_ver < 0x00160e00)
798 return true;
bec86378 799 }
bec86378 800 }
91fe77eb 801
802 if (adev->has_hw_reset) {
803 adev->has_hw_reset = false;
804 return true;
805 }
806
807 /* bios scratch used on CIK+ */
808 if (adev->asic_type >= CHIP_BONAIRE)
809 return amdgpu_atombios_scratch_need_asic_init(adev);
810
811 /* check MEM_SIZE for older asics */
812 reg = amdgpu_asic_get_config_memsize(adev);
813
814 if ((reg != 0) && (reg != 0xffffffff))
815 return false;
816
817 return true;
bec86378
ML
818}
819
d38ceaf9
AD
820/* if we get transitioned to only one device, take VGA back */
821/**
06ec9070 822 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
823 *
824 * @cookie: amdgpu_device pointer
825 * @state: enable/disable vga decode
826 *
827 * Enable/disable vga decode (all asics).
828 * Returns VGA resource flags.
829 */
06ec9070 830static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
831{
832 struct amdgpu_device *adev = cookie;
833 amdgpu_asic_set_vga_state(adev, state);
834 if (state)
835 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
836 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
837 else
838 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
839}
840
e3ecdffa
AD
841/**
842 * amdgpu_device_check_block_size - validate the vm block size
843 *
844 * @adev: amdgpu_device pointer
845 *
846 * Validates the vm block size specified via module parameter.
847 * The vm block size defines number of bits in page table versus page directory,
848 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
849 * page table and the remaining bits are in the page directory.
850 */
06ec9070 851static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
852{
853 /* defines number of bits in page table versus page directory,
854 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
855 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
856 if (amdgpu_vm_block_size == -1)
857 return;
a1adf8be 858
bab4fee7 859 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
860 dev_warn(adev->dev, "VM page table size (%d) too small\n",
861 amdgpu_vm_block_size);
97489129 862 amdgpu_vm_block_size = -1;
a1adf8be 863 }
a1adf8be
CZ
864}
865
e3ecdffa
AD
866/**
867 * amdgpu_device_check_vm_size - validate the vm size
868 *
869 * @adev: amdgpu_device pointer
870 *
871 * Validates the vm size in GB specified via module parameter.
872 * The VM size is the size of the GPU virtual memory space in GB.
873 */
06ec9070 874static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 875{
64dab074
AD
876 /* no need to check the default value */
877 if (amdgpu_vm_size == -1)
878 return;
879
83ca145d
ZJ
880 if (amdgpu_vm_size < 1) {
881 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
882 amdgpu_vm_size);
f3368128 883 amdgpu_vm_size = -1;
83ca145d 884 }
83ca145d
ZJ
885}
886
7951e376
RZ
887static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
888{
889 struct sysinfo si;
890 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
891 uint64_t total_memory;
892 uint64_t dram_size_seven_GB = 0x1B8000000;
893 uint64_t dram_size_three_GB = 0xB8000000;
894
895 if (amdgpu_smu_memory_pool_size == 0)
896 return;
897
898 if (!is_os_64) {
899 DRM_WARN("Not 64-bit OS, feature not supported\n");
900 goto def_value;
901 }
902 si_meminfo(&si);
903 total_memory = (uint64_t)si.totalram * si.mem_unit;
904
905 if ((amdgpu_smu_memory_pool_size == 1) ||
906 (amdgpu_smu_memory_pool_size == 2)) {
907 if (total_memory < dram_size_three_GB)
908 goto def_value1;
909 } else if ((amdgpu_smu_memory_pool_size == 4) ||
910 (amdgpu_smu_memory_pool_size == 8)) {
911 if (total_memory < dram_size_seven_GB)
912 goto def_value1;
913 } else {
914 DRM_WARN("Smu memory pool size not supported\n");
915 goto def_value;
916 }
917 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
918
919 return;
920
921def_value1:
922 DRM_WARN("No enough system memory\n");
923def_value:
924 adev->pm.smu_prv_buffer_size = 0;
925}
926
d38ceaf9 927/**
06ec9070 928 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
929 *
930 * @adev: amdgpu_device pointer
931 *
932 * Validates certain module parameters and updates
933 * the associated values used by the driver (all asics).
934 */
912dfc84 935static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 936{
912dfc84
EQ
937 int ret = 0;
938
5b011235
CZ
939 if (amdgpu_sched_jobs < 4) {
940 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
941 amdgpu_sched_jobs);
942 amdgpu_sched_jobs = 4;
76117507 943 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
944 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
945 amdgpu_sched_jobs);
946 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
947 }
d38ceaf9 948
83e74db6 949 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
950 /* gart size must be greater or equal to 32M */
951 dev_warn(adev->dev, "gart size (%d) too small\n",
952 amdgpu_gart_size);
83e74db6 953 amdgpu_gart_size = -1;
d38ceaf9
AD
954 }
955
36d38372 956 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 957 /* gtt size must be greater or equal to 32M */
36d38372
CK
958 dev_warn(adev->dev, "gtt size (%d) too small\n",
959 amdgpu_gtt_size);
960 amdgpu_gtt_size = -1;
d38ceaf9
AD
961 }
962
d07f14be
RH
963 /* valid range is between 4 and 9 inclusive */
964 if (amdgpu_vm_fragment_size != -1 &&
965 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
966 dev_warn(adev->dev, "valid range is between 4 and 9\n");
967 amdgpu_vm_fragment_size = -1;
968 }
969
7951e376
RZ
970 amdgpu_device_check_smu_prv_buffer_size(adev);
971
06ec9070 972 amdgpu_device_check_vm_size(adev);
d38ceaf9 973
06ec9070 974 amdgpu_device_check_block_size(adev);
6a7f76e7 975
526bae37 976 if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
76117507 977 !is_power_of_2(amdgpu_vram_page_split))) {
6a7f76e7
CK
978 dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
979 amdgpu_vram_page_split);
980 amdgpu_vram_page_split = 1024;
981 }
8854695a 982
912dfc84
EQ
983 ret = amdgpu_device_get_job_timeout_settings(adev);
984 if (ret) {
985 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
986 return ret;
8854695a 987 }
19aede77
AD
988
989 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
990
991 return ret;
d38ceaf9
AD
992}
993
994/**
995 * amdgpu_switcheroo_set_state - set switcheroo state
996 *
997 * @pdev: pci dev pointer
1694467b 998 * @state: vga_switcheroo state
d38ceaf9
AD
999 *
1000 * Callback for the switcheroo driver. Suspends or resumes the
1001 * the asics before or after it is powered up using ACPI methods.
1002 */
1003static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1004{
1005 struct drm_device *dev = pci_get_drvdata(pdev);
1006
1007 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1008 return;
1009
1010 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1011 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1012 /* don't suspend or resume card normally */
1013 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1014
810ddc3a 1015 amdgpu_device_resume(dev, true, true);
d38ceaf9 1016
d38ceaf9
AD
1017 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1018 drm_kms_helper_poll_enable(dev);
1019 } else {
7ca85295 1020 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1021 drm_kms_helper_poll_disable(dev);
1022 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 1023 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
1024 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1025 }
1026}
1027
1028/**
1029 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1030 *
1031 * @pdev: pci dev pointer
1032 *
1033 * Callback for the switcheroo driver. Check of the switcheroo
1034 * state can be changed.
1035 * Returns true if the state can be changed, false if not.
1036 */
1037static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1038{
1039 struct drm_device *dev = pci_get_drvdata(pdev);
1040
1041 /*
1042 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1043 * locking inversion with the driver load path. And the access here is
1044 * completely racy anyway. So don't bother with locking for now.
1045 */
1046 return dev->open_count == 0;
1047}
1048
1049static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1050 .set_gpu_state = amdgpu_switcheroo_set_state,
1051 .reprobe = NULL,
1052 .can_switch = amdgpu_switcheroo_can_switch,
1053};
1054
e3ecdffa
AD
1055/**
1056 * amdgpu_device_ip_set_clockgating_state - set the CG state
1057 *
87e3f136 1058 * @dev: amdgpu_device pointer
e3ecdffa
AD
1059 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1060 * @state: clockgating state (gate or ungate)
1061 *
1062 * Sets the requested clockgating state for all instances of
1063 * the hardware IP specified.
1064 * Returns the error code from the last instance.
1065 */
43fa561f 1066int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1067 enum amd_ip_block_type block_type,
1068 enum amd_clockgating_state state)
d38ceaf9 1069{
43fa561f 1070 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1071 int i, r = 0;
1072
1073 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1074 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1075 continue;
c722865a
RZ
1076 if (adev->ip_blocks[i].version->type != block_type)
1077 continue;
1078 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1079 continue;
1080 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1081 (void *)adev, state);
1082 if (r)
1083 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1084 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1085 }
1086 return r;
1087}
1088
e3ecdffa
AD
1089/**
1090 * amdgpu_device_ip_set_powergating_state - set the PG state
1091 *
87e3f136 1092 * @dev: amdgpu_device pointer
e3ecdffa
AD
1093 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1094 * @state: powergating state (gate or ungate)
1095 *
1096 * Sets the requested powergating state for all instances of
1097 * the hardware IP specified.
1098 * Returns the error code from the last instance.
1099 */
43fa561f 1100int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1101 enum amd_ip_block_type block_type,
1102 enum amd_powergating_state state)
d38ceaf9 1103{
43fa561f 1104 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1105 int i, r = 0;
1106
1107 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1108 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1109 continue;
c722865a
RZ
1110 if (adev->ip_blocks[i].version->type != block_type)
1111 continue;
1112 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1113 continue;
1114 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1115 (void *)adev, state);
1116 if (r)
1117 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1118 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1119 }
1120 return r;
1121}
1122
e3ecdffa
AD
1123/**
1124 * amdgpu_device_ip_get_clockgating_state - get the CG state
1125 *
1126 * @adev: amdgpu_device pointer
1127 * @flags: clockgating feature flags
1128 *
1129 * Walks the list of IPs on the device and updates the clockgating
1130 * flags for each IP.
1131 * Updates @flags with the feature flags for each hardware IP where
1132 * clockgating is enabled.
1133 */
2990a1fc
AD
1134void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1135 u32 *flags)
6cb2d4e4
HR
1136{
1137 int i;
1138
1139 for (i = 0; i < adev->num_ip_blocks; i++) {
1140 if (!adev->ip_blocks[i].status.valid)
1141 continue;
1142 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1143 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1144 }
1145}
1146
e3ecdffa
AD
1147/**
1148 * amdgpu_device_ip_wait_for_idle - wait for idle
1149 *
1150 * @adev: amdgpu_device pointer
1151 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1152 *
1153 * Waits for the request hardware IP to be idle.
1154 * Returns 0 for success or a negative error code on failure.
1155 */
2990a1fc
AD
1156int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1157 enum amd_ip_block_type block_type)
5dbbb60b
AD
1158{
1159 int i, r;
1160
1161 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1162 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1163 continue;
a1255107
AD
1164 if (adev->ip_blocks[i].version->type == block_type) {
1165 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1166 if (r)
1167 return r;
1168 break;
1169 }
1170 }
1171 return 0;
1172
1173}
1174
e3ecdffa
AD
1175/**
1176 * amdgpu_device_ip_is_idle - is the hardware IP idle
1177 *
1178 * @adev: amdgpu_device pointer
1179 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1180 *
1181 * Check if the hardware IP is idle or not.
1182 * Returns true if it the IP is idle, false if not.
1183 */
2990a1fc
AD
1184bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1185 enum amd_ip_block_type block_type)
5dbbb60b
AD
1186{
1187 int i;
1188
1189 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1190 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1191 continue;
a1255107
AD
1192 if (adev->ip_blocks[i].version->type == block_type)
1193 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1194 }
1195 return true;
1196
1197}
1198
e3ecdffa
AD
1199/**
1200 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1201 *
1202 * @adev: amdgpu_device pointer
87e3f136 1203 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1204 *
1205 * Returns a pointer to the hardware IP block structure
1206 * if it exists for the asic, otherwise NULL.
1207 */
2990a1fc
AD
1208struct amdgpu_ip_block *
1209amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1210 enum amd_ip_block_type type)
d38ceaf9
AD
1211{
1212 int i;
1213
1214 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1215 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1216 return &adev->ip_blocks[i];
1217
1218 return NULL;
1219}
1220
1221/**
2990a1fc 1222 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1223 *
1224 * @adev: amdgpu_device pointer
5fc3aeeb 1225 * @type: enum amd_ip_block_type
d38ceaf9
AD
1226 * @major: major version
1227 * @minor: minor version
1228 *
1229 * return 0 if equal or greater
1230 * return 1 if smaller or the ip_block doesn't exist
1231 */
2990a1fc
AD
1232int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1233 enum amd_ip_block_type type,
1234 u32 major, u32 minor)
d38ceaf9 1235{
2990a1fc 1236 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1237
a1255107
AD
1238 if (ip_block && ((ip_block->version->major > major) ||
1239 ((ip_block->version->major == major) &&
1240 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1241 return 0;
1242
1243 return 1;
1244}
1245
a1255107 1246/**
2990a1fc 1247 * amdgpu_device_ip_block_add
a1255107
AD
1248 *
1249 * @adev: amdgpu_device pointer
1250 * @ip_block_version: pointer to the IP to add
1251 *
1252 * Adds the IP block driver information to the collection of IPs
1253 * on the asic.
1254 */
2990a1fc
AD
1255int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1256 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1257{
1258 if (!ip_block_version)
1259 return -EINVAL;
1260
e966a725 1261 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1262 ip_block_version->funcs->name);
1263
a1255107
AD
1264 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1265
1266 return 0;
1267}
1268
e3ecdffa
AD
1269/**
1270 * amdgpu_device_enable_virtual_display - enable virtual display feature
1271 *
1272 * @adev: amdgpu_device pointer
1273 *
1274 * Enabled the virtual display feature if the user has enabled it via
1275 * the module parameter virtual_display. This feature provides a virtual
1276 * display hardware on headless boards or in virtualized environments.
1277 * This function parses and validates the configuration string specified by
1278 * the user and configues the virtual display configuration (number of
1279 * virtual connectors, crtcs, etc.) specified.
1280 */
483ef985 1281static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1282{
1283 adev->enable_virtual_display = false;
1284
1285 if (amdgpu_virtual_display) {
1286 struct drm_device *ddev = adev->ddev;
1287 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1288 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1289
1290 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1291 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1292 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1293 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1294 if (!strcmp("all", pciaddname)
1295 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1296 long num_crtc;
1297 int res = -1;
1298
9accf2fd 1299 adev->enable_virtual_display = true;
0f66356d
ED
1300
1301 if (pciaddname_tmp)
1302 res = kstrtol(pciaddname_tmp, 10,
1303 &num_crtc);
1304
1305 if (!res) {
1306 if (num_crtc < 1)
1307 num_crtc = 1;
1308 if (num_crtc > 6)
1309 num_crtc = 6;
1310 adev->mode_info.num_crtc = num_crtc;
1311 } else {
1312 adev->mode_info.num_crtc = 1;
1313 }
9accf2fd
ED
1314 break;
1315 }
1316 }
1317
0f66356d
ED
1318 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1319 amdgpu_virtual_display, pci_address_name,
1320 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1321
1322 kfree(pciaddstr);
1323 }
1324}
1325
e3ecdffa
AD
1326/**
1327 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1328 *
1329 * @adev: amdgpu_device pointer
1330 *
1331 * Parses the asic configuration parameters specified in the gpu info
1332 * firmware and makes them availale to the driver for use in configuring
1333 * the asic.
1334 * Returns 0 on success, -EINVAL on failure.
1335 */
e2a75f88
AD
1336static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1337{
e2a75f88
AD
1338 const char *chip_name;
1339 char fw_name[30];
1340 int err;
1341 const struct gpu_info_firmware_header_v1_0 *hdr;
1342
ab4fe3e1
HR
1343 adev->firmware.gpu_info_fw = NULL;
1344
e2a75f88
AD
1345 switch (adev->asic_type) {
1346 case CHIP_TOPAZ:
1347 case CHIP_TONGA:
1348 case CHIP_FIJI:
e2a75f88 1349 case CHIP_POLARIS10:
cc07f18d 1350 case CHIP_POLARIS11:
e2a75f88 1351 case CHIP_POLARIS12:
cc07f18d 1352 case CHIP_VEGAM:
e2a75f88
AD
1353 case CHIP_CARRIZO:
1354 case CHIP_STONEY:
1355#ifdef CONFIG_DRM_AMDGPU_SI
1356 case CHIP_VERDE:
1357 case CHIP_TAHITI:
1358 case CHIP_PITCAIRN:
1359 case CHIP_OLAND:
1360 case CHIP_HAINAN:
1361#endif
1362#ifdef CONFIG_DRM_AMDGPU_CIK
1363 case CHIP_BONAIRE:
1364 case CHIP_HAWAII:
1365 case CHIP_KAVERI:
1366 case CHIP_KABINI:
1367 case CHIP_MULLINS:
1368#endif
27c0bc71 1369 case CHIP_VEGA20:
e2a75f88
AD
1370 default:
1371 return 0;
1372 case CHIP_VEGA10:
1373 chip_name = "vega10";
1374 break;
3f76dced
AD
1375 case CHIP_VEGA12:
1376 chip_name = "vega12";
1377 break;
2d2e5e7e 1378 case CHIP_RAVEN:
54c4d17e
FX
1379 if (adev->rev_id >= 8)
1380 chip_name = "raven2";
741deade
AD
1381 else if (adev->pdev->device == 0x15d8)
1382 chip_name = "picasso";
54c4d17e
FX
1383 else
1384 chip_name = "raven";
2d2e5e7e 1385 break;
e2a75f88
AD
1386 }
1387
1388 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1389 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1390 if (err) {
1391 dev_err(adev->dev,
1392 "Failed to load gpu_info firmware \"%s\"\n",
1393 fw_name);
1394 goto out;
1395 }
ab4fe3e1 1396 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1397 if (err) {
1398 dev_err(adev->dev,
1399 "Failed to validate gpu_info firmware \"%s\"\n",
1400 fw_name);
1401 goto out;
1402 }
1403
ab4fe3e1 1404 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1405 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1406
1407 switch (hdr->version_major) {
1408 case 1:
1409 {
1410 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1411 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1412 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1413
b5ab16bf
AD
1414 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1415 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1416 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1417 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1418 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1419 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1420 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1421 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1422 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1423 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1424 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1425 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1426 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1427 adev->gfx.cu_info.max_waves_per_simd =
1428 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1429 adev->gfx.cu_info.max_scratch_slots_per_cu =
1430 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1431 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
e2a75f88
AD
1432 break;
1433 }
1434 default:
1435 dev_err(adev->dev,
1436 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1437 err = -EINVAL;
1438 goto out;
1439 }
1440out:
e2a75f88
AD
1441 return err;
1442}
1443
e3ecdffa
AD
1444/**
1445 * amdgpu_device_ip_early_init - run early init for hardware IPs
1446 *
1447 * @adev: amdgpu_device pointer
1448 *
1449 * Early initialization pass for hardware IPs. The hardware IPs that make
1450 * up each asic are discovered each IP's early_init callback is run. This
1451 * is the first stage in initializing the asic.
1452 * Returns 0 on success, negative error code on failure.
1453 */
06ec9070 1454static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1455{
aaa36a97 1456 int i, r;
d38ceaf9 1457
483ef985 1458 amdgpu_device_enable_virtual_display(adev);
a6be7570 1459
d38ceaf9 1460 switch (adev->asic_type) {
aaa36a97
AD
1461 case CHIP_TOPAZ:
1462 case CHIP_TONGA:
48299f95 1463 case CHIP_FIJI:
2cc0c0b5 1464 case CHIP_POLARIS10:
32cc7e53 1465 case CHIP_POLARIS11:
c4642a47 1466 case CHIP_POLARIS12:
32cc7e53 1467 case CHIP_VEGAM:
aaa36a97 1468 case CHIP_CARRIZO:
39bb0c92
SL
1469 case CHIP_STONEY:
1470 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1471 adev->family = AMDGPU_FAMILY_CZ;
1472 else
1473 adev->family = AMDGPU_FAMILY_VI;
1474
1475 r = vi_set_ip_blocks(adev);
1476 if (r)
1477 return r;
1478 break;
33f34802
KW
1479#ifdef CONFIG_DRM_AMDGPU_SI
1480 case CHIP_VERDE:
1481 case CHIP_TAHITI:
1482 case CHIP_PITCAIRN:
1483 case CHIP_OLAND:
1484 case CHIP_HAINAN:
295d0daf 1485 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1486 r = si_set_ip_blocks(adev);
1487 if (r)
1488 return r;
1489 break;
1490#endif
a2e73f56
AD
1491#ifdef CONFIG_DRM_AMDGPU_CIK
1492 case CHIP_BONAIRE:
1493 case CHIP_HAWAII:
1494 case CHIP_KAVERI:
1495 case CHIP_KABINI:
1496 case CHIP_MULLINS:
1497 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1498 adev->family = AMDGPU_FAMILY_CI;
1499 else
1500 adev->family = AMDGPU_FAMILY_KV;
1501
1502 r = cik_set_ip_blocks(adev);
1503 if (r)
1504 return r;
1505 break;
1506#endif
e48a3cd9
AD
1507 case CHIP_VEGA10:
1508 case CHIP_VEGA12:
e4bd8170 1509 case CHIP_VEGA20:
e48a3cd9 1510 case CHIP_RAVEN:
741deade 1511 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1512 adev->family = AMDGPU_FAMILY_RV;
1513 else
1514 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1515
1516 r = soc15_set_ip_blocks(adev);
1517 if (r)
1518 return r;
1519 break;
d38ceaf9
AD
1520 default:
1521 /* FIXME: not supported yet */
1522 return -EINVAL;
1523 }
1524
e2a75f88
AD
1525 r = amdgpu_device_parse_gpu_info_fw(adev);
1526 if (r)
1527 return r;
1528
1884734a 1529 amdgpu_amdkfd_device_probe(adev);
1530
3149d9da
XY
1531 if (amdgpu_sriov_vf(adev)) {
1532 r = amdgpu_virt_request_full_gpu(adev, true);
1533 if (r)
5ffa61c1 1534 return -EAGAIN;
78d48112
TH
1535
1536 /* query the reg access mode at the very beginning */
1537 amdgpu_virt_init_reg_access_mode(adev);
3149d9da
XY
1538 }
1539
3b94fb10 1540 adev->pm.pp_feature = amdgpu_pp_feature_mask;
00544006
HR
1541 if (amdgpu_sriov_vf(adev))
1542 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1543
394e9a14
ED
1544 /* Read BIOS */
1545 if (!amdgpu_get_bios(adev))
1546 return -EINVAL;
1547
1548 r = amdgpu_atombios_init(adev);
1549 if (r) {
1550 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1551 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1552 return r;
1553 }
1554
d38ceaf9
AD
1555 for (i = 0; i < adev->num_ip_blocks; i++) {
1556 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1557 DRM_ERROR("disabled ip block: %d <%s>\n",
1558 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1559 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1560 } else {
a1255107
AD
1561 if (adev->ip_blocks[i].version->funcs->early_init) {
1562 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1563 if (r == -ENOENT) {
a1255107 1564 adev->ip_blocks[i].status.valid = false;
2c1a2784 1565 } else if (r) {
a1255107
AD
1566 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1567 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1568 return r;
2c1a2784 1569 } else {
a1255107 1570 adev->ip_blocks[i].status.valid = true;
2c1a2784 1571 }
974e6b64 1572 } else {
a1255107 1573 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1574 }
d38ceaf9
AD
1575 }
1576 }
1577
395d1fb9
NH
1578 adev->cg_flags &= amdgpu_cg_mask;
1579 adev->pg_flags &= amdgpu_pg_mask;
1580
d38ceaf9
AD
1581 return 0;
1582}
1583
0a4f2520
RZ
1584static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1585{
1586 int i, r;
1587
1588 for (i = 0; i < adev->num_ip_blocks; i++) {
1589 if (!adev->ip_blocks[i].status.sw)
1590 continue;
1591 if (adev->ip_blocks[i].status.hw)
1592 continue;
1593 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1594 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1595 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1596 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1597 if (r) {
1598 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1599 adev->ip_blocks[i].version->funcs->name, r);
1600 return r;
1601 }
1602 adev->ip_blocks[i].status.hw = true;
1603 }
1604 }
1605
1606 return 0;
1607}
1608
1609static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1610{
1611 int i, r;
1612
1613 for (i = 0; i < adev->num_ip_blocks; i++) {
1614 if (!adev->ip_blocks[i].status.sw)
1615 continue;
1616 if (adev->ip_blocks[i].status.hw)
1617 continue;
1618 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1619 if (r) {
1620 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1621 adev->ip_blocks[i].version->funcs->name, r);
1622 return r;
1623 }
1624 adev->ip_blocks[i].status.hw = true;
1625 }
1626
1627 return 0;
1628}
1629
7a3e0bb2
RZ
1630static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1631{
1632 int r = 0;
1633 int i;
1634
1635 if (adev->asic_type >= CHIP_VEGA10) {
1636 for (i = 0; i < adev->num_ip_blocks; i++) {
1637 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1638 if (adev->in_gpu_reset || adev->in_suspend) {
1639 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1640 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1641 r = adev->ip_blocks[i].version->funcs->resume(adev);
1642 if (r) {
1643 DRM_ERROR("resume of IP block <%s> failed %d\n",
1644 adev->ip_blocks[i].version->funcs->name, r);
1645 return r;
1646 }
1647 } else {
1648 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1649 if (r) {
1650 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1651 adev->ip_blocks[i].version->funcs->name, r);
1652 return r;
1653 }
1654 }
1655 adev->ip_blocks[i].status.hw = true;
1656 }
1657 }
1658 }
1659
91eec27e 1660 if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
7a3e0bb2
RZ
1661 r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
1662 if (r) {
1663 pr_err("firmware loading failed\n");
1664 return r;
1665 }
1666 }
1667
1668 return 0;
1669}
1670
e3ecdffa
AD
1671/**
1672 * amdgpu_device_ip_init - run init for hardware IPs
1673 *
1674 * @adev: amdgpu_device pointer
1675 *
1676 * Main initialization pass for hardware IPs. The list of all the hardware
1677 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1678 * are run. sw_init initializes the software state associated with each IP
1679 * and hw_init initializes the hardware associated with each IP.
1680 * Returns 0 on success, negative error code on failure.
1681 */
06ec9070 1682static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1683{
1684 int i, r;
1685
c030f2e4 1686 r = amdgpu_ras_init(adev);
1687 if (r)
1688 return r;
1689
d38ceaf9 1690 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1691 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1692 continue;
a1255107 1693 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1694 if (r) {
a1255107
AD
1695 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1696 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1697 goto init_failed;
2c1a2784 1698 }
a1255107 1699 adev->ip_blocks[i].status.sw = true;
bfca0289 1700
d38ceaf9 1701 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1702 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1703 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1704 if (r) {
1705 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1706 goto init_failed;
2c1a2784 1707 }
a1255107 1708 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1709 if (r) {
1710 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1711 goto init_failed;
2c1a2784 1712 }
06ec9070 1713 r = amdgpu_device_wb_init(adev);
2c1a2784 1714 if (r) {
06ec9070 1715 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1716 goto init_failed;
2c1a2784 1717 }
a1255107 1718 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1719
1720 /* right after GMC hw init, we create CSA */
1721 if (amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1722 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1723 AMDGPU_GEM_DOMAIN_VRAM,
1724 AMDGPU_CSA_SIZE);
2493664f
ML
1725 if (r) {
1726 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1727 goto init_failed;
2493664f
ML
1728 }
1729 }
d38ceaf9
AD
1730 }
1731 }
1732
533aed27
AG
1733 r = amdgpu_ib_pool_init(adev);
1734 if (r) {
1735 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1736 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1737 goto init_failed;
1738 }
1739
c8963ea4
RZ
1740 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1741 if (r)
72d3f592 1742 goto init_failed;
0a4f2520
RZ
1743
1744 r = amdgpu_device_ip_hw_init_phase1(adev);
1745 if (r)
72d3f592 1746 goto init_failed;
0a4f2520 1747
7a3e0bb2
RZ
1748 r = amdgpu_device_fw_loading(adev);
1749 if (r)
72d3f592 1750 goto init_failed;
7a3e0bb2 1751
0a4f2520
RZ
1752 r = amdgpu_device_ip_hw_init_phase2(adev);
1753 if (r)
72d3f592 1754 goto init_failed;
d38ceaf9 1755
3e2e2ab5
HZ
1756 if (adev->gmc.xgmi.num_physical_nodes > 1)
1757 amdgpu_xgmi_add_device(adev);
1884734a 1758 amdgpu_amdkfd_device_init(adev);
c6332b97 1759
72d3f592 1760init_failed:
d3c117e5 1761 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1762 if (!r)
1763 amdgpu_virt_init_data_exchange(adev);
c6332b97 1764 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1765 }
c6332b97 1766
72d3f592 1767 return r;
d38ceaf9
AD
1768}
1769
e3ecdffa
AD
1770/**
1771 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1772 *
1773 * @adev: amdgpu_device pointer
1774 *
1775 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1776 * this function before a GPU reset. If the value is retained after a
1777 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1778 */
06ec9070 1779static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1780{
1781 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1782}
1783
e3ecdffa
AD
1784/**
1785 * amdgpu_device_check_vram_lost - check if vram is valid
1786 *
1787 * @adev: amdgpu_device pointer
1788 *
1789 * Checks the reset magic value written to the gart pointer in VRAM.
1790 * The driver calls this after a GPU reset to see if the contents of
1791 * VRAM is lost or now.
1792 * returns true if vram is lost, false if not.
1793 */
06ec9070 1794static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1795{
1796 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1797 AMDGPU_RESET_MAGIC_NUM);
1798}
1799
e3ecdffa 1800/**
1112a46b 1801 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1802 *
1803 * @adev: amdgpu_device pointer
1804 *
e3ecdffa 1805 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1806 * set_clockgating_state callbacks are run.
1807 * Late initialization pass enabling clockgating for hardware IPs.
1808 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1809 * Returns 0 on success, negative error code on failure.
1810 */
fdd34271 1811
1112a46b
RZ
1812static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1813 enum amd_clockgating_state state)
d38ceaf9 1814{
1112a46b 1815 int i, j, r;
d38ceaf9 1816
4a2ba394
SL
1817 if (amdgpu_emu_mode == 1)
1818 return 0;
1819
1112a46b
RZ
1820 for (j = 0; j < adev->num_ip_blocks; j++) {
1821 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1822 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1823 continue;
4a446d55 1824 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1825 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1826 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1827 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1828 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1829 /* enable clockgating to save power */
a1255107 1830 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1831 state);
4a446d55
AD
1832 if (r) {
1833 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1834 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1835 return r;
1836 }
b0b00ff1 1837 }
d38ceaf9 1838 }
06b18f61 1839
c9f96fd5
RZ
1840 return 0;
1841}
1842
1112a46b 1843static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1844{
1112a46b 1845 int i, j, r;
06b18f61 1846
c9f96fd5
RZ
1847 if (amdgpu_emu_mode == 1)
1848 return 0;
1849
1112a46b
RZ
1850 for (j = 0; j < adev->num_ip_blocks; j++) {
1851 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1852 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1853 continue;
1854 /* skip CG for VCE/UVD, it's handled specially */
1855 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1856 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1857 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1858 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1859 /* enable powergating to save power */
1860 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1861 state);
c9f96fd5
RZ
1862 if (r) {
1863 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1864 adev->ip_blocks[i].version->funcs->name, r);
1865 return r;
1866 }
1867 }
1868 }
2dc80b00
S
1869 return 0;
1870}
1871
e3ecdffa
AD
1872/**
1873 * amdgpu_device_ip_late_init - run late init for hardware IPs
1874 *
1875 * @adev: amdgpu_device pointer
1876 *
1877 * Late initialization pass for hardware IPs. The list of all the hardware
1878 * IPs that make up the asic is walked and the late_init callbacks are run.
1879 * late_init covers any special initialization that an IP requires
1880 * after all of the have been initialized or something that needs to happen
1881 * late in the init process.
1882 * Returns 0 on success, negative error code on failure.
1883 */
06ec9070 1884static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1885{
1886 int i = 0, r;
1887
1888 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1889 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1890 continue;
1891 if (adev->ip_blocks[i].version->funcs->late_init) {
1892 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1893 if (r) {
1894 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1895 adev->ip_blocks[i].version->funcs->name, r);
1896 return r;
1897 }
2dc80b00 1898 }
73f847db 1899 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1900 }
1901
1112a46b
RZ
1902 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1903 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1904
2c773de2
S
1905 queue_delayed_work(system_wq, &adev->late_init_work,
1906 msecs_to_jiffies(AMDGPU_RESUME_MS));
d38ceaf9 1907
06ec9070 1908 amdgpu_device_fill_reset_magic(adev);
d38ceaf9
AD
1909
1910 return 0;
1911}
1912
e3ecdffa
AD
1913/**
1914 * amdgpu_device_ip_fini - run fini for hardware IPs
1915 *
1916 * @adev: amdgpu_device pointer
1917 *
1918 * Main teardown pass for hardware IPs. The list of all the hardware
1919 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1920 * are run. hw_fini tears down the hardware associated with each IP
1921 * and sw_fini tears down any software state associated with each IP.
1922 * Returns 0 on success, negative error code on failure.
1923 */
06ec9070 1924static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1925{
1926 int i, r;
1927
c030f2e4 1928 amdgpu_ras_pre_fini(adev);
1929
a82400b5
AG
1930 if (adev->gmc.xgmi.num_physical_nodes > 1)
1931 amdgpu_xgmi_remove_device(adev);
1932
1884734a 1933 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
1934
1935 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
1936 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
1937
3e96dbfd
AD
1938 /* need to disable SMC first */
1939 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1940 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 1941 continue;
fdd34271 1942 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 1943 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
1944 /* XXX handle errors */
1945 if (r) {
1946 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 1947 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 1948 }
a1255107 1949 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
1950 break;
1951 }
1952 }
1953
d38ceaf9 1954 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1955 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 1956 continue;
8201a67a 1957
a1255107 1958 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 1959 /* XXX handle errors */
2c1a2784 1960 if (r) {
a1255107
AD
1961 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
1962 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1963 }
8201a67a 1964
a1255107 1965 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
1966 }
1967
9950cda2 1968
d38ceaf9 1969 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1970 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 1971 continue;
c12aba3a
ML
1972
1973 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 1974 amdgpu_ucode_free_bo(adev);
1e256e27 1975 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
1976 amdgpu_device_wb_fini(adev);
1977 amdgpu_device_vram_scratch_fini(adev);
533aed27 1978 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
1979 }
1980
a1255107 1981 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 1982 /* XXX handle errors */
2c1a2784 1983 if (r) {
a1255107
AD
1984 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
1985 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 1986 }
a1255107
AD
1987 adev->ip_blocks[i].status.sw = false;
1988 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
1989 }
1990
a6dcfd9c 1991 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 1992 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 1993 continue;
a1255107
AD
1994 if (adev->ip_blocks[i].version->funcs->late_fini)
1995 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
1996 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
1997 }
1998
c030f2e4 1999 amdgpu_ras_fini(adev);
2000
030308fc 2001 if (amdgpu_sriov_vf(adev))
24136135
ML
2002 if (amdgpu_virt_release_full_gpu(adev, false))
2003 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2004
d38ceaf9
AD
2005 return 0;
2006}
2007
b55c9e7a
EQ
2008static int amdgpu_device_enable_mgpu_fan_boost(void)
2009{
2010 struct amdgpu_gpu_instance *gpu_ins;
2011 struct amdgpu_device *adev;
2012 int i, ret = 0;
2013
2014 mutex_lock(&mgpu_info.mutex);
2015
2016 /*
2017 * MGPU fan boost feature should be enabled
2018 * only when there are two or more dGPUs in
2019 * the system
2020 */
2021 if (mgpu_info.num_dgpu < 2)
2022 goto out;
2023
2024 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2025 gpu_ins = &(mgpu_info.gpu_ins[i]);
2026 adev = gpu_ins->adev;
2027 if (!(adev->flags & AMD_IS_APU) &&
2028 !gpu_ins->mgpu_fan_enabled &&
2029 adev->powerplay.pp_funcs &&
2030 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
2031 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2032 if (ret)
2033 break;
2034
2035 gpu_ins->mgpu_fan_enabled = 1;
2036 }
2037 }
2038
2039out:
2040 mutex_unlock(&mgpu_info.mutex);
2041
2042 return ret;
2043}
2044
e3ecdffa 2045/**
1112a46b 2046 * amdgpu_device_ip_late_init_func_handler - work handler for ib test
e3ecdffa 2047 *
1112a46b 2048 * @work: work_struct.
e3ecdffa 2049 */
06ec9070 2050static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
2dc80b00
S
2051{
2052 struct amdgpu_device *adev =
2053 container_of(work, struct amdgpu_device, late_init_work.work);
916ac57f
RZ
2054 int r;
2055
2056 r = amdgpu_ib_ring_tests(adev);
2057 if (r)
2058 DRM_ERROR("ib ring test failed (%d).\n", r);
b55c9e7a
EQ
2059
2060 r = amdgpu_device_enable_mgpu_fan_boost();
2061 if (r)
2062 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
df399b06 2063
2064 /*set to low pstate by default */
2065 amdgpu_xgmi_set_pstate(adev, 0);
2066
2dc80b00
S
2067}
2068
1e317b99
RZ
2069static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2070{
2071 struct amdgpu_device *adev =
2072 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2073
2074 mutex_lock(&adev->gfx.gfx_off_mutex);
2075 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2076 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2077 adev->gfx.gfx_off_state = true;
2078 }
2079 mutex_unlock(&adev->gfx.gfx_off_mutex);
2080}
2081
e3ecdffa 2082/**
e7854a03 2083 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2084 *
2085 * @adev: amdgpu_device pointer
2086 *
2087 * Main suspend function for hardware IPs. The list of all the hardware
2088 * IPs that make up the asic is walked, clockgating is disabled and the
2089 * suspend callbacks are run. suspend puts the hardware and software state
2090 * in each IP into a state suitable for suspend.
2091 * Returns 0 on success, negative error code on failure.
2092 */
e7854a03
AD
2093static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2094{
2095 int i, r;
2096
05df1f01 2097 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2098 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2099
e7854a03
AD
2100 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2101 if (!adev->ip_blocks[i].status.valid)
2102 continue;
2103 /* displays are handled separately */
2104 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2105 /* XXX handle errors */
2106 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2107 /* XXX handle errors */
2108 if (r) {
2109 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2110 adev->ip_blocks[i].version->funcs->name, r);
2111 }
2112 }
2113 }
2114
e7854a03
AD
2115 return 0;
2116}
2117
2118/**
2119 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2120 *
2121 * @adev: amdgpu_device pointer
2122 *
2123 * Main suspend function for hardware IPs. The list of all the hardware
2124 * IPs that make up the asic is walked, clockgating is disabled and the
2125 * suspend callbacks are run. suspend puts the hardware and software state
2126 * in each IP into a state suitable for suspend.
2127 * Returns 0 on success, negative error code on failure.
2128 */
2129static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2130{
2131 int i, r;
2132
2133 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2134 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2135 continue;
e7854a03
AD
2136 /* displays are handled in phase1 */
2137 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2138 continue;
d38ceaf9 2139 /* XXX handle errors */
a1255107 2140 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2141 /* XXX handle errors */
2c1a2784 2142 if (r) {
a1255107
AD
2143 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2144 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2145 }
d38ceaf9
AD
2146 }
2147
2148 return 0;
2149}
2150
e7854a03
AD
2151/**
2152 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2153 *
2154 * @adev: amdgpu_device pointer
2155 *
2156 * Main suspend function for hardware IPs. The list of all the hardware
2157 * IPs that make up the asic is walked, clockgating is disabled and the
2158 * suspend callbacks are run. suspend puts the hardware and software state
2159 * in each IP into a state suitable for suspend.
2160 * Returns 0 on success, negative error code on failure.
2161 */
2162int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2163{
2164 int r;
2165
e7819644
YT
2166 if (amdgpu_sriov_vf(adev))
2167 amdgpu_virt_request_full_gpu(adev, false);
2168
e7854a03
AD
2169 r = amdgpu_device_ip_suspend_phase1(adev);
2170 if (r)
2171 return r;
2172 r = amdgpu_device_ip_suspend_phase2(adev);
2173
e7819644
YT
2174 if (amdgpu_sriov_vf(adev))
2175 amdgpu_virt_release_full_gpu(adev, false);
2176
e7854a03
AD
2177 return r;
2178}
2179
06ec9070 2180static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2181{
2182 int i, r;
2183
2cb681b6
ML
2184 static enum amd_ip_block_type ip_order[] = {
2185 AMD_IP_BLOCK_TYPE_GMC,
2186 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2187 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2188 AMD_IP_BLOCK_TYPE_IH,
2189 };
a90ad3c2 2190
2cb681b6
ML
2191 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2192 int j;
2193 struct amdgpu_ip_block *block;
a90ad3c2 2194
2cb681b6
ML
2195 for (j = 0; j < adev->num_ip_blocks; j++) {
2196 block = &adev->ip_blocks[j];
2197
2198 if (block->version->type != ip_order[i] ||
2199 !block->status.valid)
2200 continue;
2201
2202 r = block->version->funcs->hw_init(adev);
0aaeefcc 2203 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2204 if (r)
2205 return r;
a90ad3c2
ML
2206 }
2207 }
2208
2209 return 0;
2210}
2211
06ec9070 2212static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2213{
2214 int i, r;
2215
2cb681b6
ML
2216 static enum amd_ip_block_type ip_order[] = {
2217 AMD_IP_BLOCK_TYPE_SMC,
2218 AMD_IP_BLOCK_TYPE_DCE,
2219 AMD_IP_BLOCK_TYPE_GFX,
2220 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2221 AMD_IP_BLOCK_TYPE_UVD,
2222 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2223 };
a90ad3c2 2224
2cb681b6
ML
2225 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2226 int j;
2227 struct amdgpu_ip_block *block;
a90ad3c2 2228
2cb681b6
ML
2229 for (j = 0; j < adev->num_ip_blocks; j++) {
2230 block = &adev->ip_blocks[j];
2231
2232 if (block->version->type != ip_order[i] ||
2233 !block->status.valid)
2234 continue;
2235
2236 r = block->version->funcs->hw_init(adev);
0aaeefcc 2237 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2238 if (r)
2239 return r;
a90ad3c2
ML
2240 }
2241 }
2242
2243 return 0;
2244}
2245
e3ecdffa
AD
2246/**
2247 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2248 *
2249 * @adev: amdgpu_device pointer
2250 *
2251 * First resume function for hardware IPs. The list of all the hardware
2252 * IPs that make up the asic is walked and the resume callbacks are run for
2253 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2254 * after a suspend and updates the software state as necessary. This
2255 * function is also used for restoring the GPU after a GPU reset.
2256 * Returns 0 on success, negative error code on failure.
2257 */
06ec9070 2258static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2259{
2260 int i, r;
2261
a90ad3c2
ML
2262 for (i = 0; i < adev->num_ip_blocks; i++) {
2263 if (!adev->ip_blocks[i].status.valid)
2264 continue;
a90ad3c2 2265 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2266 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2267 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2268 r = adev->ip_blocks[i].version->funcs->resume(adev);
2269 if (r) {
2270 DRM_ERROR("resume of IP block <%s> failed %d\n",
2271 adev->ip_blocks[i].version->funcs->name, r);
2272 return r;
2273 }
a90ad3c2
ML
2274 }
2275 }
2276
2277 return 0;
2278}
2279
e3ecdffa
AD
2280/**
2281 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2282 *
2283 * @adev: amdgpu_device pointer
2284 *
2285 * First resume function for hardware IPs. The list of all the hardware
2286 * IPs that make up the asic is walked and the resume callbacks are run for
2287 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2288 * functional state after a suspend and updates the software state as
2289 * necessary. This function is also used for restoring the GPU after a GPU
2290 * reset.
2291 * Returns 0 on success, negative error code on failure.
2292 */
06ec9070 2293static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2294{
2295 int i, r;
2296
2297 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2298 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2299 continue;
fcf0649f 2300 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2301 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2302 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2303 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2304 continue;
a1255107 2305 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2306 if (r) {
a1255107
AD
2307 DRM_ERROR("resume of IP block <%s> failed %d\n",
2308 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2309 return r;
2c1a2784 2310 }
d38ceaf9
AD
2311 }
2312
2313 return 0;
2314}
2315
e3ecdffa
AD
2316/**
2317 * amdgpu_device_ip_resume - run resume for hardware IPs
2318 *
2319 * @adev: amdgpu_device pointer
2320 *
2321 * Main resume function for hardware IPs. The hardware IPs
2322 * are split into two resume functions because they are
2323 * are also used in in recovering from a GPU reset and some additional
2324 * steps need to be take between them. In this case (S3/S4) they are
2325 * run sequentially.
2326 * Returns 0 on success, negative error code on failure.
2327 */
06ec9070 2328static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2329{
2330 int r;
2331
06ec9070 2332 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2333 if (r)
2334 return r;
7a3e0bb2
RZ
2335
2336 r = amdgpu_device_fw_loading(adev);
2337 if (r)
2338 return r;
2339
06ec9070 2340 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2341
2342 return r;
2343}
2344
e3ecdffa
AD
2345/**
2346 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2347 *
2348 * @adev: amdgpu_device pointer
2349 *
2350 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2351 */
4e99a44e 2352static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2353{
6867e1b5
ML
2354 if (amdgpu_sriov_vf(adev)) {
2355 if (adev->is_atom_fw) {
2356 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2357 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2358 } else {
2359 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2360 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2361 }
2362
2363 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2364 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2365 }
048765ad
AR
2366}
2367
e3ecdffa
AD
2368/**
2369 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2370 *
2371 * @asic_type: AMD asic type
2372 *
2373 * Check if there is DC (new modesetting infrastructre) support for an asic.
2374 * returns true if DC has support, false if not.
2375 */
4562236b
HW
2376bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2377{
2378 switch (asic_type) {
2379#if defined(CONFIG_DRM_AMD_DC)
2380 case CHIP_BONAIRE:
0d6fbccb 2381 case CHIP_KAVERI:
367e6687
AD
2382 case CHIP_KABINI:
2383 case CHIP_MULLINS:
d9fda248
HW
2384 /*
2385 * We have systems in the wild with these ASICs that require
2386 * LVDS and VGA support which is not supported with DC.
2387 *
2388 * Fallback to the non-DC driver here by default so as not to
2389 * cause regressions.
2390 */
2391 return amdgpu_dc > 0;
2392 case CHIP_HAWAII:
4562236b
HW
2393 case CHIP_CARRIZO:
2394 case CHIP_STONEY:
4562236b 2395 case CHIP_POLARIS10:
675fd32b 2396 case CHIP_POLARIS11:
2c8ad2d5 2397 case CHIP_POLARIS12:
675fd32b 2398 case CHIP_VEGAM:
4562236b
HW
2399 case CHIP_TONGA:
2400 case CHIP_FIJI:
42f8ffa1 2401 case CHIP_VEGA10:
dca7b401 2402 case CHIP_VEGA12:
c6034aa2 2403 case CHIP_VEGA20:
dc37a9a0 2404#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2405 case CHIP_RAVEN:
42f8ffa1 2406#endif
fd187853 2407 return amdgpu_dc != 0;
4562236b
HW
2408#endif
2409 default:
2410 return false;
2411 }
2412}
2413
2414/**
2415 * amdgpu_device_has_dc_support - check if dc is supported
2416 *
2417 * @adev: amdgpu_device_pointer
2418 *
2419 * Returns true for supported, false for not supported
2420 */
2421bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2422{
2555039d
XY
2423 if (amdgpu_sriov_vf(adev))
2424 return false;
2425
4562236b
HW
2426 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2427}
2428
d4535e2c
AG
2429
2430static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2431{
2432 struct amdgpu_device *adev =
2433 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2434
2435 adev->asic_reset_res = amdgpu_asic_reset(adev);
2436 if (adev->asic_reset_res)
fed184e9 2437 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2438 adev->asic_reset_res, adev->ddev->unique);
2439}
2440
2441
d38ceaf9
AD
2442/**
2443 * amdgpu_device_init - initialize the driver
2444 *
2445 * @adev: amdgpu_device pointer
87e3f136 2446 * @ddev: drm dev pointer
d38ceaf9
AD
2447 * @pdev: pci dev pointer
2448 * @flags: driver flags
2449 *
2450 * Initializes the driver info and hw (all asics).
2451 * Returns 0 for success or an error on failure.
2452 * Called at driver startup.
2453 */
2454int amdgpu_device_init(struct amdgpu_device *adev,
2455 struct drm_device *ddev,
2456 struct pci_dev *pdev,
2457 uint32_t flags)
2458{
2459 int r, i;
2460 bool runtime = false;
95844d20 2461 u32 max_MBps;
d38ceaf9
AD
2462
2463 adev->shutdown = false;
2464 adev->dev = &pdev->dev;
2465 adev->ddev = ddev;
2466 adev->pdev = pdev;
2467 adev->flags = flags;
2f7d10b3 2468 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2469 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2470 if (amdgpu_emu_mode == 1)
2471 adev->usec_timeout *= 2;
770d13b1 2472 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2473 adev->accel_working = false;
2474 adev->num_rings = 0;
2475 adev->mman.buffer_funcs = NULL;
2476 adev->mman.buffer_funcs_ring = NULL;
2477 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2478 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2479 adev->gmc.gmc_funcs = NULL;
f54d1867 2480 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2481 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2482
2483 adev->smc_rreg = &amdgpu_invalid_rreg;
2484 adev->smc_wreg = &amdgpu_invalid_wreg;
2485 adev->pcie_rreg = &amdgpu_invalid_rreg;
2486 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2487 adev->pciep_rreg = &amdgpu_invalid_rreg;
2488 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2489 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2490 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2491 adev->didt_rreg = &amdgpu_invalid_rreg;
2492 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2493 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2494 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2495 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2496 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2497
3e39ab90
AD
2498 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2499 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2500 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2501
2502 /* mutex initialization are all done here so we
2503 * can recall function without having locking issues */
d38ceaf9 2504 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2505 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2506 mutex_init(&adev->pm.mutex);
2507 mutex_init(&adev->gfx.gpu_clock_mutex);
2508 mutex_init(&adev->srbm_mutex);
b8866c26 2509 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2510 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2511 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2512 mutex_init(&adev->mn_lock);
e23b74aa 2513 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2514 hash_init(adev->mn_hash);
13a752e3 2515 mutex_init(&adev->lock_reset);
bb5a2bdf 2516 mutex_init(&adev->virt.dpm_mutex);
d38ceaf9 2517
912dfc84
EQ
2518 r = amdgpu_device_check_arguments(adev);
2519 if (r)
2520 return r;
d38ceaf9 2521
d38ceaf9
AD
2522 spin_lock_init(&adev->mmio_idx_lock);
2523 spin_lock_init(&adev->smc_idx_lock);
2524 spin_lock_init(&adev->pcie_idx_lock);
2525 spin_lock_init(&adev->uvd_ctx_idx_lock);
2526 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2527 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2528 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2529 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2530 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2531
0c4e7fa5
CZ
2532 INIT_LIST_HEAD(&adev->shadow_list);
2533 mutex_init(&adev->shadow_list_lock);
2534
795f2813
AR
2535 INIT_LIST_HEAD(&adev->ring_lru_list);
2536 spin_lock_init(&adev->ring_lru_list_lock);
2537
06ec9070
AD
2538 INIT_DELAYED_WORK(&adev->late_init_work,
2539 amdgpu_device_ip_late_init_func_handler);
1e317b99
RZ
2540 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2541 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2542
d4535e2c
AG
2543 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2544
d23ee13f 2545 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2546 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2547
0fa49558
AX
2548 /* Registers mapping */
2549 /* TODO: block userspace mapping of io register */
da69c161
KW
2550 if (adev->asic_type >= CHIP_BONAIRE) {
2551 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2552 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2553 } else {
2554 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2555 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2556 }
d38ceaf9 2557
d38ceaf9
AD
2558 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2559 if (adev->rmmio == NULL) {
2560 return -ENOMEM;
2561 }
2562 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2563 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2564
d38ceaf9
AD
2565 /* io port mapping */
2566 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2567 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2568 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2569 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2570 break;
2571 }
2572 }
2573 if (adev->rio_mem == NULL)
b64a18c5 2574 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2575
5494d864
AD
2576 amdgpu_device_get_pcie_info(adev);
2577
d38ceaf9 2578 /* early init functions */
06ec9070 2579 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2580 if (r)
2581 return r;
2582
6585661d
OZ
2583 /* doorbell bar mapping and doorbell index init*/
2584 amdgpu_device_doorbell_init(adev);
2585
d38ceaf9
AD
2586 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2587 /* this will fail for cards that aren't VGA class devices, just
2588 * ignore it */
06ec9070 2589 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2590
e9bef455 2591 if (amdgpu_device_is_px(ddev))
d38ceaf9 2592 runtime = true;
84c8b22e
LW
2593 if (!pci_is_thunderbolt_attached(adev->pdev))
2594 vga_switcheroo_register_client(adev->pdev,
2595 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2596 if (runtime)
2597 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2598
9475a943
SL
2599 if (amdgpu_emu_mode == 1) {
2600 /* post the asic on emulation mode */
2601 emu_soc_asic_init(adev);
bfca0289 2602 goto fence_driver_init;
9475a943 2603 }
bfca0289 2604
4e99a44e
ML
2605 /* detect if we are with an SRIOV vbios */
2606 amdgpu_device_detect_sriov_bios(adev);
048765ad 2607
95e8e59e
AD
2608 /* check if we need to reset the asic
2609 * E.g., driver was not cleanly unloaded previously, etc.
2610 */
f14899fd 2611 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2612 r = amdgpu_asic_reset(adev);
2613 if (r) {
2614 dev_err(adev->dev, "asic reset on init failed\n");
2615 goto failed;
2616 }
2617 }
2618
d38ceaf9 2619 /* Post card if necessary */
39c640c0 2620 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2621 if (!adev->bios) {
bec86378 2622 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2623 r = -EINVAL;
2624 goto failed;
d38ceaf9 2625 }
bec86378 2626 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2627 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2628 if (r) {
2629 dev_err(adev->dev, "gpu post error!\n");
2630 goto failed;
2631 }
d38ceaf9
AD
2632 }
2633
88b64e95
AD
2634 if (adev->is_atom_fw) {
2635 /* Initialize clocks */
2636 r = amdgpu_atomfirmware_get_clock_info(adev);
2637 if (r) {
2638 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2639 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2640 goto failed;
2641 }
2642 } else {
a5bde2f9
AD
2643 /* Initialize clocks */
2644 r = amdgpu_atombios_get_clock_info(adev);
2645 if (r) {
2646 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2647 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2648 goto failed;
a5bde2f9
AD
2649 }
2650 /* init i2c buses */
4562236b
HW
2651 if (!amdgpu_device_has_dc_support(adev))
2652 amdgpu_atombios_i2c_init(adev);
2c1a2784 2653 }
d38ceaf9 2654
bfca0289 2655fence_driver_init:
d38ceaf9
AD
2656 /* Fence driver */
2657 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2658 if (r) {
2659 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2660 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2661 goto failed;
2c1a2784 2662 }
d38ceaf9
AD
2663
2664 /* init the mode config */
2665 drm_mode_config_init(adev->ddev);
2666
06ec9070 2667 r = amdgpu_device_ip_init(adev);
d38ceaf9 2668 if (r) {
8840a387 2669 /* failed in exclusive mode due to timeout */
2670 if (amdgpu_sriov_vf(adev) &&
2671 !amdgpu_sriov_runtime(adev) &&
2672 amdgpu_virt_mmio_blocked(adev) &&
2673 !amdgpu_virt_wait_reset(adev)) {
2674 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2675 /* Don't send request since VF is inactive. */
2676 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2677 adev->virt.ops = NULL;
8840a387 2678 r = -EAGAIN;
2679 goto failed;
2680 }
06ec9070 2681 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2682 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
72d3f592
ED
2683 if (amdgpu_virt_request_full_gpu(adev, false))
2684 amdgpu_virt_release_full_gpu(adev, false);
83ba126a 2685 goto failed;
d38ceaf9
AD
2686 }
2687
2688 adev->accel_working = true;
2689
e59c0205
AX
2690 amdgpu_vm_check_compute_bug(adev);
2691
95844d20
MO
2692 /* Initialize the buffer migration limit. */
2693 if (amdgpu_moverate >= 0)
2694 max_MBps = amdgpu_moverate;
2695 else
2696 max_MBps = 8; /* Allow 8 MB/s. */
2697 /* Get a log2 for easy divisions. */
2698 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2699
9bc92b9c
ML
2700 amdgpu_fbdev_init(adev);
2701
d2f52ac8
RZ
2702 r = amdgpu_pm_sysfs_init(adev);
2703 if (r)
2704 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2705
5bb23532
OM
2706 r = amdgpu_ucode_sysfs_init(adev);
2707 if (r)
2708 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2709
75758255 2710 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2711 if (r)
d38ceaf9 2712 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2713
2714 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2715 if (r)
d38ceaf9 2716 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2717
50ab2533 2718 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2719 if (r)
50ab2533 2720 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2721
763efb6c 2722 r = amdgpu_debugfs_init(adev);
db95e218 2723 if (r)
763efb6c 2724 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2725
d38ceaf9
AD
2726 if ((amdgpu_testing & 1)) {
2727 if (adev->accel_working)
2728 amdgpu_test_moves(adev);
2729 else
2730 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2731 }
d38ceaf9
AD
2732 if (amdgpu_benchmarking) {
2733 if (adev->accel_working)
2734 amdgpu_benchmark(adev, amdgpu_benchmarking);
2735 else
2736 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2737 }
2738
2739 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2740 * explicit gating rather than handling it automatically.
2741 */
06ec9070 2742 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2743 if (r) {
06ec9070 2744 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2745 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2746 goto failed;
2c1a2784 2747 }
d38ceaf9 2748
108c6a63 2749 /* must succeed. */
511fdbc3 2750 amdgpu_ras_resume(adev);
108c6a63 2751
dcea6e65
KR
2752 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2753 if (r) {
2754 dev_err(adev->dev, "Could not create pcie_replay_count");
2755 return r;
2756 }
108c6a63 2757
d38ceaf9 2758 return 0;
83ba126a
AD
2759
2760failed:
89041940 2761 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2762 if (runtime)
2763 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2764
83ba126a 2765 return r;
d38ceaf9
AD
2766}
2767
d38ceaf9
AD
2768/**
2769 * amdgpu_device_fini - tear down the driver
2770 *
2771 * @adev: amdgpu_device pointer
2772 *
2773 * Tear down the driver info (all asics).
2774 * Called at driver shutdown.
2775 */
2776void amdgpu_device_fini(struct amdgpu_device *adev)
2777{
2778 int r;
2779
2780 DRM_INFO("amdgpu: finishing device.\n");
2781 adev->shutdown = true;
e5b03032
ML
2782 /* disable all interrupts */
2783 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2784 if (adev->mode_info.mode_config_initialized){
2785 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 2786 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
2787 else
2788 drm_atomic_helper_shutdown(adev->ddev);
2789 }
d38ceaf9 2790 amdgpu_fence_driver_fini(adev);
58e955d9 2791 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2792 amdgpu_fbdev_fini(adev);
06ec9070 2793 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2794 if (adev->firmware.gpu_info_fw) {
2795 release_firmware(adev->firmware.gpu_info_fw);
2796 adev->firmware.gpu_info_fw = NULL;
2797 }
d38ceaf9 2798 adev->accel_working = false;
2dc80b00 2799 cancel_delayed_work_sync(&adev->late_init_work);
d38ceaf9 2800 /* free i2c buses */
4562236b
HW
2801 if (!amdgpu_device_has_dc_support(adev))
2802 amdgpu_i2c_fini(adev);
bfca0289
SL
2803
2804 if (amdgpu_emu_mode != 1)
2805 amdgpu_atombios_fini(adev);
2806
d38ceaf9
AD
2807 kfree(adev->bios);
2808 adev->bios = NULL;
84c8b22e
LW
2809 if (!pci_is_thunderbolt_attached(adev->pdev))
2810 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2811 if (adev->flags & AMD_IS_PX)
2812 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2813 vga_client_register(adev->pdev, NULL, NULL, NULL);
2814 if (adev->rio_mem)
2815 pci_iounmap(adev->pdev, adev->rio_mem);
2816 adev->rio_mem = NULL;
2817 iounmap(adev->rmmio);
2818 adev->rmmio = NULL;
06ec9070 2819 amdgpu_device_doorbell_fini(adev);
d38ceaf9 2820 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 2821 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
5bb23532 2822 amdgpu_ucode_sysfs_fini(adev);
d38ceaf9
AD
2823}
2824
2825
2826/*
2827 * Suspend & resume.
2828 */
2829/**
810ddc3a 2830 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2831 *
87e3f136
DP
2832 * @dev: drm dev pointer
2833 * @suspend: suspend state
2834 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2835 *
2836 * Puts the hw in the suspend state (all asics).
2837 * Returns 0 for success or an error on failure.
2838 * Called at driver suspend.
2839 */
810ddc3a 2840int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2841{
2842 struct amdgpu_device *adev;
2843 struct drm_crtc *crtc;
2844 struct drm_connector *connector;
5ceb54c6 2845 int r;
d38ceaf9
AD
2846
2847 if (dev == NULL || dev->dev_private == NULL) {
2848 return -ENODEV;
2849 }
2850
2851 adev = dev->dev_private;
2852
2853 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2854 return 0;
2855
44779b43 2856 adev->in_suspend = true;
d38ceaf9
AD
2857 drm_kms_helper_poll_disable(dev);
2858
5f818173
S
2859 if (fbcon)
2860 amdgpu_fbdev_set_suspend(adev, 1);
2861
a5459475
RZ
2862 cancel_delayed_work_sync(&adev->late_init_work);
2863
4562236b
HW
2864 if (!amdgpu_device_has_dc_support(adev)) {
2865 /* turn off display hw */
2866 drm_modeset_lock_all(dev);
2867 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2868 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2869 }
2870 drm_modeset_unlock_all(dev);
fe1053b7
AD
2871 /* unpin the front buffers and cursors */
2872 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2873 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2874 struct drm_framebuffer *fb = crtc->primary->fb;
2875 struct amdgpu_bo *robj;
2876
91334223 2877 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
2878 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2879 r = amdgpu_bo_reserve(aobj, true);
2880 if (r == 0) {
2881 amdgpu_bo_unpin(aobj);
2882 amdgpu_bo_unreserve(aobj);
2883 }
756e6880 2884 }
756e6880 2885
fe1053b7
AD
2886 if (fb == NULL || fb->obj[0] == NULL) {
2887 continue;
2888 }
2889 robj = gem_to_amdgpu_bo(fb->obj[0]);
2890 /* don't unpin kernel fb objects */
2891 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2892 r = amdgpu_bo_reserve(robj, true);
2893 if (r == 0) {
2894 amdgpu_bo_unpin(robj);
2895 amdgpu_bo_unreserve(robj);
2896 }
d38ceaf9
AD
2897 }
2898 }
2899 }
fe1053b7
AD
2900
2901 amdgpu_amdkfd_suspend(adev);
2902
5e6932fe 2903 amdgpu_ras_suspend(adev);
2904
fe1053b7
AD
2905 r = amdgpu_device_ip_suspend_phase1(adev);
2906
d38ceaf9
AD
2907 /* evict vram memory */
2908 amdgpu_bo_evict_vram(adev);
2909
5ceb54c6 2910 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2911
fe1053b7 2912 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2913
a0a71e49
AD
2914 /* evict remaining vram memory
2915 * This second call to evict vram is to evict the gart page table
2916 * using the CPU.
2917 */
d38ceaf9
AD
2918 amdgpu_bo_evict_vram(adev);
2919
2920 pci_save_state(dev->pdev);
2921 if (suspend) {
2922 /* Shut down the device */
2923 pci_disable_device(dev->pdev);
2924 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 2925 } else {
2926 r = amdgpu_asic_reset(adev);
2927 if (r)
2928 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
2929 }
2930
d38ceaf9
AD
2931 return 0;
2932}
2933
2934/**
810ddc3a 2935 * amdgpu_device_resume - initiate device resume
d38ceaf9 2936 *
87e3f136
DP
2937 * @dev: drm dev pointer
2938 * @resume: resume state
2939 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
2940 *
2941 * Bring the hw back to operating state (all asics).
2942 * Returns 0 for success or an error on failure.
2943 * Called at driver resume.
2944 */
810ddc3a 2945int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
2946{
2947 struct drm_connector *connector;
2948 struct amdgpu_device *adev = dev->dev_private;
756e6880 2949 struct drm_crtc *crtc;
03161a6e 2950 int r = 0;
d38ceaf9
AD
2951
2952 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2953 return 0;
2954
d38ceaf9
AD
2955 if (resume) {
2956 pci_set_power_state(dev->pdev, PCI_D0);
2957 pci_restore_state(dev->pdev);
74b0b157 2958 r = pci_enable_device(dev->pdev);
03161a6e 2959 if (r)
4d3b9ae5 2960 return r;
d38ceaf9
AD
2961 }
2962
2963 /* post card */
39c640c0 2964 if (amdgpu_device_need_post(adev)) {
74b0b157 2965 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2966 if (r)
2967 DRM_ERROR("amdgpu asic init failed\n");
2968 }
d38ceaf9 2969
06ec9070 2970 r = amdgpu_device_ip_resume(adev);
e6707218 2971 if (r) {
06ec9070 2972 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 2973 return r;
e6707218 2974 }
5ceb54c6
AD
2975 amdgpu_fence_driver_resume(adev);
2976
d38ceaf9 2977
06ec9070 2978 r = amdgpu_device_ip_late_init(adev);
03161a6e 2979 if (r)
4d3b9ae5 2980 return r;
d38ceaf9 2981
fe1053b7
AD
2982 if (!amdgpu_device_has_dc_support(adev)) {
2983 /* pin cursors */
2984 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2985 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2986
91334223 2987 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
2988 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2989 r = amdgpu_bo_reserve(aobj, true);
2990 if (r == 0) {
2991 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
2992 if (r != 0)
2993 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
2994 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
2995 amdgpu_bo_unreserve(aobj);
2996 }
756e6880
AD
2997 }
2998 }
2999 }
ba997709
YZ
3000 r = amdgpu_amdkfd_resume(adev);
3001 if (r)
3002 return r;
756e6880 3003
96a5d8d4
LL
3004 /* Make sure IB tests flushed */
3005 flush_delayed_work(&adev->late_init_work);
3006
d38ceaf9
AD
3007 /* blat the mode back in */
3008 if (fbcon) {
4562236b
HW
3009 if (!amdgpu_device_has_dc_support(adev)) {
3010 /* pre DCE11 */
3011 drm_helper_resume_force_mode(dev);
3012
3013 /* turn on display hw */
3014 drm_modeset_lock_all(dev);
3015 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3016 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3017 }
3018 drm_modeset_unlock_all(dev);
d38ceaf9 3019 }
4d3b9ae5 3020 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3021 }
3022
3023 drm_kms_helper_poll_enable(dev);
23a1a9e5 3024
5e6932fe 3025 amdgpu_ras_resume(adev);
3026
23a1a9e5
L
3027 /*
3028 * Most of the connector probing functions try to acquire runtime pm
3029 * refs to ensure that the GPU is powered on when connector polling is
3030 * performed. Since we're calling this from a runtime PM callback,
3031 * trying to acquire rpm refs will cause us to deadlock.
3032 *
3033 * Since we're guaranteed to be holding the rpm lock, it's safe to
3034 * temporarily disable the rpm helpers so this doesn't deadlock us.
3035 */
3036#ifdef CONFIG_PM
3037 dev->dev->power.disable_depth++;
3038#endif
4562236b
HW
3039 if (!amdgpu_device_has_dc_support(adev))
3040 drm_helper_hpd_irq_event(dev);
3041 else
3042 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3043#ifdef CONFIG_PM
3044 dev->dev->power.disable_depth--;
3045#endif
44779b43
RZ
3046 adev->in_suspend = false;
3047
4d3b9ae5 3048 return 0;
d38ceaf9
AD
3049}
3050
e3ecdffa
AD
3051/**
3052 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3053 *
3054 * @adev: amdgpu_device pointer
3055 *
3056 * The list of all the hardware IPs that make up the asic is walked and
3057 * the check_soft_reset callbacks are run. check_soft_reset determines
3058 * if the asic is still hung or not.
3059 * Returns true if any of the IPs are still in a hung state, false if not.
3060 */
06ec9070 3061static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3062{
3063 int i;
3064 bool asic_hang = false;
3065
f993d628
ML
3066 if (amdgpu_sriov_vf(adev))
3067 return true;
3068
8bc04c29
AD
3069 if (amdgpu_asic_need_full_reset(adev))
3070 return true;
3071
63fbf42f 3072 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3073 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3074 continue;
a1255107
AD
3075 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3076 adev->ip_blocks[i].status.hang =
3077 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3078 if (adev->ip_blocks[i].status.hang) {
3079 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3080 asic_hang = true;
3081 }
3082 }
3083 return asic_hang;
3084}
3085
e3ecdffa
AD
3086/**
3087 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3088 *
3089 * @adev: amdgpu_device pointer
3090 *
3091 * The list of all the hardware IPs that make up the asic is walked and the
3092 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3093 * handles any IP specific hardware or software state changes that are
3094 * necessary for a soft reset to succeed.
3095 * Returns 0 on success, negative error code on failure.
3096 */
06ec9070 3097static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3098{
3099 int i, r = 0;
3100
3101 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3102 if (!adev->ip_blocks[i].status.valid)
d31a501e 3103 continue;
a1255107
AD
3104 if (adev->ip_blocks[i].status.hang &&
3105 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3106 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3107 if (r)
3108 return r;
3109 }
3110 }
3111
3112 return 0;
3113}
3114
e3ecdffa
AD
3115/**
3116 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3117 *
3118 * @adev: amdgpu_device pointer
3119 *
3120 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3121 * reset is necessary to recover.
3122 * Returns true if a full asic reset is required, false if not.
3123 */
06ec9070 3124static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3125{
da146d3b
AD
3126 int i;
3127
8bc04c29
AD
3128 if (amdgpu_asic_need_full_reset(adev))
3129 return true;
3130
da146d3b 3131 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3132 if (!adev->ip_blocks[i].status.valid)
da146d3b 3133 continue;
a1255107
AD
3134 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3135 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3136 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3137 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3138 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3139 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3140 DRM_INFO("Some block need full reset!\n");
3141 return true;
3142 }
3143 }
35d782fe
CZ
3144 }
3145 return false;
3146}
3147
e3ecdffa
AD
3148/**
3149 * amdgpu_device_ip_soft_reset - do a soft reset
3150 *
3151 * @adev: amdgpu_device pointer
3152 *
3153 * The list of all the hardware IPs that make up the asic is walked and the
3154 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3155 * IP specific hardware or software state changes that are necessary to soft
3156 * reset the IP.
3157 * Returns 0 on success, negative error code on failure.
3158 */
06ec9070 3159static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3160{
3161 int i, r = 0;
3162
3163 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3164 if (!adev->ip_blocks[i].status.valid)
35d782fe 3165 continue;
a1255107
AD
3166 if (adev->ip_blocks[i].status.hang &&
3167 adev->ip_blocks[i].version->funcs->soft_reset) {
3168 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3169 if (r)
3170 return r;
3171 }
3172 }
3173
3174 return 0;
3175}
3176
e3ecdffa
AD
3177/**
3178 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3179 *
3180 * @adev: amdgpu_device pointer
3181 *
3182 * The list of all the hardware IPs that make up the asic is walked and the
3183 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3184 * handles any IP specific hardware or software state changes that are
3185 * necessary after the IP has been soft reset.
3186 * Returns 0 on success, negative error code on failure.
3187 */
06ec9070 3188static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3189{
3190 int i, r = 0;
3191
3192 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3193 if (!adev->ip_blocks[i].status.valid)
35d782fe 3194 continue;
a1255107
AD
3195 if (adev->ip_blocks[i].status.hang &&
3196 adev->ip_blocks[i].version->funcs->post_soft_reset)
3197 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3198 if (r)
3199 return r;
3200 }
3201
3202 return 0;
3203}
3204
e3ecdffa 3205/**
c33adbc7 3206 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3207 *
3208 * @adev: amdgpu_device pointer
3209 *
3210 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3211 * restore things like GPUVM page tables after a GPU reset where
3212 * the contents of VRAM might be lost.
403009bf
CK
3213 *
3214 * Returns:
3215 * 0 on success, negative error code on failure.
e3ecdffa 3216 */
c33adbc7 3217static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3218{
c41d1cf6 3219 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3220 struct amdgpu_bo *shadow;
3221 long r = 1, tmo;
c41d1cf6
ML
3222
3223 if (amdgpu_sriov_runtime(adev))
b045d3af 3224 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3225 else
3226 tmo = msecs_to_jiffies(100);
3227
3228 DRM_INFO("recover vram bo from shadow start\n");
3229 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3230 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3231
3232 /* No need to recover an evicted BO */
3233 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3234 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3235 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3236 continue;
3237
3238 r = amdgpu_bo_restore_shadow(shadow, &next);
3239 if (r)
3240 break;
3241
c41d1cf6 3242 if (fence) {
1712fb1a 3243 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3244 dma_fence_put(fence);
3245 fence = next;
1712fb1a 3246 if (tmo == 0) {
3247 r = -ETIMEDOUT;
c41d1cf6 3248 break;
1712fb1a 3249 } else if (tmo < 0) {
3250 r = tmo;
3251 break;
3252 }
403009bf
CK
3253 } else {
3254 fence = next;
c41d1cf6 3255 }
c41d1cf6
ML
3256 }
3257 mutex_unlock(&adev->shadow_list_lock);
3258
403009bf
CK
3259 if (fence)
3260 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3261 dma_fence_put(fence);
3262
1712fb1a 3263 if (r < 0 || tmo <= 0) {
3264 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3265 return -EIO;
3266 }
c41d1cf6 3267
403009bf
CK
3268 DRM_INFO("recover vram bo from shadow done\n");
3269 return 0;
c41d1cf6
ML
3270}
3271
a90ad3c2 3272
e3ecdffa 3273/**
06ec9070 3274 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3275 *
3276 * @adev: amdgpu device pointer
87e3f136 3277 * @from_hypervisor: request from hypervisor
5740682e
ML
3278 *
3279 * do VF FLR and reinitialize Asic
3f48c681 3280 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3281 */
3282static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3283 bool from_hypervisor)
5740682e
ML
3284{
3285 int r;
3286
3287 if (from_hypervisor)
3288 r = amdgpu_virt_request_full_gpu(adev, true);
3289 else
3290 r = amdgpu_virt_reset_gpu(adev);
3291 if (r)
3292 return r;
a90ad3c2 3293
f81e8d53
WL
3294 amdgpu_amdkfd_pre_reset(adev);
3295
a90ad3c2 3296 /* Resume IP prior to SMC */
06ec9070 3297 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3298 if (r)
3299 goto error;
a90ad3c2
ML
3300
3301 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3302 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3303
7a3e0bb2
RZ
3304 r = amdgpu_device_fw_loading(adev);
3305 if (r)
3306 return r;
3307
a90ad3c2 3308 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3309 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3310 if (r)
3311 goto error;
a90ad3c2
ML
3312
3313 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3314 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3315 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3316
abc34253 3317error:
d3c117e5 3318 amdgpu_virt_init_data_exchange(adev);
abc34253 3319 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3320 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3321 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3322 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3323 }
3324
3325 return r;
3326}
3327
12938fad
CK
3328/**
3329 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3330 *
3331 * @adev: amdgpu device pointer
3332 *
3333 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3334 * a hung GPU.
3335 */
3336bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3337{
3338 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3339 DRM_INFO("Timeout, but no hardware hang detected.\n");
3340 return false;
3341 }
3342
3ba7b418
AG
3343 if (amdgpu_gpu_recovery == 0)
3344 goto disabled;
3345
3346 if (amdgpu_sriov_vf(adev))
3347 return true;
3348
3349 if (amdgpu_gpu_recovery == -1) {
3350 switch (adev->asic_type) {
fc42d47c
AG
3351 case CHIP_BONAIRE:
3352 case CHIP_HAWAII:
3ba7b418
AG
3353 case CHIP_TOPAZ:
3354 case CHIP_TONGA:
3355 case CHIP_FIJI:
3356 case CHIP_POLARIS10:
3357 case CHIP_POLARIS11:
3358 case CHIP_POLARIS12:
3359 case CHIP_VEGAM:
3360 case CHIP_VEGA20:
3361 case CHIP_VEGA10:
3362 case CHIP_VEGA12:
3363 break;
3364 default:
3365 goto disabled;
3366 }
12938fad
CK
3367 }
3368
3369 return true;
3ba7b418
AG
3370
3371disabled:
3372 DRM_INFO("GPU recovery disabled.\n");
3373 return false;
12938fad
CK
3374}
3375
5c6dd71e 3376
26bc5340
AG
3377static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3378 struct amdgpu_job *job,
3379 bool *need_full_reset_arg)
3380{
3381 int i, r = 0;
3382 bool need_full_reset = *need_full_reset_arg;
71182665 3383
71182665 3384 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3385 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3386 struct amdgpu_ring *ring = adev->rings[i];
3387
51687759 3388 if (!ring || !ring->sched.thread)
0875dc9e 3389 continue;
5740682e 3390
2f9d4084
ML
3391 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3392 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3393 }
d38ceaf9 3394
222b5f04
AG
3395 if(job)
3396 drm_sched_increase_karma(&job->base);
3397
1d721ed6 3398 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3399 if (!amdgpu_sriov_vf(adev)) {
3400
3401 if (!need_full_reset)
3402 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3403
3404 if (!need_full_reset) {
3405 amdgpu_device_ip_pre_soft_reset(adev);
3406 r = amdgpu_device_ip_soft_reset(adev);
3407 amdgpu_device_ip_post_soft_reset(adev);
3408 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3409 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3410 need_full_reset = true;
3411 }
3412 }
3413
3414 if (need_full_reset)
3415 r = amdgpu_device_ip_suspend(adev);
3416
3417 *need_full_reset_arg = need_full_reset;
3418 }
3419
3420 return r;
3421}
3422
3423static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3424 struct list_head *device_list_handle,
3425 bool *need_full_reset_arg)
3426{
3427 struct amdgpu_device *tmp_adev = NULL;
3428 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3429 int r = 0;
3430
3431 /*
3432 * ASIC reset has to be done on all HGMI hive nodes ASAP
3433 * to allow proper links negotiation in FW (within 1 sec)
3434 */
3435 if (need_full_reset) {
3436 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3437 /* For XGMI run all resets in parallel to speed up the process */
3438 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3439 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3440 r = -EALREADY;
3441 } else
3442 r = amdgpu_asic_reset(tmp_adev);
3443
3444 if (r) {
fed184e9 3445 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
26bc5340 3446 r, tmp_adev->ddev->unique);
d4535e2c
AG
3447 break;
3448 }
3449 }
3450
3451 /* For XGMI wait for all PSP resets to complete before proceed */
3452 if (!r) {
3453 list_for_each_entry(tmp_adev, device_list_handle,
3454 gmc.xgmi.head) {
3455 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3456 flush_work(&tmp_adev->xgmi_reset_work);
3457 r = tmp_adev->asic_reset_res;
3458 if (r)
3459 break;
3460 }
3461 }
2be4c4a9 3462
3463 list_for_each_entry(tmp_adev, device_list_handle,
3464 gmc.xgmi.head) {
3465 amdgpu_ras_reserve_bad_pages(tmp_adev);
3466 }
26bc5340
AG
3467 }
3468 }
3469
3470
3471 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3472 if (need_full_reset) {
3473 /* post card */
3474 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3475 DRM_WARN("asic atom init failed!");
3476
3477 if (!r) {
3478 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3479 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3480 if (r)
3481 goto out;
3482
3483 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3484 if (vram_lost) {
77e7f829 3485 DRM_INFO("VRAM is lost due to GPU reset!\n");
26bc5340
AG
3486 atomic_inc(&tmp_adev->vram_lost_counter);
3487 }
3488
3489 r = amdgpu_gtt_mgr_recover(
3490 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3491 if (r)
3492 goto out;
3493
3494 r = amdgpu_device_fw_loading(tmp_adev);
3495 if (r)
3496 return r;
3497
3498 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3499 if (r)
3500 goto out;
3501
3502 if (vram_lost)
3503 amdgpu_device_fill_reset_magic(tmp_adev);
3504
7c04ca50 3505 r = amdgpu_device_ip_late_init(tmp_adev);
3506 if (r)
3507 goto out;
3508
e79a04d5 3509 /* must succeed. */
511fdbc3 3510 amdgpu_ras_resume(tmp_adev);
e79a04d5 3511
26bc5340
AG
3512 /* Update PSP FW topology after reset */
3513 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3514 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3515 }
3516 }
3517
3518
3519out:
3520 if (!r) {
3521 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3522 r = amdgpu_ib_ring_tests(tmp_adev);
3523 if (r) {
3524 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3525 r = amdgpu_device_ip_suspend(tmp_adev);
3526 need_full_reset = true;
3527 r = -EAGAIN;
3528 goto end;
3529 }
3530 }
3531
3532 if (!r)
3533 r = amdgpu_device_recover_vram(tmp_adev);
3534 else
3535 tmp_adev->asic_reset_res = r;
3536 }
3537
3538end:
3539 *need_full_reset_arg = need_full_reset;
3540 return r;
3541}
3542
1d721ed6 3543static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3544{
1d721ed6
AG
3545 if (trylock) {
3546 if (!mutex_trylock(&adev->lock_reset))
3547 return false;
3548 } else
3549 mutex_lock(&adev->lock_reset);
5740682e 3550
26bc5340
AG
3551 atomic_inc(&adev->gpu_reset_counter);
3552 adev->in_gpu_reset = 1;
7b184b00 3553 /* Block kfd: SRIOV would do it separately */
3554 if (!amdgpu_sriov_vf(adev))
3555 amdgpu_amdkfd_pre_reset(adev);
1d721ed6
AG
3556
3557 return true;
26bc5340 3558}
d38ceaf9 3559
26bc5340
AG
3560static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3561{
7b184b00 3562 /*unlock kfd: SRIOV would do it separately */
3563 if (!amdgpu_sriov_vf(adev))
3564 amdgpu_amdkfd_post_reset(adev);
89041940 3565 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3566 adev->in_gpu_reset = 0;
3567 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3568}
3569
3570
3571/**
3572 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3573 *
3574 * @adev: amdgpu device pointer
3575 * @job: which job trigger hang
3576 *
3577 * Attempt to reset the GPU if it has hung (all asics).
3578 * Attempt to do soft-reset or full-reset and reinitialize Asic
3579 * Returns 0 for success or an error on failure.
3580 */
3581
3582int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3583 struct amdgpu_job *job)
3584{
1d721ed6
AG
3585 struct list_head device_list, *device_list_handle = NULL;
3586 bool need_full_reset, job_signaled;
26bc5340 3587 struct amdgpu_hive_info *hive = NULL;
26bc5340 3588 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 3589 int i, r = 0;
26bc5340 3590
1d721ed6 3591 need_full_reset = job_signaled = false;
26bc5340
AG
3592 INIT_LIST_HEAD(&device_list);
3593
3594 dev_info(adev->dev, "GPU reset begin!\n");
3595
c53e4db7 3596 cancel_delayed_work_sync(&adev->late_init_work);
3597
1d721ed6
AG
3598 hive = amdgpu_get_xgmi_hive(adev, false);
3599
26bc5340 3600 /*
1d721ed6
AG
3601 * Here we trylock to avoid chain of resets executing from
3602 * either trigger by jobs on different adevs in XGMI hive or jobs on
3603 * different schedulers for same device while this TO handler is running.
3604 * We always reset all schedulers for device and all devices for XGMI
3605 * hive so that should take care of them too.
26bc5340 3606 */
1d721ed6
AG
3607
3608 if (hive && !mutex_trylock(&hive->reset_lock)) {
3609 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
3610 job->base.id, hive->hive_id);
26bc5340 3611 return 0;
1d721ed6 3612 }
26bc5340
AG
3613
3614 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
3615 if (!amdgpu_device_lock_adev(adev, !hive)) {
3616 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
3617 job->base.id);
3618 return 0;
26bc5340
AG
3619 }
3620
3621 /* Build list of devices to reset */
1d721ed6 3622 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340
AG
3623 if (!hive) {
3624 amdgpu_device_unlock_adev(adev);
3625 return -ENODEV;
3626 }
3627
3628 /*
3629 * In case we are in XGMI hive mode device reset is done for all the
3630 * nodes in the hive to retrain all XGMI links and hence the reset
3631 * sequence is executed in loop on all nodes.
3632 */
3633 device_list_handle = &hive->device_list;
3634 } else {
3635 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3636 device_list_handle = &device_list;
3637 }
3638
1d721ed6
AG
3639 /* block all schedulers and reset given job's ring */
3640 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3641 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3642 struct amdgpu_ring *ring = tmp_adev->rings[i];
3643
3644 if (!ring || !ring->sched.thread)
3645 continue;
3646
3647 drm_sched_stop(&ring->sched, &job->base);
3648 }
3649 }
3650
3651
3652 /*
3653 * Must check guilty signal here since after this point all old
3654 * HW fences are force signaled.
3655 *
3656 * job->base holds a reference to parent fence
3657 */
3658 if (job && job->base.s_fence->parent &&
3659 dma_fence_is_signaled(job->base.s_fence->parent))
3660 job_signaled = true;
3661
3662 if (!amdgpu_device_ip_need_full_reset(adev))
3663 device_list_handle = &device_list;
3664
3665 if (job_signaled) {
3666 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3667 goto skip_hw_reset;
3668 }
3669
3670
3671 /* Guilty job will be freed after this*/
3672 r = amdgpu_device_pre_asic_reset(adev,
3673 job,
3674 &need_full_reset);
3675 if (r) {
3676 /*TODO Should we stop ?*/
3677 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3678 r, adev->ddev->unique);
3679 adev->asic_reset_res = r;
3680 }
3681
26bc5340
AG
3682retry: /* Rest of adevs pre asic reset from XGMI hive. */
3683 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3684
3685 if (tmp_adev == adev)
3686 continue;
3687
1d721ed6 3688 amdgpu_device_lock_adev(tmp_adev, false);
26bc5340
AG
3689 r = amdgpu_device_pre_asic_reset(tmp_adev,
3690 NULL,
3691 &need_full_reset);
3692 /*TODO Should we stop ?*/
3693 if (r) {
3694 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3695 r, tmp_adev->ddev->unique);
3696 tmp_adev->asic_reset_res = r;
3697 }
3698 }
3699
3700 /* Actual ASIC resets if needed.*/
3701 /* TODO Implement XGMI hive reset logic for SRIOV */
3702 if (amdgpu_sriov_vf(adev)) {
3703 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3704 if (r)
3705 adev->asic_reset_res = r;
3706 } else {
3707 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3708 if (r && r == -EAGAIN)
3709 goto retry;
3710 }
3711
1d721ed6
AG
3712skip_hw_reset:
3713
26bc5340
AG
3714 /* Post ASIC reset for all devs .*/
3715 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
1d721ed6
AG
3716 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3717 struct amdgpu_ring *ring = tmp_adev->rings[i];
3718
3719 if (!ring || !ring->sched.thread)
3720 continue;
3721
3722 /* No point to resubmit jobs if we didn't HW reset*/
3723 if (!tmp_adev->asic_reset_res && !job_signaled)
3724 drm_sched_resubmit_jobs(&ring->sched);
3725
3726 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3727 }
3728
3729 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3730 drm_helper_resume_force_mode(tmp_adev->ddev);
3731 }
3732
3733 tmp_adev->asic_reset_res = 0;
26bc5340
AG
3734
3735 if (r) {
3736 /* bad news, how to tell it to userspace ? */
3737 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3738 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3739 } else {
3740 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3741 }
3742
3743 amdgpu_device_unlock_adev(tmp_adev);
3744 }
3745
1d721ed6 3746 if (hive)
22d6575b 3747 mutex_unlock(&hive->reset_lock);
26bc5340
AG
3748
3749 if (r)
3750 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3751 return r;
3752}
3753
e3ecdffa
AD
3754/**
3755 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3756 *
3757 * @adev: amdgpu_device pointer
3758 *
3759 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3760 * and lanes) of the slot the device is in. Handles APUs and
3761 * virtualized environments where PCIE config space may not be available.
3762 */
5494d864 3763static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3764{
5d9a6330 3765 struct pci_dev *pdev;
c5313457
HK
3766 enum pci_bus_speed speed_cap, platform_speed_cap;
3767 enum pcie_link_width platform_link_width;
d0dd7f0c 3768
cd474ba0
AD
3769 if (amdgpu_pcie_gen_cap)
3770 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3771
cd474ba0
AD
3772 if (amdgpu_pcie_lane_cap)
3773 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3774
cd474ba0
AD
3775 /* covers APUs as well */
3776 if (pci_is_root_bus(adev->pdev->bus)) {
3777 if (adev->pm.pcie_gen_mask == 0)
3778 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3779 if (adev->pm.pcie_mlw_mask == 0)
3780 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3781 return;
cd474ba0 3782 }
d0dd7f0c 3783
c5313457
HK
3784 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
3785 return;
3786
dbaa922b
AD
3787 pcie_bandwidth_available(adev->pdev, NULL,
3788 &platform_speed_cap, &platform_link_width);
c5313457 3789
cd474ba0 3790 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3791 /* asic caps */
3792 pdev = adev->pdev;
3793 speed_cap = pcie_get_speed_cap(pdev);
3794 if (speed_cap == PCI_SPEED_UNKNOWN) {
3795 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3796 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3797 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3798 } else {
5d9a6330
AD
3799 if (speed_cap == PCIE_SPEED_16_0GT)
3800 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3801 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3802 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3803 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3804 else if (speed_cap == PCIE_SPEED_8_0GT)
3805 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3806 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3807 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3808 else if (speed_cap == PCIE_SPEED_5_0GT)
3809 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3810 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3811 else
3812 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3813 }
3814 /* platform caps */
c5313457 3815 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
3816 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3817 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3818 } else {
c5313457 3819 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
3820 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3821 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3822 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3823 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 3824 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
3825 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3826 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3827 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 3828 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
3829 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3830 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3831 else
3832 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3833
cd474ba0
AD
3834 }
3835 }
3836 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 3837 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
3838 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3839 } else {
c5313457 3840 switch (platform_link_width) {
5d9a6330 3841 case PCIE_LNK_X32:
cd474ba0
AD
3842 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3843 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3844 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3845 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3846 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3847 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3848 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3849 break;
5d9a6330 3850 case PCIE_LNK_X16:
cd474ba0
AD
3851 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3852 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3853 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3854 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3855 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3856 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3857 break;
5d9a6330 3858 case PCIE_LNK_X12:
cd474ba0
AD
3859 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3860 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3861 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3862 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3863 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3864 break;
5d9a6330 3865 case PCIE_LNK_X8:
cd474ba0
AD
3866 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3867 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3868 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3869 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3870 break;
5d9a6330 3871 case PCIE_LNK_X4:
cd474ba0
AD
3872 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3873 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3874 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3875 break;
5d9a6330 3876 case PCIE_LNK_X2:
cd474ba0
AD
3877 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3878 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3879 break;
5d9a6330 3880 case PCIE_LNK_X1:
cd474ba0
AD
3881 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3882 break;
3883 default:
3884 break;
3885 }
d0dd7f0c
AD
3886 }
3887 }
3888}
d38ceaf9 3889