drm/amdgpu: handle AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID on gfx10
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
e2a75f88 68MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 69MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 70MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 71MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 72MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
23c6268e 73MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
e2a75f88 74
2dc80b00
S
75#define AMDGPU_RESUME_MS 2000
76
d38ceaf9 77static const char *amdgpu_asic_name[] = {
da69c161
KW
78 "TAHITI",
79 "PITCAIRN",
80 "VERDE",
81 "OLAND",
82 "HAINAN",
d38ceaf9
AD
83 "BONAIRE",
84 "KAVERI",
85 "KABINI",
86 "HAWAII",
87 "MULLINS",
88 "TOPAZ",
89 "TONGA",
48299f95 90 "FIJI",
d38ceaf9 91 "CARRIZO",
139f4917 92 "STONEY",
2cc0c0b5
FC
93 "POLARIS10",
94 "POLARIS11",
c4642a47 95 "POLARIS12",
48ff108d 96 "VEGAM",
d4196f01 97 "VEGA10",
8fab806a 98 "VEGA12",
956fcddc 99 "VEGA20",
2ca8a5d2 100 "RAVEN",
852a6626 101 "NAVI10",
d38ceaf9
AD
102 "LAST",
103};
104
dcea6e65
KR
105/**
106 * DOC: pcie_replay_count
107 *
108 * The amdgpu driver provides a sysfs API for reporting the total number
109 * of PCIe replays (NAKs)
110 * The file pcie_replay_count is used for this and returns the total
111 * number of replays as a sum of the NAKs generated and NAKs received
112 */
113
114static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
115 struct device_attribute *attr, char *buf)
116{
117 struct drm_device *ddev = dev_get_drvdata(dev);
118 struct amdgpu_device *adev = ddev->dev_private;
119 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
120
121 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
122}
123
124static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
125 amdgpu_device_get_pcie_replay_count, NULL);
126
5494d864
AD
127static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
128
e3ecdffa
AD
129/**
130 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
131 *
132 * @dev: drm_device pointer
133 *
134 * Returns true if the device is a dGPU with HG/PX power control,
135 * otherwise return false.
136 */
d38ceaf9
AD
137bool amdgpu_device_is_px(struct drm_device *dev)
138{
139 struct amdgpu_device *adev = dev->dev_private;
140
2f7d10b3 141 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
142 return true;
143 return false;
144}
145
146/*
147 * MMIO register access helper functions.
148 */
e3ecdffa
AD
149/**
150 * amdgpu_mm_rreg - read a memory mapped IO register
151 *
152 * @adev: amdgpu_device pointer
153 * @reg: dword aligned register offset
154 * @acc_flags: access flags which require special behavior
155 *
156 * Returns the 32 bit value from the offset specified.
157 */
d38ceaf9 158uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 159 uint32_t acc_flags)
d38ceaf9 160{
f4b373f4
TSD
161 uint32_t ret;
162
43ca8efa 163 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 164 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 165
15d72fd7 166 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 167 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
168 else {
169 unsigned long flags;
d38ceaf9
AD
170
171 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
172 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
173 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
174 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 175 }
f4b373f4
TSD
176 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
177 return ret;
d38ceaf9
AD
178}
179
421a2a30
ML
180/*
181 * MMIO register read with bytes helper functions
182 * @offset:bytes offset from MMIO start
183 *
184*/
185
e3ecdffa
AD
186/**
187 * amdgpu_mm_rreg8 - read a memory mapped IO register
188 *
189 * @adev: amdgpu_device pointer
190 * @offset: byte aligned register offset
191 *
192 * Returns the 8 bit value from the offset specified.
193 */
421a2a30
ML
194uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
195 if (offset < adev->rmmio_size)
196 return (readb(adev->rmmio + offset));
197 BUG();
198}
199
200/*
201 * MMIO register write with bytes helper functions
202 * @offset:bytes offset from MMIO start
203 * @value: the value want to be written to the register
204 *
205*/
e3ecdffa
AD
206/**
207 * amdgpu_mm_wreg8 - read a memory mapped IO register
208 *
209 * @adev: amdgpu_device pointer
210 * @offset: byte aligned register offset
211 * @value: 8 bit value to write
212 *
213 * Writes the value specified to the offset specified.
214 */
421a2a30
ML
215void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
216 if (offset < adev->rmmio_size)
217 writeb(value, adev->rmmio + offset);
218 else
219 BUG();
220}
221
e3ecdffa
AD
222/**
223 * amdgpu_mm_wreg - write to a memory mapped IO register
224 *
225 * @adev: amdgpu_device pointer
226 * @reg: dword aligned register offset
227 * @v: 32 bit value to write to the register
228 * @acc_flags: access flags which require special behavior
229 *
230 * Writes the value specified to the offset specified.
231 */
d38ceaf9 232void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 233 uint32_t acc_flags)
d38ceaf9 234{
f4b373f4 235 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 236
47ed4e1c
KW
237 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
238 adev->last_mm_index = v;
239 }
240
43ca8efa 241 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 242 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 243
15d72fd7 244 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
245 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
246 else {
247 unsigned long flags;
248
249 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
250 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
251 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
252 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
253 }
47ed4e1c
KW
254
255 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
256 udelay(500);
257 }
d38ceaf9
AD
258}
259
e3ecdffa
AD
260/**
261 * amdgpu_io_rreg - read an IO register
262 *
263 * @adev: amdgpu_device pointer
264 * @reg: dword aligned register offset
265 *
266 * Returns the 32 bit value from the offset specified.
267 */
d38ceaf9
AD
268u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
269{
270 if ((reg * 4) < adev->rio_mem_size)
271 return ioread32(adev->rio_mem + (reg * 4));
272 else {
273 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
274 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
275 }
276}
277
e3ecdffa
AD
278/**
279 * amdgpu_io_wreg - write to an IO register
280 *
281 * @adev: amdgpu_device pointer
282 * @reg: dword aligned register offset
283 * @v: 32 bit value to write to the register
284 *
285 * Writes the value specified to the offset specified.
286 */
d38ceaf9
AD
287void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
288{
47ed4e1c
KW
289 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
290 adev->last_mm_index = v;
291 }
d38ceaf9
AD
292
293 if ((reg * 4) < adev->rio_mem_size)
294 iowrite32(v, adev->rio_mem + (reg * 4));
295 else {
296 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
297 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
298 }
47ed4e1c
KW
299
300 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
301 udelay(500);
302 }
d38ceaf9
AD
303}
304
305/**
306 * amdgpu_mm_rdoorbell - read a doorbell dword
307 *
308 * @adev: amdgpu_device pointer
309 * @index: doorbell index
310 *
311 * Returns the value in the doorbell aperture at the
312 * requested doorbell index (CIK).
313 */
314u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
315{
316 if (index < adev->doorbell.num_doorbells) {
317 return readl(adev->doorbell.ptr + index);
318 } else {
319 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
320 return 0;
321 }
322}
323
324/**
325 * amdgpu_mm_wdoorbell - write a doorbell dword
326 *
327 * @adev: amdgpu_device pointer
328 * @index: doorbell index
329 * @v: value to write
330 *
331 * Writes @v to the doorbell aperture at the
332 * requested doorbell index (CIK).
333 */
334void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
335{
336 if (index < adev->doorbell.num_doorbells) {
337 writel(v, adev->doorbell.ptr + index);
338 } else {
339 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
340 }
341}
342
832be404
KW
343/**
344 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
345 *
346 * @adev: amdgpu_device pointer
347 * @index: doorbell index
348 *
349 * Returns the value in the doorbell aperture at the
350 * requested doorbell index (VEGA10+).
351 */
352u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
353{
354 if (index < adev->doorbell.num_doorbells) {
355 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
356 } else {
357 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
358 return 0;
359 }
360}
361
362/**
363 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
364 *
365 * @adev: amdgpu_device pointer
366 * @index: doorbell index
367 * @v: value to write
368 *
369 * Writes @v to the doorbell aperture at the
370 * requested doorbell index (VEGA10+).
371 */
372void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
373{
374 if (index < adev->doorbell.num_doorbells) {
375 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
376 } else {
377 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
378 }
379}
380
d38ceaf9
AD
381/**
382 * amdgpu_invalid_rreg - dummy reg read function
383 *
384 * @adev: amdgpu device pointer
385 * @reg: offset of register
386 *
387 * Dummy register read function. Used for register blocks
388 * that certain asics don't have (all asics).
389 * Returns the value in the register.
390 */
391static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
392{
393 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
394 BUG();
395 return 0;
396}
397
398/**
399 * amdgpu_invalid_wreg - dummy reg write function
400 *
401 * @adev: amdgpu device pointer
402 * @reg: offset of register
403 * @v: value to write to the register
404 *
405 * Dummy register read function. Used for register blocks
406 * that certain asics don't have (all asics).
407 */
408static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
409{
410 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
411 reg, v);
412 BUG();
413}
414
415/**
416 * amdgpu_block_invalid_rreg - dummy reg read function
417 *
418 * @adev: amdgpu device pointer
419 * @block: offset of instance
420 * @reg: offset of register
421 *
422 * Dummy register read function. Used for register blocks
423 * that certain asics don't have (all asics).
424 * Returns the value in the register.
425 */
426static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
427 uint32_t block, uint32_t reg)
428{
429 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
430 reg, block);
431 BUG();
432 return 0;
433}
434
435/**
436 * amdgpu_block_invalid_wreg - dummy reg write function
437 *
438 * @adev: amdgpu device pointer
439 * @block: offset of instance
440 * @reg: offset of register
441 * @v: value to write to the register
442 *
443 * Dummy register read function. Used for register blocks
444 * that certain asics don't have (all asics).
445 */
446static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
447 uint32_t block,
448 uint32_t reg, uint32_t v)
449{
450 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
451 reg, block, v);
452 BUG();
453}
454
e3ecdffa
AD
455/**
456 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
457 *
458 * @adev: amdgpu device pointer
459 *
460 * Allocates a scratch page of VRAM for use by various things in the
461 * driver.
462 */
06ec9070 463static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 464{
a4a02777
CK
465 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
466 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
467 &adev->vram_scratch.robj,
468 &adev->vram_scratch.gpu_addr,
469 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
470}
471
e3ecdffa
AD
472/**
473 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
474 *
475 * @adev: amdgpu device pointer
476 *
477 * Frees the VRAM scratch page.
478 */
06ec9070 479static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 480{
078af1a3 481 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
482}
483
484/**
9c3f2b54 485 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
486 *
487 * @adev: amdgpu_device pointer
488 * @registers: pointer to the register array
489 * @array_size: size of the register array
490 *
491 * Programs an array or registers with and and or masks.
492 * This is a helper for setting golden registers.
493 */
9c3f2b54
AD
494void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
495 const u32 *registers,
496 const u32 array_size)
d38ceaf9
AD
497{
498 u32 tmp, reg, and_mask, or_mask;
499 int i;
500
501 if (array_size % 3)
502 return;
503
504 for (i = 0; i < array_size; i +=3) {
505 reg = registers[i + 0];
506 and_mask = registers[i + 1];
507 or_mask = registers[i + 2];
508
509 if (and_mask == 0xffffffff) {
510 tmp = or_mask;
511 } else {
512 tmp = RREG32(reg);
513 tmp &= ~and_mask;
e0d07657
HZ
514 if (adev->family >= AMDGPU_FAMILY_AI)
515 tmp |= (or_mask & and_mask);
516 else
517 tmp |= or_mask;
d38ceaf9
AD
518 }
519 WREG32(reg, tmp);
520 }
521}
522
e3ecdffa
AD
523/**
524 * amdgpu_device_pci_config_reset - reset the GPU
525 *
526 * @adev: amdgpu_device pointer
527 *
528 * Resets the GPU using the pci config reset sequence.
529 * Only applicable to asics prior to vega10.
530 */
8111c387 531void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
532{
533 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
534}
535
536/*
537 * GPU doorbell aperture helpers function.
538 */
539/**
06ec9070 540 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
541 *
542 * @adev: amdgpu_device pointer
543 *
544 * Init doorbell driver information (CIK)
545 * Returns 0 on success, error on failure.
546 */
06ec9070 547static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 548{
6585661d 549
705e519e
CK
550 /* No doorbell on SI hardware generation */
551 if (adev->asic_type < CHIP_BONAIRE) {
552 adev->doorbell.base = 0;
553 adev->doorbell.size = 0;
554 adev->doorbell.num_doorbells = 0;
555 adev->doorbell.ptr = NULL;
556 return 0;
557 }
558
d6895ad3
CK
559 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
560 return -EINVAL;
561
22357775
AD
562 amdgpu_asic_init_doorbell_index(adev);
563
d38ceaf9
AD
564 /* doorbell bar mapping */
565 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
566 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
567
edf600da 568 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 569 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
570 if (adev->doorbell.num_doorbells == 0)
571 return -EINVAL;
572
ec3db8a6 573 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
574 * paging queue doorbell use the second page. The
575 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
576 * doorbells are in the first page. So with paging queue enabled,
577 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
578 */
579 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 580 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 581
8972e5d2
CK
582 adev->doorbell.ptr = ioremap(adev->doorbell.base,
583 adev->doorbell.num_doorbells *
584 sizeof(u32));
585 if (adev->doorbell.ptr == NULL)
d38ceaf9 586 return -ENOMEM;
d38ceaf9
AD
587
588 return 0;
589}
590
591/**
06ec9070 592 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
593 *
594 * @adev: amdgpu_device pointer
595 *
596 * Tear down doorbell driver information (CIK)
597 */
06ec9070 598static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
599{
600 iounmap(adev->doorbell.ptr);
601 adev->doorbell.ptr = NULL;
602}
603
22cb0164 604
d38ceaf9
AD
605
606/*
06ec9070 607 * amdgpu_device_wb_*()
455a7bc2 608 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 609 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
610 */
611
612/**
06ec9070 613 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
614 *
615 * @adev: amdgpu_device pointer
616 *
617 * Disables Writeback and frees the Writeback memory (all asics).
618 * Used at driver shutdown.
619 */
06ec9070 620static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
621{
622 if (adev->wb.wb_obj) {
a76ed485
AD
623 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
624 &adev->wb.gpu_addr,
625 (void **)&adev->wb.wb);
d38ceaf9
AD
626 adev->wb.wb_obj = NULL;
627 }
628}
629
630/**
06ec9070 631 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
632 *
633 * @adev: amdgpu_device pointer
634 *
455a7bc2 635 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
636 * Used at driver startup.
637 * Returns 0 on success or an -error on failure.
638 */
06ec9070 639static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
640{
641 int r;
642
643 if (adev->wb.wb_obj == NULL) {
97407b63
AD
644 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
645 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
646 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
647 &adev->wb.wb_obj, &adev->wb.gpu_addr,
648 (void **)&adev->wb.wb);
d38ceaf9
AD
649 if (r) {
650 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
651 return r;
652 }
d38ceaf9
AD
653
654 adev->wb.num_wb = AMDGPU_MAX_WB;
655 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
656
657 /* clear wb memory */
73469585 658 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
659 }
660
661 return 0;
662}
663
664/**
131b4b36 665 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
666 *
667 * @adev: amdgpu_device pointer
668 * @wb: wb index
669 *
670 * Allocate a wb slot for use by the driver (all asics).
671 * Returns 0 on success or -EINVAL on failure.
672 */
131b4b36 673int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
674{
675 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 676
97407b63 677 if (offset < adev->wb.num_wb) {
7014285a 678 __set_bit(offset, adev->wb.used);
63ae07ca 679 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
680 return 0;
681 } else {
682 return -EINVAL;
683 }
684}
685
d38ceaf9 686/**
131b4b36 687 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
688 *
689 * @adev: amdgpu_device pointer
690 * @wb: wb index
691 *
692 * Free a wb slot allocated for use by the driver (all asics)
693 */
131b4b36 694void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 695{
73469585 696 wb >>= 3;
d38ceaf9 697 if (wb < adev->wb.num_wb)
73469585 698 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
699}
700
d6895ad3
CK
701/**
702 * amdgpu_device_resize_fb_bar - try to resize FB BAR
703 *
704 * @adev: amdgpu_device pointer
705 *
706 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
707 * to fail, but if any of the BARs is not accessible after the size we abort
708 * driver loading by returning -ENODEV.
709 */
710int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
711{
770d13b1 712 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 713 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
714 struct pci_bus *root;
715 struct resource *res;
716 unsigned i;
d6895ad3
CK
717 u16 cmd;
718 int r;
719
0c03b912 720 /* Bypass for VF */
721 if (amdgpu_sriov_vf(adev))
722 return 0;
723
31b8adab
CK
724 /* Check if the root BUS has 64bit memory resources */
725 root = adev->pdev->bus;
726 while (root->parent)
727 root = root->parent;
728
729 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 730 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
731 res->start > 0x100000000ull)
732 break;
733 }
734
735 /* Trying to resize is pointless without a root hub window above 4GB */
736 if (!res)
737 return 0;
738
d6895ad3
CK
739 /* Disable memory decoding while we change the BAR addresses and size */
740 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
741 pci_write_config_word(adev->pdev, PCI_COMMAND,
742 cmd & ~PCI_COMMAND_MEMORY);
743
744 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 745 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
746 if (adev->asic_type >= CHIP_BONAIRE)
747 pci_release_resource(adev->pdev, 2);
748
749 pci_release_resource(adev->pdev, 0);
750
751 r = pci_resize_resource(adev->pdev, 0, rbar_size);
752 if (r == -ENOSPC)
753 DRM_INFO("Not enough PCI address space for a large BAR.");
754 else if (r && r != -ENOTSUPP)
755 DRM_ERROR("Problem resizing BAR0 (%d).", r);
756
757 pci_assign_unassigned_bus_resources(adev->pdev->bus);
758
759 /* When the doorbell or fb BAR isn't available we have no chance of
760 * using the device.
761 */
06ec9070 762 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
763 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
764 return -ENODEV;
765
766 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
767
768 return 0;
769}
a05502e5 770
d38ceaf9
AD
771/*
772 * GPU helpers function.
773 */
774/**
39c640c0 775 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
776 *
777 * @adev: amdgpu_device pointer
778 *
c836fec5
JQ
779 * Check if the asic has been initialized (all asics) at driver startup
780 * or post is needed if hw reset is performed.
781 * Returns true if need or false if not.
d38ceaf9 782 */
39c640c0 783bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
784{
785 uint32_t reg;
786
bec86378
ML
787 if (amdgpu_sriov_vf(adev))
788 return false;
789
790 if (amdgpu_passthrough(adev)) {
1da2c326
ML
791 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
792 * some old smc fw still need driver do vPost otherwise gpu hang, while
793 * those smc fw version above 22.15 doesn't have this flaw, so we force
794 * vpost executed for smc version below 22.15
bec86378
ML
795 */
796 if (adev->asic_type == CHIP_FIJI) {
797 int err;
798 uint32_t fw_ver;
799 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
800 /* force vPost if error occured */
801 if (err)
802 return true;
803
804 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
805 if (fw_ver < 0x00160e00)
806 return true;
bec86378 807 }
bec86378 808 }
91fe77eb 809
810 if (adev->has_hw_reset) {
811 adev->has_hw_reset = false;
812 return true;
813 }
814
815 /* bios scratch used on CIK+ */
816 if (adev->asic_type >= CHIP_BONAIRE)
817 return amdgpu_atombios_scratch_need_asic_init(adev);
818
819 /* check MEM_SIZE for older asics */
820 reg = amdgpu_asic_get_config_memsize(adev);
821
822 if ((reg != 0) && (reg != 0xffffffff))
823 return false;
824
825 return true;
bec86378
ML
826}
827
d38ceaf9
AD
828/* if we get transitioned to only one device, take VGA back */
829/**
06ec9070 830 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
831 *
832 * @cookie: amdgpu_device pointer
833 * @state: enable/disable vga decode
834 *
835 * Enable/disable vga decode (all asics).
836 * Returns VGA resource flags.
837 */
06ec9070 838static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
839{
840 struct amdgpu_device *adev = cookie;
841 amdgpu_asic_set_vga_state(adev, state);
842 if (state)
843 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
844 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
845 else
846 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
847}
848
e3ecdffa
AD
849/**
850 * amdgpu_device_check_block_size - validate the vm block size
851 *
852 * @adev: amdgpu_device pointer
853 *
854 * Validates the vm block size specified via module parameter.
855 * The vm block size defines number of bits in page table versus page directory,
856 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
857 * page table and the remaining bits are in the page directory.
858 */
06ec9070 859static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
860{
861 /* defines number of bits in page table versus page directory,
862 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
863 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
864 if (amdgpu_vm_block_size == -1)
865 return;
a1adf8be 866
bab4fee7 867 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
868 dev_warn(adev->dev, "VM page table size (%d) too small\n",
869 amdgpu_vm_block_size);
97489129 870 amdgpu_vm_block_size = -1;
a1adf8be 871 }
a1adf8be
CZ
872}
873
e3ecdffa
AD
874/**
875 * amdgpu_device_check_vm_size - validate the vm size
876 *
877 * @adev: amdgpu_device pointer
878 *
879 * Validates the vm size in GB specified via module parameter.
880 * The VM size is the size of the GPU virtual memory space in GB.
881 */
06ec9070 882static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 883{
64dab074
AD
884 /* no need to check the default value */
885 if (amdgpu_vm_size == -1)
886 return;
887
83ca145d
ZJ
888 if (amdgpu_vm_size < 1) {
889 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
890 amdgpu_vm_size);
f3368128 891 amdgpu_vm_size = -1;
83ca145d 892 }
83ca145d
ZJ
893}
894
7951e376
RZ
895static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
896{
897 struct sysinfo si;
898 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
899 uint64_t total_memory;
900 uint64_t dram_size_seven_GB = 0x1B8000000;
901 uint64_t dram_size_three_GB = 0xB8000000;
902
903 if (amdgpu_smu_memory_pool_size == 0)
904 return;
905
906 if (!is_os_64) {
907 DRM_WARN("Not 64-bit OS, feature not supported\n");
908 goto def_value;
909 }
910 si_meminfo(&si);
911 total_memory = (uint64_t)si.totalram * si.mem_unit;
912
913 if ((amdgpu_smu_memory_pool_size == 1) ||
914 (amdgpu_smu_memory_pool_size == 2)) {
915 if (total_memory < dram_size_three_GB)
916 goto def_value1;
917 } else if ((amdgpu_smu_memory_pool_size == 4) ||
918 (amdgpu_smu_memory_pool_size == 8)) {
919 if (total_memory < dram_size_seven_GB)
920 goto def_value1;
921 } else {
922 DRM_WARN("Smu memory pool size not supported\n");
923 goto def_value;
924 }
925 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
926
927 return;
928
929def_value1:
930 DRM_WARN("No enough system memory\n");
931def_value:
932 adev->pm.smu_prv_buffer_size = 0;
933}
934
d38ceaf9 935/**
06ec9070 936 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
937 *
938 * @adev: amdgpu_device pointer
939 *
940 * Validates certain module parameters and updates
941 * the associated values used by the driver (all asics).
942 */
912dfc84 943static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 944{
912dfc84
EQ
945 int ret = 0;
946
5b011235
CZ
947 if (amdgpu_sched_jobs < 4) {
948 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
949 amdgpu_sched_jobs);
950 amdgpu_sched_jobs = 4;
76117507 951 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
952 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
953 amdgpu_sched_jobs);
954 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
955 }
d38ceaf9 956
83e74db6 957 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
958 /* gart size must be greater or equal to 32M */
959 dev_warn(adev->dev, "gart size (%d) too small\n",
960 amdgpu_gart_size);
83e74db6 961 amdgpu_gart_size = -1;
d38ceaf9
AD
962 }
963
36d38372 964 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 965 /* gtt size must be greater or equal to 32M */
36d38372
CK
966 dev_warn(adev->dev, "gtt size (%d) too small\n",
967 amdgpu_gtt_size);
968 amdgpu_gtt_size = -1;
d38ceaf9
AD
969 }
970
d07f14be
RH
971 /* valid range is between 4 and 9 inclusive */
972 if (amdgpu_vm_fragment_size != -1 &&
973 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
974 dev_warn(adev->dev, "valid range is between 4 and 9\n");
975 amdgpu_vm_fragment_size = -1;
976 }
977
7951e376
RZ
978 amdgpu_device_check_smu_prv_buffer_size(adev);
979
06ec9070 980 amdgpu_device_check_vm_size(adev);
d38ceaf9 981
06ec9070 982 amdgpu_device_check_block_size(adev);
6a7f76e7 983
912dfc84
EQ
984 ret = amdgpu_device_get_job_timeout_settings(adev);
985 if (ret) {
986 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
987 return ret;
8854695a 988 }
19aede77
AD
989
990 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
991
992 return ret;
d38ceaf9
AD
993}
994
995/**
996 * amdgpu_switcheroo_set_state - set switcheroo state
997 *
998 * @pdev: pci dev pointer
1694467b 999 * @state: vga_switcheroo state
d38ceaf9
AD
1000 *
1001 * Callback for the switcheroo driver. Suspends or resumes the
1002 * the asics before or after it is powered up using ACPI methods.
1003 */
1004static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1005{
1006 struct drm_device *dev = pci_get_drvdata(pdev);
1007
1008 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1009 return;
1010
1011 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1012 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1013 /* don't suspend or resume card normally */
1014 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1015
810ddc3a 1016 amdgpu_device_resume(dev, true, true);
d38ceaf9 1017
d38ceaf9
AD
1018 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1019 drm_kms_helper_poll_enable(dev);
1020 } else {
7ca85295 1021 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1022 drm_kms_helper_poll_disable(dev);
1023 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 1024 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
1025 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1026 }
1027}
1028
1029/**
1030 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1031 *
1032 * @pdev: pci dev pointer
1033 *
1034 * Callback for the switcheroo driver. Check of the switcheroo
1035 * state can be changed.
1036 * Returns true if the state can be changed, false if not.
1037 */
1038static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1039{
1040 struct drm_device *dev = pci_get_drvdata(pdev);
1041
1042 /*
1043 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1044 * locking inversion with the driver load path. And the access here is
1045 * completely racy anyway. So don't bother with locking for now.
1046 */
1047 return dev->open_count == 0;
1048}
1049
1050static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1051 .set_gpu_state = amdgpu_switcheroo_set_state,
1052 .reprobe = NULL,
1053 .can_switch = amdgpu_switcheroo_can_switch,
1054};
1055
e3ecdffa
AD
1056/**
1057 * amdgpu_device_ip_set_clockgating_state - set the CG state
1058 *
87e3f136 1059 * @dev: amdgpu_device pointer
e3ecdffa
AD
1060 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1061 * @state: clockgating state (gate or ungate)
1062 *
1063 * Sets the requested clockgating state for all instances of
1064 * the hardware IP specified.
1065 * Returns the error code from the last instance.
1066 */
43fa561f 1067int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1068 enum amd_ip_block_type block_type,
1069 enum amd_clockgating_state state)
d38ceaf9 1070{
43fa561f 1071 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1072 int i, r = 0;
1073
1074 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1075 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1076 continue;
c722865a
RZ
1077 if (adev->ip_blocks[i].version->type != block_type)
1078 continue;
1079 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1080 continue;
1081 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1082 (void *)adev, state);
1083 if (r)
1084 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1085 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1086 }
1087 return r;
1088}
1089
e3ecdffa
AD
1090/**
1091 * amdgpu_device_ip_set_powergating_state - set the PG state
1092 *
87e3f136 1093 * @dev: amdgpu_device pointer
e3ecdffa
AD
1094 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1095 * @state: powergating state (gate or ungate)
1096 *
1097 * Sets the requested powergating state for all instances of
1098 * the hardware IP specified.
1099 * Returns the error code from the last instance.
1100 */
43fa561f 1101int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1102 enum amd_ip_block_type block_type,
1103 enum amd_powergating_state state)
d38ceaf9 1104{
43fa561f 1105 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1106 int i, r = 0;
1107
1108 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1109 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1110 continue;
c722865a
RZ
1111 if (adev->ip_blocks[i].version->type != block_type)
1112 continue;
1113 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1114 continue;
1115 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1116 (void *)adev, state);
1117 if (r)
1118 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1119 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1120 }
1121 return r;
1122}
1123
e3ecdffa
AD
1124/**
1125 * amdgpu_device_ip_get_clockgating_state - get the CG state
1126 *
1127 * @adev: amdgpu_device pointer
1128 * @flags: clockgating feature flags
1129 *
1130 * Walks the list of IPs on the device and updates the clockgating
1131 * flags for each IP.
1132 * Updates @flags with the feature flags for each hardware IP where
1133 * clockgating is enabled.
1134 */
2990a1fc
AD
1135void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1136 u32 *flags)
6cb2d4e4
HR
1137{
1138 int i;
1139
1140 for (i = 0; i < adev->num_ip_blocks; i++) {
1141 if (!adev->ip_blocks[i].status.valid)
1142 continue;
1143 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1144 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1145 }
1146}
1147
e3ecdffa
AD
1148/**
1149 * amdgpu_device_ip_wait_for_idle - wait for idle
1150 *
1151 * @adev: amdgpu_device pointer
1152 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1153 *
1154 * Waits for the request hardware IP to be idle.
1155 * Returns 0 for success or a negative error code on failure.
1156 */
2990a1fc
AD
1157int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1158 enum amd_ip_block_type block_type)
5dbbb60b
AD
1159{
1160 int i, r;
1161
1162 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1163 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1164 continue;
a1255107
AD
1165 if (adev->ip_blocks[i].version->type == block_type) {
1166 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1167 if (r)
1168 return r;
1169 break;
1170 }
1171 }
1172 return 0;
1173
1174}
1175
e3ecdffa
AD
1176/**
1177 * amdgpu_device_ip_is_idle - is the hardware IP idle
1178 *
1179 * @adev: amdgpu_device pointer
1180 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1181 *
1182 * Check if the hardware IP is idle or not.
1183 * Returns true if it the IP is idle, false if not.
1184 */
2990a1fc
AD
1185bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1186 enum amd_ip_block_type block_type)
5dbbb60b
AD
1187{
1188 int i;
1189
1190 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1191 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1192 continue;
a1255107
AD
1193 if (adev->ip_blocks[i].version->type == block_type)
1194 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1195 }
1196 return true;
1197
1198}
1199
e3ecdffa
AD
1200/**
1201 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1202 *
1203 * @adev: amdgpu_device pointer
87e3f136 1204 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1205 *
1206 * Returns a pointer to the hardware IP block structure
1207 * if it exists for the asic, otherwise NULL.
1208 */
2990a1fc
AD
1209struct amdgpu_ip_block *
1210amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1211 enum amd_ip_block_type type)
d38ceaf9
AD
1212{
1213 int i;
1214
1215 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1216 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1217 return &adev->ip_blocks[i];
1218
1219 return NULL;
1220}
1221
1222/**
2990a1fc 1223 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1224 *
1225 * @adev: amdgpu_device pointer
5fc3aeeb 1226 * @type: enum amd_ip_block_type
d38ceaf9
AD
1227 * @major: major version
1228 * @minor: minor version
1229 *
1230 * return 0 if equal or greater
1231 * return 1 if smaller or the ip_block doesn't exist
1232 */
2990a1fc
AD
1233int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1234 enum amd_ip_block_type type,
1235 u32 major, u32 minor)
d38ceaf9 1236{
2990a1fc 1237 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1238
a1255107
AD
1239 if (ip_block && ((ip_block->version->major > major) ||
1240 ((ip_block->version->major == major) &&
1241 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1242 return 0;
1243
1244 return 1;
1245}
1246
a1255107 1247/**
2990a1fc 1248 * amdgpu_device_ip_block_add
a1255107
AD
1249 *
1250 * @adev: amdgpu_device pointer
1251 * @ip_block_version: pointer to the IP to add
1252 *
1253 * Adds the IP block driver information to the collection of IPs
1254 * on the asic.
1255 */
2990a1fc
AD
1256int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1257 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1258{
1259 if (!ip_block_version)
1260 return -EINVAL;
1261
e966a725 1262 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1263 ip_block_version->funcs->name);
1264
a1255107
AD
1265 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1266
1267 return 0;
1268}
1269
e3ecdffa
AD
1270/**
1271 * amdgpu_device_enable_virtual_display - enable virtual display feature
1272 *
1273 * @adev: amdgpu_device pointer
1274 *
1275 * Enabled the virtual display feature if the user has enabled it via
1276 * the module parameter virtual_display. This feature provides a virtual
1277 * display hardware on headless boards or in virtualized environments.
1278 * This function parses and validates the configuration string specified by
1279 * the user and configues the virtual display configuration (number of
1280 * virtual connectors, crtcs, etc.) specified.
1281 */
483ef985 1282static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1283{
1284 adev->enable_virtual_display = false;
1285
1286 if (amdgpu_virtual_display) {
1287 struct drm_device *ddev = adev->ddev;
1288 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1289 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1290
1291 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1292 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1293 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1294 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1295 if (!strcmp("all", pciaddname)
1296 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1297 long num_crtc;
1298 int res = -1;
1299
9accf2fd 1300 adev->enable_virtual_display = true;
0f66356d
ED
1301
1302 if (pciaddname_tmp)
1303 res = kstrtol(pciaddname_tmp, 10,
1304 &num_crtc);
1305
1306 if (!res) {
1307 if (num_crtc < 1)
1308 num_crtc = 1;
1309 if (num_crtc > 6)
1310 num_crtc = 6;
1311 adev->mode_info.num_crtc = num_crtc;
1312 } else {
1313 adev->mode_info.num_crtc = 1;
1314 }
9accf2fd
ED
1315 break;
1316 }
1317 }
1318
0f66356d
ED
1319 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1320 amdgpu_virtual_display, pci_address_name,
1321 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1322
1323 kfree(pciaddstr);
1324 }
1325}
1326
e3ecdffa
AD
1327/**
1328 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1329 *
1330 * @adev: amdgpu_device pointer
1331 *
1332 * Parses the asic configuration parameters specified in the gpu info
1333 * firmware and makes them availale to the driver for use in configuring
1334 * the asic.
1335 * Returns 0 on success, -EINVAL on failure.
1336 */
e2a75f88
AD
1337static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1338{
e2a75f88
AD
1339 const char *chip_name;
1340 char fw_name[30];
1341 int err;
1342 const struct gpu_info_firmware_header_v1_0 *hdr;
1343
ab4fe3e1
HR
1344 adev->firmware.gpu_info_fw = NULL;
1345
e2a75f88
AD
1346 switch (adev->asic_type) {
1347 case CHIP_TOPAZ:
1348 case CHIP_TONGA:
1349 case CHIP_FIJI:
e2a75f88 1350 case CHIP_POLARIS10:
cc07f18d 1351 case CHIP_POLARIS11:
e2a75f88 1352 case CHIP_POLARIS12:
cc07f18d 1353 case CHIP_VEGAM:
e2a75f88
AD
1354 case CHIP_CARRIZO:
1355 case CHIP_STONEY:
1356#ifdef CONFIG_DRM_AMDGPU_SI
1357 case CHIP_VERDE:
1358 case CHIP_TAHITI:
1359 case CHIP_PITCAIRN:
1360 case CHIP_OLAND:
1361 case CHIP_HAINAN:
1362#endif
1363#ifdef CONFIG_DRM_AMDGPU_CIK
1364 case CHIP_BONAIRE:
1365 case CHIP_HAWAII:
1366 case CHIP_KAVERI:
1367 case CHIP_KABINI:
1368 case CHIP_MULLINS:
1369#endif
27c0bc71 1370 case CHIP_VEGA20:
e2a75f88
AD
1371 default:
1372 return 0;
1373 case CHIP_VEGA10:
1374 chip_name = "vega10";
1375 break;
3f76dced
AD
1376 case CHIP_VEGA12:
1377 chip_name = "vega12";
1378 break;
2d2e5e7e 1379 case CHIP_RAVEN:
54c4d17e
FX
1380 if (adev->rev_id >= 8)
1381 chip_name = "raven2";
741deade
AD
1382 else if (adev->pdev->device == 0x15d8)
1383 chip_name = "picasso";
54c4d17e
FX
1384 else
1385 chip_name = "raven";
2d2e5e7e 1386 break;
23c6268e
HR
1387 case CHIP_NAVI10:
1388 chip_name = "navi10";
1389 break;
e2a75f88
AD
1390 }
1391
1392 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1393 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1394 if (err) {
1395 dev_err(adev->dev,
1396 "Failed to load gpu_info firmware \"%s\"\n",
1397 fw_name);
1398 goto out;
1399 }
ab4fe3e1 1400 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1401 if (err) {
1402 dev_err(adev->dev,
1403 "Failed to validate gpu_info firmware \"%s\"\n",
1404 fw_name);
1405 goto out;
1406 }
1407
ab4fe3e1 1408 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1409 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1410
1411 switch (hdr->version_major) {
1412 case 1:
1413 {
1414 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1415 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1416 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1417
b5ab16bf
AD
1418 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1419 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1420 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1421 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1422 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1423 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1424 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1425 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1426 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1427 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1428 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1429 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1430 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1431 adev->gfx.cu_info.max_waves_per_simd =
1432 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1433 adev->gfx.cu_info.max_scratch_slots_per_cu =
1434 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1435 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1436 if (hdr->version_minor >= 1) {
35c2e910
HZ
1437 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1438 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1439 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1440 adev->gfx.config.num_sc_per_sh =
1441 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1442 adev->gfx.config.num_packer_per_sc =
1443 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1444 }
48321c3d
HW
1445#ifdef CONFIG_DRM_AMD_DC_DCN2_0
1446 if (hdr->version_minor == 2) {
1447 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1448 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1449 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1450 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1451 }
1452#endif
e2a75f88
AD
1453 break;
1454 }
1455 default:
1456 dev_err(adev->dev,
1457 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1458 err = -EINVAL;
1459 goto out;
1460 }
1461out:
e2a75f88
AD
1462 return err;
1463}
1464
e3ecdffa
AD
1465/**
1466 * amdgpu_device_ip_early_init - run early init for hardware IPs
1467 *
1468 * @adev: amdgpu_device pointer
1469 *
1470 * Early initialization pass for hardware IPs. The hardware IPs that make
1471 * up each asic are discovered each IP's early_init callback is run. This
1472 * is the first stage in initializing the asic.
1473 * Returns 0 on success, negative error code on failure.
1474 */
06ec9070 1475static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1476{
aaa36a97 1477 int i, r;
d38ceaf9 1478
483ef985 1479 amdgpu_device_enable_virtual_display(adev);
a6be7570 1480
d38ceaf9 1481 switch (adev->asic_type) {
aaa36a97
AD
1482 case CHIP_TOPAZ:
1483 case CHIP_TONGA:
48299f95 1484 case CHIP_FIJI:
2cc0c0b5 1485 case CHIP_POLARIS10:
32cc7e53 1486 case CHIP_POLARIS11:
c4642a47 1487 case CHIP_POLARIS12:
32cc7e53 1488 case CHIP_VEGAM:
aaa36a97 1489 case CHIP_CARRIZO:
39bb0c92
SL
1490 case CHIP_STONEY:
1491 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1492 adev->family = AMDGPU_FAMILY_CZ;
1493 else
1494 adev->family = AMDGPU_FAMILY_VI;
1495
1496 r = vi_set_ip_blocks(adev);
1497 if (r)
1498 return r;
1499 break;
33f34802
KW
1500#ifdef CONFIG_DRM_AMDGPU_SI
1501 case CHIP_VERDE:
1502 case CHIP_TAHITI:
1503 case CHIP_PITCAIRN:
1504 case CHIP_OLAND:
1505 case CHIP_HAINAN:
295d0daf 1506 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1507 r = si_set_ip_blocks(adev);
1508 if (r)
1509 return r;
1510 break;
1511#endif
a2e73f56
AD
1512#ifdef CONFIG_DRM_AMDGPU_CIK
1513 case CHIP_BONAIRE:
1514 case CHIP_HAWAII:
1515 case CHIP_KAVERI:
1516 case CHIP_KABINI:
1517 case CHIP_MULLINS:
1518 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1519 adev->family = AMDGPU_FAMILY_CI;
1520 else
1521 adev->family = AMDGPU_FAMILY_KV;
1522
1523 r = cik_set_ip_blocks(adev);
1524 if (r)
1525 return r;
1526 break;
1527#endif
e48a3cd9
AD
1528 case CHIP_VEGA10:
1529 case CHIP_VEGA12:
e4bd8170 1530 case CHIP_VEGA20:
e48a3cd9 1531 case CHIP_RAVEN:
741deade 1532 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1533 adev->family = AMDGPU_FAMILY_RV;
1534 else
1535 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1536
1537 r = soc15_set_ip_blocks(adev);
1538 if (r)
1539 return r;
1540 break;
0a5b8c7b
HR
1541 case CHIP_NAVI10:
1542 adev->family = AMDGPU_FAMILY_NV;
1543
1544 r = nv_set_ip_blocks(adev);
1545 if (r)
1546 return r;
1547 break;
d38ceaf9
AD
1548 default:
1549 /* FIXME: not supported yet */
1550 return -EINVAL;
1551 }
1552
e2a75f88
AD
1553 r = amdgpu_device_parse_gpu_info_fw(adev);
1554 if (r)
1555 return r;
1556
1884734a 1557 amdgpu_amdkfd_device_probe(adev);
1558
3149d9da
XY
1559 if (amdgpu_sriov_vf(adev)) {
1560 r = amdgpu_virt_request_full_gpu(adev, true);
1561 if (r)
5ffa61c1 1562 return -EAGAIN;
78d48112
TH
1563
1564 /* query the reg access mode at the very beginning */
1565 amdgpu_virt_init_reg_access_mode(adev);
3149d9da
XY
1566 }
1567
3b94fb10 1568 adev->pm.pp_feature = amdgpu_pp_feature_mask;
00544006
HR
1569 if (amdgpu_sriov_vf(adev))
1570 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1571
d38ceaf9
AD
1572 for (i = 0; i < adev->num_ip_blocks; i++) {
1573 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1574 DRM_ERROR("disabled ip block: %d <%s>\n",
1575 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1576 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1577 } else {
a1255107
AD
1578 if (adev->ip_blocks[i].version->funcs->early_init) {
1579 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1580 if (r == -ENOENT) {
a1255107 1581 adev->ip_blocks[i].status.valid = false;
2c1a2784 1582 } else if (r) {
a1255107
AD
1583 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1584 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1585 return r;
2c1a2784 1586 } else {
a1255107 1587 adev->ip_blocks[i].status.valid = true;
2c1a2784 1588 }
974e6b64 1589 } else {
a1255107 1590 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1591 }
d38ceaf9 1592 }
21a249ca
AD
1593 /* get the vbios after the asic_funcs are set up */
1594 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1595 /* Read BIOS */
1596 if (!amdgpu_get_bios(adev))
1597 return -EINVAL;
1598
1599 r = amdgpu_atombios_init(adev);
1600 if (r) {
1601 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1602 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1603 return r;
1604 }
1605 }
d38ceaf9
AD
1606 }
1607
395d1fb9
NH
1608 adev->cg_flags &= amdgpu_cg_mask;
1609 adev->pg_flags &= amdgpu_pg_mask;
1610
d38ceaf9
AD
1611 return 0;
1612}
1613
0a4f2520
RZ
1614static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1615{
1616 int i, r;
1617
1618 for (i = 0; i < adev->num_ip_blocks; i++) {
1619 if (!adev->ip_blocks[i].status.sw)
1620 continue;
1621 if (adev->ip_blocks[i].status.hw)
1622 continue;
1623 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1624 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1625 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1626 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1627 if (r) {
1628 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1629 adev->ip_blocks[i].version->funcs->name, r);
1630 return r;
1631 }
1632 adev->ip_blocks[i].status.hw = true;
1633 }
1634 }
1635
1636 return 0;
1637}
1638
1639static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1640{
1641 int i, r;
1642
1643 for (i = 0; i < adev->num_ip_blocks; i++) {
1644 if (!adev->ip_blocks[i].status.sw)
1645 continue;
1646 if (adev->ip_blocks[i].status.hw)
1647 continue;
1648 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1649 if (r) {
1650 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1651 adev->ip_blocks[i].version->funcs->name, r);
1652 return r;
1653 }
1654 adev->ip_blocks[i].status.hw = true;
1655 }
1656
1657 return 0;
1658}
1659
7a3e0bb2
RZ
1660static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1661{
1662 int r = 0;
1663 int i;
80f41f84 1664 uint32_t smu_version;
7a3e0bb2
RZ
1665
1666 if (adev->asic_type >= CHIP_VEGA10) {
1667 for (i = 0; i < adev->num_ip_blocks; i++) {
1668 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1669 if (adev->in_gpu_reset || adev->in_suspend) {
1670 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1671 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1672 r = adev->ip_blocks[i].version->funcs->resume(adev);
1673 if (r) {
1674 DRM_ERROR("resume of IP block <%s> failed %d\n",
1675 adev->ip_blocks[i].version->funcs->name, r);
1676 return r;
1677 }
1678 } else {
1679 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1680 if (r) {
1681 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1682 adev->ip_blocks[i].version->funcs->name, r);
1683 return r;
1684 }
1685 }
1686 adev->ip_blocks[i].status.hw = true;
1687 }
1688 }
1689 }
80f41f84 1690 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1691
80f41f84 1692 return r;
7a3e0bb2
RZ
1693}
1694
e3ecdffa
AD
1695/**
1696 * amdgpu_device_ip_init - run init for hardware IPs
1697 *
1698 * @adev: amdgpu_device pointer
1699 *
1700 * Main initialization pass for hardware IPs. The list of all the hardware
1701 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1702 * are run. sw_init initializes the software state associated with each IP
1703 * and hw_init initializes the hardware associated with each IP.
1704 * Returns 0 on success, negative error code on failure.
1705 */
06ec9070 1706static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1707{
1708 int i, r;
1709
c030f2e4 1710 r = amdgpu_ras_init(adev);
1711 if (r)
1712 return r;
1713
d38ceaf9 1714 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1715 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1716 continue;
a1255107 1717 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1718 if (r) {
a1255107
AD
1719 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1720 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1721 goto init_failed;
2c1a2784 1722 }
a1255107 1723 adev->ip_blocks[i].status.sw = true;
bfca0289 1724
d38ceaf9 1725 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1726 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1727 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1728 if (r) {
1729 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1730 goto init_failed;
2c1a2784 1731 }
a1255107 1732 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1733 if (r) {
1734 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1735 goto init_failed;
2c1a2784 1736 }
06ec9070 1737 r = amdgpu_device_wb_init(adev);
2c1a2784 1738 if (r) {
06ec9070 1739 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1740 goto init_failed;
2c1a2784 1741 }
a1255107 1742 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1743
1744 /* right after GMC hw init, we create CSA */
f92d5c61 1745 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1746 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1747 AMDGPU_GEM_DOMAIN_VRAM,
1748 AMDGPU_CSA_SIZE);
2493664f
ML
1749 if (r) {
1750 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1751 goto init_failed;
2493664f
ML
1752 }
1753 }
d38ceaf9
AD
1754 }
1755 }
1756
533aed27
AG
1757 r = amdgpu_ib_pool_init(adev);
1758 if (r) {
1759 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1760 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1761 goto init_failed;
1762 }
1763
c8963ea4
RZ
1764 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1765 if (r)
72d3f592 1766 goto init_failed;
0a4f2520
RZ
1767
1768 r = amdgpu_device_ip_hw_init_phase1(adev);
1769 if (r)
72d3f592 1770 goto init_failed;
0a4f2520 1771
7a3e0bb2
RZ
1772 r = amdgpu_device_fw_loading(adev);
1773 if (r)
72d3f592 1774 goto init_failed;
7a3e0bb2 1775
0a4f2520
RZ
1776 r = amdgpu_device_ip_hw_init_phase2(adev);
1777 if (r)
72d3f592 1778 goto init_failed;
d38ceaf9 1779
3e2e2ab5
HZ
1780 if (adev->gmc.xgmi.num_physical_nodes > 1)
1781 amdgpu_xgmi_add_device(adev);
1884734a 1782 amdgpu_amdkfd_device_init(adev);
c6332b97 1783
72d3f592 1784init_failed:
d3c117e5 1785 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1786 if (!r)
1787 amdgpu_virt_init_data_exchange(adev);
c6332b97 1788 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1789 }
c6332b97 1790
72d3f592 1791 return r;
d38ceaf9
AD
1792}
1793
e3ecdffa
AD
1794/**
1795 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1796 *
1797 * @adev: amdgpu_device pointer
1798 *
1799 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1800 * this function before a GPU reset. If the value is retained after a
1801 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1802 */
06ec9070 1803static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1804{
1805 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1806}
1807
e3ecdffa
AD
1808/**
1809 * amdgpu_device_check_vram_lost - check if vram is valid
1810 *
1811 * @adev: amdgpu_device pointer
1812 *
1813 * Checks the reset magic value written to the gart pointer in VRAM.
1814 * The driver calls this after a GPU reset to see if the contents of
1815 * VRAM is lost or now.
1816 * returns true if vram is lost, false if not.
1817 */
06ec9070 1818static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1819{
1820 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1821 AMDGPU_RESET_MAGIC_NUM);
1822}
1823
e3ecdffa 1824/**
1112a46b 1825 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1826 *
1827 * @adev: amdgpu_device pointer
1828 *
e3ecdffa 1829 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1830 * set_clockgating_state callbacks are run.
1831 * Late initialization pass enabling clockgating for hardware IPs.
1832 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1833 * Returns 0 on success, negative error code on failure.
1834 */
fdd34271 1835
1112a46b
RZ
1836static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1837 enum amd_clockgating_state state)
d38ceaf9 1838{
1112a46b 1839 int i, j, r;
d38ceaf9 1840
4a2ba394
SL
1841 if (amdgpu_emu_mode == 1)
1842 return 0;
1843
1112a46b
RZ
1844 for (j = 0; j < adev->num_ip_blocks; j++) {
1845 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1846 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1847 continue;
4a446d55 1848 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1849 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1850 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1851 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1852 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1853 /* enable clockgating to save power */
a1255107 1854 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1855 state);
4a446d55
AD
1856 if (r) {
1857 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1858 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1859 return r;
1860 }
b0b00ff1 1861 }
d38ceaf9 1862 }
06b18f61 1863
c9f96fd5
RZ
1864 return 0;
1865}
1866
1112a46b 1867static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1868{
1112a46b 1869 int i, j, r;
06b18f61 1870
c9f96fd5
RZ
1871 if (amdgpu_emu_mode == 1)
1872 return 0;
1873
1112a46b
RZ
1874 for (j = 0; j < adev->num_ip_blocks; j++) {
1875 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1876 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1877 continue;
1878 /* skip CG for VCE/UVD, it's handled specially */
1879 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1880 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1881 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1882 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1883 /* enable powergating to save power */
1884 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1885 state);
c9f96fd5
RZ
1886 if (r) {
1887 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1888 adev->ip_blocks[i].version->funcs->name, r);
1889 return r;
1890 }
1891 }
1892 }
2dc80b00
S
1893 return 0;
1894}
1895
beff74bc
AD
1896static int amdgpu_device_enable_mgpu_fan_boost(void)
1897{
1898 struct amdgpu_gpu_instance *gpu_ins;
1899 struct amdgpu_device *adev;
1900 int i, ret = 0;
1901
1902 mutex_lock(&mgpu_info.mutex);
1903
1904 /*
1905 * MGPU fan boost feature should be enabled
1906 * only when there are two or more dGPUs in
1907 * the system
1908 */
1909 if (mgpu_info.num_dgpu < 2)
1910 goto out;
1911
1912 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1913 gpu_ins = &(mgpu_info.gpu_ins[i]);
1914 adev = gpu_ins->adev;
1915 if (!(adev->flags & AMD_IS_APU) &&
1916 !gpu_ins->mgpu_fan_enabled &&
1917 adev->powerplay.pp_funcs &&
1918 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1919 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1920 if (ret)
1921 break;
1922
1923 gpu_ins->mgpu_fan_enabled = 1;
1924 }
1925 }
1926
1927out:
1928 mutex_unlock(&mgpu_info.mutex);
1929
1930 return ret;
1931}
1932
e3ecdffa
AD
1933/**
1934 * amdgpu_device_ip_late_init - run late init for hardware IPs
1935 *
1936 * @adev: amdgpu_device pointer
1937 *
1938 * Late initialization pass for hardware IPs. The list of all the hardware
1939 * IPs that make up the asic is walked and the late_init callbacks are run.
1940 * late_init covers any special initialization that an IP requires
1941 * after all of the have been initialized or something that needs to happen
1942 * late in the init process.
1943 * Returns 0 on success, negative error code on failure.
1944 */
06ec9070 1945static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1946{
1947 int i = 0, r;
1948
1949 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1950 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1951 continue;
1952 if (adev->ip_blocks[i].version->funcs->late_init) {
1953 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1954 if (r) {
1955 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1956 adev->ip_blocks[i].version->funcs->name, r);
1957 return r;
1958 }
2dc80b00 1959 }
73f847db 1960 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1961 }
1962
1112a46b
RZ
1963 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1964 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1965
06ec9070 1966 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 1967
beff74bc
AD
1968 r = amdgpu_device_enable_mgpu_fan_boost();
1969 if (r)
1970 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
1971
1972 /* set to low pstate by default */
1973 amdgpu_xgmi_set_pstate(adev, 0);
1974
d38ceaf9
AD
1975 return 0;
1976}
1977
e3ecdffa
AD
1978/**
1979 * amdgpu_device_ip_fini - run fini for hardware IPs
1980 *
1981 * @adev: amdgpu_device pointer
1982 *
1983 * Main teardown pass for hardware IPs. The list of all the hardware
1984 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1985 * are run. hw_fini tears down the hardware associated with each IP
1986 * and sw_fini tears down any software state associated with each IP.
1987 * Returns 0 on success, negative error code on failure.
1988 */
06ec9070 1989static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1990{
1991 int i, r;
1992
c030f2e4 1993 amdgpu_ras_pre_fini(adev);
1994
a82400b5
AG
1995 if (adev->gmc.xgmi.num_physical_nodes > 1)
1996 amdgpu_xgmi_remove_device(adev);
1997
1884734a 1998 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
1999
2000 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2001 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2002
3e96dbfd
AD
2003 /* need to disable SMC first */
2004 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2005 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2006 continue;
fdd34271 2007 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2008 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2009 /* XXX handle errors */
2010 if (r) {
2011 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2012 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2013 }
a1255107 2014 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2015 break;
2016 }
2017 }
2018
d38ceaf9 2019 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2020 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2021 continue;
8201a67a 2022
a1255107 2023 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2024 /* XXX handle errors */
2c1a2784 2025 if (r) {
a1255107
AD
2026 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2027 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2028 }
8201a67a 2029
a1255107 2030 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2031 }
2032
9950cda2 2033
d38ceaf9 2034 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2035 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2036 continue;
c12aba3a
ML
2037
2038 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2039 amdgpu_ucode_free_bo(adev);
1e256e27 2040 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2041 amdgpu_device_wb_fini(adev);
2042 amdgpu_device_vram_scratch_fini(adev);
533aed27 2043 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2044 }
2045
a1255107 2046 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2047 /* XXX handle errors */
2c1a2784 2048 if (r) {
a1255107
AD
2049 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2050 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2051 }
a1255107
AD
2052 adev->ip_blocks[i].status.sw = false;
2053 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2054 }
2055
a6dcfd9c 2056 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2057 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2058 continue;
a1255107
AD
2059 if (adev->ip_blocks[i].version->funcs->late_fini)
2060 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2061 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2062 }
2063
c030f2e4 2064 amdgpu_ras_fini(adev);
2065
030308fc 2066 if (amdgpu_sriov_vf(adev))
24136135
ML
2067 if (amdgpu_virt_release_full_gpu(adev, false))
2068 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2069
d38ceaf9
AD
2070 return 0;
2071}
2072
e3ecdffa 2073/**
beff74bc 2074 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2075 *
1112a46b 2076 * @work: work_struct.
e3ecdffa 2077 */
beff74bc 2078static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2079{
2080 struct amdgpu_device *adev =
beff74bc 2081 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2082 int r;
2083
2084 r = amdgpu_ib_ring_tests(adev);
2085 if (r)
2086 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2087}
2088
1e317b99
RZ
2089static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2090{
2091 struct amdgpu_device *adev =
2092 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2093
2094 mutex_lock(&adev->gfx.gfx_off_mutex);
2095 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2096 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2097 adev->gfx.gfx_off_state = true;
2098 }
2099 mutex_unlock(&adev->gfx.gfx_off_mutex);
2100}
2101
e3ecdffa 2102/**
e7854a03 2103 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2104 *
2105 * @adev: amdgpu_device pointer
2106 *
2107 * Main suspend function for hardware IPs. The list of all the hardware
2108 * IPs that make up the asic is walked, clockgating is disabled and the
2109 * suspend callbacks are run. suspend puts the hardware and software state
2110 * in each IP into a state suitable for suspend.
2111 * Returns 0 on success, negative error code on failure.
2112 */
e7854a03
AD
2113static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2114{
2115 int i, r;
2116
05df1f01 2117 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2118 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2119
e7854a03
AD
2120 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2121 if (!adev->ip_blocks[i].status.valid)
2122 continue;
2123 /* displays are handled separately */
2124 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2125 /* XXX handle errors */
2126 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2127 /* XXX handle errors */
2128 if (r) {
2129 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2130 adev->ip_blocks[i].version->funcs->name, r);
2131 }
2132 }
2133 }
2134
e7854a03
AD
2135 return 0;
2136}
2137
2138/**
2139 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2140 *
2141 * @adev: amdgpu_device pointer
2142 *
2143 * Main suspend function for hardware IPs. The list of all the hardware
2144 * IPs that make up the asic is walked, clockgating is disabled and the
2145 * suspend callbacks are run. suspend puts the hardware and software state
2146 * in each IP into a state suitable for suspend.
2147 * Returns 0 on success, negative error code on failure.
2148 */
2149static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2150{
2151 int i, r;
2152
2153 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2154 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2155 continue;
e7854a03
AD
2156 /* displays are handled in phase1 */
2157 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2158 continue;
d38ceaf9 2159 /* XXX handle errors */
a1255107 2160 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2161 /* XXX handle errors */
2c1a2784 2162 if (r) {
a1255107
AD
2163 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2164 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2165 }
d38ceaf9
AD
2166 }
2167
2168 return 0;
2169}
2170
e7854a03
AD
2171/**
2172 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2173 *
2174 * @adev: amdgpu_device pointer
2175 *
2176 * Main suspend function for hardware IPs. The list of all the hardware
2177 * IPs that make up the asic is walked, clockgating is disabled and the
2178 * suspend callbacks are run. suspend puts the hardware and software state
2179 * in each IP into a state suitable for suspend.
2180 * Returns 0 on success, negative error code on failure.
2181 */
2182int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2183{
2184 int r;
2185
e7819644
YT
2186 if (amdgpu_sriov_vf(adev))
2187 amdgpu_virt_request_full_gpu(adev, false);
2188
e7854a03
AD
2189 r = amdgpu_device_ip_suspend_phase1(adev);
2190 if (r)
2191 return r;
2192 r = amdgpu_device_ip_suspend_phase2(adev);
2193
e7819644
YT
2194 if (amdgpu_sriov_vf(adev))
2195 amdgpu_virt_release_full_gpu(adev, false);
2196
e7854a03
AD
2197 return r;
2198}
2199
06ec9070 2200static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2201{
2202 int i, r;
2203
2cb681b6
ML
2204 static enum amd_ip_block_type ip_order[] = {
2205 AMD_IP_BLOCK_TYPE_GMC,
2206 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2207 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2208 AMD_IP_BLOCK_TYPE_IH,
2209 };
a90ad3c2 2210
2cb681b6
ML
2211 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2212 int j;
2213 struct amdgpu_ip_block *block;
a90ad3c2 2214
2cb681b6
ML
2215 for (j = 0; j < adev->num_ip_blocks; j++) {
2216 block = &adev->ip_blocks[j];
2217
2218 if (block->version->type != ip_order[i] ||
2219 !block->status.valid)
2220 continue;
2221
2222 r = block->version->funcs->hw_init(adev);
0aaeefcc 2223 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2224 if (r)
2225 return r;
a90ad3c2
ML
2226 }
2227 }
2228
2229 return 0;
2230}
2231
06ec9070 2232static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2233{
2234 int i, r;
2235
2cb681b6
ML
2236 static enum amd_ip_block_type ip_order[] = {
2237 AMD_IP_BLOCK_TYPE_SMC,
2238 AMD_IP_BLOCK_TYPE_DCE,
2239 AMD_IP_BLOCK_TYPE_GFX,
2240 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2241 AMD_IP_BLOCK_TYPE_UVD,
2242 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2243 };
a90ad3c2 2244
2cb681b6
ML
2245 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2246 int j;
2247 struct amdgpu_ip_block *block;
a90ad3c2 2248
2cb681b6
ML
2249 for (j = 0; j < adev->num_ip_blocks; j++) {
2250 block = &adev->ip_blocks[j];
2251
2252 if (block->version->type != ip_order[i] ||
2253 !block->status.valid)
2254 continue;
2255
2256 r = block->version->funcs->hw_init(adev);
0aaeefcc 2257 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2258 if (r)
2259 return r;
a90ad3c2
ML
2260 }
2261 }
2262
2263 return 0;
2264}
2265
e3ecdffa
AD
2266/**
2267 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2268 *
2269 * @adev: amdgpu_device pointer
2270 *
2271 * First resume function for hardware IPs. The list of all the hardware
2272 * IPs that make up the asic is walked and the resume callbacks are run for
2273 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2274 * after a suspend and updates the software state as necessary. This
2275 * function is also used for restoring the GPU after a GPU reset.
2276 * Returns 0 on success, negative error code on failure.
2277 */
06ec9070 2278static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2279{
2280 int i, r;
2281
a90ad3c2
ML
2282 for (i = 0; i < adev->num_ip_blocks; i++) {
2283 if (!adev->ip_blocks[i].status.valid)
2284 continue;
a90ad3c2 2285 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2286 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2287 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2288 r = adev->ip_blocks[i].version->funcs->resume(adev);
2289 if (r) {
2290 DRM_ERROR("resume of IP block <%s> failed %d\n",
2291 adev->ip_blocks[i].version->funcs->name, r);
2292 return r;
2293 }
a90ad3c2
ML
2294 }
2295 }
2296
2297 return 0;
2298}
2299
e3ecdffa
AD
2300/**
2301 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2302 *
2303 * @adev: amdgpu_device pointer
2304 *
2305 * First resume function for hardware IPs. The list of all the hardware
2306 * IPs that make up the asic is walked and the resume callbacks are run for
2307 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2308 * functional state after a suspend and updates the software state as
2309 * necessary. This function is also used for restoring the GPU after a GPU
2310 * reset.
2311 * Returns 0 on success, negative error code on failure.
2312 */
06ec9070 2313static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2314{
2315 int i, r;
2316
2317 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2318 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2319 continue;
fcf0649f 2320 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2321 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2322 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2323 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2324 continue;
a1255107 2325 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2326 if (r) {
a1255107
AD
2327 DRM_ERROR("resume of IP block <%s> failed %d\n",
2328 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2329 return r;
2c1a2784 2330 }
d38ceaf9
AD
2331 }
2332
2333 return 0;
2334}
2335
e3ecdffa
AD
2336/**
2337 * amdgpu_device_ip_resume - run resume for hardware IPs
2338 *
2339 * @adev: amdgpu_device pointer
2340 *
2341 * Main resume function for hardware IPs. The hardware IPs
2342 * are split into two resume functions because they are
2343 * are also used in in recovering from a GPU reset and some additional
2344 * steps need to be take between them. In this case (S3/S4) they are
2345 * run sequentially.
2346 * Returns 0 on success, negative error code on failure.
2347 */
06ec9070 2348static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2349{
2350 int r;
2351
06ec9070 2352 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2353 if (r)
2354 return r;
7a3e0bb2
RZ
2355
2356 r = amdgpu_device_fw_loading(adev);
2357 if (r)
2358 return r;
2359
06ec9070 2360 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2361
2362 return r;
2363}
2364
e3ecdffa
AD
2365/**
2366 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2367 *
2368 * @adev: amdgpu_device pointer
2369 *
2370 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2371 */
4e99a44e 2372static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2373{
6867e1b5
ML
2374 if (amdgpu_sriov_vf(adev)) {
2375 if (adev->is_atom_fw) {
2376 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2377 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2378 } else {
2379 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2380 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2381 }
2382
2383 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2384 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2385 }
048765ad
AR
2386}
2387
e3ecdffa
AD
2388/**
2389 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2390 *
2391 * @asic_type: AMD asic type
2392 *
2393 * Check if there is DC (new modesetting infrastructre) support for an asic.
2394 * returns true if DC has support, false if not.
2395 */
4562236b
HW
2396bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2397{
2398 switch (asic_type) {
2399#if defined(CONFIG_DRM_AMD_DC)
2400 case CHIP_BONAIRE:
0d6fbccb 2401 case CHIP_KAVERI:
367e6687
AD
2402 case CHIP_KABINI:
2403 case CHIP_MULLINS:
d9fda248
HW
2404 /*
2405 * We have systems in the wild with these ASICs that require
2406 * LVDS and VGA support which is not supported with DC.
2407 *
2408 * Fallback to the non-DC driver here by default so as not to
2409 * cause regressions.
2410 */
2411 return amdgpu_dc > 0;
2412 case CHIP_HAWAII:
4562236b
HW
2413 case CHIP_CARRIZO:
2414 case CHIP_STONEY:
4562236b 2415 case CHIP_POLARIS10:
675fd32b 2416 case CHIP_POLARIS11:
2c8ad2d5 2417 case CHIP_POLARIS12:
675fd32b 2418 case CHIP_VEGAM:
4562236b
HW
2419 case CHIP_TONGA:
2420 case CHIP_FIJI:
42f8ffa1 2421 case CHIP_VEGA10:
dca7b401 2422 case CHIP_VEGA12:
c6034aa2 2423 case CHIP_VEGA20:
dc37a9a0 2424#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2425 case CHIP_RAVEN:
b4f199c7
HW
2426#endif
2427#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
2428 case CHIP_NAVI10:
42f8ffa1 2429#endif
fd187853 2430 return amdgpu_dc != 0;
4562236b
HW
2431#endif
2432 default:
2433 return false;
2434 }
2435}
2436
2437/**
2438 * amdgpu_device_has_dc_support - check if dc is supported
2439 *
2440 * @adev: amdgpu_device_pointer
2441 *
2442 * Returns true for supported, false for not supported
2443 */
2444bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2445{
2555039d
XY
2446 if (amdgpu_sriov_vf(adev))
2447 return false;
2448
4562236b
HW
2449 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2450}
2451
d4535e2c
AG
2452
2453static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2454{
2455 struct amdgpu_device *adev =
2456 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2457
2458 adev->asic_reset_res = amdgpu_asic_reset(adev);
2459 if (adev->asic_reset_res)
fed184e9 2460 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2461 adev->asic_reset_res, adev->ddev->unique);
2462}
2463
2464
d38ceaf9
AD
2465/**
2466 * amdgpu_device_init - initialize the driver
2467 *
2468 * @adev: amdgpu_device pointer
87e3f136 2469 * @ddev: drm dev pointer
d38ceaf9
AD
2470 * @pdev: pci dev pointer
2471 * @flags: driver flags
2472 *
2473 * Initializes the driver info and hw (all asics).
2474 * Returns 0 for success or an error on failure.
2475 * Called at driver startup.
2476 */
2477int amdgpu_device_init(struct amdgpu_device *adev,
2478 struct drm_device *ddev,
2479 struct pci_dev *pdev,
2480 uint32_t flags)
2481{
2482 int r, i;
2483 bool runtime = false;
95844d20 2484 u32 max_MBps;
d38ceaf9
AD
2485
2486 adev->shutdown = false;
2487 adev->dev = &pdev->dev;
2488 adev->ddev = ddev;
2489 adev->pdev = pdev;
2490 adev->flags = flags;
2f7d10b3 2491 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2492 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2493 if (amdgpu_emu_mode == 1)
2494 adev->usec_timeout *= 2;
770d13b1 2495 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2496 adev->accel_working = false;
2497 adev->num_rings = 0;
2498 adev->mman.buffer_funcs = NULL;
2499 adev->mman.buffer_funcs_ring = NULL;
2500 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2501 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2502 adev->gmc.gmc_funcs = NULL;
f54d1867 2503 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2504 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2505
2506 adev->smc_rreg = &amdgpu_invalid_rreg;
2507 adev->smc_wreg = &amdgpu_invalid_wreg;
2508 adev->pcie_rreg = &amdgpu_invalid_rreg;
2509 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2510 adev->pciep_rreg = &amdgpu_invalid_rreg;
2511 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2512 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2513 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2514 adev->didt_rreg = &amdgpu_invalid_rreg;
2515 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2516 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2517 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2518 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2519 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2520
3e39ab90
AD
2521 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2522 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2523 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2524
2525 /* mutex initialization are all done here so we
2526 * can recall function without having locking issues */
d38ceaf9 2527 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2528 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2529 mutex_init(&adev->pm.mutex);
2530 mutex_init(&adev->gfx.gpu_clock_mutex);
2531 mutex_init(&adev->srbm_mutex);
b8866c26 2532 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2533 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2534 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2535 mutex_init(&adev->mn_lock);
e23b74aa 2536 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2537 hash_init(adev->mn_hash);
13a752e3 2538 mutex_init(&adev->lock_reset);
bb5a2bdf 2539 mutex_init(&adev->virt.dpm_mutex);
d38ceaf9 2540
912dfc84
EQ
2541 r = amdgpu_device_check_arguments(adev);
2542 if (r)
2543 return r;
d38ceaf9 2544
d38ceaf9
AD
2545 spin_lock_init(&adev->mmio_idx_lock);
2546 spin_lock_init(&adev->smc_idx_lock);
2547 spin_lock_init(&adev->pcie_idx_lock);
2548 spin_lock_init(&adev->uvd_ctx_idx_lock);
2549 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2550 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2551 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2552 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2553 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2554
0c4e7fa5
CZ
2555 INIT_LIST_HEAD(&adev->shadow_list);
2556 mutex_init(&adev->shadow_list_lock);
2557
795f2813
AR
2558 INIT_LIST_HEAD(&adev->ring_lru_list);
2559 spin_lock_init(&adev->ring_lru_list_lock);
2560
beff74bc
AD
2561 INIT_DELAYED_WORK(&adev->delayed_init_work,
2562 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2563 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2564 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2565
d4535e2c
AG
2566 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2567
d23ee13f 2568 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2569 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2570
0fa49558
AX
2571 /* Registers mapping */
2572 /* TODO: block userspace mapping of io register */
da69c161
KW
2573 if (adev->asic_type >= CHIP_BONAIRE) {
2574 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2575 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2576 } else {
2577 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2578 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2579 }
d38ceaf9 2580
d38ceaf9
AD
2581 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2582 if (adev->rmmio == NULL) {
2583 return -ENOMEM;
2584 }
2585 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2586 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2587
d38ceaf9
AD
2588 /* io port mapping */
2589 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2590 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2591 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2592 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2593 break;
2594 }
2595 }
2596 if (adev->rio_mem == NULL)
b64a18c5 2597 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2598
5494d864
AD
2599 amdgpu_device_get_pcie_info(adev);
2600
b239c017
JX
2601 if (amdgpu_mcbp)
2602 DRM_INFO("MCBP is enabled\n");
2603
5f84cc63
JX
2604 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2605 adev->enable_mes = true;
2606
a190d1c7
XY
2607 if (amdgpu_discovery) {
2608 r = amdgpu_discovery_init(adev);
2609 if (r) {
2610 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2611 return r;
2612 }
2613 }
2614
d38ceaf9 2615 /* early init functions */
06ec9070 2616 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2617 if (r)
2618 return r;
2619
6585661d
OZ
2620 /* doorbell bar mapping and doorbell index init*/
2621 amdgpu_device_doorbell_init(adev);
2622
d38ceaf9
AD
2623 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2624 /* this will fail for cards that aren't VGA class devices, just
2625 * ignore it */
06ec9070 2626 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2627
e9bef455 2628 if (amdgpu_device_is_px(ddev))
d38ceaf9 2629 runtime = true;
84c8b22e
LW
2630 if (!pci_is_thunderbolt_attached(adev->pdev))
2631 vga_switcheroo_register_client(adev->pdev,
2632 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2633 if (runtime)
2634 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2635
9475a943
SL
2636 if (amdgpu_emu_mode == 1) {
2637 /* post the asic on emulation mode */
2638 emu_soc_asic_init(adev);
bfca0289 2639 goto fence_driver_init;
9475a943 2640 }
bfca0289 2641
4e99a44e
ML
2642 /* detect if we are with an SRIOV vbios */
2643 amdgpu_device_detect_sriov_bios(adev);
048765ad 2644
95e8e59e
AD
2645 /* check if we need to reset the asic
2646 * E.g., driver was not cleanly unloaded previously, etc.
2647 */
f14899fd 2648 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2649 r = amdgpu_asic_reset(adev);
2650 if (r) {
2651 dev_err(adev->dev, "asic reset on init failed\n");
2652 goto failed;
2653 }
2654 }
2655
d38ceaf9 2656 /* Post card if necessary */
39c640c0 2657 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2658 if (!adev->bios) {
bec86378 2659 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2660 r = -EINVAL;
2661 goto failed;
d38ceaf9 2662 }
bec86378 2663 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2664 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2665 if (r) {
2666 dev_err(adev->dev, "gpu post error!\n");
2667 goto failed;
2668 }
d38ceaf9
AD
2669 }
2670
88b64e95
AD
2671 if (adev->is_atom_fw) {
2672 /* Initialize clocks */
2673 r = amdgpu_atomfirmware_get_clock_info(adev);
2674 if (r) {
2675 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2676 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2677 goto failed;
2678 }
2679 } else {
a5bde2f9
AD
2680 /* Initialize clocks */
2681 r = amdgpu_atombios_get_clock_info(adev);
2682 if (r) {
2683 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2684 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2685 goto failed;
a5bde2f9
AD
2686 }
2687 /* init i2c buses */
4562236b
HW
2688 if (!amdgpu_device_has_dc_support(adev))
2689 amdgpu_atombios_i2c_init(adev);
2c1a2784 2690 }
d38ceaf9 2691
bfca0289 2692fence_driver_init:
d38ceaf9
AD
2693 /* Fence driver */
2694 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2695 if (r) {
2696 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2697 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2698 goto failed;
2c1a2784 2699 }
d38ceaf9
AD
2700
2701 /* init the mode config */
2702 drm_mode_config_init(adev->ddev);
2703
06ec9070 2704 r = amdgpu_device_ip_init(adev);
d38ceaf9 2705 if (r) {
8840a387 2706 /* failed in exclusive mode due to timeout */
2707 if (amdgpu_sriov_vf(adev) &&
2708 !amdgpu_sriov_runtime(adev) &&
2709 amdgpu_virt_mmio_blocked(adev) &&
2710 !amdgpu_virt_wait_reset(adev)) {
2711 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2712 /* Don't send request since VF is inactive. */
2713 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2714 adev->virt.ops = NULL;
8840a387 2715 r = -EAGAIN;
2716 goto failed;
2717 }
06ec9070 2718 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2719 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
72d3f592
ED
2720 if (amdgpu_virt_request_full_gpu(adev, false))
2721 amdgpu_virt_release_full_gpu(adev, false);
83ba126a 2722 goto failed;
d38ceaf9
AD
2723 }
2724
2725 adev->accel_working = true;
2726
e59c0205
AX
2727 amdgpu_vm_check_compute_bug(adev);
2728
95844d20
MO
2729 /* Initialize the buffer migration limit. */
2730 if (amdgpu_moverate >= 0)
2731 max_MBps = amdgpu_moverate;
2732 else
2733 max_MBps = 8; /* Allow 8 MB/s. */
2734 /* Get a log2 for easy divisions. */
2735 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2736
9bc92b9c
ML
2737 amdgpu_fbdev_init(adev);
2738
e9bc1bf7
YT
2739 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2740 amdgpu_pm_virt_sysfs_init(adev);
2741
d2f52ac8
RZ
2742 r = amdgpu_pm_sysfs_init(adev);
2743 if (r)
2744 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2745
5bb23532
OM
2746 r = amdgpu_ucode_sysfs_init(adev);
2747 if (r)
2748 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2749
75758255 2750 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2751 if (r)
d38ceaf9 2752 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2753
2754 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2755 if (r)
d38ceaf9 2756 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2757
50ab2533 2758 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2759 if (r)
50ab2533 2760 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2761
763efb6c 2762 r = amdgpu_debugfs_init(adev);
db95e218 2763 if (r)
763efb6c 2764 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2765
d38ceaf9
AD
2766 if ((amdgpu_testing & 1)) {
2767 if (adev->accel_working)
2768 amdgpu_test_moves(adev);
2769 else
2770 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2771 }
d38ceaf9
AD
2772 if (amdgpu_benchmarking) {
2773 if (adev->accel_working)
2774 amdgpu_benchmark(adev, amdgpu_benchmarking);
2775 else
2776 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2777 }
2778
2779 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2780 * explicit gating rather than handling it automatically.
2781 */
06ec9070 2782 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2783 if (r) {
06ec9070 2784 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2785 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2786 goto failed;
2c1a2784 2787 }
d38ceaf9 2788
108c6a63 2789 /* must succeed. */
511fdbc3 2790 amdgpu_ras_resume(adev);
108c6a63 2791
beff74bc
AD
2792 queue_delayed_work(system_wq, &adev->delayed_init_work,
2793 msecs_to_jiffies(AMDGPU_RESUME_MS));
2794
dcea6e65
KR
2795 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2796 if (r) {
2797 dev_err(adev->dev, "Could not create pcie_replay_count");
2798 return r;
2799 }
108c6a63 2800
9c7c85f7
JK
2801 r = amdgpu_pmu_init(adev);
2802 if (r)
2803 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2804
d38ceaf9 2805 return 0;
83ba126a
AD
2806
2807failed:
89041940 2808 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2809 if (runtime)
2810 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2811
83ba126a 2812 return r;
d38ceaf9
AD
2813}
2814
d38ceaf9
AD
2815/**
2816 * amdgpu_device_fini - tear down the driver
2817 *
2818 * @adev: amdgpu_device pointer
2819 *
2820 * Tear down the driver info (all asics).
2821 * Called at driver shutdown.
2822 */
2823void amdgpu_device_fini(struct amdgpu_device *adev)
2824{
2825 int r;
2826
2827 DRM_INFO("amdgpu: finishing device.\n");
2828 adev->shutdown = true;
e5b03032
ML
2829 /* disable all interrupts */
2830 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2831 if (adev->mode_info.mode_config_initialized){
2832 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 2833 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
2834 else
2835 drm_atomic_helper_shutdown(adev->ddev);
2836 }
d38ceaf9 2837 amdgpu_fence_driver_fini(adev);
58e955d9 2838 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2839 amdgpu_fbdev_fini(adev);
06ec9070 2840 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2841 if (adev->firmware.gpu_info_fw) {
2842 release_firmware(adev->firmware.gpu_info_fw);
2843 adev->firmware.gpu_info_fw = NULL;
2844 }
d38ceaf9 2845 adev->accel_working = false;
beff74bc 2846 cancel_delayed_work_sync(&adev->delayed_init_work);
d38ceaf9 2847 /* free i2c buses */
4562236b
HW
2848 if (!amdgpu_device_has_dc_support(adev))
2849 amdgpu_i2c_fini(adev);
bfca0289
SL
2850
2851 if (amdgpu_emu_mode != 1)
2852 amdgpu_atombios_fini(adev);
2853
d38ceaf9
AD
2854 kfree(adev->bios);
2855 adev->bios = NULL;
84c8b22e
LW
2856 if (!pci_is_thunderbolt_attached(adev->pdev))
2857 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2858 if (adev->flags & AMD_IS_PX)
2859 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2860 vga_client_register(adev->pdev, NULL, NULL, NULL);
2861 if (adev->rio_mem)
2862 pci_iounmap(adev->pdev, adev->rio_mem);
2863 adev->rio_mem = NULL;
2864 iounmap(adev->rmmio);
2865 adev->rmmio = NULL;
06ec9070 2866 amdgpu_device_doorbell_fini(adev);
e9bc1bf7
YT
2867 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2868 amdgpu_pm_virt_sysfs_fini(adev);
2869
d38ceaf9 2870 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 2871 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
5bb23532 2872 amdgpu_ucode_sysfs_fini(adev);
9c7c85f7 2873 amdgpu_pmu_fini(adev);
6698a3d0 2874 amdgpu_debugfs_preempt_cleanup(adev);
a190d1c7
XY
2875 if (amdgpu_discovery)
2876 amdgpu_discovery_fini(adev);
d38ceaf9
AD
2877}
2878
2879
2880/*
2881 * Suspend & resume.
2882 */
2883/**
810ddc3a 2884 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2885 *
87e3f136
DP
2886 * @dev: drm dev pointer
2887 * @suspend: suspend state
2888 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2889 *
2890 * Puts the hw in the suspend state (all asics).
2891 * Returns 0 for success or an error on failure.
2892 * Called at driver suspend.
2893 */
810ddc3a 2894int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2895{
2896 struct amdgpu_device *adev;
2897 struct drm_crtc *crtc;
2898 struct drm_connector *connector;
5ceb54c6 2899 int r;
d38ceaf9
AD
2900
2901 if (dev == NULL || dev->dev_private == NULL) {
2902 return -ENODEV;
2903 }
2904
2905 adev = dev->dev_private;
2906
2907 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2908 return 0;
2909
44779b43 2910 adev->in_suspend = true;
d38ceaf9
AD
2911 drm_kms_helper_poll_disable(dev);
2912
5f818173
S
2913 if (fbcon)
2914 amdgpu_fbdev_set_suspend(adev, 1);
2915
beff74bc 2916 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 2917
4562236b
HW
2918 if (!amdgpu_device_has_dc_support(adev)) {
2919 /* turn off display hw */
2920 drm_modeset_lock_all(dev);
2921 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2922 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2923 }
2924 drm_modeset_unlock_all(dev);
fe1053b7
AD
2925 /* unpin the front buffers and cursors */
2926 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2927 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2928 struct drm_framebuffer *fb = crtc->primary->fb;
2929 struct amdgpu_bo *robj;
2930
91334223 2931 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
2932 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2933 r = amdgpu_bo_reserve(aobj, true);
2934 if (r == 0) {
2935 amdgpu_bo_unpin(aobj);
2936 amdgpu_bo_unreserve(aobj);
2937 }
756e6880 2938 }
756e6880 2939
fe1053b7
AD
2940 if (fb == NULL || fb->obj[0] == NULL) {
2941 continue;
2942 }
2943 robj = gem_to_amdgpu_bo(fb->obj[0]);
2944 /* don't unpin kernel fb objects */
2945 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2946 r = amdgpu_bo_reserve(robj, true);
2947 if (r == 0) {
2948 amdgpu_bo_unpin(robj);
2949 amdgpu_bo_unreserve(robj);
2950 }
d38ceaf9
AD
2951 }
2952 }
2953 }
fe1053b7
AD
2954
2955 amdgpu_amdkfd_suspend(adev);
2956
5e6932fe 2957 amdgpu_ras_suspend(adev);
2958
fe1053b7
AD
2959 r = amdgpu_device_ip_suspend_phase1(adev);
2960
d38ceaf9
AD
2961 /* evict vram memory */
2962 amdgpu_bo_evict_vram(adev);
2963
5ceb54c6 2964 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2965
fe1053b7 2966 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2967
a0a71e49
AD
2968 /* evict remaining vram memory
2969 * This second call to evict vram is to evict the gart page table
2970 * using the CPU.
2971 */
d38ceaf9
AD
2972 amdgpu_bo_evict_vram(adev);
2973
2974 pci_save_state(dev->pdev);
2975 if (suspend) {
2976 /* Shut down the device */
2977 pci_disable_device(dev->pdev);
2978 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 2979 } else {
2980 r = amdgpu_asic_reset(adev);
2981 if (r)
2982 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
2983 }
2984
d38ceaf9
AD
2985 return 0;
2986}
2987
2988/**
810ddc3a 2989 * amdgpu_device_resume - initiate device resume
d38ceaf9 2990 *
87e3f136
DP
2991 * @dev: drm dev pointer
2992 * @resume: resume state
2993 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
2994 *
2995 * Bring the hw back to operating state (all asics).
2996 * Returns 0 for success or an error on failure.
2997 * Called at driver resume.
2998 */
810ddc3a 2999int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
3000{
3001 struct drm_connector *connector;
3002 struct amdgpu_device *adev = dev->dev_private;
756e6880 3003 struct drm_crtc *crtc;
03161a6e 3004 int r = 0;
d38ceaf9
AD
3005
3006 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3007 return 0;
3008
d38ceaf9
AD
3009 if (resume) {
3010 pci_set_power_state(dev->pdev, PCI_D0);
3011 pci_restore_state(dev->pdev);
74b0b157 3012 r = pci_enable_device(dev->pdev);
03161a6e 3013 if (r)
4d3b9ae5 3014 return r;
d38ceaf9
AD
3015 }
3016
3017 /* post card */
39c640c0 3018 if (amdgpu_device_need_post(adev)) {
74b0b157 3019 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3020 if (r)
3021 DRM_ERROR("amdgpu asic init failed\n");
3022 }
d38ceaf9 3023
06ec9070 3024 r = amdgpu_device_ip_resume(adev);
e6707218 3025 if (r) {
06ec9070 3026 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3027 return r;
e6707218 3028 }
5ceb54c6
AD
3029 amdgpu_fence_driver_resume(adev);
3030
d38ceaf9 3031
06ec9070 3032 r = amdgpu_device_ip_late_init(adev);
03161a6e 3033 if (r)
4d3b9ae5 3034 return r;
d38ceaf9 3035
beff74bc
AD
3036 queue_delayed_work(system_wq, &adev->delayed_init_work,
3037 msecs_to_jiffies(AMDGPU_RESUME_MS));
3038
fe1053b7
AD
3039 if (!amdgpu_device_has_dc_support(adev)) {
3040 /* pin cursors */
3041 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3042 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3043
91334223 3044 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3045 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3046 r = amdgpu_bo_reserve(aobj, true);
3047 if (r == 0) {
3048 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3049 if (r != 0)
3050 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3051 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3052 amdgpu_bo_unreserve(aobj);
3053 }
756e6880
AD
3054 }
3055 }
3056 }
ba997709
YZ
3057 r = amdgpu_amdkfd_resume(adev);
3058 if (r)
3059 return r;
756e6880 3060
96a5d8d4 3061 /* Make sure IB tests flushed */
beff74bc 3062 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3063
d38ceaf9
AD
3064 /* blat the mode back in */
3065 if (fbcon) {
4562236b
HW
3066 if (!amdgpu_device_has_dc_support(adev)) {
3067 /* pre DCE11 */
3068 drm_helper_resume_force_mode(dev);
3069
3070 /* turn on display hw */
3071 drm_modeset_lock_all(dev);
3072 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3073 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3074 }
3075 drm_modeset_unlock_all(dev);
d38ceaf9 3076 }
4d3b9ae5 3077 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3078 }
3079
3080 drm_kms_helper_poll_enable(dev);
23a1a9e5 3081
5e6932fe 3082 amdgpu_ras_resume(adev);
3083
23a1a9e5
L
3084 /*
3085 * Most of the connector probing functions try to acquire runtime pm
3086 * refs to ensure that the GPU is powered on when connector polling is
3087 * performed. Since we're calling this from a runtime PM callback,
3088 * trying to acquire rpm refs will cause us to deadlock.
3089 *
3090 * Since we're guaranteed to be holding the rpm lock, it's safe to
3091 * temporarily disable the rpm helpers so this doesn't deadlock us.
3092 */
3093#ifdef CONFIG_PM
3094 dev->dev->power.disable_depth++;
3095#endif
4562236b
HW
3096 if (!amdgpu_device_has_dc_support(adev))
3097 drm_helper_hpd_irq_event(dev);
3098 else
3099 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3100#ifdef CONFIG_PM
3101 dev->dev->power.disable_depth--;
3102#endif
44779b43
RZ
3103 adev->in_suspend = false;
3104
4d3b9ae5 3105 return 0;
d38ceaf9
AD
3106}
3107
e3ecdffa
AD
3108/**
3109 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3110 *
3111 * @adev: amdgpu_device pointer
3112 *
3113 * The list of all the hardware IPs that make up the asic is walked and
3114 * the check_soft_reset callbacks are run. check_soft_reset determines
3115 * if the asic is still hung or not.
3116 * Returns true if any of the IPs are still in a hung state, false if not.
3117 */
06ec9070 3118static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3119{
3120 int i;
3121 bool asic_hang = false;
3122
f993d628
ML
3123 if (amdgpu_sriov_vf(adev))
3124 return true;
3125
8bc04c29
AD
3126 if (amdgpu_asic_need_full_reset(adev))
3127 return true;
3128
63fbf42f 3129 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3130 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3131 continue;
a1255107
AD
3132 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3133 adev->ip_blocks[i].status.hang =
3134 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3135 if (adev->ip_blocks[i].status.hang) {
3136 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3137 asic_hang = true;
3138 }
3139 }
3140 return asic_hang;
3141}
3142
e3ecdffa
AD
3143/**
3144 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3145 *
3146 * @adev: amdgpu_device pointer
3147 *
3148 * The list of all the hardware IPs that make up the asic is walked and the
3149 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3150 * handles any IP specific hardware or software state changes that are
3151 * necessary for a soft reset to succeed.
3152 * Returns 0 on success, negative error code on failure.
3153 */
06ec9070 3154static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3155{
3156 int i, r = 0;
3157
3158 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3159 if (!adev->ip_blocks[i].status.valid)
d31a501e 3160 continue;
a1255107
AD
3161 if (adev->ip_blocks[i].status.hang &&
3162 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3163 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3164 if (r)
3165 return r;
3166 }
3167 }
3168
3169 return 0;
3170}
3171
e3ecdffa
AD
3172/**
3173 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3174 *
3175 * @adev: amdgpu_device pointer
3176 *
3177 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3178 * reset is necessary to recover.
3179 * Returns true if a full asic reset is required, false if not.
3180 */
06ec9070 3181static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3182{
da146d3b
AD
3183 int i;
3184
8bc04c29
AD
3185 if (amdgpu_asic_need_full_reset(adev))
3186 return true;
3187
da146d3b 3188 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3189 if (!adev->ip_blocks[i].status.valid)
da146d3b 3190 continue;
a1255107
AD
3191 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3192 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3193 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3194 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3195 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3196 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3197 DRM_INFO("Some block need full reset!\n");
3198 return true;
3199 }
3200 }
35d782fe
CZ
3201 }
3202 return false;
3203}
3204
e3ecdffa
AD
3205/**
3206 * amdgpu_device_ip_soft_reset - do a soft reset
3207 *
3208 * @adev: amdgpu_device pointer
3209 *
3210 * The list of all the hardware IPs that make up the asic is walked and the
3211 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3212 * IP specific hardware or software state changes that are necessary to soft
3213 * reset the IP.
3214 * Returns 0 on success, negative error code on failure.
3215 */
06ec9070 3216static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3217{
3218 int i, r = 0;
3219
3220 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3221 if (!adev->ip_blocks[i].status.valid)
35d782fe 3222 continue;
a1255107
AD
3223 if (adev->ip_blocks[i].status.hang &&
3224 adev->ip_blocks[i].version->funcs->soft_reset) {
3225 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3226 if (r)
3227 return r;
3228 }
3229 }
3230
3231 return 0;
3232}
3233
e3ecdffa
AD
3234/**
3235 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3236 *
3237 * @adev: amdgpu_device pointer
3238 *
3239 * The list of all the hardware IPs that make up the asic is walked and the
3240 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3241 * handles any IP specific hardware or software state changes that are
3242 * necessary after the IP has been soft reset.
3243 * Returns 0 on success, negative error code on failure.
3244 */
06ec9070 3245static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3246{
3247 int i, r = 0;
3248
3249 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3250 if (!adev->ip_blocks[i].status.valid)
35d782fe 3251 continue;
a1255107
AD
3252 if (adev->ip_blocks[i].status.hang &&
3253 adev->ip_blocks[i].version->funcs->post_soft_reset)
3254 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3255 if (r)
3256 return r;
3257 }
3258
3259 return 0;
3260}
3261
e3ecdffa 3262/**
c33adbc7 3263 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3264 *
3265 * @adev: amdgpu_device pointer
3266 *
3267 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3268 * restore things like GPUVM page tables after a GPU reset where
3269 * the contents of VRAM might be lost.
403009bf
CK
3270 *
3271 * Returns:
3272 * 0 on success, negative error code on failure.
e3ecdffa 3273 */
c33adbc7 3274static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3275{
c41d1cf6 3276 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3277 struct amdgpu_bo *shadow;
3278 long r = 1, tmo;
c41d1cf6
ML
3279
3280 if (amdgpu_sriov_runtime(adev))
b045d3af 3281 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3282 else
3283 tmo = msecs_to_jiffies(100);
3284
3285 DRM_INFO("recover vram bo from shadow start\n");
3286 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3287 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3288
3289 /* No need to recover an evicted BO */
3290 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3291 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3292 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3293 continue;
3294
3295 r = amdgpu_bo_restore_shadow(shadow, &next);
3296 if (r)
3297 break;
3298
c41d1cf6 3299 if (fence) {
1712fb1a 3300 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3301 dma_fence_put(fence);
3302 fence = next;
1712fb1a 3303 if (tmo == 0) {
3304 r = -ETIMEDOUT;
c41d1cf6 3305 break;
1712fb1a 3306 } else if (tmo < 0) {
3307 r = tmo;
3308 break;
3309 }
403009bf
CK
3310 } else {
3311 fence = next;
c41d1cf6 3312 }
c41d1cf6
ML
3313 }
3314 mutex_unlock(&adev->shadow_list_lock);
3315
403009bf
CK
3316 if (fence)
3317 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3318 dma_fence_put(fence);
3319
1712fb1a 3320 if (r < 0 || tmo <= 0) {
3321 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3322 return -EIO;
3323 }
c41d1cf6 3324
403009bf
CK
3325 DRM_INFO("recover vram bo from shadow done\n");
3326 return 0;
c41d1cf6
ML
3327}
3328
a90ad3c2 3329
e3ecdffa 3330/**
06ec9070 3331 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3332 *
3333 * @adev: amdgpu device pointer
87e3f136 3334 * @from_hypervisor: request from hypervisor
5740682e
ML
3335 *
3336 * do VF FLR and reinitialize Asic
3f48c681 3337 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3338 */
3339static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3340 bool from_hypervisor)
5740682e
ML
3341{
3342 int r;
3343
3344 if (from_hypervisor)
3345 r = amdgpu_virt_request_full_gpu(adev, true);
3346 else
3347 r = amdgpu_virt_reset_gpu(adev);
3348 if (r)
3349 return r;
a90ad3c2 3350
f81e8d53
WL
3351 amdgpu_amdkfd_pre_reset(adev);
3352
a90ad3c2 3353 /* Resume IP prior to SMC */
06ec9070 3354 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3355 if (r)
3356 goto error;
a90ad3c2
ML
3357
3358 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3359 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3360
7a3e0bb2
RZ
3361 r = amdgpu_device_fw_loading(adev);
3362 if (r)
3363 return r;
3364
a90ad3c2 3365 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3366 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3367 if (r)
3368 goto error;
a90ad3c2
ML
3369
3370 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3371 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3372 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3373
abc34253 3374error:
d3c117e5 3375 amdgpu_virt_init_data_exchange(adev);
abc34253 3376 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3377 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3378 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3379 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3380 }
3381
3382 return r;
3383}
3384
12938fad
CK
3385/**
3386 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3387 *
3388 * @adev: amdgpu device pointer
3389 *
3390 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3391 * a hung GPU.
3392 */
3393bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3394{
3395 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3396 DRM_INFO("Timeout, but no hardware hang detected.\n");
3397 return false;
3398 }
3399
3ba7b418
AG
3400 if (amdgpu_gpu_recovery == 0)
3401 goto disabled;
3402
3403 if (amdgpu_sriov_vf(adev))
3404 return true;
3405
3406 if (amdgpu_gpu_recovery == -1) {
3407 switch (adev->asic_type) {
fc42d47c
AG
3408 case CHIP_BONAIRE:
3409 case CHIP_HAWAII:
3ba7b418
AG
3410 case CHIP_TOPAZ:
3411 case CHIP_TONGA:
3412 case CHIP_FIJI:
3413 case CHIP_POLARIS10:
3414 case CHIP_POLARIS11:
3415 case CHIP_POLARIS12:
3416 case CHIP_VEGAM:
3417 case CHIP_VEGA20:
3418 case CHIP_VEGA10:
3419 case CHIP_VEGA12:
3420 break;
3421 default:
3422 goto disabled;
3423 }
12938fad
CK
3424 }
3425
3426 return true;
3ba7b418
AG
3427
3428disabled:
3429 DRM_INFO("GPU recovery disabled.\n");
3430 return false;
12938fad
CK
3431}
3432
5c6dd71e 3433
26bc5340
AG
3434static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3435 struct amdgpu_job *job,
3436 bool *need_full_reset_arg)
3437{
3438 int i, r = 0;
3439 bool need_full_reset = *need_full_reset_arg;
71182665 3440
71182665 3441 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3442 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3443 struct amdgpu_ring *ring = adev->rings[i];
3444
51687759 3445 if (!ring || !ring->sched.thread)
0875dc9e 3446 continue;
5740682e 3447
2f9d4084
ML
3448 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3449 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3450 }
d38ceaf9 3451
222b5f04
AG
3452 if(job)
3453 drm_sched_increase_karma(&job->base);
3454
1d721ed6 3455 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3456 if (!amdgpu_sriov_vf(adev)) {
3457
3458 if (!need_full_reset)
3459 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3460
3461 if (!need_full_reset) {
3462 amdgpu_device_ip_pre_soft_reset(adev);
3463 r = amdgpu_device_ip_soft_reset(adev);
3464 amdgpu_device_ip_post_soft_reset(adev);
3465 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3466 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3467 need_full_reset = true;
3468 }
3469 }
3470
3471 if (need_full_reset)
3472 r = amdgpu_device_ip_suspend(adev);
3473
3474 *need_full_reset_arg = need_full_reset;
3475 }
3476
3477 return r;
3478}
3479
3480static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3481 struct list_head *device_list_handle,
3482 bool *need_full_reset_arg)
3483{
3484 struct amdgpu_device *tmp_adev = NULL;
3485 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3486 int r = 0;
3487
3488 /*
3489 * ASIC reset has to be done on all HGMI hive nodes ASAP
3490 * to allow proper links negotiation in FW (within 1 sec)
3491 */
3492 if (need_full_reset) {
3493 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3494 /* For XGMI run all resets in parallel to speed up the process */
3495 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3496 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3497 r = -EALREADY;
3498 } else
3499 r = amdgpu_asic_reset(tmp_adev);
3500
3501 if (r) {
fed184e9 3502 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
26bc5340 3503 r, tmp_adev->ddev->unique);
d4535e2c
AG
3504 break;
3505 }
3506 }
3507
3508 /* For XGMI wait for all PSP resets to complete before proceed */
3509 if (!r) {
3510 list_for_each_entry(tmp_adev, device_list_handle,
3511 gmc.xgmi.head) {
3512 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3513 flush_work(&tmp_adev->xgmi_reset_work);
3514 r = tmp_adev->asic_reset_res;
3515 if (r)
3516 break;
3517 }
3518 }
2be4c4a9 3519
3520 list_for_each_entry(tmp_adev, device_list_handle,
3521 gmc.xgmi.head) {
3522 amdgpu_ras_reserve_bad_pages(tmp_adev);
3523 }
26bc5340
AG
3524 }
3525 }
3526
3527
3528 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3529 if (need_full_reset) {
3530 /* post card */
3531 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3532 DRM_WARN("asic atom init failed!");
3533
3534 if (!r) {
3535 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3536 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3537 if (r)
3538 goto out;
3539
3540 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3541 if (vram_lost) {
77e7f829 3542 DRM_INFO("VRAM is lost due to GPU reset!\n");
26bc5340
AG
3543 atomic_inc(&tmp_adev->vram_lost_counter);
3544 }
3545
3546 r = amdgpu_gtt_mgr_recover(
3547 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3548 if (r)
3549 goto out;
3550
3551 r = amdgpu_device_fw_loading(tmp_adev);
3552 if (r)
3553 return r;
3554
3555 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3556 if (r)
3557 goto out;
3558
3559 if (vram_lost)
3560 amdgpu_device_fill_reset_magic(tmp_adev);
3561
7c04ca50 3562 r = amdgpu_device_ip_late_init(tmp_adev);
3563 if (r)
3564 goto out;
3565
e79a04d5 3566 /* must succeed. */
511fdbc3 3567 amdgpu_ras_resume(tmp_adev);
e79a04d5 3568
26bc5340
AG
3569 /* Update PSP FW topology after reset */
3570 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3571 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3572 }
3573 }
3574
3575
3576out:
3577 if (!r) {
3578 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3579 r = amdgpu_ib_ring_tests(tmp_adev);
3580 if (r) {
3581 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3582 r = amdgpu_device_ip_suspend(tmp_adev);
3583 need_full_reset = true;
3584 r = -EAGAIN;
3585 goto end;
3586 }
3587 }
3588
3589 if (!r)
3590 r = amdgpu_device_recover_vram(tmp_adev);
3591 else
3592 tmp_adev->asic_reset_res = r;
3593 }
3594
3595end:
3596 *need_full_reset_arg = need_full_reset;
3597 return r;
3598}
3599
1d721ed6 3600static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3601{
1d721ed6
AG
3602 if (trylock) {
3603 if (!mutex_trylock(&adev->lock_reset))
3604 return false;
3605 } else
3606 mutex_lock(&adev->lock_reset);
5740682e 3607
26bc5340
AG
3608 atomic_inc(&adev->gpu_reset_counter);
3609 adev->in_gpu_reset = 1;
7b184b00 3610 /* Block kfd: SRIOV would do it separately */
3611 if (!amdgpu_sriov_vf(adev))
3612 amdgpu_amdkfd_pre_reset(adev);
1d721ed6
AG
3613
3614 return true;
26bc5340 3615}
d38ceaf9 3616
26bc5340
AG
3617static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3618{
7b184b00 3619 /*unlock kfd: SRIOV would do it separately */
3620 if (!amdgpu_sriov_vf(adev))
3621 amdgpu_amdkfd_post_reset(adev);
89041940 3622 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3623 adev->in_gpu_reset = 0;
3624 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3625}
3626
3627
3628/**
3629 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3630 *
3631 * @adev: amdgpu device pointer
3632 * @job: which job trigger hang
3633 *
3634 * Attempt to reset the GPU if it has hung (all asics).
3635 * Attempt to do soft-reset or full-reset and reinitialize Asic
3636 * Returns 0 for success or an error on failure.
3637 */
3638
3639int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3640 struct amdgpu_job *job)
3641{
1d721ed6
AG
3642 struct list_head device_list, *device_list_handle = NULL;
3643 bool need_full_reset, job_signaled;
26bc5340 3644 struct amdgpu_hive_info *hive = NULL;
26bc5340 3645 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 3646 int i, r = 0;
26bc5340 3647
1d721ed6 3648 need_full_reset = job_signaled = false;
26bc5340
AG
3649 INIT_LIST_HEAD(&device_list);
3650
3651 dev_info(adev->dev, "GPU reset begin!\n");
3652
beff74bc 3653 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 3654
1d721ed6
AG
3655 hive = amdgpu_get_xgmi_hive(adev, false);
3656
26bc5340 3657 /*
1d721ed6
AG
3658 * Here we trylock to avoid chain of resets executing from
3659 * either trigger by jobs on different adevs in XGMI hive or jobs on
3660 * different schedulers for same device while this TO handler is running.
3661 * We always reset all schedulers for device and all devices for XGMI
3662 * hive so that should take care of them too.
26bc5340 3663 */
1d721ed6
AG
3664
3665 if (hive && !mutex_trylock(&hive->reset_lock)) {
3666 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
3667 job->base.id, hive->hive_id);
26bc5340 3668 return 0;
1d721ed6 3669 }
26bc5340
AG
3670
3671 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
3672 if (!amdgpu_device_lock_adev(adev, !hive)) {
3673 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
3674 job->base.id);
3675 return 0;
26bc5340
AG
3676 }
3677
3678 /* Build list of devices to reset */
1d721ed6 3679 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340
AG
3680 if (!hive) {
3681 amdgpu_device_unlock_adev(adev);
3682 return -ENODEV;
3683 }
3684
3685 /*
3686 * In case we are in XGMI hive mode device reset is done for all the
3687 * nodes in the hive to retrain all XGMI links and hence the reset
3688 * sequence is executed in loop on all nodes.
3689 */
3690 device_list_handle = &hive->device_list;
3691 } else {
3692 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3693 device_list_handle = &device_list;
3694 }
3695
1d721ed6
AG
3696 /* block all schedulers and reset given job's ring */
3697 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3698 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3699 struct amdgpu_ring *ring = tmp_adev->rings[i];
3700
3701 if (!ring || !ring->sched.thread)
3702 continue;
3703
3704 drm_sched_stop(&ring->sched, &job->base);
3705 }
3706 }
3707
3708
3709 /*
3710 * Must check guilty signal here since after this point all old
3711 * HW fences are force signaled.
3712 *
3713 * job->base holds a reference to parent fence
3714 */
3715 if (job && job->base.s_fence->parent &&
3716 dma_fence_is_signaled(job->base.s_fence->parent))
3717 job_signaled = true;
3718
3719 if (!amdgpu_device_ip_need_full_reset(adev))
3720 device_list_handle = &device_list;
3721
3722 if (job_signaled) {
3723 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3724 goto skip_hw_reset;
3725 }
3726
3727
3728 /* Guilty job will be freed after this*/
3729 r = amdgpu_device_pre_asic_reset(adev,
3730 job,
3731 &need_full_reset);
3732 if (r) {
3733 /*TODO Should we stop ?*/
3734 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3735 r, adev->ddev->unique);
3736 adev->asic_reset_res = r;
3737 }
3738
26bc5340
AG
3739retry: /* Rest of adevs pre asic reset from XGMI hive. */
3740 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3741
3742 if (tmp_adev == adev)
3743 continue;
3744
1d721ed6 3745 amdgpu_device_lock_adev(tmp_adev, false);
26bc5340
AG
3746 r = amdgpu_device_pre_asic_reset(tmp_adev,
3747 NULL,
3748 &need_full_reset);
3749 /*TODO Should we stop ?*/
3750 if (r) {
3751 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3752 r, tmp_adev->ddev->unique);
3753 tmp_adev->asic_reset_res = r;
3754 }
3755 }
3756
3757 /* Actual ASIC resets if needed.*/
3758 /* TODO Implement XGMI hive reset logic for SRIOV */
3759 if (amdgpu_sriov_vf(adev)) {
3760 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3761 if (r)
3762 adev->asic_reset_res = r;
3763 } else {
3764 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3765 if (r && r == -EAGAIN)
3766 goto retry;
3767 }
3768
1d721ed6
AG
3769skip_hw_reset:
3770
26bc5340
AG
3771 /* Post ASIC reset for all devs .*/
3772 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
1d721ed6
AG
3773 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3774 struct amdgpu_ring *ring = tmp_adev->rings[i];
3775
3776 if (!ring || !ring->sched.thread)
3777 continue;
3778
3779 /* No point to resubmit jobs if we didn't HW reset*/
3780 if (!tmp_adev->asic_reset_res && !job_signaled)
3781 drm_sched_resubmit_jobs(&ring->sched);
3782
3783 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3784 }
3785
3786 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3787 drm_helper_resume_force_mode(tmp_adev->ddev);
3788 }
3789
3790 tmp_adev->asic_reset_res = 0;
26bc5340
AG
3791
3792 if (r) {
3793 /* bad news, how to tell it to userspace ? */
3794 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3795 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3796 } else {
3797 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3798 }
3799
3800 amdgpu_device_unlock_adev(tmp_adev);
3801 }
3802
1d721ed6 3803 if (hive)
22d6575b 3804 mutex_unlock(&hive->reset_lock);
26bc5340
AG
3805
3806 if (r)
3807 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3808 return r;
3809}
3810
e3ecdffa
AD
3811/**
3812 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3813 *
3814 * @adev: amdgpu_device pointer
3815 *
3816 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3817 * and lanes) of the slot the device is in. Handles APUs and
3818 * virtualized environments where PCIE config space may not be available.
3819 */
5494d864 3820static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3821{
5d9a6330 3822 struct pci_dev *pdev;
c5313457
HK
3823 enum pci_bus_speed speed_cap, platform_speed_cap;
3824 enum pcie_link_width platform_link_width;
d0dd7f0c 3825
cd474ba0
AD
3826 if (amdgpu_pcie_gen_cap)
3827 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3828
cd474ba0
AD
3829 if (amdgpu_pcie_lane_cap)
3830 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3831
cd474ba0
AD
3832 /* covers APUs as well */
3833 if (pci_is_root_bus(adev->pdev->bus)) {
3834 if (adev->pm.pcie_gen_mask == 0)
3835 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3836 if (adev->pm.pcie_mlw_mask == 0)
3837 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3838 return;
cd474ba0 3839 }
d0dd7f0c 3840
c5313457
HK
3841 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
3842 return;
3843
dbaa922b
AD
3844 pcie_bandwidth_available(adev->pdev, NULL,
3845 &platform_speed_cap, &platform_link_width);
c5313457 3846
cd474ba0 3847 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3848 /* asic caps */
3849 pdev = adev->pdev;
3850 speed_cap = pcie_get_speed_cap(pdev);
3851 if (speed_cap == PCI_SPEED_UNKNOWN) {
3852 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3853 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3854 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3855 } else {
5d9a6330
AD
3856 if (speed_cap == PCIE_SPEED_16_0GT)
3857 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3858 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3859 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3860 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3861 else if (speed_cap == PCIE_SPEED_8_0GT)
3862 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3863 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3864 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3865 else if (speed_cap == PCIE_SPEED_5_0GT)
3866 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3867 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3868 else
3869 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3870 }
3871 /* platform caps */
c5313457 3872 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
3873 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3874 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3875 } else {
c5313457 3876 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
3877 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3878 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3879 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3880 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 3881 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
3882 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3883 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3884 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 3885 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
3886 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3887 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3888 else
3889 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3890
cd474ba0
AD
3891 }
3892 }
3893 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 3894 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
3895 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3896 } else {
c5313457 3897 switch (platform_link_width) {
5d9a6330 3898 case PCIE_LNK_X32:
cd474ba0
AD
3899 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3900 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3901 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3902 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3903 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3904 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3905 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3906 break;
5d9a6330 3907 case PCIE_LNK_X16:
cd474ba0
AD
3908 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3909 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3910 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3911 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3912 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3913 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3914 break;
5d9a6330 3915 case PCIE_LNK_X12:
cd474ba0
AD
3916 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3917 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3918 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3919 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3920 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3921 break;
5d9a6330 3922 case PCIE_LNK_X8:
cd474ba0
AD
3923 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3924 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3925 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3926 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3927 break;
5d9a6330 3928 case PCIE_LNK_X4:
cd474ba0
AD
3929 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3930 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3931 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3932 break;
5d9a6330 3933 case PCIE_LNK_X2:
cd474ba0
AD
3934 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3935 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3936 break;
5d9a6330 3937 case PCIE_LNK_X1:
cd474ba0
AD
3938 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3939 break;
3940 default:
3941 break;
3942 }
d0dd7f0c
AD
3943 }
3944 }
3945}
d38ceaf9 3946