drm/amdgpu/navi10: add uclk activity sensor
[linux-2.6-block.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
CommitLineData
d38ceaf9
AD
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
b1ddf548 28#include <linux/power_supply.h>
0875dc9e 29#include <linux/kthread.h>
fdf2f6c5 30#include <linux/module.h>
d38ceaf9
AD
31#include <linux/console.h>
32#include <linux/slab.h>
fdf2f6c5 33
4562236b 34#include <drm/drm_atomic_helper.h>
fcd70cd3 35#include <drm/drm_probe_helper.h>
d38ceaf9
AD
36#include <drm/amdgpu_drm.h>
37#include <linux/vgaarb.h>
38#include <linux/vga_switcheroo.h>
39#include <linux/efi.h>
40#include "amdgpu.h"
f4b373f4 41#include "amdgpu_trace.h"
d38ceaf9
AD
42#include "amdgpu_i2c.h"
43#include "atom.h"
44#include "amdgpu_atombios.h"
a5bde2f9 45#include "amdgpu_atomfirmware.h"
d0dd7f0c 46#include "amd_pcie.h"
33f34802
KW
47#ifdef CONFIG_DRM_AMDGPU_SI
48#include "si.h"
49#endif
a2e73f56
AD
50#ifdef CONFIG_DRM_AMDGPU_CIK
51#include "cik.h"
52#endif
aaa36a97 53#include "vi.h"
460826e6 54#include "soc15.h"
0a5b8c7b 55#include "nv.h"
d38ceaf9 56#include "bif/bif_4_1_d.h"
9accf2fd 57#include <linux/pci.h>
bec86378 58#include <linux/firmware.h>
89041940 59#include "amdgpu_vf_error.h"
d38ceaf9 60
ba997709 61#include "amdgpu_amdkfd.h"
d2f52ac8 62#include "amdgpu_pm.h"
d38ceaf9 63
5183411b 64#include "amdgpu_xgmi.h"
c030f2e4 65#include "amdgpu_ras.h"
9c7c85f7 66#include "amdgpu_pmu.h"
5183411b 67
e2a75f88 68MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
3f76dced 69MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
2d2e5e7e 70MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
ad5a67a7 71MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
54c4d17e 72MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
23c6268e 73MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
e2a75f88 74
2dc80b00
S
75#define AMDGPU_RESUME_MS 2000
76
d38ceaf9 77static const char *amdgpu_asic_name[] = {
da69c161
KW
78 "TAHITI",
79 "PITCAIRN",
80 "VERDE",
81 "OLAND",
82 "HAINAN",
d38ceaf9
AD
83 "BONAIRE",
84 "KAVERI",
85 "KABINI",
86 "HAWAII",
87 "MULLINS",
88 "TOPAZ",
89 "TONGA",
48299f95 90 "FIJI",
d38ceaf9 91 "CARRIZO",
139f4917 92 "STONEY",
2cc0c0b5
FC
93 "POLARIS10",
94 "POLARIS11",
c4642a47 95 "POLARIS12",
48ff108d 96 "VEGAM",
d4196f01 97 "VEGA10",
8fab806a 98 "VEGA12",
956fcddc 99 "VEGA20",
2ca8a5d2 100 "RAVEN",
852a6626 101 "NAVI10",
d38ceaf9
AD
102 "LAST",
103};
104
dcea6e65
KR
105/**
106 * DOC: pcie_replay_count
107 *
108 * The amdgpu driver provides a sysfs API for reporting the total number
109 * of PCIe replays (NAKs)
110 * The file pcie_replay_count is used for this and returns the total
111 * number of replays as a sum of the NAKs generated and NAKs received
112 */
113
114static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
115 struct device_attribute *attr, char *buf)
116{
117 struct drm_device *ddev = dev_get_drvdata(dev);
118 struct amdgpu_device *adev = ddev->dev_private;
119 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
120
121 return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
122}
123
124static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
125 amdgpu_device_get_pcie_replay_count, NULL);
126
5494d864
AD
127static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
128
e3ecdffa
AD
129/**
130 * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
131 *
132 * @dev: drm_device pointer
133 *
134 * Returns true if the device is a dGPU with HG/PX power control,
135 * otherwise return false.
136 */
d38ceaf9
AD
137bool amdgpu_device_is_px(struct drm_device *dev)
138{
139 struct amdgpu_device *adev = dev->dev_private;
140
2f7d10b3 141 if (adev->flags & AMD_IS_PX)
d38ceaf9
AD
142 return true;
143 return false;
144}
145
146/*
147 * MMIO register access helper functions.
148 */
e3ecdffa
AD
149/**
150 * amdgpu_mm_rreg - read a memory mapped IO register
151 *
152 * @adev: amdgpu_device pointer
153 * @reg: dword aligned register offset
154 * @acc_flags: access flags which require special behavior
155 *
156 * Returns the 32 bit value from the offset specified.
157 */
d38ceaf9 158uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
15d72fd7 159 uint32_t acc_flags)
d38ceaf9 160{
f4b373f4
TSD
161 uint32_t ret;
162
43ca8efa 163 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 164 return amdgpu_virt_kiq_rreg(adev, reg);
bc992ba5 165
15d72fd7 166 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
f4b373f4 167 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
d38ceaf9
AD
168 else {
169 unsigned long flags;
d38ceaf9
AD
170
171 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
172 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
173 ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
174 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
d38ceaf9 175 }
f4b373f4
TSD
176 trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
177 return ret;
d38ceaf9
AD
178}
179
421a2a30
ML
180/*
181 * MMIO register read with bytes helper functions
182 * @offset:bytes offset from MMIO start
183 *
184*/
185
e3ecdffa
AD
186/**
187 * amdgpu_mm_rreg8 - read a memory mapped IO register
188 *
189 * @adev: amdgpu_device pointer
190 * @offset: byte aligned register offset
191 *
192 * Returns the 8 bit value from the offset specified.
193 */
421a2a30
ML
194uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
195 if (offset < adev->rmmio_size)
196 return (readb(adev->rmmio + offset));
197 BUG();
198}
199
200/*
201 * MMIO register write with bytes helper functions
202 * @offset:bytes offset from MMIO start
203 * @value: the value want to be written to the register
204 *
205*/
e3ecdffa
AD
206/**
207 * amdgpu_mm_wreg8 - read a memory mapped IO register
208 *
209 * @adev: amdgpu_device pointer
210 * @offset: byte aligned register offset
211 * @value: 8 bit value to write
212 *
213 * Writes the value specified to the offset specified.
214 */
421a2a30
ML
215void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
216 if (offset < adev->rmmio_size)
217 writeb(value, adev->rmmio + offset);
218 else
219 BUG();
220}
221
e3ecdffa
AD
222/**
223 * amdgpu_mm_wreg - write to a memory mapped IO register
224 *
225 * @adev: amdgpu_device pointer
226 * @reg: dword aligned register offset
227 * @v: 32 bit value to write to the register
228 * @acc_flags: access flags which require special behavior
229 *
230 * Writes the value specified to the offset specified.
231 */
d38ceaf9 232void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
15d72fd7 233 uint32_t acc_flags)
d38ceaf9 234{
f4b373f4 235 trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
4e99a44e 236
47ed4e1c
KW
237 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
238 adev->last_mm_index = v;
239 }
240
43ca8efa 241 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
bc992ba5 242 return amdgpu_virt_kiq_wreg(adev, reg, v);
bc992ba5 243
15d72fd7 244 if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
d38ceaf9
AD
245 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
246 else {
247 unsigned long flags;
248
249 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
250 writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
251 writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
252 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
253 }
47ed4e1c
KW
254
255 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
256 udelay(500);
257 }
d38ceaf9
AD
258}
259
e3ecdffa
AD
260/**
261 * amdgpu_io_rreg - read an IO register
262 *
263 * @adev: amdgpu_device pointer
264 * @reg: dword aligned register offset
265 *
266 * Returns the 32 bit value from the offset specified.
267 */
d38ceaf9
AD
268u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
269{
270 if ((reg * 4) < adev->rio_mem_size)
271 return ioread32(adev->rio_mem + (reg * 4));
272 else {
273 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
274 return ioread32(adev->rio_mem + (mmMM_DATA * 4));
275 }
276}
277
e3ecdffa
AD
278/**
279 * amdgpu_io_wreg - write to an IO register
280 *
281 * @adev: amdgpu_device pointer
282 * @reg: dword aligned register offset
283 * @v: 32 bit value to write to the register
284 *
285 * Writes the value specified to the offset specified.
286 */
d38ceaf9
AD
287void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
288{
47ed4e1c
KW
289 if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
290 adev->last_mm_index = v;
291 }
d38ceaf9
AD
292
293 if ((reg * 4) < adev->rio_mem_size)
294 iowrite32(v, adev->rio_mem + (reg * 4));
295 else {
296 iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4));
297 iowrite32(v, adev->rio_mem + (mmMM_DATA * 4));
298 }
47ed4e1c
KW
299
300 if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
301 udelay(500);
302 }
d38ceaf9
AD
303}
304
305/**
306 * amdgpu_mm_rdoorbell - read a doorbell dword
307 *
308 * @adev: amdgpu_device pointer
309 * @index: doorbell index
310 *
311 * Returns the value in the doorbell aperture at the
312 * requested doorbell index (CIK).
313 */
314u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
315{
316 if (index < adev->doorbell.num_doorbells) {
317 return readl(adev->doorbell.ptr + index);
318 } else {
319 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
320 return 0;
321 }
322}
323
324/**
325 * amdgpu_mm_wdoorbell - write a doorbell dword
326 *
327 * @adev: amdgpu_device pointer
328 * @index: doorbell index
329 * @v: value to write
330 *
331 * Writes @v to the doorbell aperture at the
332 * requested doorbell index (CIK).
333 */
334void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
335{
336 if (index < adev->doorbell.num_doorbells) {
337 writel(v, adev->doorbell.ptr + index);
338 } else {
339 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
340 }
341}
342
832be404
KW
343/**
344 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
345 *
346 * @adev: amdgpu_device pointer
347 * @index: doorbell index
348 *
349 * Returns the value in the doorbell aperture at the
350 * requested doorbell index (VEGA10+).
351 */
352u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
353{
354 if (index < adev->doorbell.num_doorbells) {
355 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
356 } else {
357 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
358 return 0;
359 }
360}
361
362/**
363 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
364 *
365 * @adev: amdgpu_device pointer
366 * @index: doorbell index
367 * @v: value to write
368 *
369 * Writes @v to the doorbell aperture at the
370 * requested doorbell index (VEGA10+).
371 */
372void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
373{
374 if (index < adev->doorbell.num_doorbells) {
375 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
376 } else {
377 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
378 }
379}
380
d38ceaf9
AD
381/**
382 * amdgpu_invalid_rreg - dummy reg read function
383 *
384 * @adev: amdgpu device pointer
385 * @reg: offset of register
386 *
387 * Dummy register read function. Used for register blocks
388 * that certain asics don't have (all asics).
389 * Returns the value in the register.
390 */
391static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
392{
393 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
394 BUG();
395 return 0;
396}
397
398/**
399 * amdgpu_invalid_wreg - dummy reg write function
400 *
401 * @adev: amdgpu device pointer
402 * @reg: offset of register
403 * @v: value to write to the register
404 *
405 * Dummy register read function. Used for register blocks
406 * that certain asics don't have (all asics).
407 */
408static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
409{
410 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
411 reg, v);
412 BUG();
413}
414
415/**
416 * amdgpu_block_invalid_rreg - dummy reg read function
417 *
418 * @adev: amdgpu device pointer
419 * @block: offset of instance
420 * @reg: offset of register
421 *
422 * Dummy register read function. Used for register blocks
423 * that certain asics don't have (all asics).
424 * Returns the value in the register.
425 */
426static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
427 uint32_t block, uint32_t reg)
428{
429 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
430 reg, block);
431 BUG();
432 return 0;
433}
434
435/**
436 * amdgpu_block_invalid_wreg - dummy reg write function
437 *
438 * @adev: amdgpu device pointer
439 * @block: offset of instance
440 * @reg: offset of register
441 * @v: value to write to the register
442 *
443 * Dummy register read function. Used for register blocks
444 * that certain asics don't have (all asics).
445 */
446static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
447 uint32_t block,
448 uint32_t reg, uint32_t v)
449{
450 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
451 reg, block, v);
452 BUG();
453}
454
e3ecdffa
AD
455/**
456 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
457 *
458 * @adev: amdgpu device pointer
459 *
460 * Allocates a scratch page of VRAM for use by various things in the
461 * driver.
462 */
06ec9070 463static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
d38ceaf9 464{
a4a02777
CK
465 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
466 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
467 &adev->vram_scratch.robj,
468 &adev->vram_scratch.gpu_addr,
469 (void **)&adev->vram_scratch.ptr);
d38ceaf9
AD
470}
471
e3ecdffa
AD
472/**
473 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
474 *
475 * @adev: amdgpu device pointer
476 *
477 * Frees the VRAM scratch page.
478 */
06ec9070 479static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
d38ceaf9 480{
078af1a3 481 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
d38ceaf9
AD
482}
483
484/**
9c3f2b54 485 * amdgpu_device_program_register_sequence - program an array of registers.
d38ceaf9
AD
486 *
487 * @adev: amdgpu_device pointer
488 * @registers: pointer to the register array
489 * @array_size: size of the register array
490 *
491 * Programs an array or registers with and and or masks.
492 * This is a helper for setting golden registers.
493 */
9c3f2b54
AD
494void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
495 const u32 *registers,
496 const u32 array_size)
d38ceaf9
AD
497{
498 u32 tmp, reg, and_mask, or_mask;
499 int i;
500
501 if (array_size % 3)
502 return;
503
504 for (i = 0; i < array_size; i +=3) {
505 reg = registers[i + 0];
506 and_mask = registers[i + 1];
507 or_mask = registers[i + 2];
508
509 if (and_mask == 0xffffffff) {
510 tmp = or_mask;
511 } else {
512 tmp = RREG32(reg);
513 tmp &= ~and_mask;
e0d07657
HZ
514 if (adev->family >= AMDGPU_FAMILY_AI)
515 tmp |= (or_mask & and_mask);
516 else
517 tmp |= or_mask;
d38ceaf9
AD
518 }
519 WREG32(reg, tmp);
520 }
521}
522
e3ecdffa
AD
523/**
524 * amdgpu_device_pci_config_reset - reset the GPU
525 *
526 * @adev: amdgpu_device pointer
527 *
528 * Resets the GPU using the pci config reset sequence.
529 * Only applicable to asics prior to vega10.
530 */
8111c387 531void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
d38ceaf9
AD
532{
533 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
534}
535
536/*
537 * GPU doorbell aperture helpers function.
538 */
539/**
06ec9070 540 * amdgpu_device_doorbell_init - Init doorbell driver information.
d38ceaf9
AD
541 *
542 * @adev: amdgpu_device pointer
543 *
544 * Init doorbell driver information (CIK)
545 * Returns 0 on success, error on failure.
546 */
06ec9070 547static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
d38ceaf9 548{
6585661d 549
705e519e
CK
550 /* No doorbell on SI hardware generation */
551 if (adev->asic_type < CHIP_BONAIRE) {
552 adev->doorbell.base = 0;
553 adev->doorbell.size = 0;
554 adev->doorbell.num_doorbells = 0;
555 adev->doorbell.ptr = NULL;
556 return 0;
557 }
558
d6895ad3
CK
559 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
560 return -EINVAL;
561
22357775
AD
562 amdgpu_asic_init_doorbell_index(adev);
563
d38ceaf9
AD
564 /* doorbell bar mapping */
565 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
566 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
567
edf600da 568 adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
9564f192 569 adev->doorbell_index.max_assignment+1);
d38ceaf9
AD
570 if (adev->doorbell.num_doorbells == 0)
571 return -EINVAL;
572
ec3db8a6 573 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
88dc26e4
OZ
574 * paging queue doorbell use the second page. The
575 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
576 * doorbells are in the first page. So with paging queue enabled,
577 * the max num_doorbells should + 1 page (0x400 in dword)
ec3db8a6
PY
578 */
579 if (adev->asic_type >= CHIP_VEGA10)
88dc26e4 580 adev->doorbell.num_doorbells += 0x400;
ec3db8a6 581
8972e5d2
CK
582 adev->doorbell.ptr = ioremap(adev->doorbell.base,
583 adev->doorbell.num_doorbells *
584 sizeof(u32));
585 if (adev->doorbell.ptr == NULL)
d38ceaf9 586 return -ENOMEM;
d38ceaf9
AD
587
588 return 0;
589}
590
591/**
06ec9070 592 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
d38ceaf9
AD
593 *
594 * @adev: amdgpu_device pointer
595 *
596 * Tear down doorbell driver information (CIK)
597 */
06ec9070 598static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
d38ceaf9
AD
599{
600 iounmap(adev->doorbell.ptr);
601 adev->doorbell.ptr = NULL;
602}
603
22cb0164 604
d38ceaf9
AD
605
606/*
06ec9070 607 * amdgpu_device_wb_*()
455a7bc2 608 * Writeback is the method by which the GPU updates special pages in memory
ea81a173 609 * with the status of certain GPU events (fences, ring pointers,etc.).
d38ceaf9
AD
610 */
611
612/**
06ec9070 613 * amdgpu_device_wb_fini - Disable Writeback and free memory
d38ceaf9
AD
614 *
615 * @adev: amdgpu_device pointer
616 *
617 * Disables Writeback and frees the Writeback memory (all asics).
618 * Used at driver shutdown.
619 */
06ec9070 620static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
d38ceaf9
AD
621{
622 if (adev->wb.wb_obj) {
a76ed485
AD
623 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
624 &adev->wb.gpu_addr,
625 (void **)&adev->wb.wb);
d38ceaf9
AD
626 adev->wb.wb_obj = NULL;
627 }
628}
629
630/**
06ec9070 631 * amdgpu_device_wb_init- Init Writeback driver info and allocate memory
d38ceaf9
AD
632 *
633 * @adev: amdgpu_device pointer
634 *
455a7bc2 635 * Initializes writeback and allocates writeback memory (all asics).
d38ceaf9
AD
636 * Used at driver startup.
637 * Returns 0 on success or an -error on failure.
638 */
06ec9070 639static int amdgpu_device_wb_init(struct amdgpu_device *adev)
d38ceaf9
AD
640{
641 int r;
642
643 if (adev->wb.wb_obj == NULL) {
97407b63
AD
644 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
645 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
a76ed485
AD
646 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
647 &adev->wb.wb_obj, &adev->wb.gpu_addr,
648 (void **)&adev->wb.wb);
d38ceaf9
AD
649 if (r) {
650 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
651 return r;
652 }
d38ceaf9
AD
653
654 adev->wb.num_wb = AMDGPU_MAX_WB;
655 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
656
657 /* clear wb memory */
73469585 658 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
d38ceaf9
AD
659 }
660
661 return 0;
662}
663
664/**
131b4b36 665 * amdgpu_device_wb_get - Allocate a wb entry
d38ceaf9
AD
666 *
667 * @adev: amdgpu_device pointer
668 * @wb: wb index
669 *
670 * Allocate a wb slot for use by the driver (all asics).
671 * Returns 0 on success or -EINVAL on failure.
672 */
131b4b36 673int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
d38ceaf9
AD
674{
675 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
d38ceaf9 676
97407b63 677 if (offset < adev->wb.num_wb) {
7014285a 678 __set_bit(offset, adev->wb.used);
63ae07ca 679 *wb = offset << 3; /* convert to dw offset */
0915fdbc
ML
680 return 0;
681 } else {
682 return -EINVAL;
683 }
684}
685
d38ceaf9 686/**
131b4b36 687 * amdgpu_device_wb_free - Free a wb entry
d38ceaf9
AD
688 *
689 * @adev: amdgpu_device pointer
690 * @wb: wb index
691 *
692 * Free a wb slot allocated for use by the driver (all asics)
693 */
131b4b36 694void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
d38ceaf9 695{
73469585 696 wb >>= 3;
d38ceaf9 697 if (wb < adev->wb.num_wb)
73469585 698 __clear_bit(wb, adev->wb.used);
d38ceaf9
AD
699}
700
d6895ad3
CK
701/**
702 * amdgpu_device_resize_fb_bar - try to resize FB BAR
703 *
704 * @adev: amdgpu_device pointer
705 *
706 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
707 * to fail, but if any of the BARs is not accessible after the size we abort
708 * driver loading by returning -ENODEV.
709 */
710int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
711{
770d13b1 712 u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
d6895ad3 713 u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
31b8adab
CK
714 struct pci_bus *root;
715 struct resource *res;
716 unsigned i;
d6895ad3
CK
717 u16 cmd;
718 int r;
719
0c03b912 720 /* Bypass for VF */
721 if (amdgpu_sriov_vf(adev))
722 return 0;
723
31b8adab
CK
724 /* Check if the root BUS has 64bit memory resources */
725 root = adev->pdev->bus;
726 while (root->parent)
727 root = root->parent;
728
729 pci_bus_for_each_resource(root, res, i) {
0ebb7c54 730 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
31b8adab
CK
731 res->start > 0x100000000ull)
732 break;
733 }
734
735 /* Trying to resize is pointless without a root hub window above 4GB */
736 if (!res)
737 return 0;
738
d6895ad3
CK
739 /* Disable memory decoding while we change the BAR addresses and size */
740 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
741 pci_write_config_word(adev->pdev, PCI_COMMAND,
742 cmd & ~PCI_COMMAND_MEMORY);
743
744 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
06ec9070 745 amdgpu_device_doorbell_fini(adev);
d6895ad3
CK
746 if (adev->asic_type >= CHIP_BONAIRE)
747 pci_release_resource(adev->pdev, 2);
748
749 pci_release_resource(adev->pdev, 0);
750
751 r = pci_resize_resource(adev->pdev, 0, rbar_size);
752 if (r == -ENOSPC)
753 DRM_INFO("Not enough PCI address space for a large BAR.");
754 else if (r && r != -ENOTSUPP)
755 DRM_ERROR("Problem resizing BAR0 (%d).", r);
756
757 pci_assign_unassigned_bus_resources(adev->pdev->bus);
758
759 /* When the doorbell or fb BAR isn't available we have no chance of
760 * using the device.
761 */
06ec9070 762 r = amdgpu_device_doorbell_init(adev);
d6895ad3
CK
763 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
764 return -ENODEV;
765
766 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
767
768 return 0;
769}
a05502e5 770
d38ceaf9
AD
771/*
772 * GPU helpers function.
773 */
774/**
39c640c0 775 * amdgpu_device_need_post - check if the hw need post or not
d38ceaf9
AD
776 *
777 * @adev: amdgpu_device pointer
778 *
c836fec5
JQ
779 * Check if the asic has been initialized (all asics) at driver startup
780 * or post is needed if hw reset is performed.
781 * Returns true if need or false if not.
d38ceaf9 782 */
39c640c0 783bool amdgpu_device_need_post(struct amdgpu_device *adev)
d38ceaf9
AD
784{
785 uint32_t reg;
786
bec86378
ML
787 if (amdgpu_sriov_vf(adev))
788 return false;
789
790 if (amdgpu_passthrough(adev)) {
1da2c326
ML
791 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
792 * some old smc fw still need driver do vPost otherwise gpu hang, while
793 * those smc fw version above 22.15 doesn't have this flaw, so we force
794 * vpost executed for smc version below 22.15
bec86378
ML
795 */
796 if (adev->asic_type == CHIP_FIJI) {
797 int err;
798 uint32_t fw_ver;
799 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
800 /* force vPost if error occured */
801 if (err)
802 return true;
803
804 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1da2c326
ML
805 if (fw_ver < 0x00160e00)
806 return true;
bec86378 807 }
bec86378 808 }
91fe77eb 809
810 if (adev->has_hw_reset) {
811 adev->has_hw_reset = false;
812 return true;
813 }
814
815 /* bios scratch used on CIK+ */
816 if (adev->asic_type >= CHIP_BONAIRE)
817 return amdgpu_atombios_scratch_need_asic_init(adev);
818
819 /* check MEM_SIZE for older asics */
820 reg = amdgpu_asic_get_config_memsize(adev);
821
822 if ((reg != 0) && (reg != 0xffffffff))
823 return false;
824
825 return true;
bec86378
ML
826}
827
d38ceaf9
AD
828/* if we get transitioned to only one device, take VGA back */
829/**
06ec9070 830 * amdgpu_device_vga_set_decode - enable/disable vga decode
d38ceaf9
AD
831 *
832 * @cookie: amdgpu_device pointer
833 * @state: enable/disable vga decode
834 *
835 * Enable/disable vga decode (all asics).
836 * Returns VGA resource flags.
837 */
06ec9070 838static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
d38ceaf9
AD
839{
840 struct amdgpu_device *adev = cookie;
841 amdgpu_asic_set_vga_state(adev, state);
842 if (state)
843 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
844 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
845 else
846 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
847}
848
e3ecdffa
AD
849/**
850 * amdgpu_device_check_block_size - validate the vm block size
851 *
852 * @adev: amdgpu_device pointer
853 *
854 * Validates the vm block size specified via module parameter.
855 * The vm block size defines number of bits in page table versus page directory,
856 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
857 * page table and the remaining bits are in the page directory.
858 */
06ec9070 859static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
a1adf8be
CZ
860{
861 /* defines number of bits in page table versus page directory,
862 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
863 * page table and the remaining bits are in the page directory */
bab4fee7
JZ
864 if (amdgpu_vm_block_size == -1)
865 return;
a1adf8be 866
bab4fee7 867 if (amdgpu_vm_block_size < 9) {
a1adf8be
CZ
868 dev_warn(adev->dev, "VM page table size (%d) too small\n",
869 amdgpu_vm_block_size);
97489129 870 amdgpu_vm_block_size = -1;
a1adf8be 871 }
a1adf8be
CZ
872}
873
e3ecdffa
AD
874/**
875 * amdgpu_device_check_vm_size - validate the vm size
876 *
877 * @adev: amdgpu_device pointer
878 *
879 * Validates the vm size in GB specified via module parameter.
880 * The VM size is the size of the GPU virtual memory space in GB.
881 */
06ec9070 882static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
83ca145d 883{
64dab074
AD
884 /* no need to check the default value */
885 if (amdgpu_vm_size == -1)
886 return;
887
83ca145d
ZJ
888 if (amdgpu_vm_size < 1) {
889 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
890 amdgpu_vm_size);
f3368128 891 amdgpu_vm_size = -1;
83ca145d 892 }
83ca145d
ZJ
893}
894
7951e376
RZ
895static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
896{
897 struct sysinfo si;
898 bool is_os_64 = (sizeof(void *) == 8) ? true : false;
899 uint64_t total_memory;
900 uint64_t dram_size_seven_GB = 0x1B8000000;
901 uint64_t dram_size_three_GB = 0xB8000000;
902
903 if (amdgpu_smu_memory_pool_size == 0)
904 return;
905
906 if (!is_os_64) {
907 DRM_WARN("Not 64-bit OS, feature not supported\n");
908 goto def_value;
909 }
910 si_meminfo(&si);
911 total_memory = (uint64_t)si.totalram * si.mem_unit;
912
913 if ((amdgpu_smu_memory_pool_size == 1) ||
914 (amdgpu_smu_memory_pool_size == 2)) {
915 if (total_memory < dram_size_three_GB)
916 goto def_value1;
917 } else if ((amdgpu_smu_memory_pool_size == 4) ||
918 (amdgpu_smu_memory_pool_size == 8)) {
919 if (total_memory < dram_size_seven_GB)
920 goto def_value1;
921 } else {
922 DRM_WARN("Smu memory pool size not supported\n");
923 goto def_value;
924 }
925 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
926
927 return;
928
929def_value1:
930 DRM_WARN("No enough system memory\n");
931def_value:
932 adev->pm.smu_prv_buffer_size = 0;
933}
934
d38ceaf9 935/**
06ec9070 936 * amdgpu_device_check_arguments - validate module params
d38ceaf9
AD
937 *
938 * @adev: amdgpu_device pointer
939 *
940 * Validates certain module parameters and updates
941 * the associated values used by the driver (all asics).
942 */
912dfc84 943static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
d38ceaf9 944{
912dfc84
EQ
945 int ret = 0;
946
5b011235
CZ
947 if (amdgpu_sched_jobs < 4) {
948 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
949 amdgpu_sched_jobs);
950 amdgpu_sched_jobs = 4;
76117507 951 } else if (!is_power_of_2(amdgpu_sched_jobs)){
5b011235
CZ
952 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
953 amdgpu_sched_jobs);
954 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
955 }
d38ceaf9 956
83e74db6 957 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
f9321cc4
CK
958 /* gart size must be greater or equal to 32M */
959 dev_warn(adev->dev, "gart size (%d) too small\n",
960 amdgpu_gart_size);
83e74db6 961 amdgpu_gart_size = -1;
d38ceaf9
AD
962 }
963
36d38372 964 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
c4e1a13a 965 /* gtt size must be greater or equal to 32M */
36d38372
CK
966 dev_warn(adev->dev, "gtt size (%d) too small\n",
967 amdgpu_gtt_size);
968 amdgpu_gtt_size = -1;
d38ceaf9
AD
969 }
970
d07f14be
RH
971 /* valid range is between 4 and 9 inclusive */
972 if (amdgpu_vm_fragment_size != -1 &&
973 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
974 dev_warn(adev->dev, "valid range is between 4 and 9\n");
975 amdgpu_vm_fragment_size = -1;
976 }
977
7951e376
RZ
978 amdgpu_device_check_smu_prv_buffer_size(adev);
979
06ec9070 980 amdgpu_device_check_vm_size(adev);
d38ceaf9 981
06ec9070 982 amdgpu_device_check_block_size(adev);
6a7f76e7 983
912dfc84
EQ
984 ret = amdgpu_device_get_job_timeout_settings(adev);
985 if (ret) {
986 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
987 return ret;
8854695a 988 }
19aede77
AD
989
990 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
912dfc84
EQ
991
992 return ret;
d38ceaf9
AD
993}
994
995/**
996 * amdgpu_switcheroo_set_state - set switcheroo state
997 *
998 * @pdev: pci dev pointer
1694467b 999 * @state: vga_switcheroo state
d38ceaf9
AD
1000 *
1001 * Callback for the switcheroo driver. Suspends or resumes the
1002 * the asics before or after it is powered up using ACPI methods.
1003 */
1004static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
1005{
1006 struct drm_device *dev = pci_get_drvdata(pdev);
1007
1008 if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF)
1009 return;
1010
1011 if (state == VGA_SWITCHEROO_ON) {
7ca85295 1012 pr_info("amdgpu: switched on\n");
d38ceaf9
AD
1013 /* don't suspend or resume card normally */
1014 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1015
810ddc3a 1016 amdgpu_device_resume(dev, true, true);
d38ceaf9 1017
d38ceaf9
AD
1018 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1019 drm_kms_helper_poll_enable(dev);
1020 } else {
7ca85295 1021 pr_info("amdgpu: switched off\n");
d38ceaf9
AD
1022 drm_kms_helper_poll_disable(dev);
1023 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
810ddc3a 1024 amdgpu_device_suspend(dev, true, true);
d38ceaf9
AD
1025 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1026 }
1027}
1028
1029/**
1030 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1031 *
1032 * @pdev: pci dev pointer
1033 *
1034 * Callback for the switcheroo driver. Check of the switcheroo
1035 * state can be changed.
1036 * Returns true if the state can be changed, false if not.
1037 */
1038static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1039{
1040 struct drm_device *dev = pci_get_drvdata(pdev);
1041
1042 /*
1043 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1044 * locking inversion with the driver load path. And the access here is
1045 * completely racy anyway. So don't bother with locking for now.
1046 */
1047 return dev->open_count == 0;
1048}
1049
1050static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1051 .set_gpu_state = amdgpu_switcheroo_set_state,
1052 .reprobe = NULL,
1053 .can_switch = amdgpu_switcheroo_can_switch,
1054};
1055
e3ecdffa
AD
1056/**
1057 * amdgpu_device_ip_set_clockgating_state - set the CG state
1058 *
87e3f136 1059 * @dev: amdgpu_device pointer
e3ecdffa
AD
1060 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1061 * @state: clockgating state (gate or ungate)
1062 *
1063 * Sets the requested clockgating state for all instances of
1064 * the hardware IP specified.
1065 * Returns the error code from the last instance.
1066 */
43fa561f 1067int amdgpu_device_ip_set_clockgating_state(void *dev,
2990a1fc
AD
1068 enum amd_ip_block_type block_type,
1069 enum amd_clockgating_state state)
d38ceaf9 1070{
43fa561f 1071 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1072 int i, r = 0;
1073
1074 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1075 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1076 continue;
c722865a
RZ
1077 if (adev->ip_blocks[i].version->type != block_type)
1078 continue;
1079 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1080 continue;
1081 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1082 (void *)adev, state);
1083 if (r)
1084 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1085 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1086 }
1087 return r;
1088}
1089
e3ecdffa
AD
1090/**
1091 * amdgpu_device_ip_set_powergating_state - set the PG state
1092 *
87e3f136 1093 * @dev: amdgpu_device pointer
e3ecdffa
AD
1094 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1095 * @state: powergating state (gate or ungate)
1096 *
1097 * Sets the requested powergating state for all instances of
1098 * the hardware IP specified.
1099 * Returns the error code from the last instance.
1100 */
43fa561f 1101int amdgpu_device_ip_set_powergating_state(void *dev,
2990a1fc
AD
1102 enum amd_ip_block_type block_type,
1103 enum amd_powergating_state state)
d38ceaf9 1104{
43fa561f 1105 struct amdgpu_device *adev = dev;
d38ceaf9
AD
1106 int i, r = 0;
1107
1108 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1109 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1110 continue;
c722865a
RZ
1111 if (adev->ip_blocks[i].version->type != block_type)
1112 continue;
1113 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1114 continue;
1115 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1116 (void *)adev, state);
1117 if (r)
1118 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1119 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9
AD
1120 }
1121 return r;
1122}
1123
e3ecdffa
AD
1124/**
1125 * amdgpu_device_ip_get_clockgating_state - get the CG state
1126 *
1127 * @adev: amdgpu_device pointer
1128 * @flags: clockgating feature flags
1129 *
1130 * Walks the list of IPs on the device and updates the clockgating
1131 * flags for each IP.
1132 * Updates @flags with the feature flags for each hardware IP where
1133 * clockgating is enabled.
1134 */
2990a1fc
AD
1135void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1136 u32 *flags)
6cb2d4e4
HR
1137{
1138 int i;
1139
1140 for (i = 0; i < adev->num_ip_blocks; i++) {
1141 if (!adev->ip_blocks[i].status.valid)
1142 continue;
1143 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1144 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1145 }
1146}
1147
e3ecdffa
AD
1148/**
1149 * amdgpu_device_ip_wait_for_idle - wait for idle
1150 *
1151 * @adev: amdgpu_device pointer
1152 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1153 *
1154 * Waits for the request hardware IP to be idle.
1155 * Returns 0 for success or a negative error code on failure.
1156 */
2990a1fc
AD
1157int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1158 enum amd_ip_block_type block_type)
5dbbb60b
AD
1159{
1160 int i, r;
1161
1162 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1163 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1164 continue;
a1255107
AD
1165 if (adev->ip_blocks[i].version->type == block_type) {
1166 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
5dbbb60b
AD
1167 if (r)
1168 return r;
1169 break;
1170 }
1171 }
1172 return 0;
1173
1174}
1175
e3ecdffa
AD
1176/**
1177 * amdgpu_device_ip_is_idle - is the hardware IP idle
1178 *
1179 * @adev: amdgpu_device pointer
1180 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1181 *
1182 * Check if the hardware IP is idle or not.
1183 * Returns true if it the IP is idle, false if not.
1184 */
2990a1fc
AD
1185bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1186 enum amd_ip_block_type block_type)
5dbbb60b
AD
1187{
1188 int i;
1189
1190 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1191 if (!adev->ip_blocks[i].status.valid)
9ecbe7f5 1192 continue;
a1255107
AD
1193 if (adev->ip_blocks[i].version->type == block_type)
1194 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
5dbbb60b
AD
1195 }
1196 return true;
1197
1198}
1199
e3ecdffa
AD
1200/**
1201 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1202 *
1203 * @adev: amdgpu_device pointer
87e3f136 1204 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
e3ecdffa
AD
1205 *
1206 * Returns a pointer to the hardware IP block structure
1207 * if it exists for the asic, otherwise NULL.
1208 */
2990a1fc
AD
1209struct amdgpu_ip_block *
1210amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1211 enum amd_ip_block_type type)
d38ceaf9
AD
1212{
1213 int i;
1214
1215 for (i = 0; i < adev->num_ip_blocks; i++)
a1255107 1216 if (adev->ip_blocks[i].version->type == type)
d38ceaf9
AD
1217 return &adev->ip_blocks[i];
1218
1219 return NULL;
1220}
1221
1222/**
2990a1fc 1223 * amdgpu_device_ip_block_version_cmp
d38ceaf9
AD
1224 *
1225 * @adev: amdgpu_device pointer
5fc3aeeb 1226 * @type: enum amd_ip_block_type
d38ceaf9
AD
1227 * @major: major version
1228 * @minor: minor version
1229 *
1230 * return 0 if equal or greater
1231 * return 1 if smaller or the ip_block doesn't exist
1232 */
2990a1fc
AD
1233int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1234 enum amd_ip_block_type type,
1235 u32 major, u32 minor)
d38ceaf9 1236{
2990a1fc 1237 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
d38ceaf9 1238
a1255107
AD
1239 if (ip_block && ((ip_block->version->major > major) ||
1240 ((ip_block->version->major == major) &&
1241 (ip_block->version->minor >= minor))))
d38ceaf9
AD
1242 return 0;
1243
1244 return 1;
1245}
1246
a1255107 1247/**
2990a1fc 1248 * amdgpu_device_ip_block_add
a1255107
AD
1249 *
1250 * @adev: amdgpu_device pointer
1251 * @ip_block_version: pointer to the IP to add
1252 *
1253 * Adds the IP block driver information to the collection of IPs
1254 * on the asic.
1255 */
2990a1fc
AD
1256int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1257 const struct amdgpu_ip_block_version *ip_block_version)
a1255107
AD
1258{
1259 if (!ip_block_version)
1260 return -EINVAL;
1261
e966a725 1262 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
a0bae357
HR
1263 ip_block_version->funcs->name);
1264
a1255107
AD
1265 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1266
1267 return 0;
1268}
1269
e3ecdffa
AD
1270/**
1271 * amdgpu_device_enable_virtual_display - enable virtual display feature
1272 *
1273 * @adev: amdgpu_device pointer
1274 *
1275 * Enabled the virtual display feature if the user has enabled it via
1276 * the module parameter virtual_display. This feature provides a virtual
1277 * display hardware on headless boards or in virtualized environments.
1278 * This function parses and validates the configuration string specified by
1279 * the user and configues the virtual display configuration (number of
1280 * virtual connectors, crtcs, etc.) specified.
1281 */
483ef985 1282static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
9accf2fd
ED
1283{
1284 adev->enable_virtual_display = false;
1285
1286 if (amdgpu_virtual_display) {
1287 struct drm_device *ddev = adev->ddev;
1288 const char *pci_address_name = pci_name(ddev->pdev);
0f66356d 1289 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
9accf2fd
ED
1290
1291 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1292 pciaddstr_tmp = pciaddstr;
0f66356d
ED
1293 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1294 pciaddname = strsep(&pciaddname_tmp, ",");
967de2a9
YT
1295 if (!strcmp("all", pciaddname)
1296 || !strcmp(pci_address_name, pciaddname)) {
0f66356d
ED
1297 long num_crtc;
1298 int res = -1;
1299
9accf2fd 1300 adev->enable_virtual_display = true;
0f66356d
ED
1301
1302 if (pciaddname_tmp)
1303 res = kstrtol(pciaddname_tmp, 10,
1304 &num_crtc);
1305
1306 if (!res) {
1307 if (num_crtc < 1)
1308 num_crtc = 1;
1309 if (num_crtc > 6)
1310 num_crtc = 6;
1311 adev->mode_info.num_crtc = num_crtc;
1312 } else {
1313 adev->mode_info.num_crtc = 1;
1314 }
9accf2fd
ED
1315 break;
1316 }
1317 }
1318
0f66356d
ED
1319 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1320 amdgpu_virtual_display, pci_address_name,
1321 adev->enable_virtual_display, adev->mode_info.num_crtc);
9accf2fd
ED
1322
1323 kfree(pciaddstr);
1324 }
1325}
1326
e3ecdffa
AD
1327/**
1328 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1329 *
1330 * @adev: amdgpu_device pointer
1331 *
1332 * Parses the asic configuration parameters specified in the gpu info
1333 * firmware and makes them availale to the driver for use in configuring
1334 * the asic.
1335 * Returns 0 on success, -EINVAL on failure.
1336 */
e2a75f88
AD
1337static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1338{
e2a75f88
AD
1339 const char *chip_name;
1340 char fw_name[30];
1341 int err;
1342 const struct gpu_info_firmware_header_v1_0 *hdr;
1343
ab4fe3e1
HR
1344 adev->firmware.gpu_info_fw = NULL;
1345
e2a75f88
AD
1346 switch (adev->asic_type) {
1347 case CHIP_TOPAZ:
1348 case CHIP_TONGA:
1349 case CHIP_FIJI:
e2a75f88 1350 case CHIP_POLARIS10:
cc07f18d 1351 case CHIP_POLARIS11:
e2a75f88 1352 case CHIP_POLARIS12:
cc07f18d 1353 case CHIP_VEGAM:
e2a75f88
AD
1354 case CHIP_CARRIZO:
1355 case CHIP_STONEY:
1356#ifdef CONFIG_DRM_AMDGPU_SI
1357 case CHIP_VERDE:
1358 case CHIP_TAHITI:
1359 case CHIP_PITCAIRN:
1360 case CHIP_OLAND:
1361 case CHIP_HAINAN:
1362#endif
1363#ifdef CONFIG_DRM_AMDGPU_CIK
1364 case CHIP_BONAIRE:
1365 case CHIP_HAWAII:
1366 case CHIP_KAVERI:
1367 case CHIP_KABINI:
1368 case CHIP_MULLINS:
1369#endif
27c0bc71 1370 case CHIP_VEGA20:
e2a75f88
AD
1371 default:
1372 return 0;
1373 case CHIP_VEGA10:
1374 chip_name = "vega10";
1375 break;
3f76dced
AD
1376 case CHIP_VEGA12:
1377 chip_name = "vega12";
1378 break;
2d2e5e7e 1379 case CHIP_RAVEN:
54c4d17e
FX
1380 if (adev->rev_id >= 8)
1381 chip_name = "raven2";
741deade
AD
1382 else if (adev->pdev->device == 0x15d8)
1383 chip_name = "picasso";
54c4d17e
FX
1384 else
1385 chip_name = "raven";
2d2e5e7e 1386 break;
23c6268e
HR
1387 case CHIP_NAVI10:
1388 chip_name = "navi10";
1389 break;
e2a75f88
AD
1390 }
1391
1392 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
ab4fe3e1 1393 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
e2a75f88
AD
1394 if (err) {
1395 dev_err(adev->dev,
1396 "Failed to load gpu_info firmware \"%s\"\n",
1397 fw_name);
1398 goto out;
1399 }
ab4fe3e1 1400 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
e2a75f88
AD
1401 if (err) {
1402 dev_err(adev->dev,
1403 "Failed to validate gpu_info firmware \"%s\"\n",
1404 fw_name);
1405 goto out;
1406 }
1407
ab4fe3e1 1408 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
e2a75f88
AD
1409 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
1410
1411 switch (hdr->version_major) {
1412 case 1:
1413 {
1414 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
ab4fe3e1 1415 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
e2a75f88
AD
1416 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1417
b5ab16bf
AD
1418 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
1419 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
1420 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
1421 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
e2a75f88 1422 adev->gfx.config.max_texture_channel_caches =
b5ab16bf
AD
1423 le32_to_cpu(gpu_info_fw->gc_num_tccs);
1424 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
1425 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
1426 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
1427 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
e2a75f88 1428 adev->gfx.config.double_offchip_lds_buf =
b5ab16bf
AD
1429 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
1430 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
51fd0370
HZ
1431 adev->gfx.cu_info.max_waves_per_simd =
1432 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
1433 adev->gfx.cu_info.max_scratch_slots_per_cu =
1434 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
1435 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
48321c3d 1436 if (hdr->version_minor >= 1) {
35c2e910
HZ
1437 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
1438 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
1439 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1440 adev->gfx.config.num_sc_per_sh =
1441 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
1442 adev->gfx.config.num_packer_per_sc =
1443 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
1444 }
48321c3d
HW
1445#ifdef CONFIG_DRM_AMD_DC_DCN2_0
1446 if (hdr->version_minor == 2) {
1447 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
1448 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
1449 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1450 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
1451 }
1452#endif
e2a75f88
AD
1453 break;
1454 }
1455 default:
1456 dev_err(adev->dev,
1457 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
1458 err = -EINVAL;
1459 goto out;
1460 }
1461out:
e2a75f88
AD
1462 return err;
1463}
1464
e3ecdffa
AD
1465/**
1466 * amdgpu_device_ip_early_init - run early init for hardware IPs
1467 *
1468 * @adev: amdgpu_device pointer
1469 *
1470 * Early initialization pass for hardware IPs. The hardware IPs that make
1471 * up each asic are discovered each IP's early_init callback is run. This
1472 * is the first stage in initializing the asic.
1473 * Returns 0 on success, negative error code on failure.
1474 */
06ec9070 1475static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
d38ceaf9 1476{
aaa36a97 1477 int i, r;
d38ceaf9 1478
483ef985 1479 amdgpu_device_enable_virtual_display(adev);
a6be7570 1480
d38ceaf9 1481 switch (adev->asic_type) {
aaa36a97
AD
1482 case CHIP_TOPAZ:
1483 case CHIP_TONGA:
48299f95 1484 case CHIP_FIJI:
2cc0c0b5 1485 case CHIP_POLARIS10:
32cc7e53 1486 case CHIP_POLARIS11:
c4642a47 1487 case CHIP_POLARIS12:
32cc7e53 1488 case CHIP_VEGAM:
aaa36a97 1489 case CHIP_CARRIZO:
39bb0c92
SL
1490 case CHIP_STONEY:
1491 if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
aaa36a97
AD
1492 adev->family = AMDGPU_FAMILY_CZ;
1493 else
1494 adev->family = AMDGPU_FAMILY_VI;
1495
1496 r = vi_set_ip_blocks(adev);
1497 if (r)
1498 return r;
1499 break;
33f34802
KW
1500#ifdef CONFIG_DRM_AMDGPU_SI
1501 case CHIP_VERDE:
1502 case CHIP_TAHITI:
1503 case CHIP_PITCAIRN:
1504 case CHIP_OLAND:
1505 case CHIP_HAINAN:
295d0daf 1506 adev->family = AMDGPU_FAMILY_SI;
33f34802
KW
1507 r = si_set_ip_blocks(adev);
1508 if (r)
1509 return r;
1510 break;
1511#endif
a2e73f56
AD
1512#ifdef CONFIG_DRM_AMDGPU_CIK
1513 case CHIP_BONAIRE:
1514 case CHIP_HAWAII:
1515 case CHIP_KAVERI:
1516 case CHIP_KABINI:
1517 case CHIP_MULLINS:
1518 if ((adev->asic_type == CHIP_BONAIRE) || (adev->asic_type == CHIP_HAWAII))
1519 adev->family = AMDGPU_FAMILY_CI;
1520 else
1521 adev->family = AMDGPU_FAMILY_KV;
1522
1523 r = cik_set_ip_blocks(adev);
1524 if (r)
1525 return r;
1526 break;
1527#endif
e48a3cd9
AD
1528 case CHIP_VEGA10:
1529 case CHIP_VEGA12:
e4bd8170 1530 case CHIP_VEGA20:
e48a3cd9 1531 case CHIP_RAVEN:
741deade 1532 if (adev->asic_type == CHIP_RAVEN)
2ca8a5d2
CZ
1533 adev->family = AMDGPU_FAMILY_RV;
1534 else
1535 adev->family = AMDGPU_FAMILY_AI;
460826e6
KW
1536
1537 r = soc15_set_ip_blocks(adev);
1538 if (r)
1539 return r;
1540 break;
0a5b8c7b
HR
1541 case CHIP_NAVI10:
1542 adev->family = AMDGPU_FAMILY_NV;
1543
1544 r = nv_set_ip_blocks(adev);
1545 if (r)
1546 return r;
1547 break;
d38ceaf9
AD
1548 default:
1549 /* FIXME: not supported yet */
1550 return -EINVAL;
1551 }
1552
e2a75f88
AD
1553 r = amdgpu_device_parse_gpu_info_fw(adev);
1554 if (r)
1555 return r;
1556
1884734a 1557 amdgpu_amdkfd_device_probe(adev);
1558
3149d9da
XY
1559 if (amdgpu_sriov_vf(adev)) {
1560 r = amdgpu_virt_request_full_gpu(adev, true);
1561 if (r)
5ffa61c1 1562 return -EAGAIN;
78d48112
TH
1563
1564 /* query the reg access mode at the very beginning */
1565 amdgpu_virt_init_reg_access_mode(adev);
3149d9da
XY
1566 }
1567
3b94fb10 1568 adev->pm.pp_feature = amdgpu_pp_feature_mask;
00544006
HR
1569 if (amdgpu_sriov_vf(adev))
1570 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
00f54b97 1571
d38ceaf9
AD
1572 for (i = 0; i < adev->num_ip_blocks; i++) {
1573 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
ed8cf00c
HR
1574 DRM_ERROR("disabled ip block: %d <%s>\n",
1575 i, adev->ip_blocks[i].version->funcs->name);
a1255107 1576 adev->ip_blocks[i].status.valid = false;
d38ceaf9 1577 } else {
a1255107
AD
1578 if (adev->ip_blocks[i].version->funcs->early_init) {
1579 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2c1a2784 1580 if (r == -ENOENT) {
a1255107 1581 adev->ip_blocks[i].status.valid = false;
2c1a2784 1582 } else if (r) {
a1255107
AD
1583 DRM_ERROR("early_init of IP block <%s> failed %d\n",
1584 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 1585 return r;
2c1a2784 1586 } else {
a1255107 1587 adev->ip_blocks[i].status.valid = true;
2c1a2784 1588 }
974e6b64 1589 } else {
a1255107 1590 adev->ip_blocks[i].status.valid = true;
d38ceaf9 1591 }
d38ceaf9 1592 }
21a249ca
AD
1593 /* get the vbios after the asic_funcs are set up */
1594 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
1595 /* Read BIOS */
1596 if (!amdgpu_get_bios(adev))
1597 return -EINVAL;
1598
1599 r = amdgpu_atombios_init(adev);
1600 if (r) {
1601 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
1602 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
1603 return r;
1604 }
1605 }
d38ceaf9
AD
1606 }
1607
395d1fb9
NH
1608 adev->cg_flags &= amdgpu_cg_mask;
1609 adev->pg_flags &= amdgpu_pg_mask;
1610
d38ceaf9
AD
1611 return 0;
1612}
1613
0a4f2520
RZ
1614static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
1615{
1616 int i, r;
1617
1618 for (i = 0; i < adev->num_ip_blocks; i++) {
1619 if (!adev->ip_blocks[i].status.sw)
1620 continue;
1621 if (adev->ip_blocks[i].status.hw)
1622 continue;
1623 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2d11fd3f 1624 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
0a4f2520
RZ
1625 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
1626 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1627 if (r) {
1628 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1629 adev->ip_blocks[i].version->funcs->name, r);
1630 return r;
1631 }
1632 adev->ip_blocks[i].status.hw = true;
1633 }
1634 }
1635
1636 return 0;
1637}
1638
1639static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
1640{
1641 int i, r;
1642
1643 for (i = 0; i < adev->num_ip_blocks; i++) {
1644 if (!adev->ip_blocks[i].status.sw)
1645 continue;
1646 if (adev->ip_blocks[i].status.hw)
1647 continue;
1648 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1649 if (r) {
1650 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1651 adev->ip_blocks[i].version->funcs->name, r);
1652 return r;
1653 }
1654 adev->ip_blocks[i].status.hw = true;
1655 }
1656
1657 return 0;
1658}
1659
7a3e0bb2
RZ
1660static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
1661{
1662 int r = 0;
1663 int i;
80f41f84 1664 uint32_t smu_version;
7a3e0bb2
RZ
1665
1666 if (adev->asic_type >= CHIP_VEGA10) {
1667 for (i = 0; i < adev->num_ip_blocks; i++) {
1668 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
1669 if (adev->in_gpu_reset || adev->in_suspend) {
1670 if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
1671 break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
1672 r = adev->ip_blocks[i].version->funcs->resume(adev);
1673 if (r) {
1674 DRM_ERROR("resume of IP block <%s> failed %d\n",
1675 adev->ip_blocks[i].version->funcs->name, r);
1676 return r;
1677 }
1678 } else {
1679 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
1680 if (r) {
1681 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
1682 adev->ip_blocks[i].version->funcs->name, r);
1683 return r;
1684 }
1685 }
1686 adev->ip_blocks[i].status.hw = true;
1687 }
1688 }
1689 }
80f41f84 1690 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
7a3e0bb2 1691
80f41f84 1692 return r;
7a3e0bb2
RZ
1693}
1694
e3ecdffa
AD
1695/**
1696 * amdgpu_device_ip_init - run init for hardware IPs
1697 *
1698 * @adev: amdgpu_device pointer
1699 *
1700 * Main initialization pass for hardware IPs. The list of all the hardware
1701 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
1702 * are run. sw_init initializes the software state associated with each IP
1703 * and hw_init initializes the hardware associated with each IP.
1704 * Returns 0 on success, negative error code on failure.
1705 */
06ec9070 1706static int amdgpu_device_ip_init(struct amdgpu_device *adev)
d38ceaf9
AD
1707{
1708 int i, r;
1709
c030f2e4 1710 r = amdgpu_ras_init(adev);
1711 if (r)
1712 return r;
1713
d38ceaf9 1714 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 1715 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 1716 continue;
a1255107 1717 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2c1a2784 1718 if (r) {
a1255107
AD
1719 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1720 adev->ip_blocks[i].version->funcs->name, r);
72d3f592 1721 goto init_failed;
2c1a2784 1722 }
a1255107 1723 adev->ip_blocks[i].status.sw = true;
bfca0289 1724
d38ceaf9 1725 /* need to do gmc hw init early so we can allocate gpu mem */
a1255107 1726 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
06ec9070 1727 r = amdgpu_device_vram_scratch_init(adev);
2c1a2784
AD
1728 if (r) {
1729 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
72d3f592 1730 goto init_failed;
2c1a2784 1731 }
a1255107 1732 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2c1a2784
AD
1733 if (r) {
1734 DRM_ERROR("hw_init %d failed %d\n", i, r);
72d3f592 1735 goto init_failed;
2c1a2784 1736 }
06ec9070 1737 r = amdgpu_device_wb_init(adev);
2c1a2784 1738 if (r) {
06ec9070 1739 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
72d3f592 1740 goto init_failed;
2c1a2784 1741 }
a1255107 1742 adev->ip_blocks[i].status.hw = true;
2493664f
ML
1743
1744 /* right after GMC hw init, we create CSA */
f92d5c61 1745 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
1e256e27
RZ
1746 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
1747 AMDGPU_GEM_DOMAIN_VRAM,
1748 AMDGPU_CSA_SIZE);
2493664f
ML
1749 if (r) {
1750 DRM_ERROR("allocate CSA failed %d\n", r);
72d3f592 1751 goto init_failed;
2493664f
ML
1752 }
1753 }
d38ceaf9
AD
1754 }
1755 }
1756
533aed27
AG
1757 r = amdgpu_ib_pool_init(adev);
1758 if (r) {
1759 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
1760 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
1761 goto init_failed;
1762 }
1763
c8963ea4
RZ
1764 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1765 if (r)
72d3f592 1766 goto init_failed;
0a4f2520
RZ
1767
1768 r = amdgpu_device_ip_hw_init_phase1(adev);
1769 if (r)
72d3f592 1770 goto init_failed;
0a4f2520 1771
7a3e0bb2
RZ
1772 r = amdgpu_device_fw_loading(adev);
1773 if (r)
72d3f592 1774 goto init_failed;
7a3e0bb2 1775
0a4f2520
RZ
1776 r = amdgpu_device_ip_hw_init_phase2(adev);
1777 if (r)
72d3f592 1778 goto init_failed;
d38ceaf9 1779
3e2e2ab5
HZ
1780 if (adev->gmc.xgmi.num_physical_nodes > 1)
1781 amdgpu_xgmi_add_device(adev);
1884734a 1782 amdgpu_amdkfd_device_init(adev);
c6332b97 1783
72d3f592 1784init_failed:
d3c117e5 1785 if (amdgpu_sriov_vf(adev)) {
72d3f592
ED
1786 if (!r)
1787 amdgpu_virt_init_data_exchange(adev);
c6332b97 1788 amdgpu_virt_release_full_gpu(adev, true);
d3c117e5 1789 }
c6332b97 1790
72d3f592 1791 return r;
d38ceaf9
AD
1792}
1793
e3ecdffa
AD
1794/**
1795 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
1796 *
1797 * @adev: amdgpu_device pointer
1798 *
1799 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
1800 * this function before a GPU reset. If the value is retained after a
1801 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
1802 */
06ec9070 1803static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
0c49e0b8
CZ
1804{
1805 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
1806}
1807
e3ecdffa
AD
1808/**
1809 * amdgpu_device_check_vram_lost - check if vram is valid
1810 *
1811 * @adev: amdgpu_device pointer
1812 *
1813 * Checks the reset magic value written to the gart pointer in VRAM.
1814 * The driver calls this after a GPU reset to see if the contents of
1815 * VRAM is lost or now.
1816 * returns true if vram is lost, false if not.
1817 */
06ec9070 1818static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
0c49e0b8
CZ
1819{
1820 return !!memcmp(adev->gart.ptr, adev->reset_magic,
1821 AMDGPU_RESET_MAGIC_NUM);
1822}
1823
e3ecdffa 1824/**
1112a46b 1825 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
e3ecdffa
AD
1826 *
1827 * @adev: amdgpu_device pointer
1828 *
e3ecdffa 1829 * The list of all the hardware IPs that make up the asic is walked and the
1112a46b
RZ
1830 * set_clockgating_state callbacks are run.
1831 * Late initialization pass enabling clockgating for hardware IPs.
1832 * Fini or suspend, pass disabling clockgating for hardware IPs.
e3ecdffa
AD
1833 * Returns 0 on success, negative error code on failure.
1834 */
fdd34271 1835
1112a46b
RZ
1836static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
1837 enum amd_clockgating_state state)
d38ceaf9 1838{
1112a46b 1839 int i, j, r;
d38ceaf9 1840
4a2ba394
SL
1841 if (amdgpu_emu_mode == 1)
1842 return 0;
1843
1112a46b
RZ
1844 for (j = 0; j < adev->num_ip_blocks; j++) {
1845 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1846 if (!adev->ip_blocks[i].status.late_initialized)
d38ceaf9 1847 continue;
4a446d55 1848 /* skip CG for VCE/UVD, it's handled specially */
a1255107 1849 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
57716327 1850 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
34319b32 1851 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
57716327 1852 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
4a446d55 1853 /* enable clockgating to save power */
a1255107 1854 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
1112a46b 1855 state);
4a446d55
AD
1856 if (r) {
1857 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
a1255107 1858 adev->ip_blocks[i].version->funcs->name, r);
4a446d55
AD
1859 return r;
1860 }
b0b00ff1 1861 }
d38ceaf9 1862 }
06b18f61 1863
c9f96fd5
RZ
1864 return 0;
1865}
1866
1112a46b 1867static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
c9f96fd5 1868{
1112a46b 1869 int i, j, r;
06b18f61 1870
c9f96fd5
RZ
1871 if (amdgpu_emu_mode == 1)
1872 return 0;
1873
1112a46b
RZ
1874 for (j = 0; j < adev->num_ip_blocks; j++) {
1875 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
a2d31dc3 1876 if (!adev->ip_blocks[i].status.late_initialized)
c9f96fd5
RZ
1877 continue;
1878 /* skip CG for VCE/UVD, it's handled specially */
1879 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
1880 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
1881 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
1882 adev->ip_blocks[i].version->funcs->set_powergating_state) {
1883 /* enable powergating to save power */
1884 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
1112a46b 1885 state);
c9f96fd5
RZ
1886 if (r) {
1887 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
1888 adev->ip_blocks[i].version->funcs->name, r);
1889 return r;
1890 }
1891 }
1892 }
2dc80b00
S
1893 return 0;
1894}
1895
beff74bc
AD
1896static int amdgpu_device_enable_mgpu_fan_boost(void)
1897{
1898 struct amdgpu_gpu_instance *gpu_ins;
1899 struct amdgpu_device *adev;
1900 int i, ret = 0;
1901
1902 mutex_lock(&mgpu_info.mutex);
1903
1904 /*
1905 * MGPU fan boost feature should be enabled
1906 * only when there are two or more dGPUs in
1907 * the system
1908 */
1909 if (mgpu_info.num_dgpu < 2)
1910 goto out;
1911
1912 for (i = 0; i < mgpu_info.num_dgpu; i++) {
1913 gpu_ins = &(mgpu_info.gpu_ins[i]);
1914 adev = gpu_ins->adev;
1915 if (!(adev->flags & AMD_IS_APU) &&
1916 !gpu_ins->mgpu_fan_enabled &&
1917 adev->powerplay.pp_funcs &&
1918 adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
1919 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
1920 if (ret)
1921 break;
1922
1923 gpu_ins->mgpu_fan_enabled = 1;
1924 }
1925 }
1926
1927out:
1928 mutex_unlock(&mgpu_info.mutex);
1929
1930 return ret;
1931}
1932
e3ecdffa
AD
1933/**
1934 * amdgpu_device_ip_late_init - run late init for hardware IPs
1935 *
1936 * @adev: amdgpu_device pointer
1937 *
1938 * Late initialization pass for hardware IPs. The list of all the hardware
1939 * IPs that make up the asic is walked and the late_init callbacks are run.
1940 * late_init covers any special initialization that an IP requires
1941 * after all of the have been initialized or something that needs to happen
1942 * late in the init process.
1943 * Returns 0 on success, negative error code on failure.
1944 */
06ec9070 1945static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2dc80b00
S
1946{
1947 int i = 0, r;
1948
1949 for (i = 0; i < adev->num_ip_blocks; i++) {
73f847db 1950 if (!adev->ip_blocks[i].status.hw)
2dc80b00
S
1951 continue;
1952 if (adev->ip_blocks[i].version->funcs->late_init) {
1953 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
1954 if (r) {
1955 DRM_ERROR("late_init of IP block <%s> failed %d\n",
1956 adev->ip_blocks[i].version->funcs->name, r);
1957 return r;
1958 }
2dc80b00 1959 }
73f847db 1960 adev->ip_blocks[i].status.late_initialized = true;
2dc80b00
S
1961 }
1962
1112a46b
RZ
1963 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
1964 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
916ac57f 1965
06ec9070 1966 amdgpu_device_fill_reset_magic(adev);
d38ceaf9 1967
beff74bc
AD
1968 r = amdgpu_device_enable_mgpu_fan_boost();
1969 if (r)
1970 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
1971
1972 /* set to low pstate by default */
1973 amdgpu_xgmi_set_pstate(adev, 0);
1974
d38ceaf9
AD
1975 return 0;
1976}
1977
e3ecdffa
AD
1978/**
1979 * amdgpu_device_ip_fini - run fini for hardware IPs
1980 *
1981 * @adev: amdgpu_device pointer
1982 *
1983 * Main teardown pass for hardware IPs. The list of all the hardware
1984 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
1985 * are run. hw_fini tears down the hardware associated with each IP
1986 * and sw_fini tears down any software state associated with each IP.
1987 * Returns 0 on success, negative error code on failure.
1988 */
06ec9070 1989static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
d38ceaf9
AD
1990{
1991 int i, r;
1992
c030f2e4 1993 amdgpu_ras_pre_fini(adev);
1994
a82400b5
AG
1995 if (adev->gmc.xgmi.num_physical_nodes > 1)
1996 amdgpu_xgmi_remove_device(adev);
1997
1884734a 1998 amdgpu_amdkfd_device_fini(adev);
05df1f01
RZ
1999
2000 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271
RZ
2001 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2002
3e96dbfd
AD
2003 /* need to disable SMC first */
2004 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2005 if (!adev->ip_blocks[i].status.hw)
3e96dbfd 2006 continue;
fdd34271 2007 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
a1255107 2008 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3e96dbfd
AD
2009 /* XXX handle errors */
2010 if (r) {
2011 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
a1255107 2012 adev->ip_blocks[i].version->funcs->name, r);
3e96dbfd 2013 }
a1255107 2014 adev->ip_blocks[i].status.hw = false;
3e96dbfd
AD
2015 break;
2016 }
2017 }
2018
d38ceaf9 2019 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2020 if (!adev->ip_blocks[i].status.hw)
d38ceaf9 2021 continue;
8201a67a 2022
a1255107 2023 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
d38ceaf9 2024 /* XXX handle errors */
2c1a2784 2025 if (r) {
a1255107
AD
2026 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2027 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2028 }
8201a67a 2029
a1255107 2030 adev->ip_blocks[i].status.hw = false;
d38ceaf9
AD
2031 }
2032
9950cda2 2033
d38ceaf9 2034 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2035 if (!adev->ip_blocks[i].status.sw)
d38ceaf9 2036 continue;
c12aba3a
ML
2037
2038 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
c8963ea4 2039 amdgpu_ucode_free_bo(adev);
1e256e27 2040 amdgpu_free_static_csa(&adev->virt.csa_obj);
c12aba3a
ML
2041 amdgpu_device_wb_fini(adev);
2042 amdgpu_device_vram_scratch_fini(adev);
533aed27 2043 amdgpu_ib_pool_fini(adev);
c12aba3a
ML
2044 }
2045
a1255107 2046 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
d38ceaf9 2047 /* XXX handle errors */
2c1a2784 2048 if (r) {
a1255107
AD
2049 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2050 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2051 }
a1255107
AD
2052 adev->ip_blocks[i].status.sw = false;
2053 adev->ip_blocks[i].status.valid = false;
d38ceaf9
AD
2054 }
2055
a6dcfd9c 2056 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2057 if (!adev->ip_blocks[i].status.late_initialized)
8a2eef1d 2058 continue;
a1255107
AD
2059 if (adev->ip_blocks[i].version->funcs->late_fini)
2060 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2061 adev->ip_blocks[i].status.late_initialized = false;
a6dcfd9c
ML
2062 }
2063
c030f2e4 2064 amdgpu_ras_fini(adev);
2065
030308fc 2066 if (amdgpu_sriov_vf(adev))
24136135
ML
2067 if (amdgpu_virt_release_full_gpu(adev, false))
2068 DRM_ERROR("failed to release exclusive mode on fini\n");
2493664f 2069
d38ceaf9
AD
2070 return 0;
2071}
2072
e3ecdffa 2073/**
beff74bc 2074 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
e3ecdffa 2075 *
1112a46b 2076 * @work: work_struct.
e3ecdffa 2077 */
beff74bc 2078static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2dc80b00
S
2079{
2080 struct amdgpu_device *adev =
beff74bc 2081 container_of(work, struct amdgpu_device, delayed_init_work.work);
916ac57f
RZ
2082 int r;
2083
2084 r = amdgpu_ib_ring_tests(adev);
2085 if (r)
2086 DRM_ERROR("ib ring test failed (%d).\n", r);
2dc80b00
S
2087}
2088
1e317b99
RZ
2089static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2090{
2091 struct amdgpu_device *adev =
2092 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2093
2094 mutex_lock(&adev->gfx.gfx_off_mutex);
2095 if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
2096 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2097 adev->gfx.gfx_off_state = true;
2098 }
2099 mutex_unlock(&adev->gfx.gfx_off_mutex);
2100}
2101
e3ecdffa 2102/**
e7854a03 2103 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
e3ecdffa
AD
2104 *
2105 * @adev: amdgpu_device pointer
2106 *
2107 * Main suspend function for hardware IPs. The list of all the hardware
2108 * IPs that make up the asic is walked, clockgating is disabled and the
2109 * suspend callbacks are run. suspend puts the hardware and software state
2110 * in each IP into a state suitable for suspend.
2111 * Returns 0 on success, negative error code on failure.
2112 */
e7854a03
AD
2113static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2114{
2115 int i, r;
2116
05df1f01 2117 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
fdd34271 2118 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
05df1f01 2119
e7854a03
AD
2120 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2121 if (!adev->ip_blocks[i].status.valid)
2122 continue;
2123 /* displays are handled separately */
2124 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
e7854a03
AD
2125 /* XXX handle errors */
2126 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2127 /* XXX handle errors */
2128 if (r) {
2129 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2130 adev->ip_blocks[i].version->funcs->name, r);
2131 }
2132 }
2133 }
2134
e7854a03
AD
2135 return 0;
2136}
2137
2138/**
2139 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
2140 *
2141 * @adev: amdgpu_device pointer
2142 *
2143 * Main suspend function for hardware IPs. The list of all the hardware
2144 * IPs that make up the asic is walked, clockgating is disabled and the
2145 * suspend callbacks are run. suspend puts the hardware and software state
2146 * in each IP into a state suitable for suspend.
2147 * Returns 0 on success, negative error code on failure.
2148 */
2149static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2150{
2151 int i, r;
2152
2153 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
a1255107 2154 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2155 continue;
e7854a03
AD
2156 /* displays are handled in phase1 */
2157 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2158 continue;
d38ceaf9 2159 /* XXX handle errors */
a1255107 2160 r = adev->ip_blocks[i].version->funcs->suspend(adev);
d38ceaf9 2161 /* XXX handle errors */
2c1a2784 2162 if (r) {
a1255107
AD
2163 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2164 adev->ip_blocks[i].version->funcs->name, r);
2c1a2784 2165 }
d38ceaf9
AD
2166 }
2167
2168 return 0;
2169}
2170
e7854a03
AD
2171/**
2172 * amdgpu_device_ip_suspend - run suspend for hardware IPs
2173 *
2174 * @adev: amdgpu_device pointer
2175 *
2176 * Main suspend function for hardware IPs. The list of all the hardware
2177 * IPs that make up the asic is walked, clockgating is disabled and the
2178 * suspend callbacks are run. suspend puts the hardware and software state
2179 * in each IP into a state suitable for suspend.
2180 * Returns 0 on success, negative error code on failure.
2181 */
2182int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
2183{
2184 int r;
2185
e7819644
YT
2186 if (amdgpu_sriov_vf(adev))
2187 amdgpu_virt_request_full_gpu(adev, false);
2188
e7854a03
AD
2189 r = amdgpu_device_ip_suspend_phase1(adev);
2190 if (r)
2191 return r;
2192 r = amdgpu_device_ip_suspend_phase2(adev);
2193
e7819644
YT
2194 if (amdgpu_sriov_vf(adev))
2195 amdgpu_virt_release_full_gpu(adev, false);
2196
e7854a03
AD
2197 return r;
2198}
2199
06ec9070 2200static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2201{
2202 int i, r;
2203
2cb681b6
ML
2204 static enum amd_ip_block_type ip_order[] = {
2205 AMD_IP_BLOCK_TYPE_GMC,
2206 AMD_IP_BLOCK_TYPE_COMMON,
39186aef 2207 AMD_IP_BLOCK_TYPE_PSP,
2cb681b6
ML
2208 AMD_IP_BLOCK_TYPE_IH,
2209 };
a90ad3c2 2210
2cb681b6
ML
2211 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2212 int j;
2213 struct amdgpu_ip_block *block;
a90ad3c2 2214
2cb681b6
ML
2215 for (j = 0; j < adev->num_ip_blocks; j++) {
2216 block = &adev->ip_blocks[j];
2217
2218 if (block->version->type != ip_order[i] ||
2219 !block->status.valid)
2220 continue;
2221
2222 r = block->version->funcs->hw_init(adev);
0aaeefcc 2223 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2224 if (r)
2225 return r;
a90ad3c2
ML
2226 }
2227 }
2228
2229 return 0;
2230}
2231
06ec9070 2232static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
a90ad3c2
ML
2233{
2234 int i, r;
2235
2cb681b6
ML
2236 static enum amd_ip_block_type ip_order[] = {
2237 AMD_IP_BLOCK_TYPE_SMC,
2238 AMD_IP_BLOCK_TYPE_DCE,
2239 AMD_IP_BLOCK_TYPE_GFX,
2240 AMD_IP_BLOCK_TYPE_SDMA,
257deb8c
FM
2241 AMD_IP_BLOCK_TYPE_UVD,
2242 AMD_IP_BLOCK_TYPE_VCE
2cb681b6 2243 };
a90ad3c2 2244
2cb681b6
ML
2245 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
2246 int j;
2247 struct amdgpu_ip_block *block;
a90ad3c2 2248
2cb681b6
ML
2249 for (j = 0; j < adev->num_ip_blocks; j++) {
2250 block = &adev->ip_blocks[j];
2251
2252 if (block->version->type != ip_order[i] ||
2253 !block->status.valid)
2254 continue;
2255
2256 r = block->version->funcs->hw_init(adev);
0aaeefcc 2257 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
c41d1cf6
ML
2258 if (r)
2259 return r;
a90ad3c2
ML
2260 }
2261 }
2262
2263 return 0;
2264}
2265
e3ecdffa
AD
2266/**
2267 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
2268 *
2269 * @adev: amdgpu_device pointer
2270 *
2271 * First resume function for hardware IPs. The list of all the hardware
2272 * IPs that make up the asic is walked and the resume callbacks are run for
2273 * COMMON, GMC, and IH. resume puts the hardware into a functional state
2274 * after a suspend and updates the software state as necessary. This
2275 * function is also used for restoring the GPU after a GPU reset.
2276 * Returns 0 on success, negative error code on failure.
2277 */
06ec9070 2278static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
d38ceaf9
AD
2279{
2280 int i, r;
2281
a90ad3c2
ML
2282 for (i = 0; i < adev->num_ip_blocks; i++) {
2283 if (!adev->ip_blocks[i].status.valid)
2284 continue;
a90ad3c2 2285 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa
AD
2286 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
2287 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
fcf0649f
CZ
2288 r = adev->ip_blocks[i].version->funcs->resume(adev);
2289 if (r) {
2290 DRM_ERROR("resume of IP block <%s> failed %d\n",
2291 adev->ip_blocks[i].version->funcs->name, r);
2292 return r;
2293 }
a90ad3c2
ML
2294 }
2295 }
2296
2297 return 0;
2298}
2299
e3ecdffa
AD
2300/**
2301 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
2302 *
2303 * @adev: amdgpu_device pointer
2304 *
2305 * First resume function for hardware IPs. The list of all the hardware
2306 * IPs that make up the asic is walked and the resume callbacks are run for
2307 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
2308 * functional state after a suspend and updates the software state as
2309 * necessary. This function is also used for restoring the GPU after a GPU
2310 * reset.
2311 * Returns 0 on success, negative error code on failure.
2312 */
06ec9070 2313static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
d38ceaf9
AD
2314{
2315 int i, r;
2316
2317 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 2318 if (!adev->ip_blocks[i].status.valid)
d38ceaf9 2319 continue;
fcf0649f 2320 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
e3ecdffa 2321 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
7a3e0bb2
RZ
2322 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
2323 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
fcf0649f 2324 continue;
a1255107 2325 r = adev->ip_blocks[i].version->funcs->resume(adev);
2c1a2784 2326 if (r) {
a1255107
AD
2327 DRM_ERROR("resume of IP block <%s> failed %d\n",
2328 adev->ip_blocks[i].version->funcs->name, r);
d38ceaf9 2329 return r;
2c1a2784 2330 }
d38ceaf9
AD
2331 }
2332
2333 return 0;
2334}
2335
e3ecdffa
AD
2336/**
2337 * amdgpu_device_ip_resume - run resume for hardware IPs
2338 *
2339 * @adev: amdgpu_device pointer
2340 *
2341 * Main resume function for hardware IPs. The hardware IPs
2342 * are split into two resume functions because they are
2343 * are also used in in recovering from a GPU reset and some additional
2344 * steps need to be take between them. In this case (S3/S4) they are
2345 * run sequentially.
2346 * Returns 0 on success, negative error code on failure.
2347 */
06ec9070 2348static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
fcf0649f
CZ
2349{
2350 int r;
2351
06ec9070 2352 r = amdgpu_device_ip_resume_phase1(adev);
fcf0649f
CZ
2353 if (r)
2354 return r;
7a3e0bb2
RZ
2355
2356 r = amdgpu_device_fw_loading(adev);
2357 if (r)
2358 return r;
2359
06ec9070 2360 r = amdgpu_device_ip_resume_phase2(adev);
fcf0649f
CZ
2361
2362 return r;
2363}
2364
e3ecdffa
AD
2365/**
2366 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
2367 *
2368 * @adev: amdgpu_device pointer
2369 *
2370 * Query the VBIOS data tables to determine if the board supports SR-IOV.
2371 */
4e99a44e 2372static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
048765ad 2373{
6867e1b5
ML
2374 if (amdgpu_sriov_vf(adev)) {
2375 if (adev->is_atom_fw) {
2376 if (amdgpu_atomfirmware_gpu_supports_virtualization(adev))
2377 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2378 } else {
2379 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
2380 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
2381 }
2382
2383 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
2384 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
a5bde2f9 2385 }
048765ad
AR
2386}
2387
e3ecdffa
AD
2388/**
2389 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
2390 *
2391 * @asic_type: AMD asic type
2392 *
2393 * Check if there is DC (new modesetting infrastructre) support for an asic.
2394 * returns true if DC has support, false if not.
2395 */
4562236b
HW
2396bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
2397{
2398 switch (asic_type) {
2399#if defined(CONFIG_DRM_AMD_DC)
2400 case CHIP_BONAIRE:
0d6fbccb 2401 case CHIP_KAVERI:
367e6687
AD
2402 case CHIP_KABINI:
2403 case CHIP_MULLINS:
d9fda248
HW
2404 /*
2405 * We have systems in the wild with these ASICs that require
2406 * LVDS and VGA support which is not supported with DC.
2407 *
2408 * Fallback to the non-DC driver here by default so as not to
2409 * cause regressions.
2410 */
2411 return amdgpu_dc > 0;
2412 case CHIP_HAWAII:
4562236b
HW
2413 case CHIP_CARRIZO:
2414 case CHIP_STONEY:
4562236b 2415 case CHIP_POLARIS10:
675fd32b 2416 case CHIP_POLARIS11:
2c8ad2d5 2417 case CHIP_POLARIS12:
675fd32b 2418 case CHIP_VEGAM:
4562236b
HW
2419 case CHIP_TONGA:
2420 case CHIP_FIJI:
42f8ffa1 2421 case CHIP_VEGA10:
dca7b401 2422 case CHIP_VEGA12:
c6034aa2 2423 case CHIP_VEGA20:
dc37a9a0 2424#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
fd187853 2425 case CHIP_RAVEN:
b4f199c7
HW
2426#endif
2427#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
2428 case CHIP_NAVI10:
42f8ffa1 2429#endif
fd187853 2430 return amdgpu_dc != 0;
4562236b
HW
2431#endif
2432 default:
2433 return false;
2434 }
2435}
2436
2437/**
2438 * amdgpu_device_has_dc_support - check if dc is supported
2439 *
2440 * @adev: amdgpu_device_pointer
2441 *
2442 * Returns true for supported, false for not supported
2443 */
2444bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2445{
2555039d
XY
2446 if (amdgpu_sriov_vf(adev))
2447 return false;
2448
4562236b
HW
2449 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2450}
2451
d4535e2c
AG
2452
2453static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2454{
2455 struct amdgpu_device *adev =
2456 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2457
2458 adev->asic_reset_res = amdgpu_asic_reset(adev);
2459 if (adev->asic_reset_res)
fed184e9 2460 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
d4535e2c
AG
2461 adev->asic_reset_res, adev->ddev->unique);
2462}
2463
2464
d38ceaf9
AD
2465/**
2466 * amdgpu_device_init - initialize the driver
2467 *
2468 * @adev: amdgpu_device pointer
87e3f136 2469 * @ddev: drm dev pointer
d38ceaf9
AD
2470 * @pdev: pci dev pointer
2471 * @flags: driver flags
2472 *
2473 * Initializes the driver info and hw (all asics).
2474 * Returns 0 for success or an error on failure.
2475 * Called at driver startup.
2476 */
2477int amdgpu_device_init(struct amdgpu_device *adev,
2478 struct drm_device *ddev,
2479 struct pci_dev *pdev,
2480 uint32_t flags)
2481{
2482 int r, i;
2483 bool runtime = false;
95844d20 2484 u32 max_MBps;
d38ceaf9
AD
2485
2486 adev->shutdown = false;
2487 adev->dev = &pdev->dev;
2488 adev->ddev = ddev;
2489 adev->pdev = pdev;
2490 adev->flags = flags;
2f7d10b3 2491 adev->asic_type = flags & AMD_ASIC_MASK;
d38ceaf9 2492 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
593aa2d2
SL
2493 if (amdgpu_emu_mode == 1)
2494 adev->usec_timeout *= 2;
770d13b1 2495 adev->gmc.gart_size = 512 * 1024 * 1024;
d38ceaf9
AD
2496 adev->accel_working = false;
2497 adev->num_rings = 0;
2498 adev->mman.buffer_funcs = NULL;
2499 adev->mman.buffer_funcs_ring = NULL;
2500 adev->vm_manager.vm_pte_funcs = NULL;
3798e9a6 2501 adev->vm_manager.vm_pte_num_rqs = 0;
132f34e4 2502 adev->gmc.gmc_funcs = NULL;
f54d1867 2503 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
b8866c26 2504 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
d38ceaf9
AD
2505
2506 adev->smc_rreg = &amdgpu_invalid_rreg;
2507 adev->smc_wreg = &amdgpu_invalid_wreg;
2508 adev->pcie_rreg = &amdgpu_invalid_rreg;
2509 adev->pcie_wreg = &amdgpu_invalid_wreg;
36b9a952
HR
2510 adev->pciep_rreg = &amdgpu_invalid_rreg;
2511 adev->pciep_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2512 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
2513 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
2514 adev->didt_rreg = &amdgpu_invalid_rreg;
2515 adev->didt_wreg = &amdgpu_invalid_wreg;
ccdbb20a
RZ
2516 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
2517 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
d38ceaf9
AD
2518 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
2519 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
2520
3e39ab90
AD
2521 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
2522 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
2523 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
d38ceaf9
AD
2524
2525 /* mutex initialization are all done here so we
2526 * can recall function without having locking issues */
d38ceaf9 2527 atomic_set(&adev->irq.ih.lock, 0);
0e5ca0d1 2528 mutex_init(&adev->firmware.mutex);
d38ceaf9
AD
2529 mutex_init(&adev->pm.mutex);
2530 mutex_init(&adev->gfx.gpu_clock_mutex);
2531 mutex_init(&adev->srbm_mutex);
b8866c26 2532 mutex_init(&adev->gfx.pipe_reserve_mutex);
d23ee13f 2533 mutex_init(&adev->gfx.gfx_off_mutex);
d38ceaf9 2534 mutex_init(&adev->grbm_idx_mutex);
d38ceaf9 2535 mutex_init(&adev->mn_lock);
e23b74aa 2536 mutex_init(&adev->virt.vf_errors.lock);
d38ceaf9 2537 hash_init(adev->mn_hash);
13a752e3 2538 mutex_init(&adev->lock_reset);
bb5a2bdf 2539 mutex_init(&adev->virt.dpm_mutex);
d38ceaf9 2540
912dfc84
EQ
2541 r = amdgpu_device_check_arguments(adev);
2542 if (r)
2543 return r;
d38ceaf9 2544
d38ceaf9
AD
2545 spin_lock_init(&adev->mmio_idx_lock);
2546 spin_lock_init(&adev->smc_idx_lock);
2547 spin_lock_init(&adev->pcie_idx_lock);
2548 spin_lock_init(&adev->uvd_ctx_idx_lock);
2549 spin_lock_init(&adev->didt_idx_lock);
ccdbb20a 2550 spin_lock_init(&adev->gc_cac_idx_lock);
16abb5d2 2551 spin_lock_init(&adev->se_cac_idx_lock);
d38ceaf9 2552 spin_lock_init(&adev->audio_endpt_idx_lock);
95844d20 2553 spin_lock_init(&adev->mm_stats.lock);
d38ceaf9 2554
0c4e7fa5
CZ
2555 INIT_LIST_HEAD(&adev->shadow_list);
2556 mutex_init(&adev->shadow_list_lock);
2557
795f2813
AR
2558 INIT_LIST_HEAD(&adev->ring_lru_list);
2559 spin_lock_init(&adev->ring_lru_list_lock);
2560
beff74bc
AD
2561 INIT_DELAYED_WORK(&adev->delayed_init_work,
2562 amdgpu_device_delayed_init_work_handler);
1e317b99
RZ
2563 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2564 amdgpu_device_delay_enable_gfx_off);
2dc80b00 2565
d4535e2c
AG
2566 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2567
d23ee13f 2568 adev->gfx.gfx_off_req_count = 1;
b1ddf548
RZ
2569 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2570
0fa49558
AX
2571 /* Registers mapping */
2572 /* TODO: block userspace mapping of io register */
da69c161
KW
2573 if (adev->asic_type >= CHIP_BONAIRE) {
2574 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
2575 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
2576 } else {
2577 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
2578 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
2579 }
d38ceaf9 2580
d38ceaf9
AD
2581 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
2582 if (adev->rmmio == NULL) {
2583 return -ENOMEM;
2584 }
2585 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
2586 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
2587
d38ceaf9
AD
2588 /* io port mapping */
2589 for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
2590 if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
2591 adev->rio_mem_size = pci_resource_len(adev->pdev, i);
2592 adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size);
2593 break;
2594 }
2595 }
2596 if (adev->rio_mem == NULL)
b64a18c5 2597 DRM_INFO("PCI I/O BAR is not found.\n");
d38ceaf9 2598
b2109d8e
JX
2599 /* enable PCIE atomic ops */
2600 r = pci_enable_atomic_ops_to_root(adev->pdev,
2601 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
2602 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
2603 if (r) {
2604 adev->have_atomics_support = false;
2605 DRM_INFO("PCIE atomic ops is not supported\n");
2606 } else {
2607 adev->have_atomics_support = true;
2608 }
2609
5494d864
AD
2610 amdgpu_device_get_pcie_info(adev);
2611
b239c017
JX
2612 if (amdgpu_mcbp)
2613 DRM_INFO("MCBP is enabled\n");
2614
5f84cc63
JX
2615 if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
2616 adev->enable_mes = true;
2617
f54eeab4 2618 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
a190d1c7
XY
2619 r = amdgpu_discovery_init(adev);
2620 if (r) {
2621 dev_err(adev->dev, "amdgpu_discovery_init failed\n");
2622 return r;
2623 }
2624 }
2625
d38ceaf9 2626 /* early init functions */
06ec9070 2627 r = amdgpu_device_ip_early_init(adev);
d38ceaf9
AD
2628 if (r)
2629 return r;
2630
6585661d
OZ
2631 /* doorbell bar mapping and doorbell index init*/
2632 amdgpu_device_doorbell_init(adev);
2633
d38ceaf9
AD
2634 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
2635 /* this will fail for cards that aren't VGA class devices, just
2636 * ignore it */
06ec9070 2637 vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
d38ceaf9 2638
e9bef455 2639 if (amdgpu_device_is_px(ddev))
d38ceaf9 2640 runtime = true;
84c8b22e
LW
2641 if (!pci_is_thunderbolt_attached(adev->pdev))
2642 vga_switcheroo_register_client(adev->pdev,
2643 &amdgpu_switcheroo_ops, runtime);
d38ceaf9
AD
2644 if (runtime)
2645 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
2646
9475a943
SL
2647 if (amdgpu_emu_mode == 1) {
2648 /* post the asic on emulation mode */
2649 emu_soc_asic_init(adev);
bfca0289 2650 goto fence_driver_init;
9475a943 2651 }
bfca0289 2652
4e99a44e
ML
2653 /* detect if we are with an SRIOV vbios */
2654 amdgpu_device_detect_sriov_bios(adev);
048765ad 2655
95e8e59e
AD
2656 /* check if we need to reset the asic
2657 * E.g., driver was not cleanly unloaded previously, etc.
2658 */
f14899fd 2659 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
95e8e59e
AD
2660 r = amdgpu_asic_reset(adev);
2661 if (r) {
2662 dev_err(adev->dev, "asic reset on init failed\n");
2663 goto failed;
2664 }
2665 }
2666
d38ceaf9 2667 /* Post card if necessary */
39c640c0 2668 if (amdgpu_device_need_post(adev)) {
d38ceaf9 2669 if (!adev->bios) {
bec86378 2670 dev_err(adev->dev, "no vBIOS found\n");
83ba126a
AD
2671 r = -EINVAL;
2672 goto failed;
d38ceaf9 2673 }
bec86378 2674 DRM_INFO("GPU posting now...\n");
4e99a44e
ML
2675 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
2676 if (r) {
2677 dev_err(adev->dev, "gpu post error!\n");
2678 goto failed;
2679 }
d38ceaf9
AD
2680 }
2681
88b64e95
AD
2682 if (adev->is_atom_fw) {
2683 /* Initialize clocks */
2684 r = amdgpu_atomfirmware_get_clock_info(adev);
2685 if (r) {
2686 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
e23b74aa 2687 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
88b64e95
AD
2688 goto failed;
2689 }
2690 } else {
a5bde2f9
AD
2691 /* Initialize clocks */
2692 r = amdgpu_atombios_get_clock_info(adev);
2693 if (r) {
2694 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
e23b74aa 2695 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
89041940 2696 goto failed;
a5bde2f9
AD
2697 }
2698 /* init i2c buses */
4562236b
HW
2699 if (!amdgpu_device_has_dc_support(adev))
2700 amdgpu_atombios_i2c_init(adev);
2c1a2784 2701 }
d38ceaf9 2702
bfca0289 2703fence_driver_init:
d38ceaf9
AD
2704 /* Fence driver */
2705 r = amdgpu_fence_driver_init(adev);
2c1a2784
AD
2706 if (r) {
2707 dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
e23b74aa 2708 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
83ba126a 2709 goto failed;
2c1a2784 2710 }
d38ceaf9
AD
2711
2712 /* init the mode config */
2713 drm_mode_config_init(adev->ddev);
2714
06ec9070 2715 r = amdgpu_device_ip_init(adev);
d38ceaf9 2716 if (r) {
8840a387 2717 /* failed in exclusive mode due to timeout */
2718 if (amdgpu_sriov_vf(adev) &&
2719 !amdgpu_sriov_runtime(adev) &&
2720 amdgpu_virt_mmio_blocked(adev) &&
2721 !amdgpu_virt_wait_reset(adev)) {
2722 dev_err(adev->dev, "VF exclusive mode timeout\n");
1daee8b4
PD
2723 /* Don't send request since VF is inactive. */
2724 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
2725 adev->virt.ops = NULL;
8840a387 2726 r = -EAGAIN;
2727 goto failed;
2728 }
06ec9070 2729 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
e23b74aa 2730 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
72d3f592
ED
2731 if (amdgpu_virt_request_full_gpu(adev, false))
2732 amdgpu_virt_release_full_gpu(adev, false);
83ba126a 2733 goto failed;
d38ceaf9
AD
2734 }
2735
2736 adev->accel_working = true;
2737
e59c0205
AX
2738 amdgpu_vm_check_compute_bug(adev);
2739
95844d20
MO
2740 /* Initialize the buffer migration limit. */
2741 if (amdgpu_moverate >= 0)
2742 max_MBps = amdgpu_moverate;
2743 else
2744 max_MBps = 8; /* Allow 8 MB/s. */
2745 /* Get a log2 for easy divisions. */
2746 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
2747
9bc92b9c
ML
2748 amdgpu_fbdev_init(adev);
2749
e9bc1bf7
YT
2750 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2751 amdgpu_pm_virt_sysfs_init(adev);
2752
d2f52ac8
RZ
2753 r = amdgpu_pm_sysfs_init(adev);
2754 if (r)
2755 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
2756
5bb23532
OM
2757 r = amdgpu_ucode_sysfs_init(adev);
2758 if (r)
2759 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
2760
75758255 2761 r = amdgpu_debugfs_gem_init(adev);
3f14e623 2762 if (r)
d38ceaf9 2763 DRM_ERROR("registering gem debugfs failed (%d).\n", r);
d38ceaf9
AD
2764
2765 r = amdgpu_debugfs_regs_init(adev);
3f14e623 2766 if (r)
d38ceaf9 2767 DRM_ERROR("registering register debugfs failed (%d).\n", r);
d38ceaf9 2768
50ab2533 2769 r = amdgpu_debugfs_firmware_init(adev);
3f14e623 2770 if (r)
50ab2533 2771 DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
50ab2533 2772
763efb6c 2773 r = amdgpu_debugfs_init(adev);
db95e218 2774 if (r)
763efb6c 2775 DRM_ERROR("Creating debugfs files failed (%d).\n", r);
db95e218 2776
d38ceaf9
AD
2777 if ((amdgpu_testing & 1)) {
2778 if (adev->accel_working)
2779 amdgpu_test_moves(adev);
2780 else
2781 DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
2782 }
d38ceaf9
AD
2783 if (amdgpu_benchmarking) {
2784 if (adev->accel_working)
2785 amdgpu_benchmark(adev, amdgpu_benchmarking);
2786 else
2787 DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
2788 }
2789
2790 /* enable clockgating, etc. after ib tests, etc. since some blocks require
2791 * explicit gating rather than handling it automatically.
2792 */
06ec9070 2793 r = amdgpu_device_ip_late_init(adev);
2c1a2784 2794 if (r) {
06ec9070 2795 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
e23b74aa 2796 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
83ba126a 2797 goto failed;
2c1a2784 2798 }
d38ceaf9 2799
108c6a63 2800 /* must succeed. */
511fdbc3 2801 amdgpu_ras_resume(adev);
108c6a63 2802
beff74bc
AD
2803 queue_delayed_work(system_wq, &adev->delayed_init_work,
2804 msecs_to_jiffies(AMDGPU_RESUME_MS));
2805
dcea6e65
KR
2806 r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
2807 if (r) {
2808 dev_err(adev->dev, "Could not create pcie_replay_count");
2809 return r;
2810 }
108c6a63 2811
d155bef0
AB
2812 if (IS_ENABLED(CONFIG_PERF_EVENTS))
2813 r = amdgpu_pmu_init(adev);
9c7c85f7
JK
2814 if (r)
2815 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
2816
d38ceaf9 2817 return 0;
83ba126a
AD
2818
2819failed:
89041940 2820 amdgpu_vf_error_trans_all(adev);
83ba126a
AD
2821 if (runtime)
2822 vga_switcheroo_fini_domain_pm_ops(adev->dev);
8840a387 2823
83ba126a 2824 return r;
d38ceaf9
AD
2825}
2826
d38ceaf9
AD
2827/**
2828 * amdgpu_device_fini - tear down the driver
2829 *
2830 * @adev: amdgpu_device pointer
2831 *
2832 * Tear down the driver info (all asics).
2833 * Called at driver shutdown.
2834 */
2835void amdgpu_device_fini(struct amdgpu_device *adev)
2836{
2837 int r;
2838
2839 DRM_INFO("amdgpu: finishing device.\n");
2840 adev->shutdown = true;
e5b03032
ML
2841 /* disable all interrupts */
2842 amdgpu_irq_disable_all(adev);
ff97cba8
ML
2843 if (adev->mode_info.mode_config_initialized){
2844 if (!amdgpu_device_has_dc_support(adev))
c2d88e06 2845 drm_helper_force_disable_all(adev->ddev);
ff97cba8
ML
2846 else
2847 drm_atomic_helper_shutdown(adev->ddev);
2848 }
d38ceaf9 2849 amdgpu_fence_driver_fini(adev);
58e955d9 2850 amdgpu_pm_sysfs_fini(adev);
d38ceaf9 2851 amdgpu_fbdev_fini(adev);
06ec9070 2852 r = amdgpu_device_ip_fini(adev);
ab4fe3e1
HR
2853 if (adev->firmware.gpu_info_fw) {
2854 release_firmware(adev->firmware.gpu_info_fw);
2855 adev->firmware.gpu_info_fw = NULL;
2856 }
d38ceaf9 2857 adev->accel_working = false;
beff74bc 2858 cancel_delayed_work_sync(&adev->delayed_init_work);
d38ceaf9 2859 /* free i2c buses */
4562236b
HW
2860 if (!amdgpu_device_has_dc_support(adev))
2861 amdgpu_i2c_fini(adev);
bfca0289
SL
2862
2863 if (amdgpu_emu_mode != 1)
2864 amdgpu_atombios_fini(adev);
2865
d38ceaf9
AD
2866 kfree(adev->bios);
2867 adev->bios = NULL;
84c8b22e
LW
2868 if (!pci_is_thunderbolt_attached(adev->pdev))
2869 vga_switcheroo_unregister_client(adev->pdev);
83ba126a
AD
2870 if (adev->flags & AMD_IS_PX)
2871 vga_switcheroo_fini_domain_pm_ops(adev->dev);
d38ceaf9
AD
2872 vga_client_register(adev->pdev, NULL, NULL, NULL);
2873 if (adev->rio_mem)
2874 pci_iounmap(adev->pdev, adev->rio_mem);
2875 adev->rio_mem = NULL;
2876 iounmap(adev->rmmio);
2877 adev->rmmio = NULL;
06ec9070 2878 amdgpu_device_doorbell_fini(adev);
e9bc1bf7
YT
2879 if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
2880 amdgpu_pm_virt_sysfs_fini(adev);
2881
d38ceaf9 2882 amdgpu_debugfs_regs_cleanup(adev);
dcea6e65 2883 device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
5bb23532 2884 amdgpu_ucode_sysfs_fini(adev);
d155bef0
AB
2885 if (IS_ENABLED(CONFIG_PERF_EVENTS))
2886 amdgpu_pmu_fini(adev);
6698a3d0 2887 amdgpu_debugfs_preempt_cleanup(adev);
f54eeab4 2888 if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
a190d1c7 2889 amdgpu_discovery_fini(adev);
d38ceaf9
AD
2890}
2891
2892
2893/*
2894 * Suspend & resume.
2895 */
2896/**
810ddc3a 2897 * amdgpu_device_suspend - initiate device suspend
d38ceaf9 2898 *
87e3f136
DP
2899 * @dev: drm dev pointer
2900 * @suspend: suspend state
2901 * @fbcon : notify the fbdev of suspend
d38ceaf9
AD
2902 *
2903 * Puts the hw in the suspend state (all asics).
2904 * Returns 0 for success or an error on failure.
2905 * Called at driver suspend.
2906 */
810ddc3a 2907int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
d38ceaf9
AD
2908{
2909 struct amdgpu_device *adev;
2910 struct drm_crtc *crtc;
2911 struct drm_connector *connector;
5ceb54c6 2912 int r;
d38ceaf9
AD
2913
2914 if (dev == NULL || dev->dev_private == NULL) {
2915 return -ENODEV;
2916 }
2917
2918 adev = dev->dev_private;
2919
2920 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
2921 return 0;
2922
44779b43 2923 adev->in_suspend = true;
d38ceaf9
AD
2924 drm_kms_helper_poll_disable(dev);
2925
5f818173
S
2926 if (fbcon)
2927 amdgpu_fbdev_set_suspend(adev, 1);
2928
beff74bc 2929 cancel_delayed_work_sync(&adev->delayed_init_work);
a5459475 2930
4562236b
HW
2931 if (!amdgpu_device_has_dc_support(adev)) {
2932 /* turn off display hw */
2933 drm_modeset_lock_all(dev);
2934 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
2935 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
2936 }
2937 drm_modeset_unlock_all(dev);
fe1053b7
AD
2938 /* unpin the front buffers and cursors */
2939 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2940 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2941 struct drm_framebuffer *fb = crtc->primary->fb;
2942 struct amdgpu_bo *robj;
2943
91334223 2944 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
2945 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2946 r = amdgpu_bo_reserve(aobj, true);
2947 if (r == 0) {
2948 amdgpu_bo_unpin(aobj);
2949 amdgpu_bo_unreserve(aobj);
2950 }
756e6880 2951 }
756e6880 2952
fe1053b7
AD
2953 if (fb == NULL || fb->obj[0] == NULL) {
2954 continue;
2955 }
2956 robj = gem_to_amdgpu_bo(fb->obj[0]);
2957 /* don't unpin kernel fb objects */
2958 if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
2959 r = amdgpu_bo_reserve(robj, true);
2960 if (r == 0) {
2961 amdgpu_bo_unpin(robj);
2962 amdgpu_bo_unreserve(robj);
2963 }
d38ceaf9
AD
2964 }
2965 }
2966 }
fe1053b7
AD
2967
2968 amdgpu_amdkfd_suspend(adev);
2969
5e6932fe 2970 amdgpu_ras_suspend(adev);
2971
fe1053b7
AD
2972 r = amdgpu_device_ip_suspend_phase1(adev);
2973
d38ceaf9
AD
2974 /* evict vram memory */
2975 amdgpu_bo_evict_vram(adev);
2976
5ceb54c6 2977 amdgpu_fence_driver_suspend(adev);
d38ceaf9 2978
fe1053b7 2979 r = amdgpu_device_ip_suspend_phase2(adev);
d38ceaf9 2980
a0a71e49
AD
2981 /* evict remaining vram memory
2982 * This second call to evict vram is to evict the gart page table
2983 * using the CPU.
2984 */
d38ceaf9
AD
2985 amdgpu_bo_evict_vram(adev);
2986
2987 pci_save_state(dev->pdev);
2988 if (suspend) {
2989 /* Shut down the device */
2990 pci_disable_device(dev->pdev);
2991 pci_set_power_state(dev->pdev, PCI_D3hot);
74b0b157 2992 } else {
2993 r = amdgpu_asic_reset(adev);
2994 if (r)
2995 DRM_ERROR("amdgpu asic reset failed\n");
d38ceaf9
AD
2996 }
2997
d38ceaf9
AD
2998 return 0;
2999}
3000
3001/**
810ddc3a 3002 * amdgpu_device_resume - initiate device resume
d38ceaf9 3003 *
87e3f136
DP
3004 * @dev: drm dev pointer
3005 * @resume: resume state
3006 * @fbcon : notify the fbdev of resume
d38ceaf9
AD
3007 *
3008 * Bring the hw back to operating state (all asics).
3009 * Returns 0 for success or an error on failure.
3010 * Called at driver resume.
3011 */
810ddc3a 3012int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
d38ceaf9
AD
3013{
3014 struct drm_connector *connector;
3015 struct amdgpu_device *adev = dev->dev_private;
756e6880 3016 struct drm_crtc *crtc;
03161a6e 3017 int r = 0;
d38ceaf9
AD
3018
3019 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
3020 return 0;
3021
d38ceaf9
AD
3022 if (resume) {
3023 pci_set_power_state(dev->pdev, PCI_D0);
3024 pci_restore_state(dev->pdev);
74b0b157 3025 r = pci_enable_device(dev->pdev);
03161a6e 3026 if (r)
4d3b9ae5 3027 return r;
d38ceaf9
AD
3028 }
3029
3030 /* post card */
39c640c0 3031 if (amdgpu_device_need_post(adev)) {
74b0b157 3032 r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
3033 if (r)
3034 DRM_ERROR("amdgpu asic init failed\n");
3035 }
d38ceaf9 3036
06ec9070 3037 r = amdgpu_device_ip_resume(adev);
e6707218 3038 if (r) {
06ec9070 3039 DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
4d3b9ae5 3040 return r;
e6707218 3041 }
5ceb54c6
AD
3042 amdgpu_fence_driver_resume(adev);
3043
d38ceaf9 3044
06ec9070 3045 r = amdgpu_device_ip_late_init(adev);
03161a6e 3046 if (r)
4d3b9ae5 3047 return r;
d38ceaf9 3048
beff74bc
AD
3049 queue_delayed_work(system_wq, &adev->delayed_init_work,
3050 msecs_to_jiffies(AMDGPU_RESUME_MS));
3051
fe1053b7
AD
3052 if (!amdgpu_device_has_dc_support(adev)) {
3053 /* pin cursors */
3054 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
3055 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
3056
91334223 3057 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
fe1053b7
AD
3058 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
3059 r = amdgpu_bo_reserve(aobj, true);
3060 if (r == 0) {
3061 r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
3062 if (r != 0)
3063 DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
3064 amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
3065 amdgpu_bo_unreserve(aobj);
3066 }
756e6880
AD
3067 }
3068 }
3069 }
ba997709
YZ
3070 r = amdgpu_amdkfd_resume(adev);
3071 if (r)
3072 return r;
756e6880 3073
96a5d8d4 3074 /* Make sure IB tests flushed */
beff74bc 3075 flush_delayed_work(&adev->delayed_init_work);
96a5d8d4 3076
d38ceaf9
AD
3077 /* blat the mode back in */
3078 if (fbcon) {
4562236b
HW
3079 if (!amdgpu_device_has_dc_support(adev)) {
3080 /* pre DCE11 */
3081 drm_helper_resume_force_mode(dev);
3082
3083 /* turn on display hw */
3084 drm_modeset_lock_all(dev);
3085 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
3086 drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
3087 }
3088 drm_modeset_unlock_all(dev);
d38ceaf9 3089 }
4d3b9ae5 3090 amdgpu_fbdev_set_suspend(adev, 0);
d38ceaf9
AD
3091 }
3092
3093 drm_kms_helper_poll_enable(dev);
23a1a9e5 3094
5e6932fe 3095 amdgpu_ras_resume(adev);
3096
23a1a9e5
L
3097 /*
3098 * Most of the connector probing functions try to acquire runtime pm
3099 * refs to ensure that the GPU is powered on when connector polling is
3100 * performed. Since we're calling this from a runtime PM callback,
3101 * trying to acquire rpm refs will cause us to deadlock.
3102 *
3103 * Since we're guaranteed to be holding the rpm lock, it's safe to
3104 * temporarily disable the rpm helpers so this doesn't deadlock us.
3105 */
3106#ifdef CONFIG_PM
3107 dev->dev->power.disable_depth++;
3108#endif
4562236b
HW
3109 if (!amdgpu_device_has_dc_support(adev))
3110 drm_helper_hpd_irq_event(dev);
3111 else
3112 drm_kms_helper_hotplug_event(dev);
23a1a9e5
L
3113#ifdef CONFIG_PM
3114 dev->dev->power.disable_depth--;
3115#endif
44779b43
RZ
3116 adev->in_suspend = false;
3117
4d3b9ae5 3118 return 0;
d38ceaf9
AD
3119}
3120
e3ecdffa
AD
3121/**
3122 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
3123 *
3124 * @adev: amdgpu_device pointer
3125 *
3126 * The list of all the hardware IPs that make up the asic is walked and
3127 * the check_soft_reset callbacks are run. check_soft_reset determines
3128 * if the asic is still hung or not.
3129 * Returns true if any of the IPs are still in a hung state, false if not.
3130 */
06ec9070 3131static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
63fbf42f
CZ
3132{
3133 int i;
3134 bool asic_hang = false;
3135
f993d628
ML
3136 if (amdgpu_sriov_vf(adev))
3137 return true;
3138
8bc04c29
AD
3139 if (amdgpu_asic_need_full_reset(adev))
3140 return true;
3141
63fbf42f 3142 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3143 if (!adev->ip_blocks[i].status.valid)
63fbf42f 3144 continue;
a1255107
AD
3145 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
3146 adev->ip_blocks[i].status.hang =
3147 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
3148 if (adev->ip_blocks[i].status.hang) {
3149 DRM_INFO("IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
63fbf42f
CZ
3150 asic_hang = true;
3151 }
3152 }
3153 return asic_hang;
3154}
3155
e3ecdffa
AD
3156/**
3157 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
3158 *
3159 * @adev: amdgpu_device pointer
3160 *
3161 * The list of all the hardware IPs that make up the asic is walked and the
3162 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
3163 * handles any IP specific hardware or software state changes that are
3164 * necessary for a soft reset to succeed.
3165 * Returns 0 on success, negative error code on failure.
3166 */
06ec9070 3167static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
d31a501e
CZ
3168{
3169 int i, r = 0;
3170
3171 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3172 if (!adev->ip_blocks[i].status.valid)
d31a501e 3173 continue;
a1255107
AD
3174 if (adev->ip_blocks[i].status.hang &&
3175 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
3176 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
d31a501e
CZ
3177 if (r)
3178 return r;
3179 }
3180 }
3181
3182 return 0;
3183}
3184
e3ecdffa
AD
3185/**
3186 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
3187 *
3188 * @adev: amdgpu_device pointer
3189 *
3190 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
3191 * reset is necessary to recover.
3192 * Returns true if a full asic reset is required, false if not.
3193 */
06ec9070 3194static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
35d782fe 3195{
da146d3b
AD
3196 int i;
3197
8bc04c29
AD
3198 if (amdgpu_asic_need_full_reset(adev))
3199 return true;
3200
da146d3b 3201 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3202 if (!adev->ip_blocks[i].status.valid)
da146d3b 3203 continue;
a1255107
AD
3204 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
3205 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
3206 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
98512bb8
KW
3207 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
3208 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
a1255107 3209 if (adev->ip_blocks[i].status.hang) {
da146d3b
AD
3210 DRM_INFO("Some block need full reset!\n");
3211 return true;
3212 }
3213 }
35d782fe
CZ
3214 }
3215 return false;
3216}
3217
e3ecdffa
AD
3218/**
3219 * amdgpu_device_ip_soft_reset - do a soft reset
3220 *
3221 * @adev: amdgpu_device pointer
3222 *
3223 * The list of all the hardware IPs that make up the asic is walked and the
3224 * soft_reset callbacks are run if the block is hung. soft_reset handles any
3225 * IP specific hardware or software state changes that are necessary to soft
3226 * reset the IP.
3227 * Returns 0 on success, negative error code on failure.
3228 */
06ec9070 3229static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3230{
3231 int i, r = 0;
3232
3233 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3234 if (!adev->ip_blocks[i].status.valid)
35d782fe 3235 continue;
a1255107
AD
3236 if (adev->ip_blocks[i].status.hang &&
3237 adev->ip_blocks[i].version->funcs->soft_reset) {
3238 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
35d782fe
CZ
3239 if (r)
3240 return r;
3241 }
3242 }
3243
3244 return 0;
3245}
3246
e3ecdffa
AD
3247/**
3248 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
3249 *
3250 * @adev: amdgpu_device pointer
3251 *
3252 * The list of all the hardware IPs that make up the asic is walked and the
3253 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
3254 * handles any IP specific hardware or software state changes that are
3255 * necessary after the IP has been soft reset.
3256 * Returns 0 on success, negative error code on failure.
3257 */
06ec9070 3258static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
35d782fe
CZ
3259{
3260 int i, r = 0;
3261
3262 for (i = 0; i < adev->num_ip_blocks; i++) {
a1255107 3263 if (!adev->ip_blocks[i].status.valid)
35d782fe 3264 continue;
a1255107
AD
3265 if (adev->ip_blocks[i].status.hang &&
3266 adev->ip_blocks[i].version->funcs->post_soft_reset)
3267 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
35d782fe
CZ
3268 if (r)
3269 return r;
3270 }
3271
3272 return 0;
3273}
3274
e3ecdffa 3275/**
c33adbc7 3276 * amdgpu_device_recover_vram - Recover some VRAM contents
e3ecdffa
AD
3277 *
3278 * @adev: amdgpu_device pointer
3279 *
3280 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
3281 * restore things like GPUVM page tables after a GPU reset where
3282 * the contents of VRAM might be lost.
403009bf
CK
3283 *
3284 * Returns:
3285 * 0 on success, negative error code on failure.
e3ecdffa 3286 */
c33adbc7 3287static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
c41d1cf6 3288{
c41d1cf6 3289 struct dma_fence *fence = NULL, *next = NULL;
403009bf
CK
3290 struct amdgpu_bo *shadow;
3291 long r = 1, tmo;
c41d1cf6
ML
3292
3293 if (amdgpu_sriov_runtime(adev))
b045d3af 3294 tmo = msecs_to_jiffies(8000);
c41d1cf6
ML
3295 else
3296 tmo = msecs_to_jiffies(100);
3297
3298 DRM_INFO("recover vram bo from shadow start\n");
3299 mutex_lock(&adev->shadow_list_lock);
403009bf
CK
3300 list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
3301
3302 /* No need to recover an evicted BO */
3303 if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
b575f10d 3304 shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
403009bf
CK
3305 shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
3306 continue;
3307
3308 r = amdgpu_bo_restore_shadow(shadow, &next);
3309 if (r)
3310 break;
3311
c41d1cf6 3312 if (fence) {
1712fb1a 3313 tmo = dma_fence_wait_timeout(fence, false, tmo);
403009bf
CK
3314 dma_fence_put(fence);
3315 fence = next;
1712fb1a 3316 if (tmo == 0) {
3317 r = -ETIMEDOUT;
c41d1cf6 3318 break;
1712fb1a 3319 } else if (tmo < 0) {
3320 r = tmo;
3321 break;
3322 }
403009bf
CK
3323 } else {
3324 fence = next;
c41d1cf6 3325 }
c41d1cf6
ML
3326 }
3327 mutex_unlock(&adev->shadow_list_lock);
3328
403009bf
CK
3329 if (fence)
3330 tmo = dma_fence_wait_timeout(fence, false, tmo);
c41d1cf6
ML
3331 dma_fence_put(fence);
3332
1712fb1a 3333 if (r < 0 || tmo <= 0) {
3334 DRM_ERROR("recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
403009bf
CK
3335 return -EIO;
3336 }
c41d1cf6 3337
403009bf
CK
3338 DRM_INFO("recover vram bo from shadow done\n");
3339 return 0;
c41d1cf6
ML
3340}
3341
a90ad3c2 3342
e3ecdffa 3343/**
06ec9070 3344 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5740682e
ML
3345 *
3346 * @adev: amdgpu device pointer
87e3f136 3347 * @from_hypervisor: request from hypervisor
5740682e
ML
3348 *
3349 * do VF FLR and reinitialize Asic
3f48c681 3350 * return 0 means succeeded otherwise failed
e3ecdffa
AD
3351 */
3352static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3353 bool from_hypervisor)
5740682e
ML
3354{
3355 int r;
3356
3357 if (from_hypervisor)
3358 r = amdgpu_virt_request_full_gpu(adev, true);
3359 else
3360 r = amdgpu_virt_reset_gpu(adev);
3361 if (r)
3362 return r;
a90ad3c2 3363
f81e8d53
WL
3364 amdgpu_amdkfd_pre_reset(adev);
3365
a90ad3c2 3366 /* Resume IP prior to SMC */
06ec9070 3367 r = amdgpu_device_ip_reinit_early_sriov(adev);
5740682e
ML
3368 if (r)
3369 goto error;
a90ad3c2
ML
3370
3371 /* we need recover gart prior to run SMC/CP/SDMA resume */
c1c7ce8f 3372 amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
a90ad3c2 3373
7a3e0bb2
RZ
3374 r = amdgpu_device_fw_loading(adev);
3375 if (r)
3376 return r;
3377
a90ad3c2 3378 /* now we are okay to resume SMC/CP/SDMA */
06ec9070 3379 r = amdgpu_device_ip_reinit_late_sriov(adev);
5740682e
ML
3380 if (r)
3381 goto error;
a90ad3c2
ML
3382
3383 amdgpu_irq_gpu_reset_resume_helper(adev);
5740682e 3384 r = amdgpu_ib_ring_tests(adev);
f81e8d53 3385 amdgpu_amdkfd_post_reset(adev);
a90ad3c2 3386
abc34253 3387error:
d3c117e5 3388 amdgpu_virt_init_data_exchange(adev);
abc34253 3389 amdgpu_virt_release_full_gpu(adev, true);
c41d1cf6
ML
3390 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3391 atomic_inc(&adev->vram_lost_counter);
c33adbc7 3392 r = amdgpu_device_recover_vram(adev);
a90ad3c2
ML
3393 }
3394
3395 return r;
3396}
3397
12938fad
CK
3398/**
3399 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
3400 *
3401 * @adev: amdgpu device pointer
3402 *
3403 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
3404 * a hung GPU.
3405 */
3406bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3407{
3408 if (!amdgpu_device_ip_check_soft_reset(adev)) {
3409 DRM_INFO("Timeout, but no hardware hang detected.\n");
3410 return false;
3411 }
3412
3ba7b418
AG
3413 if (amdgpu_gpu_recovery == 0)
3414 goto disabled;
3415
3416 if (amdgpu_sriov_vf(adev))
3417 return true;
3418
3419 if (amdgpu_gpu_recovery == -1) {
3420 switch (adev->asic_type) {
fc42d47c
AG
3421 case CHIP_BONAIRE:
3422 case CHIP_HAWAII:
3ba7b418
AG
3423 case CHIP_TOPAZ:
3424 case CHIP_TONGA:
3425 case CHIP_FIJI:
3426 case CHIP_POLARIS10:
3427 case CHIP_POLARIS11:
3428 case CHIP_POLARIS12:
3429 case CHIP_VEGAM:
3430 case CHIP_VEGA20:
3431 case CHIP_VEGA10:
3432 case CHIP_VEGA12:
3433 break;
3434 default:
3435 goto disabled;
3436 }
12938fad
CK
3437 }
3438
3439 return true;
3ba7b418
AG
3440
3441disabled:
3442 DRM_INFO("GPU recovery disabled.\n");
3443 return false;
12938fad
CK
3444}
3445
5c6dd71e 3446
26bc5340
AG
3447static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3448 struct amdgpu_job *job,
3449 bool *need_full_reset_arg)
3450{
3451 int i, r = 0;
3452 bool need_full_reset = *need_full_reset_arg;
71182665 3453
71182665 3454 /* block all schedulers and reset given job's ring */
0875dc9e
CZ
3455 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3456 struct amdgpu_ring *ring = adev->rings[i];
3457
51687759 3458 if (!ring || !ring->sched.thread)
0875dc9e 3459 continue;
5740682e 3460
2f9d4084
ML
3461 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3462 amdgpu_fence_driver_force_completion(ring);
0875dc9e 3463 }
d38ceaf9 3464
222b5f04
AG
3465 if(job)
3466 drm_sched_increase_karma(&job->base);
3467
1d721ed6 3468 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
26bc5340
AG
3469 if (!amdgpu_sriov_vf(adev)) {
3470
3471 if (!need_full_reset)
3472 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
3473
3474 if (!need_full_reset) {
3475 amdgpu_device_ip_pre_soft_reset(adev);
3476 r = amdgpu_device_ip_soft_reset(adev);
3477 amdgpu_device_ip_post_soft_reset(adev);
3478 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
3479 DRM_INFO("soft reset failed, will fallback to full reset!\n");
3480 need_full_reset = true;
3481 }
3482 }
3483
3484 if (need_full_reset)
3485 r = amdgpu_device_ip_suspend(adev);
3486
3487 *need_full_reset_arg = need_full_reset;
3488 }
3489
3490 return r;
3491}
3492
3493static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3494 struct list_head *device_list_handle,
3495 bool *need_full_reset_arg)
3496{
3497 struct amdgpu_device *tmp_adev = NULL;
3498 bool need_full_reset = *need_full_reset_arg, vram_lost = false;
3499 int r = 0;
3500
3501 /*
3502 * ASIC reset has to be done on all HGMI hive nodes ASAP
3503 * to allow proper links negotiation in FW (within 1 sec)
3504 */
3505 if (need_full_reset) {
3506 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
d4535e2c
AG
3507 /* For XGMI run all resets in parallel to speed up the process */
3508 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3509 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3510 r = -EALREADY;
3511 } else
3512 r = amdgpu_asic_reset(tmp_adev);
3513
3514 if (r) {
fed184e9 3515 DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
26bc5340 3516 r, tmp_adev->ddev->unique);
d4535e2c
AG
3517 break;
3518 }
3519 }
3520
3521 /* For XGMI wait for all PSP resets to complete before proceed */
3522 if (!r) {
3523 list_for_each_entry(tmp_adev, device_list_handle,
3524 gmc.xgmi.head) {
3525 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3526 flush_work(&tmp_adev->xgmi_reset_work);
3527 r = tmp_adev->asic_reset_res;
3528 if (r)
3529 break;
3530 }
3531 }
2be4c4a9 3532
3533 list_for_each_entry(tmp_adev, device_list_handle,
3534 gmc.xgmi.head) {
3535 amdgpu_ras_reserve_bad_pages(tmp_adev);
3536 }
26bc5340
AG
3537 }
3538 }
3539
3540
3541 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3542 if (need_full_reset) {
3543 /* post card */
3544 if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
3545 DRM_WARN("asic atom init failed!");
3546
3547 if (!r) {
3548 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
3549 r = amdgpu_device_ip_resume_phase1(tmp_adev);
3550 if (r)
3551 goto out;
3552
3553 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
3554 if (vram_lost) {
77e7f829 3555 DRM_INFO("VRAM is lost due to GPU reset!\n");
26bc5340
AG
3556 atomic_inc(&tmp_adev->vram_lost_counter);
3557 }
3558
3559 r = amdgpu_gtt_mgr_recover(
3560 &tmp_adev->mman.bdev.man[TTM_PL_TT]);
3561 if (r)
3562 goto out;
3563
3564 r = amdgpu_device_fw_loading(tmp_adev);
3565 if (r)
3566 return r;
3567
3568 r = amdgpu_device_ip_resume_phase2(tmp_adev);
3569 if (r)
3570 goto out;
3571
3572 if (vram_lost)
3573 amdgpu_device_fill_reset_magic(tmp_adev);
3574
fdafb359
EQ
3575 /*
3576 * Add this ASIC as tracked as reset was already
3577 * complete successfully.
3578 */
3579 amdgpu_register_gpu_instance(tmp_adev);
3580
7c04ca50 3581 r = amdgpu_device_ip_late_init(tmp_adev);
3582 if (r)
3583 goto out;
3584
e79a04d5 3585 /* must succeed. */
511fdbc3 3586 amdgpu_ras_resume(tmp_adev);
e79a04d5 3587
26bc5340
AG
3588 /* Update PSP FW topology after reset */
3589 if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
3590 r = amdgpu_xgmi_update_topology(hive, tmp_adev);
3591 }
3592 }
3593
3594
3595out:
3596 if (!r) {
3597 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
3598 r = amdgpu_ib_ring_tests(tmp_adev);
3599 if (r) {
3600 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
3601 r = amdgpu_device_ip_suspend(tmp_adev);
3602 need_full_reset = true;
3603 r = -EAGAIN;
3604 goto end;
3605 }
3606 }
3607
3608 if (!r)
3609 r = amdgpu_device_recover_vram(tmp_adev);
3610 else
3611 tmp_adev->asic_reset_res = r;
3612 }
3613
3614end:
3615 *need_full_reset_arg = need_full_reset;
3616 return r;
3617}
3618
1d721ed6 3619static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
26bc5340 3620{
1d721ed6
AG
3621 if (trylock) {
3622 if (!mutex_trylock(&adev->lock_reset))
3623 return false;
3624 } else
3625 mutex_lock(&adev->lock_reset);
5740682e 3626
26bc5340
AG
3627 atomic_inc(&adev->gpu_reset_counter);
3628 adev->in_gpu_reset = 1;
7b184b00 3629 /* Block kfd: SRIOV would do it separately */
3630 if (!amdgpu_sriov_vf(adev))
3631 amdgpu_amdkfd_pre_reset(adev);
1d721ed6
AG
3632
3633 return true;
26bc5340 3634}
d38ceaf9 3635
26bc5340
AG
3636static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
3637{
7b184b00 3638 /*unlock kfd: SRIOV would do it separately */
3639 if (!amdgpu_sriov_vf(adev))
3640 amdgpu_amdkfd_post_reset(adev);
89041940 3641 amdgpu_vf_error_trans_all(adev);
13a752e3
ML
3642 adev->in_gpu_reset = 0;
3643 mutex_unlock(&adev->lock_reset);
26bc5340
AG
3644}
3645
3646
3647/**
3648 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
3649 *
3650 * @adev: amdgpu device pointer
3651 * @job: which job trigger hang
3652 *
3653 * Attempt to reset the GPU if it has hung (all asics).
3654 * Attempt to do soft-reset or full-reset and reinitialize Asic
3655 * Returns 0 for success or an error on failure.
3656 */
3657
3658int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3659 struct amdgpu_job *job)
3660{
1d721ed6
AG
3661 struct list_head device_list, *device_list_handle = NULL;
3662 bool need_full_reset, job_signaled;
26bc5340 3663 struct amdgpu_hive_info *hive = NULL;
26bc5340 3664 struct amdgpu_device *tmp_adev = NULL;
1d721ed6 3665 int i, r = 0;
26bc5340 3666
1d721ed6 3667 need_full_reset = job_signaled = false;
26bc5340
AG
3668 INIT_LIST_HEAD(&device_list);
3669
3670 dev_info(adev->dev, "GPU reset begin!\n");
3671
beff74bc 3672 cancel_delayed_work_sync(&adev->delayed_init_work);
c53e4db7 3673
1d721ed6
AG
3674 hive = amdgpu_get_xgmi_hive(adev, false);
3675
26bc5340 3676 /*
1d721ed6
AG
3677 * Here we trylock to avoid chain of resets executing from
3678 * either trigger by jobs on different adevs in XGMI hive or jobs on
3679 * different schedulers for same device while this TO handler is running.
3680 * We always reset all schedulers for device and all devices for XGMI
3681 * hive so that should take care of them too.
26bc5340 3682 */
1d721ed6
AG
3683
3684 if (hive && !mutex_trylock(&hive->reset_lock)) {
3685 DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
3686 job->base.id, hive->hive_id);
26bc5340 3687 return 0;
1d721ed6 3688 }
26bc5340
AG
3689
3690 /* Start with adev pre asic reset first for soft reset check.*/
1d721ed6
AG
3691 if (!amdgpu_device_lock_adev(adev, !hive)) {
3692 DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
3693 job->base.id);
3694 return 0;
26bc5340
AG
3695 }
3696
3697 /* Build list of devices to reset */
1d721ed6 3698 if (adev->gmc.xgmi.num_physical_nodes > 1) {
26bc5340
AG
3699 if (!hive) {
3700 amdgpu_device_unlock_adev(adev);
3701 return -ENODEV;
3702 }
3703
3704 /*
3705 * In case we are in XGMI hive mode device reset is done for all the
3706 * nodes in the hive to retrain all XGMI links and hence the reset
3707 * sequence is executed in loop on all nodes.
3708 */
3709 device_list_handle = &hive->device_list;
3710 } else {
3711 list_add_tail(&adev->gmc.xgmi.head, &device_list);
3712 device_list_handle = &device_list;
3713 }
3714
fdafb359
EQ
3715 /*
3716 * Mark these ASICs to be reseted as untracked first
3717 * And add them back after reset completed
3718 */
3719 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
3720 amdgpu_unregister_gpu_instance(tmp_adev);
3721
1d721ed6
AG
3722 /* block all schedulers and reset given job's ring */
3723 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
f1c1314b 3724 /* disable ras on ALL IPs */
3725 if (amdgpu_device_ip_need_full_reset(tmp_adev))
3726 amdgpu_ras_suspend(tmp_adev);
3727
1d721ed6
AG
3728 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3729 struct amdgpu_ring *ring = tmp_adev->rings[i];
3730
3731 if (!ring || !ring->sched.thread)
3732 continue;
3733
3734 drm_sched_stop(&ring->sched, &job->base);
3735 }
3736 }
3737
3738
3739 /*
3740 * Must check guilty signal here since after this point all old
3741 * HW fences are force signaled.
3742 *
3743 * job->base holds a reference to parent fence
3744 */
3745 if (job && job->base.s_fence->parent &&
3746 dma_fence_is_signaled(job->base.s_fence->parent))
3747 job_signaled = true;
3748
3749 if (!amdgpu_device_ip_need_full_reset(adev))
3750 device_list_handle = &device_list;
3751
3752 if (job_signaled) {
3753 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
3754 goto skip_hw_reset;
3755 }
3756
3757
3758 /* Guilty job will be freed after this*/
3759 r = amdgpu_device_pre_asic_reset(adev,
3760 job,
3761 &need_full_reset);
3762 if (r) {
3763 /*TODO Should we stop ?*/
3764 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3765 r, adev->ddev->unique);
3766 adev->asic_reset_res = r;
3767 }
3768
26bc5340
AG
3769retry: /* Rest of adevs pre asic reset from XGMI hive. */
3770 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3771
3772 if (tmp_adev == adev)
3773 continue;
3774
1d721ed6 3775 amdgpu_device_lock_adev(tmp_adev, false);
26bc5340
AG
3776 r = amdgpu_device_pre_asic_reset(tmp_adev,
3777 NULL,
3778 &need_full_reset);
3779 /*TODO Should we stop ?*/
3780 if (r) {
3781 DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
3782 r, tmp_adev->ddev->unique);
3783 tmp_adev->asic_reset_res = r;
3784 }
3785 }
3786
3787 /* Actual ASIC resets if needed.*/
3788 /* TODO Implement XGMI hive reset logic for SRIOV */
3789 if (amdgpu_sriov_vf(adev)) {
3790 r = amdgpu_device_reset_sriov(adev, job ? false : true);
3791 if (r)
3792 adev->asic_reset_res = r;
3793 } else {
3794 r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
3795 if (r && r == -EAGAIN)
3796 goto retry;
3797 }
3798
1d721ed6
AG
3799skip_hw_reset:
3800
26bc5340
AG
3801 /* Post ASIC reset for all devs .*/
3802 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
1d721ed6
AG
3803 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
3804 struct amdgpu_ring *ring = tmp_adev->rings[i];
3805
3806 if (!ring || !ring->sched.thread)
3807 continue;
3808
3809 /* No point to resubmit jobs if we didn't HW reset*/
3810 if (!tmp_adev->asic_reset_res && !job_signaled)
3811 drm_sched_resubmit_jobs(&ring->sched);
3812
3813 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
3814 }
3815
3816 if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
3817 drm_helper_resume_force_mode(tmp_adev->ddev);
3818 }
3819
3820 tmp_adev->asic_reset_res = 0;
26bc5340
AG
3821
3822 if (r) {
3823 /* bad news, how to tell it to userspace ? */
3824 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
3825 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
3826 } else {
3827 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
3828 }
3829
3830 amdgpu_device_unlock_adev(tmp_adev);
3831 }
3832
1d721ed6 3833 if (hive)
22d6575b 3834 mutex_unlock(&hive->reset_lock);
26bc5340
AG
3835
3836 if (r)
3837 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
d38ceaf9
AD
3838 return r;
3839}
3840
e3ecdffa
AD
3841/**
3842 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
3843 *
3844 * @adev: amdgpu_device pointer
3845 *
3846 * Fetchs and stores in the driver the PCIE capabilities (gen speed
3847 * and lanes) of the slot the device is in. Handles APUs and
3848 * virtualized environments where PCIE config space may not be available.
3849 */
5494d864 3850static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
d0dd7f0c 3851{
5d9a6330 3852 struct pci_dev *pdev;
c5313457
HK
3853 enum pci_bus_speed speed_cap, platform_speed_cap;
3854 enum pcie_link_width platform_link_width;
d0dd7f0c 3855
cd474ba0
AD
3856 if (amdgpu_pcie_gen_cap)
3857 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
d0dd7f0c 3858
cd474ba0
AD
3859 if (amdgpu_pcie_lane_cap)
3860 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
d0dd7f0c 3861
cd474ba0
AD
3862 /* covers APUs as well */
3863 if (pci_is_root_bus(adev->pdev->bus)) {
3864 if (adev->pm.pcie_gen_mask == 0)
3865 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
3866 if (adev->pm.pcie_mlw_mask == 0)
3867 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
d0dd7f0c 3868 return;
cd474ba0 3869 }
d0dd7f0c 3870
c5313457
HK
3871 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
3872 return;
3873
dbaa922b
AD
3874 pcie_bandwidth_available(adev->pdev, NULL,
3875 &platform_speed_cap, &platform_link_width);
c5313457 3876
cd474ba0 3877 if (adev->pm.pcie_gen_mask == 0) {
5d9a6330
AD
3878 /* asic caps */
3879 pdev = adev->pdev;
3880 speed_cap = pcie_get_speed_cap(pdev);
3881 if (speed_cap == PCI_SPEED_UNKNOWN) {
3882 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
cd474ba0
AD
3883 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3884 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
cd474ba0 3885 } else {
5d9a6330
AD
3886 if (speed_cap == PCIE_SPEED_16_0GT)
3887 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3888 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3889 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3890 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
3891 else if (speed_cap == PCIE_SPEED_8_0GT)
3892 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3893 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3894 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
3895 else if (speed_cap == PCIE_SPEED_5_0GT)
3896 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3897 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
3898 else
3899 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
3900 }
3901 /* platform caps */
c5313457 3902 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5d9a6330
AD
3903 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3904 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3905 } else {
c5313457 3906 if (platform_speed_cap == PCIE_SPEED_16_0GT)
5d9a6330
AD
3907 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3908 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3909 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
3910 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
c5313457 3911 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5d9a6330
AD
3912 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3913 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
3914 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
c5313457 3915 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5d9a6330
AD
3916 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
3917 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
3918 else
3919 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
3920
cd474ba0
AD
3921 }
3922 }
3923 if (adev->pm.pcie_mlw_mask == 0) {
c5313457 3924 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5d9a6330
AD
3925 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
3926 } else {
c5313457 3927 switch (platform_link_width) {
5d9a6330 3928 case PCIE_LNK_X32:
cd474ba0
AD
3929 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
3930 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3931 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3932 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3933 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3934 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3935 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3936 break;
5d9a6330 3937 case PCIE_LNK_X16:
cd474ba0
AD
3938 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
3939 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3940 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3941 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3942 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3943 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3944 break;
5d9a6330 3945 case PCIE_LNK_X12:
cd474ba0
AD
3946 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
3947 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3948 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3949 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3950 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3951 break;
5d9a6330 3952 case PCIE_LNK_X8:
cd474ba0
AD
3953 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
3954 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3955 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3956 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3957 break;
5d9a6330 3958 case PCIE_LNK_X4:
cd474ba0
AD
3959 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
3960 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3961 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3962 break;
5d9a6330 3963 case PCIE_LNK_X2:
cd474ba0
AD
3964 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
3965 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
3966 break;
5d9a6330 3967 case PCIE_LNK_X1:
cd474ba0
AD
3968 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
3969 break;
3970 default:
3971 break;
3972 }
d0dd7f0c
AD
3973 }
3974 }
3975}
d38ceaf9 3976